1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
64 /* Processor costs (relative to an add) */
66 struct processor_costs size_cost
= { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
109 /* Processor costs (relative to an add) */
111 struct processor_costs i386_cost
= { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
155 struct processor_costs i486_cost
= { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
199 struct processor_costs pentium_cost
= {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
243 struct processor_costs pentiumpro_cost
= {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
287 struct processor_costs k6_cost
= {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
331 struct processor_costs athlon_cost
= {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
375 struct processor_costs k8_cost
= {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
419 struct processor_costs pentium4_cost
= {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
463 struct processor_costs nocona_cost
= {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
506 const struct processor_costs
*ix86_cost
= &pentium_cost
;
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
520 const int x86_use_leave
= m_386
| m_K6
| m_ATHLON_K8
;
521 const int x86_push_memory
= m_386
| m_K6
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
522 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
523 const int x86_movx
= m_ATHLON_K8
| m_PPRO
| m_PENT4
| m_NOCONA
/* m_386 | m_K6 */;
524 const int x86_double_with_add
= ~m_386
;
525 const int x86_use_bit_test
= m_386
;
526 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8
| m_K6
;
527 const int x86_cmove
= m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
528 const int x86_3dnow_a
= m_ATHLON_K8
;
529 const int x86_deep_branch
= m_PPRO
| m_K6
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
530 /* Branch hints were put in P4 based on simulation result. But
531 after P4 was made, no performance benefit was observed with
532 branch hints. It also increases the code size. As the result,
533 icc never generates branch hints. */
534 const int x86_branch_hints
= 0;
535 const int x86_use_sahf
= m_PPRO
| m_K6
| m_PENT4
| m_NOCONA
;
536 const int x86_partial_reg_stall
= m_PPRO
;
537 const int x86_use_loop
= m_K6
;
538 const int x86_use_fiop
= ~(m_PPRO
| m_ATHLON_K8
| m_PENT
);
539 const int x86_use_mov0
= m_K6
;
540 const int x86_use_cltd
= ~(m_PENT
| m_K6
);
541 const int x86_read_modify_write
= ~m_PENT
;
542 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
543 const int x86_split_long_moves
= m_PPRO
;
544 const int x86_promote_QImode
= m_K6
| m_PENT
| m_386
| m_486
| m_ATHLON_K8
;
545 const int x86_fast_prefix
= ~(m_PENT
| m_486
| m_386
);
546 const int x86_single_stringop
= m_386
| m_PENT4
| m_NOCONA
;
547 const int x86_qimode_math
= ~(0);
548 const int x86_promote_qi_regs
= 0;
549 const int x86_himode_math
= ~(m_PPRO
);
550 const int x86_promote_hi_regs
= m_PPRO
;
551 const int x86_sub_esp_4
= m_ATHLON_K8
| m_PPRO
| m_PENT4
| m_NOCONA
;
552 const int x86_sub_esp_8
= m_ATHLON_K8
| m_PPRO
| m_386
| m_486
| m_PENT4
| m_NOCONA
;
553 const int x86_add_esp_4
= m_ATHLON_K8
| m_K6
| m_PENT4
| m_NOCONA
;
554 const int x86_add_esp_8
= m_ATHLON_K8
| m_PPRO
| m_K6
| m_386
| m_486
| m_PENT4
| m_NOCONA
;
555 const int x86_integer_DFmode_moves
= ~(m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
);
556 const int x86_partial_reg_dependency
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
557 const int x86_memory_mismatch_stall
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
558 const int x86_accumulate_outgoing_args
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
;
559 const int x86_prologue_using_move
= m_ATHLON_K8
| m_PPRO
;
560 const int x86_epilogue_using_move
= m_ATHLON_K8
| m_PPRO
;
561 const int x86_decompose_lea
= m_PENT4
| m_NOCONA
;
562 const int x86_shift1
= ~m_486
;
563 const int x86_arch_always_fancy_math_387
= m_PENT
| m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
564 const int x86_sse_partial_reg_dependency
= m_PENT4
| m_NOCONA
| m_PPRO
;
565 /* Set for machines where the type and dependencies are resolved on SSE register
566 parts instead of whole registers, so we may maintain just lower part of
567 scalar values in proper format leaving the upper part undefined. */
568 const int x86_sse_partial_regs
= m_ATHLON_K8
;
569 /* Athlon optimizes partial-register FPS special case, thus avoiding the
570 need for extra instructions beforehand */
571 const int x86_sse_partial_regs_for_cvtsd2ss
= 0;
572 const int x86_sse_typeless_stores
= m_ATHLON_K8
;
573 const int x86_sse_load0_by_pxor
= m_PPRO
| m_PENT4
| m_NOCONA
;
574 const int x86_use_ffreep
= m_ATHLON_K8
;
575 const int x86_rep_movl_optimal
= m_386
| m_PENT
| m_PPRO
| m_K6
;
576 const int x86_inter_unit_moves
= ~(m_ATHLON_K8
);
577 const int x86_ext_80387_constants
= m_K6
| m_ATHLON
| m_PENT4
| m_NOCONA
| m_PPRO
;
578 /* Some CPU cores are not able to predict more than 4 branch instructions in
579 the 16 byte window. */
580 const int x86_four_jump_limit
= m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
581 const int x86_schedule
= m_PPRO
| m_ATHLON_K8
| m_K8
| m_PENT
;
583 /* In case the average insn count for single function invocation is
584 lower than this constant, emit fast (but longer) prologue and
586 #define FAST_PROLOGUE_INSN_COUNT 20
588 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
589 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
590 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
591 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
593 /* Array of the smallest class containing reg number REGNO, indexed by
594 REGNO. Used by REGNO_REG_CLASS in i386.h. */
596 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
599 AREG
, DREG
, CREG
, BREG
,
601 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
603 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
604 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
607 /* flags, fpsr, dirflag, frame */
608 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
609 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
611 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
613 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
614 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
615 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
619 /* The "default" register map used in 32bit mode. */
621 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
623 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
624 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
625 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
626 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
627 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
628 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
629 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
632 static int const x86_64_int_parameter_registers
[6] =
634 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
635 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
638 static int const x86_64_int_return_registers
[4] =
640 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
643 /* The "default" register map used in 64bit mode. */
644 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
646 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
647 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
648 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
649 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
650 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
651 8,9,10,11,12,13,14,15, /* extended integer registers */
652 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
655 /* Define the register numbers to be used in Dwarf debugging information.
656 The SVR4 reference port C compiler uses the following register numbers
657 in its Dwarf output code:
658 0 for %eax (gcc regno = 0)
659 1 for %ecx (gcc regno = 2)
660 2 for %edx (gcc regno = 1)
661 3 for %ebx (gcc regno = 3)
662 4 for %esp (gcc regno = 7)
663 5 for %ebp (gcc regno = 6)
664 6 for %esi (gcc regno = 4)
665 7 for %edi (gcc regno = 5)
666 The following three DWARF register numbers are never generated by
667 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
668 believes these numbers have these meanings.
669 8 for %eip (no gcc equivalent)
670 9 for %eflags (gcc regno = 17)
671 10 for %trapno (no gcc equivalent)
672 It is not at all clear how we should number the FP stack registers
673 for the x86 architecture. If the version of SDB on x86/svr4 were
674 a bit less brain dead with respect to floating-point then we would
675 have a precedent to follow with respect to DWARF register numbers
676 for x86 FP registers, but the SDB on x86/svr4 is so completely
677 broken with respect to FP registers that it is hardly worth thinking
678 of it as something to strive for compatibility with.
679 The version of x86/svr4 SDB I have at the moment does (partially)
680 seem to believe that DWARF register number 11 is associated with
681 the x86 register %st(0), but that's about all. Higher DWARF
682 register numbers don't seem to be associated with anything in
683 particular, and even for DWARF regno 11, SDB only seems to under-
684 stand that it should say that a variable lives in %st(0) (when
685 asked via an `=' command) if we said it was in DWARF regno 11,
686 but SDB still prints garbage when asked for the value of the
687 variable in question (via a `/' command).
688 (Also note that the labels SDB prints for various FP stack regs
689 when doing an `x' command are all wrong.)
690 Note that these problems generally don't affect the native SVR4
691 C compiler because it doesn't allow the use of -O with -g and
692 because when it is *not* optimizing, it allocates a memory
693 location for each floating-point variable, and the memory
694 location is what gets described in the DWARF AT_location
695 attribute for the variable in question.
696 Regardless of the severe mental illness of the x86/svr4 SDB, we
697 do something sensible here and we use the following DWARF
698 register numbers. Note that these are all stack-top-relative
700 11 for %st(0) (gcc regno = 8)
701 12 for %st(1) (gcc regno = 9)
702 13 for %st(2) (gcc regno = 10)
703 14 for %st(3) (gcc regno = 11)
704 15 for %st(4) (gcc regno = 12)
705 16 for %st(5) (gcc regno = 13)
706 17 for %st(6) (gcc regno = 14)
707 18 for %st(7) (gcc regno = 15)
709 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
711 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
712 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
713 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
714 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
715 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
716 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
717 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
720 /* Test and compare insns in i386.md store the information needed to
721 generate branch and scc insns here. */
723 rtx ix86_compare_op0
= NULL_RTX
;
724 rtx ix86_compare_op1
= NULL_RTX
;
726 #define MAX_386_STACK_LOCALS 3
727 /* Size of the register save area. */
728 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
730 /* Define the structure for the machine field in struct function. */
732 struct stack_local_entry
GTY(())
737 struct stack_local_entry
*next
;
740 /* Structure describing stack frame layout.
741 Stack grows downward:
747 saved frame pointer if frame_pointer_needed
748 <- HARD_FRAME_POINTER
754 > to_allocate <- FRAME_POINTER
766 int outgoing_arguments_size
;
769 HOST_WIDE_INT to_allocate
;
770 /* The offsets relative to ARG_POINTER. */
771 HOST_WIDE_INT frame_pointer_offset
;
772 HOST_WIDE_INT hard_frame_pointer_offset
;
773 HOST_WIDE_INT stack_pointer_offset
;
775 /* When save_regs_using_mov is set, emit prologue using
776 move instead of push instructions. */
777 bool save_regs_using_mov
;
780 /* Used to enable/disable debugging features. */
781 const char *ix86_debug_arg_string
, *ix86_debug_addr_string
;
782 /* Code model option as passed by user. */
783 const char *ix86_cmodel_string
;
785 enum cmodel ix86_cmodel
;
787 const char *ix86_asm_string
;
788 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
790 const char *ix86_tls_dialect_string
;
791 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
793 /* Which unit we are generating floating point math for. */
794 enum fpmath_unit ix86_fpmath
;
796 /* Which cpu are we scheduling for. */
797 enum processor_type ix86_tune
;
798 /* Which instruction set architecture to use. */
799 enum processor_type ix86_arch
;
801 /* Strings to hold which cpu and instruction set architecture to use. */
802 const char *ix86_tune_string
; /* for -mtune=<xxx> */
803 const char *ix86_arch_string
; /* for -march=<xxx> */
804 const char *ix86_fpmath_string
; /* for -mfpmath=<xxx> */
806 /* # of registers to use to pass arguments. */
807 const char *ix86_regparm_string
;
809 /* true if sse prefetch instruction is not NOOP. */
810 int x86_prefetch_sse
;
812 /* ix86_regparm_string as a number */
815 /* Alignment to use for loops and jumps: */
817 /* Power of two alignment for loops. */
818 const char *ix86_align_loops_string
;
820 /* Power of two alignment for non-loop jumps. */
821 const char *ix86_align_jumps_string
;
823 /* Power of two alignment for stack boundary in bytes. */
824 const char *ix86_preferred_stack_boundary_string
;
826 /* Preferred alignment for stack boundary in bits. */
827 unsigned int ix86_preferred_stack_boundary
;
829 /* Values 1-5: see jump.c */
830 int ix86_branch_cost
;
831 const char *ix86_branch_cost_string
;
833 /* Power of two alignment for functions. */
834 const char *ix86_align_funcs_string
;
836 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
837 char internal_label_prefix
[16];
838 int internal_label_prefix_len
;
840 static void output_pic_addr_const (FILE *, rtx
, int);
841 static void put_condition_code (enum rtx_code
, enum machine_mode
,
843 static const char *get_some_local_dynamic_name (void);
844 static int get_some_local_dynamic_name_1 (rtx
*, void *);
845 static rtx
ix86_expand_int_compare (enum rtx_code
, rtx
, rtx
);
846 static enum rtx_code
ix86_prepare_fp_compare_args (enum rtx_code
, rtx
*,
848 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
849 static enum machine_mode
ix86_cc_modes_compatible (enum machine_mode
,
851 static rtx
get_thread_pointer (int);
852 static rtx
legitimize_tls_address (rtx
, enum tls_model
, int);
853 static void get_pc_thunk_name (char [32], unsigned int);
854 static rtx
gen_push (rtx
);
855 static int ix86_flags_dependant (rtx
, rtx
, enum attr_type
);
856 static int ix86_agi_dependant (rtx
, rtx
, enum attr_type
);
857 static struct machine_function
* ix86_init_machine_status (void);
858 static int ix86_split_to_parts (rtx
, rtx
*, enum machine_mode
);
859 static int ix86_nsaved_regs (void);
860 static void ix86_emit_save_regs (void);
861 static void ix86_emit_save_regs_using_mov (rtx
, HOST_WIDE_INT
);
862 static void ix86_emit_restore_regs_using_mov (rtx
, HOST_WIDE_INT
, int);
863 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT
);
864 static HOST_WIDE_INT
ix86_GOT_alias_set (void);
865 static void ix86_adjust_counter (rtx
, HOST_WIDE_INT
);
866 static rtx
ix86_expand_aligntest (rtx
, int);
867 static void ix86_expand_strlensi_unroll_1 (rtx
, rtx
, rtx
);
868 static int ix86_issue_rate (void);
869 static int ix86_adjust_cost (rtx
, rtx
, rtx
, int);
870 static int ia32_multipass_dfa_lookahead (void);
871 static void ix86_init_mmx_sse_builtins (void);
872 static rtx
x86_this_parameter (tree
);
873 static void x86_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
874 HOST_WIDE_INT
, tree
);
875 static bool x86_can_output_mi_thunk (tree
, HOST_WIDE_INT
, HOST_WIDE_INT
, tree
);
876 static void x86_file_start (void);
877 static void ix86_reorg (void);
878 static bool ix86_expand_carry_flag_compare (enum rtx_code
, rtx
, rtx
, rtx
*);
879 static tree
ix86_build_builtin_va_list (void);
880 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
882 static tree
ix86_gimplify_va_arg (tree
, tree
, tree
*, tree
*);
883 static bool ix86_vector_mode_supported_p (enum machine_mode
);
885 static int ix86_address_cost (rtx
);
886 static bool ix86_cannot_force_const_mem (rtx
);
887 static rtx
ix86_delegitimize_address (rtx
);
889 struct builtin_description
;
890 static rtx
ix86_expand_sse_comi (const struct builtin_description
*,
892 static rtx
ix86_expand_sse_compare (const struct builtin_description
*,
894 static rtx
ix86_expand_unop1_builtin (enum insn_code
, tree
, rtx
);
895 static rtx
ix86_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
896 static rtx
ix86_expand_binop_builtin (enum insn_code
, tree
, rtx
);
897 static rtx
ix86_expand_store_builtin (enum insn_code
, tree
);
898 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
899 static rtx
ix86_expand_fp_compare (enum rtx_code
, rtx
, rtx
, rtx
, rtx
*, rtx
*);
900 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code
);
901 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code
);
902 static int ix86_fp_comparison_sahf_cost (enum rtx_code code
);
903 static int ix86_fp_comparison_cost (enum rtx_code code
);
904 static unsigned int ix86_select_alt_pic_regnum (void);
905 static int ix86_save_reg (unsigned int, int);
906 static void ix86_compute_frame_layout (struct ix86_frame
*);
907 static int ix86_comp_type_attributes (tree
, tree
);
908 static int ix86_function_regparm (tree
, tree
);
909 const struct attribute_spec ix86_attribute_table
[];
910 static bool ix86_function_ok_for_sibcall (tree
, tree
);
911 static tree
ix86_handle_cdecl_attribute (tree
*, tree
, tree
, int, bool *);
912 static tree
ix86_handle_regparm_attribute (tree
*, tree
, tree
, int, bool *);
913 static int ix86_value_regno (enum machine_mode
);
914 static bool contains_128bit_aligned_vector_p (tree
);
915 static rtx
ix86_struct_value_rtx (tree
, int);
916 static bool ix86_ms_bitfield_layout_p (tree
);
917 static tree
ix86_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
918 static int extended_reg_mentioned_1 (rtx
*, void *);
919 static bool ix86_rtx_costs (rtx
, int, int, int *);
920 static int min_insn_size (rtx
);
921 static tree
ix86_md_asm_clobbers (tree clobbers
);
922 static bool ix86_must_pass_in_stack (enum machine_mode mode
, tree type
);
923 static bool ix86_pass_by_reference (CUMULATIVE_ARGS
*, enum machine_mode
,
926 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
927 static void ix86_svr3_asm_out_constructor (rtx
, int);
930 /* Register class used for passing given 64bit part of the argument.
931 These represent classes as documented by the PS ABI, with the exception
932 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
933 use SF or DFmode move instead of DImode to avoid reformatting penalties.
935 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
936 whenever possible (upper half does contain padding).
938 enum x86_64_reg_class
941 X86_64_INTEGER_CLASS
,
942 X86_64_INTEGERSI_CLASS
,
951 static const char * const x86_64_reg_class_name
[] =
952 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
954 #define MAX_CLASSES 4
955 static int classify_argument (enum machine_mode
, tree
,
956 enum x86_64_reg_class
[MAX_CLASSES
], int);
957 static int examine_argument (enum machine_mode
, tree
, int, int *, int *);
958 static rtx
construct_container (enum machine_mode
, tree
, int, int, int,
960 static enum x86_64_reg_class
merge_classes (enum x86_64_reg_class
,
961 enum x86_64_reg_class
);
963 /* Table of constants used by fldpi, fldln2, etc.... */
964 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
965 static bool ext_80387_constants_init
= 0;
966 static void init_ext_80387_constants (void);
968 /* Initialize the GCC target structure. */
969 #undef TARGET_ATTRIBUTE_TABLE
970 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
971 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
972 # undef TARGET_MERGE_DECL_ATTRIBUTES
973 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
976 #undef TARGET_COMP_TYPE_ATTRIBUTES
977 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
979 #undef TARGET_INIT_BUILTINS
980 #define TARGET_INIT_BUILTINS ix86_init_builtins
982 #undef TARGET_EXPAND_BUILTIN
983 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
985 #undef TARGET_ASM_FUNCTION_EPILOGUE
986 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
988 #undef TARGET_ASM_OPEN_PAREN
989 #define TARGET_ASM_OPEN_PAREN ""
990 #undef TARGET_ASM_CLOSE_PAREN
991 #define TARGET_ASM_CLOSE_PAREN ""
993 #undef TARGET_ASM_ALIGNED_HI_OP
994 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
995 #undef TARGET_ASM_ALIGNED_SI_OP
996 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
998 #undef TARGET_ASM_ALIGNED_DI_OP
999 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1002 #undef TARGET_ASM_UNALIGNED_HI_OP
1003 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1004 #undef TARGET_ASM_UNALIGNED_SI_OP
1005 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1006 #undef TARGET_ASM_UNALIGNED_DI_OP
1007 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1009 #undef TARGET_SCHED_ADJUST_COST
1010 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1011 #undef TARGET_SCHED_ISSUE_RATE
1012 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1013 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1014 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1015 ia32_multipass_dfa_lookahead
1017 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1018 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1021 #undef TARGET_HAVE_TLS
1022 #define TARGET_HAVE_TLS true
1024 #undef TARGET_CANNOT_FORCE_CONST_MEM
1025 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1027 #undef TARGET_DELEGITIMIZE_ADDRESS
1028 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1030 #undef TARGET_MS_BITFIELD_LAYOUT_P
1031 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1033 #undef TARGET_ASM_OUTPUT_MI_THUNK
1034 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1035 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1036 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1038 #undef TARGET_ASM_FILE_START
1039 #define TARGET_ASM_FILE_START x86_file_start
1041 #undef TARGET_RTX_COSTS
1042 #define TARGET_RTX_COSTS ix86_rtx_costs
1043 #undef TARGET_ADDRESS_COST
1044 #define TARGET_ADDRESS_COST ix86_address_cost
1046 #undef TARGET_FIXED_CONDITION_CODE_REGS
1047 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1048 #undef TARGET_CC_MODES_COMPATIBLE
1049 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1051 #undef TARGET_MACHINE_DEPENDENT_REORG
1052 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1054 #undef TARGET_BUILD_BUILTIN_VA_LIST
1055 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1057 #undef TARGET_MD_ASM_CLOBBERS
1058 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1060 #undef TARGET_PROMOTE_PROTOTYPES
1061 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1062 #undef TARGET_STRUCT_VALUE_RTX
1063 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1064 #undef TARGET_SETUP_INCOMING_VARARGS
1065 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1066 #undef TARGET_MUST_PASS_IN_STACK
1067 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1068 #undef TARGET_PASS_BY_REFERENCE
1069 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1071 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1072 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1074 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1075 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1077 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1078 #undef TARGET_INSERT_ATTRIBUTES
1079 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1082 struct gcc_target targetm
= TARGET_INITIALIZER
;
1085 /* The svr4 ABI for the i386 says that records and unions are returned
1087 #ifndef DEFAULT_PCC_STRUCT_RETURN
1088 #define DEFAULT_PCC_STRUCT_RETURN 1
1091 /* Sometimes certain combinations of command options do not make
1092 sense on a particular target machine. You can define a macro
1093 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1094 defined, is executed once just after all the command options have
1097 Don't use this macro to turn on various extra optimizations for
1098 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1101 override_options (void)
1104 int ix86_tune_defaulted
= 0;
1106 /* Comes from final.c -- no real reason to change it. */
1107 #define MAX_CODE_ALIGN 16
1111 const struct processor_costs
*cost
; /* Processor costs */
1112 const int target_enable
; /* Target flags to enable. */
1113 const int target_disable
; /* Target flags to disable. */
1114 const int align_loop
; /* Default alignments. */
1115 const int align_loop_max_skip
;
1116 const int align_jump
;
1117 const int align_jump_max_skip
;
1118 const int align_func
;
1120 const processor_target_table
[PROCESSOR_max
] =
1122 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4},
1123 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16},
1124 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16},
1125 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16},
1126 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32},
1127 {&athlon_cost
, 0, 0, 16, 7, 16, 7, 16},
1128 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0},
1129 {&k8_cost
, 0, 0, 16, 7, 16, 7, 16},
1130 {&nocona_cost
, 0, 0, 0, 0, 0, 0, 0}
1133 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1136 const char *const name
; /* processor name or nickname. */
1137 const enum processor_type processor
;
1138 const enum pta_flags
1144 PTA_PREFETCH_SSE
= 16,
1150 const processor_alias_table
[] =
1152 {"i386", PROCESSOR_I386
, 0},
1153 {"i486", PROCESSOR_I486
, 0},
1154 {"i586", PROCESSOR_PENTIUM
, 0},
1155 {"pentium", PROCESSOR_PENTIUM
, 0},
1156 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1157 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1158 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1159 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1160 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_SSE
},
1161 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1162 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1163 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1164 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1165 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1166 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
| PTA_SSE2
},
1167 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1168 | PTA_MMX
| PTA_PREFETCH_SSE
},
1169 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1170 | PTA_MMX
| PTA_PREFETCH_SSE
},
1171 {"prescott", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
1172 | PTA_MMX
| PTA_PREFETCH_SSE
},
1173 {"nocona", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_64BIT
1174 | PTA_MMX
| PTA_PREFETCH_SSE
},
1175 {"k6", PROCESSOR_K6
, PTA_MMX
},
1176 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1177 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1178 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1180 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
1181 | PTA_3DNOW
| PTA_3DNOW_A
},
1182 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1183 | PTA_3DNOW_A
| PTA_SSE
},
1184 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1185 | PTA_3DNOW_A
| PTA_SSE
},
1186 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1187 | PTA_3DNOW_A
| PTA_SSE
},
1188 {"x86-64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_64BIT
1189 | PTA_SSE
| PTA_SSE2
},
1190 {"k8", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1191 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1192 {"opteron", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1193 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1194 {"athlon64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1195 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1196 {"athlon-fx", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1197 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1200 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1202 /* Set the default values for switches whose default depends on TARGET_64BIT
1203 in case they weren't overwritten by command line options. */
1206 if (flag_omit_frame_pointer
== 2)
1207 flag_omit_frame_pointer
= 1;
1208 if (flag_asynchronous_unwind_tables
== 2)
1209 flag_asynchronous_unwind_tables
= 1;
1210 if (flag_pcc_struct_return
== 2)
1211 flag_pcc_struct_return
= 0;
1215 if (flag_omit_frame_pointer
== 2)
1216 flag_omit_frame_pointer
= 0;
1217 if (flag_asynchronous_unwind_tables
== 2)
1218 flag_asynchronous_unwind_tables
= 0;
1219 if (flag_pcc_struct_return
== 2)
1220 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
1223 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1224 SUBTARGET_OVERRIDE_OPTIONS
;
1227 if (!ix86_tune_string
&& ix86_arch_string
)
1228 ix86_tune_string
= ix86_arch_string
;
1229 if (!ix86_tune_string
)
1231 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
1232 ix86_tune_defaulted
= 1;
1234 if (!ix86_arch_string
)
1235 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
1237 if (ix86_cmodel_string
!= 0)
1239 if (!strcmp (ix86_cmodel_string
, "small"))
1240 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1242 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string
);
1243 else if (!strcmp (ix86_cmodel_string
, "32"))
1244 ix86_cmodel
= CM_32
;
1245 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
1246 ix86_cmodel
= CM_KERNEL
;
1247 else if (!strcmp (ix86_cmodel_string
, "medium") && !flag_pic
)
1248 ix86_cmodel
= CM_MEDIUM
;
1249 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
1250 ix86_cmodel
= CM_LARGE
;
1252 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
1256 ix86_cmodel
= CM_32
;
1258 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1260 if (ix86_asm_string
!= 0)
1262 if (!strcmp (ix86_asm_string
, "intel"))
1263 ix86_asm_dialect
= ASM_INTEL
;
1264 else if (!strcmp (ix86_asm_string
, "att"))
1265 ix86_asm_dialect
= ASM_ATT
;
1267 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
1269 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
1270 error ("code model `%s' not supported in the %s bit mode",
1271 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
1272 if (ix86_cmodel
== CM_LARGE
)
1273 sorry ("code model `large' not supported yet");
1274 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
1275 sorry ("%i-bit mode not compiled in",
1276 (target_flags
& MASK_64BIT
) ? 64 : 32);
1278 for (i
= 0; i
< pta_size
; i
++)
1279 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
1281 ix86_arch
= processor_alias_table
[i
].processor
;
1282 /* Default cpu tuning to the architecture. */
1283 ix86_tune
= ix86_arch
;
1284 if (processor_alias_table
[i
].flags
& PTA_MMX
1285 && !(target_flags_explicit
& MASK_MMX
))
1286 target_flags
|= MASK_MMX
;
1287 if (processor_alias_table
[i
].flags
& PTA_3DNOW
1288 && !(target_flags_explicit
& MASK_3DNOW
))
1289 target_flags
|= MASK_3DNOW
;
1290 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
1291 && !(target_flags_explicit
& MASK_3DNOW_A
))
1292 target_flags
|= MASK_3DNOW_A
;
1293 if (processor_alias_table
[i
].flags
& PTA_SSE
1294 && !(target_flags_explicit
& MASK_SSE
))
1295 target_flags
|= MASK_SSE
;
1296 if (processor_alias_table
[i
].flags
& PTA_SSE2
1297 && !(target_flags_explicit
& MASK_SSE2
))
1298 target_flags
|= MASK_SSE2
;
1299 if (processor_alias_table
[i
].flags
& PTA_SSE3
1300 && !(target_flags_explicit
& MASK_SSE3
))
1301 target_flags
|= MASK_SSE3
;
1302 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1303 x86_prefetch_sse
= true;
1304 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1306 if (ix86_tune_defaulted
)
1308 ix86_tune_string
= "x86-64";
1309 for (i
= 0; i
< pta_size
; i
++)
1310 if (! strcmp (ix86_tune_string
,
1311 processor_alias_table
[i
].name
))
1313 ix86_tune
= processor_alias_table
[i
].processor
;
1316 error ("CPU you selected does not support x86-64 "
1323 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1325 for (i
= 0; i
< pta_size
; i
++)
1326 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
1328 ix86_tune
= processor_alias_table
[i
].processor
;
1329 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1330 error ("CPU you selected does not support x86-64 instruction set");
1332 /* Intel CPUs have always interpreted SSE prefetch instructions as
1333 NOPs; so, we can enable SSE prefetch instructions even when
1334 -mtune (rather than -march) points us to a processor that has them.
1335 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1336 higher processors. */
1337 if (TARGET_CMOVE
&& (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
))
1338 x86_prefetch_sse
= true;
1342 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1345 ix86_cost
= &size_cost
;
1347 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
1348 target_flags
|= processor_target_table
[ix86_tune
].target_enable
;
1349 target_flags
&= ~processor_target_table
[ix86_tune
].target_disable
;
1351 /* Arrange to set up i386_stack_locals for all functions. */
1352 init_machine_status
= ix86_init_machine_status
;
1354 /* Validate -mregparm= value. */
1355 if (ix86_regparm_string
)
1357 i
= atoi (ix86_regparm_string
);
1358 if (i
< 0 || i
> REGPARM_MAX
)
1359 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
1365 ix86_regparm
= REGPARM_MAX
;
1367 /* If the user has provided any of the -malign-* options,
1368 warn and use that value only if -falign-* is not set.
1369 Remove this code in GCC 3.2 or later. */
1370 if (ix86_align_loops_string
)
1372 warning ("-malign-loops is obsolete, use -falign-loops");
1373 if (align_loops
== 0)
1375 i
= atoi (ix86_align_loops_string
);
1376 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1377 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1379 align_loops
= 1 << i
;
1383 if (ix86_align_jumps_string
)
1385 warning ("-malign-jumps is obsolete, use -falign-jumps");
1386 if (align_jumps
== 0)
1388 i
= atoi (ix86_align_jumps_string
);
1389 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1390 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1392 align_jumps
= 1 << i
;
1396 if (ix86_align_funcs_string
)
1398 warning ("-malign-functions is obsolete, use -falign-functions");
1399 if (align_functions
== 0)
1401 i
= atoi (ix86_align_funcs_string
);
1402 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1403 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1405 align_functions
= 1 << i
;
1409 /* Default align_* from the processor table. */
1410 if (align_loops
== 0)
1412 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
1413 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
1415 if (align_jumps
== 0)
1417 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
1418 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
1420 if (align_functions
== 0)
1422 align_functions
= processor_target_table
[ix86_tune
].align_func
;
1425 /* Validate -mpreferred-stack-boundary= value, or provide default.
1426 The default of 128 bits is for Pentium III's SSE __m128, but we
1427 don't want additional code to keep the stack aligned when
1428 optimizing for code size. */
1429 ix86_preferred_stack_boundary
= (optimize_size
1430 ? TARGET_64BIT
? 128 : 32
1432 if (ix86_preferred_stack_boundary_string
)
1434 i
= atoi (ix86_preferred_stack_boundary_string
);
1435 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
1436 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
1437 TARGET_64BIT
? 4 : 2);
1439 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
1442 /* Validate -mbranch-cost= value, or provide default. */
1443 ix86_branch_cost
= processor_target_table
[ix86_tune
].cost
->branch_cost
;
1444 if (ix86_branch_cost_string
)
1446 i
= atoi (ix86_branch_cost_string
);
1448 error ("-mbranch-cost=%d is not between 0 and 5", i
);
1450 ix86_branch_cost
= i
;
1453 if (ix86_tls_dialect_string
)
1455 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
1456 ix86_tls_dialect
= TLS_DIALECT_GNU
;
1457 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
1458 ix86_tls_dialect
= TLS_DIALECT_SUN
;
1460 error ("bad value (%s) for -mtls-dialect= switch",
1461 ix86_tls_dialect_string
);
1464 /* Keep nonleaf frame pointers. */
1465 if (TARGET_OMIT_LEAF_FRAME_POINTER
)
1466 flag_omit_frame_pointer
= 1;
1468 /* If we're doing fast math, we don't care about comparison order
1469 wrt NaNs. This lets us use a shorter comparison sequence. */
1470 if (flag_unsafe_math_optimizations
)
1471 target_flags
&= ~MASK_IEEE_FP
;
1473 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1474 since the insns won't need emulation. */
1475 if (x86_arch_always_fancy_math_387
& (1 << ix86_arch
))
1476 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
1478 /* Turn on SSE2 builtins for -msse3. */
1480 target_flags
|= MASK_SSE2
;
1482 /* Turn on SSE builtins for -msse2. */
1484 target_flags
|= MASK_SSE
;
1488 if (TARGET_ALIGN_DOUBLE
)
1489 error ("-malign-double makes no sense in the 64bit mode");
1491 error ("-mrtd calling convention not supported in the 64bit mode");
1492 /* Enable by default the SSE and MMX builtins. */
1493 target_flags
|= (MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
);
1494 ix86_fpmath
= FPMATH_SSE
;
1498 ix86_fpmath
= FPMATH_387
;
1499 /* i386 ABI does not specify red zone. It still makes sense to use it
1500 when programmer takes care to stack from being destroyed. */
1501 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
1502 target_flags
|= MASK_NO_RED_ZONE
;
1505 if (ix86_fpmath_string
!= 0)
1507 if (! strcmp (ix86_fpmath_string
, "387"))
1508 ix86_fpmath
= FPMATH_387
;
1509 else if (! strcmp (ix86_fpmath_string
, "sse"))
1513 warning ("SSE instruction set disabled, using 387 arithmetics");
1514 ix86_fpmath
= FPMATH_387
;
1517 ix86_fpmath
= FPMATH_SSE
;
1519 else if (! strcmp (ix86_fpmath_string
, "387,sse")
1520 || ! strcmp (ix86_fpmath_string
, "sse,387"))
1524 warning ("SSE instruction set disabled, using 387 arithmetics");
1525 ix86_fpmath
= FPMATH_387
;
1527 else if (!TARGET_80387
)
1529 warning ("387 instruction set disabled, using SSE arithmetics");
1530 ix86_fpmath
= FPMATH_SSE
;
1533 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
1536 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
1539 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1543 target_flags
|= MASK_MMX
;
1544 x86_prefetch_sse
= true;
1547 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1550 target_flags
|= MASK_MMX
;
1551 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1552 extensions it adds. */
1553 if (x86_3dnow_a
& (1 << ix86_arch
))
1554 target_flags
|= MASK_3DNOW_A
;
1556 if ((x86_accumulate_outgoing_args
& TUNEMASK
)
1557 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
1559 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
1561 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1564 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
1565 p
= strchr (internal_label_prefix
, 'X');
1566 internal_label_prefix_len
= p
- internal_label_prefix
;
1569 /* When scheduling description is not available, disable scheduler pass so it
1570 won't slow down the compilation and make x87 code slower. */
1571 if (!TARGET_SCHEDULE
)
1572 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
1576 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
1578 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1579 make the problem with not enough registers even worse. */
1580 #ifdef INSN_SCHEDULING
1582 flag_schedule_insns
= 0;
1585 /* The default values of these switches depend on the TARGET_64BIT
1586 that is not known at this moment. Mark these values with 2 and
1587 let user the to override these. In case there is no command line option
1588 specifying them, we will set the defaults in override_options. */
1590 flag_omit_frame_pointer
= 2;
1591 flag_pcc_struct_return
= 2;
1592 flag_asynchronous_unwind_tables
= 2;
1595 /* Table of valid machine attributes. */
1596 const struct attribute_spec ix86_attribute_table
[] =
1598 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1599 /* Stdcall attribute says callee is responsible for popping arguments
1600 if they are not variable. */
1601 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1602 /* Fastcall attribute says callee is responsible for popping arguments
1603 if they are not variable. */
1604 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1605 /* Cdecl attribute says the callee is a normal C declaration */
1606 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1607 /* Regparm attribute specifies how many integer arguments are to be
1608 passed in registers. */
1609 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute
},
1610 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1611 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
1612 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
1613 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
1615 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
1616 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
1617 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1618 SUBTARGET_ATTRIBUTE_TABLE
,
1620 { NULL
, 0, 0, false, false, false, NULL
}
1623 /* Decide whether we can make a sibling call to a function. DECL is the
1624 declaration of the function being targeted by the call and EXP is the
1625 CALL_EXPR representing the call. */
1628 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
1630 /* If we are generating position-independent code, we cannot sibcall
1631 optimize any indirect call, or a direct call to a global function,
1632 as the PLT requires %ebx be live. */
1633 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| TREE_PUBLIC (decl
)))
1636 /* If we are returning floats on the 80387 register stack, we cannot
1637 make a sibcall from a function that doesn't return a float to a
1638 function that does or, conversely, from a function that does return
1639 a float to a function that doesn't; the necessary stack adjustment
1640 would not be executed. */
1641 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp
)))
1642 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)))))
1645 /* If this call is indirect, we'll need to be able to use a call-clobbered
1646 register for the address of the target function. Make sure that all
1647 such registers are not used for passing parameters. */
1648 if (!decl
&& !TARGET_64BIT
)
1652 /* We're looking at the CALL_EXPR, we need the type of the function. */
1653 type
= TREE_OPERAND (exp
, 0); /* pointer expression */
1654 type
= TREE_TYPE (type
); /* pointer type */
1655 type
= TREE_TYPE (type
); /* function type */
1657 if (ix86_function_regparm (type
, NULL
) >= 3)
1659 /* ??? Need to count the actual number of registers to be used,
1660 not the possible number of registers. Fix later. */
1665 /* Otherwise okay. That also includes certain types of indirect calls. */
1669 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1670 arguments as in struct attribute_spec.handler. */
1672 ix86_handle_cdecl_attribute (tree
*node
, tree name
,
1673 tree args ATTRIBUTE_UNUSED
,
1674 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
1676 if (TREE_CODE (*node
) != FUNCTION_TYPE
1677 && TREE_CODE (*node
) != METHOD_TYPE
1678 && TREE_CODE (*node
) != FIELD_DECL
1679 && TREE_CODE (*node
) != TYPE_DECL
)
1681 warning ("`%s' attribute only applies to functions",
1682 IDENTIFIER_POINTER (name
));
1683 *no_add_attrs
= true;
1687 if (is_attribute_p ("fastcall", name
))
1689 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
1691 error ("fastcall and stdcall attributes are not compatible");
1693 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
1695 error ("fastcall and regparm attributes are not compatible");
1698 else if (is_attribute_p ("stdcall", name
))
1700 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
1702 error ("fastcall and stdcall attributes are not compatible");
1709 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name
));
1710 *no_add_attrs
= true;
1716 /* Handle a "regparm" attribute;
1717 arguments as in struct attribute_spec.handler. */
1719 ix86_handle_regparm_attribute (tree
*node
, tree name
, tree args
,
1720 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
1722 if (TREE_CODE (*node
) != FUNCTION_TYPE
1723 && TREE_CODE (*node
) != METHOD_TYPE
1724 && TREE_CODE (*node
) != FIELD_DECL
1725 && TREE_CODE (*node
) != TYPE_DECL
)
1727 warning ("`%s' attribute only applies to functions",
1728 IDENTIFIER_POINTER (name
));
1729 *no_add_attrs
= true;
1735 cst
= TREE_VALUE (args
);
1736 if (TREE_CODE (cst
) != INTEGER_CST
)
1738 warning ("`%s' attribute requires an integer constant argument",
1739 IDENTIFIER_POINTER (name
));
1740 *no_add_attrs
= true;
1742 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
1744 warning ("argument to `%s' attribute larger than %d",
1745 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
1746 *no_add_attrs
= true;
1749 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
1751 error ("fastcall and regparm attributes are not compatible");
1758 /* Return 0 if the attributes for two types are incompatible, 1 if they
1759 are compatible, and 2 if they are nearly compatible (which causes a
1760 warning to be generated). */
1763 ix86_comp_type_attributes (tree type1
, tree type2
)
1765 /* Check for mismatch of non-default calling convention. */
1766 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
1768 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
1771 /* Check for mismatched fastcall types */
1772 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
1773 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
1776 /* Check for mismatched return types (cdecl vs stdcall). */
1777 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
1778 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
1780 if (ix86_function_regparm (type1
, NULL
)
1781 != ix86_function_regparm (type2
, NULL
))
1786 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1787 DECL may be NULL when calling function indirectly
1788 or considering a libcall. */
1791 ix86_function_regparm (tree type
, tree decl
)
1794 int regparm
= ix86_regparm
;
1795 bool user_convention
= false;
1799 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
1802 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1803 user_convention
= true;
1806 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
1809 user_convention
= true;
1812 /* Use register calling convention for local functions when possible. */
1813 if (!TARGET_64BIT
&& !user_convention
&& decl
1814 && flag_unit_at_a_time
&& !profile_flag
)
1816 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
1819 /* We can't use regparm(3) for nested functions as these use
1820 static chain pointer in third argument. */
1821 if (DECL_CONTEXT (decl
) && !DECL_NO_STATIC_CHAIN (decl
))
1831 /* Return true if EAX is live at the start of the function. Used by
1832 ix86_expand_prologue to determine if we need special help before
1833 calling allocate_stack_worker. */
1836 ix86_eax_live_at_start_p (void)
1838 /* Cheat. Don't bother working forward from ix86_function_regparm
1839 to the function type to whether an actual argument is located in
1840 eax. Instead just look at cfg info, which is still close enough
1841 to correct at this point. This gives false positives for broken
1842 functions that might use uninitialized data that happens to be
1843 allocated in eax, but who cares? */
1844 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR
->global_live_at_end
, 0);
1847 /* Value is the number of bytes of arguments automatically
1848 popped when returning from a subroutine call.
1849 FUNDECL is the declaration node of the function (as a tree),
1850 FUNTYPE is the data type of the function (as a tree),
1851 or for a library call it is an identifier node for the subroutine name.
1852 SIZE is the number of bytes of arguments passed on the stack.
1854 On the 80386, the RTD insn may be used to pop them if the number
1855 of args is fixed, but if the number is variable then the caller
1856 must pop them all. RTD can't be used for library calls now
1857 because the library is compiled with the Unix compiler.
1858 Use of RTD is a selectable option, since it is incompatible with
1859 standard Unix calling sequences. If the option is not selected,
1860 the caller must always pop the args.
1862 The attribute stdcall is equivalent to RTD on a per module basis. */
1865 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
1867 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
1869 /* Cdecl functions override -mrtd, and never pop the stack. */
1870 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
1872 /* Stdcall and fastcall functions will pop the stack if not
1874 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
1875 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
1879 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
1880 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
1881 == void_type_node
)))
1885 /* Lose any fake structure return argument if it is passed on the stack. */
1886 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
1888 && !KEEP_AGGREGATE_RETURN_POINTER
)
1890 int nregs
= ix86_function_regparm (funtype
, fundecl
);
1893 return GET_MODE_SIZE (Pmode
);
1899 /* Argument support functions. */
1901 /* Return true when register may be used to pass function parameters. */
1903 ix86_function_arg_regno_p (int regno
)
1907 return (regno
< REGPARM_MAX
1908 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
1909 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
1911 /* RAX is used as hidden argument to va_arg functions. */
1914 for (i
= 0; i
< REGPARM_MAX
; i
++)
1915 if (regno
== x86_64_int_parameter_registers
[i
])
1920 /* Return if we do not know how to pass TYPE solely in registers. */
1923 ix86_must_pass_in_stack (enum machine_mode mode
, tree type
)
1925 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
1927 return (!TARGET_64BIT
&& type
&& mode
== TImode
);
1930 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1931 for a call to a function whose data type is FNTYPE.
1932 For a library call, FNTYPE is 0. */
1935 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
1936 tree fntype
, /* tree ptr for function decl */
1937 rtx libname
, /* SYMBOL_REF of library name or 0 */
1940 static CUMULATIVE_ARGS zero_cum
;
1941 tree param
, next_param
;
1943 if (TARGET_DEBUG_ARG
)
1945 fprintf (stderr
, "\ninit_cumulative_args (");
1947 fprintf (stderr
, "fntype code = %s, ret code = %s",
1948 tree_code_name
[(int) TREE_CODE (fntype
)],
1949 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
1951 fprintf (stderr
, "no fntype");
1954 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
1959 /* Set up the number of registers to use for passing arguments. */
1961 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
1963 cum
->nregs
= ix86_regparm
;
1965 cum
->sse_nregs
= SSE_REGPARM_MAX
;
1967 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
1968 cum
->warn_sse
= true;
1969 cum
->warn_mmx
= true;
1970 cum
->maybe_vaarg
= false;
1972 /* Use ecx and edx registers if function has fastcall attribute */
1973 if (fntype
&& !TARGET_64BIT
)
1975 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
1982 /* Determine if this function has variable arguments. This is
1983 indicated by the last argument being 'void_type_mode' if there
1984 are no variable arguments. If there are variable arguments, then
1985 we won't pass anything in registers in 32-bit mode. */
1987 if (cum
->nregs
|| cum
->mmx_nregs
|| cum
->sse_nregs
)
1989 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
1990 param
!= 0; param
= next_param
)
1992 next_param
= TREE_CHAIN (param
);
1993 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
2004 cum
->maybe_vaarg
= true;
2008 if ((!fntype
&& !libname
)
2009 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
2010 cum
->maybe_vaarg
= 1;
2012 if (TARGET_DEBUG_ARG
)
2013 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
2018 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2019 of this code is to classify each 8bytes of incoming argument by the register
2020 class and assign registers accordingly. */
2022 /* Return the union class of CLASS1 and CLASS2.
2023 See the x86-64 PS ABI for details. */
2025 static enum x86_64_reg_class
2026 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
2028 /* Rule #1: If both classes are equal, this is the resulting class. */
2029 if (class1
== class2
)
2032 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2034 if (class1
== X86_64_NO_CLASS
)
2036 if (class2
== X86_64_NO_CLASS
)
2039 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2040 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
2041 return X86_64_MEMORY_CLASS
;
2043 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2044 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
2045 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
2046 return X86_64_INTEGERSI_CLASS
;
2047 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
2048 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
2049 return X86_64_INTEGER_CLASS
;
2051 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
2052 if (class1
== X86_64_X87_CLASS
|| class1
== X86_64_X87UP_CLASS
2053 || class2
== X86_64_X87_CLASS
|| class2
== X86_64_X87UP_CLASS
)
2054 return X86_64_MEMORY_CLASS
;
2056 /* Rule #6: Otherwise class SSE is used. */
2057 return X86_64_SSE_CLASS
;
2060 /* Classify the argument of type TYPE and mode MODE.
2061 CLASSES will be filled by the register class used to pass each word
2062 of the operand. The number of words is returned. In case the parameter
2063 should be passed in memory, 0 is returned. As a special case for zero
2064 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2066 BIT_OFFSET is used internally for handling records and specifies offset
2067 of the offset in bits modulo 256 to avoid overflow cases.
2069 See the x86-64 PS ABI for details.
2073 classify_argument (enum machine_mode mode
, tree type
,
2074 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
2076 HOST_WIDE_INT bytes
=
2077 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2078 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2080 /* Variable sized entities are always passed/returned in memory. */
2084 if (mode
!= VOIDmode
2085 && targetm
.calls
.must_pass_in_stack (mode
, type
))
2088 if (type
&& AGGREGATE_TYPE_P (type
))
2092 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
2094 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2098 for (i
= 0; i
< words
; i
++)
2099 classes
[i
] = X86_64_NO_CLASS
;
2101 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2102 signalize memory class, so handle it as special case. */
2105 classes
[0] = X86_64_NO_CLASS
;
2109 /* Classify each field of record and merge classes. */
2110 if (TREE_CODE (type
) == RECORD_TYPE
)
2112 /* For classes first merge in the field of the subclasses. */
2113 if (TYPE_BINFO (type
))
2115 tree binfo
, base_binfo
;
2118 for (binfo
= TYPE_BINFO (type
), i
= 0;
2119 BINFO_BASE_ITERATE (binfo
, i
, base_binfo
); i
++)
2122 int offset
= tree_low_cst (BINFO_OFFSET (base_binfo
), 0) * 8;
2123 tree type
= BINFO_TYPE (base_binfo
);
2125 num
= classify_argument (TYPE_MODE (type
),
2127 (offset
+ bit_offset
) % 256);
2130 for (i
= 0; i
< num
; i
++)
2132 int pos
= (offset
+ (bit_offset
% 64)) / 8 / 8;
2134 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2138 /* And now merge the fields of structure. */
2139 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
2141 if (TREE_CODE (field
) == FIELD_DECL
)
2145 /* Bitfields are always classified as integer. Handle them
2146 early, since later code would consider them to be
2147 misaligned integers. */
2148 if (DECL_BIT_FIELD (field
))
2150 for (i
= int_bit_position (field
) / 8 / 8;
2151 i
< (int_bit_position (field
)
2152 + tree_low_cst (DECL_SIZE (field
), 0)
2155 merge_classes (X86_64_INTEGER_CLASS
,
2160 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
2161 TREE_TYPE (field
), subclasses
,
2162 (int_bit_position (field
)
2163 + bit_offset
) % 256);
2166 for (i
= 0; i
< num
; i
++)
2169 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
2171 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2177 /* Arrays are handled as small records. */
2178 else if (TREE_CODE (type
) == ARRAY_TYPE
)
2181 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
2182 TREE_TYPE (type
), subclasses
, bit_offset
);
2186 /* The partial classes are now full classes. */
2187 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
2188 subclasses
[0] = X86_64_SSE_CLASS
;
2189 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
2190 subclasses
[0] = X86_64_INTEGER_CLASS
;
2192 for (i
= 0; i
< words
; i
++)
2193 classes
[i
] = subclasses
[i
% num
];
2195 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2196 else if (TREE_CODE (type
) == UNION_TYPE
2197 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
2199 /* For classes first merge in the field of the subclasses. */
2200 if (TYPE_BINFO (type
))
2202 tree binfo
, base_binfo
;
2205 for (binfo
= TYPE_BINFO (type
), i
= 0;
2206 BINFO_BASE_ITERATE (binfo
, i
, base_binfo
); i
++)
2209 int offset
= tree_low_cst (BINFO_OFFSET (base_binfo
), 0) * 8;
2210 tree type
= BINFO_TYPE (base_binfo
);
2212 num
= classify_argument (TYPE_MODE (type
),
2214 (offset
+ (bit_offset
% 64)) % 256);
2217 for (i
= 0; i
< num
; i
++)
2219 int pos
= (offset
+ (bit_offset
% 64)) / 8 / 8;
2221 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2225 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
2227 if (TREE_CODE (field
) == FIELD_DECL
)
2230 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
2231 TREE_TYPE (field
), subclasses
,
2235 for (i
= 0; i
< num
; i
++)
2236 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
2240 else if (TREE_CODE (type
) == SET_TYPE
)
2244 classes
[0] = X86_64_INTEGERSI_CLASS
;
2247 else if (bytes
<= 8)
2249 classes
[0] = X86_64_INTEGER_CLASS
;
2252 else if (bytes
<= 12)
2254 classes
[0] = X86_64_INTEGER_CLASS
;
2255 classes
[1] = X86_64_INTEGERSI_CLASS
;
2260 classes
[0] = X86_64_INTEGER_CLASS
;
2261 classes
[1] = X86_64_INTEGER_CLASS
;
2268 /* Final merger cleanup. */
2269 for (i
= 0; i
< words
; i
++)
2271 /* If one class is MEMORY, everything should be passed in
2273 if (classes
[i
] == X86_64_MEMORY_CLASS
)
2276 /* The X86_64_SSEUP_CLASS should be always preceded by
2277 X86_64_SSE_CLASS. */
2278 if (classes
[i
] == X86_64_SSEUP_CLASS
2279 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
2280 classes
[i
] = X86_64_SSE_CLASS
;
2282 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2283 if (classes
[i
] == X86_64_X87UP_CLASS
2284 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
2285 classes
[i
] = X86_64_SSE_CLASS
;
2290 /* Compute alignment needed. We align all types to natural boundaries with
2291 exception of XFmode that is aligned to 64bits. */
2292 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
2294 int mode_alignment
= GET_MODE_BITSIZE (mode
);
2297 mode_alignment
= 128;
2298 else if (mode
== XCmode
)
2299 mode_alignment
= 256;
2300 if (COMPLEX_MODE_P (mode
))
2301 mode_alignment
/= 2;
2302 /* Misaligned fields are always returned in memory. */
2303 if (bit_offset
% mode_alignment
)
2307 /* for V1xx modes, just use the base mode */
2308 if (VECTOR_MODE_P (mode
)
2309 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
2310 mode
= GET_MODE_INNER (mode
);
2312 /* Classification of atomic types. */
2322 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
2323 classes
[0] = X86_64_INTEGERSI_CLASS
;
2325 classes
[0] = X86_64_INTEGER_CLASS
;
2329 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
2334 if (!(bit_offset
% 64))
2335 classes
[0] = X86_64_SSESF_CLASS
;
2337 classes
[0] = X86_64_SSE_CLASS
;
2340 classes
[0] = X86_64_SSEDF_CLASS
;
2343 classes
[0] = X86_64_X87_CLASS
;
2344 classes
[1] = X86_64_X87UP_CLASS
;
2347 classes
[0] = X86_64_SSE_CLASS
;
2348 classes
[1] = X86_64_SSEUP_CLASS
;
2351 classes
[0] = X86_64_SSE_CLASS
;
2354 classes
[0] = X86_64_SSEDF_CLASS
;
2355 classes
[1] = X86_64_SSEDF_CLASS
;
2359 /* These modes are larger than 16 bytes. */
2367 classes
[0] = X86_64_SSE_CLASS
;
2368 classes
[1] = X86_64_SSEUP_CLASS
;
2374 classes
[0] = X86_64_SSE_CLASS
;
2380 if (VECTOR_MODE_P (mode
))
2384 if (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
)
2386 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
2387 classes
[0] = X86_64_INTEGERSI_CLASS
;
2389 classes
[0] = X86_64_INTEGER_CLASS
;
2390 classes
[1] = X86_64_INTEGER_CLASS
;
2391 return 1 + (bytes
> 8);
2398 /* Examine the argument and return set number of register required in each
2399 class. Return 0 iff parameter should be passed in memory. */
2401 examine_argument (enum machine_mode mode
, tree type
, int in_return
,
2402 int *int_nregs
, int *sse_nregs
)
2404 enum x86_64_reg_class
class[MAX_CLASSES
];
2405 int n
= classify_argument (mode
, type
, class, 0);
2411 for (n
--; n
>= 0; n
--)
2414 case X86_64_INTEGER_CLASS
:
2415 case X86_64_INTEGERSI_CLASS
:
2418 case X86_64_SSE_CLASS
:
2419 case X86_64_SSESF_CLASS
:
2420 case X86_64_SSEDF_CLASS
:
2423 case X86_64_NO_CLASS
:
2424 case X86_64_SSEUP_CLASS
:
2426 case X86_64_X87_CLASS
:
2427 case X86_64_X87UP_CLASS
:
2431 case X86_64_MEMORY_CLASS
:
2436 /* Construct container for the argument used by GCC interface. See
2437 FUNCTION_ARG for the detailed description. */
2439 construct_container (enum machine_mode mode
, tree type
, int in_return
,
2440 int nintregs
, int nsseregs
, const int * intreg
,
2443 enum machine_mode tmpmode
;
2445 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2446 enum x86_64_reg_class
class[MAX_CLASSES
];
2450 int needed_sseregs
, needed_intregs
;
2451 rtx exp
[MAX_CLASSES
];
2454 n
= classify_argument (mode
, type
, class, 0);
2455 if (TARGET_DEBUG_ARG
)
2458 fprintf (stderr
, "Memory class\n");
2461 fprintf (stderr
, "Classes:");
2462 for (i
= 0; i
< n
; i
++)
2464 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
2466 fprintf (stderr
, "\n");
2471 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
, &needed_sseregs
))
2473 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
2476 /* First construct simple cases. Avoid SCmode, since we want to use
2477 single register to pass this type. */
2478 if (n
== 1 && mode
!= SCmode
)
2481 case X86_64_INTEGER_CLASS
:
2482 case X86_64_INTEGERSI_CLASS
:
2483 return gen_rtx_REG (mode
, intreg
[0]);
2484 case X86_64_SSE_CLASS
:
2485 case X86_64_SSESF_CLASS
:
2486 case X86_64_SSEDF_CLASS
:
2487 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
2488 case X86_64_X87_CLASS
:
2489 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
2490 case X86_64_NO_CLASS
:
2491 /* Zero sized array, struct or class. */
2496 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
2498 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
2500 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
2501 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
2502 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
2503 && class[1] == X86_64_INTEGER_CLASS
2504 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
2505 && intreg
[0] + 1 == intreg
[1])
2506 return gen_rtx_REG (mode
, intreg
[0]);
2508 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
2509 && class[2] == X86_64_X87_CLASS
&& class[3] == X86_64_X87UP_CLASS
2511 return gen_rtx_REG (XCmode
, FIRST_STACK_REG
);
2513 /* Otherwise figure out the entries of the PARALLEL. */
2514 for (i
= 0; i
< n
; i
++)
2518 case X86_64_NO_CLASS
:
2520 case X86_64_INTEGER_CLASS
:
2521 case X86_64_INTEGERSI_CLASS
:
2522 /* Merge TImodes on aligned occasions here too. */
2523 if (i
* 8 + 8 > bytes
)
2524 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
2525 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
2529 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2530 if (tmpmode
== BLKmode
)
2532 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2533 gen_rtx_REG (tmpmode
, *intreg
),
2537 case X86_64_SSESF_CLASS
:
2538 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2539 gen_rtx_REG (SFmode
,
2540 SSE_REGNO (sse_regno
)),
2544 case X86_64_SSEDF_CLASS
:
2545 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2546 gen_rtx_REG (DFmode
,
2547 SSE_REGNO (sse_regno
)),
2551 case X86_64_SSE_CLASS
:
2552 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
2556 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2557 gen_rtx_REG (tmpmode
,
2558 SSE_REGNO (sse_regno
)),
2560 if (tmpmode
== TImode
)
2568 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
2569 for (i
= 0; i
< nexps
; i
++)
2570 XVECEXP (ret
, 0, i
) = exp
[i
];
2574 /* Update the data in CUM to advance over an argument
2575 of mode MODE and data type TYPE.
2576 (TYPE is null for libcalls where that information may not be available.) */
2579 function_arg_advance (CUMULATIVE_ARGS
*cum
, /* current arg information */
2580 enum machine_mode mode
, /* current arg mode */
2581 tree type
, /* type of the argument or 0 if lib support */
2582 int named
) /* whether or not the argument was named */
2585 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2586 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2588 if (TARGET_DEBUG_ARG
)
2590 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2591 words
, cum
->words
, cum
->nregs
, cum
->sse_nregs
, GET_MODE_NAME (mode
), named
);
2594 int int_nregs
, sse_nregs
;
2595 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
2596 cum
->words
+= words
;
2597 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
2599 cum
->nregs
-= int_nregs
;
2600 cum
->sse_nregs
-= sse_nregs
;
2601 cum
->regno
+= int_nregs
;
2602 cum
->sse_regno
+= sse_nregs
;
2605 cum
->words
+= words
;
2609 if (TARGET_SSE
&& SSE_REG_MODE_P (mode
)
2610 && (!type
|| !AGGREGATE_TYPE_P (type
)))
2612 cum
->sse_words
+= words
;
2613 cum
->sse_nregs
-= 1;
2614 cum
->sse_regno
+= 1;
2615 if (cum
->sse_nregs
<= 0)
2621 else if (TARGET_MMX
&& MMX_REG_MODE_P (mode
)
2622 && (!type
|| !AGGREGATE_TYPE_P (type
)))
2624 cum
->mmx_words
+= words
;
2625 cum
->mmx_nregs
-= 1;
2626 cum
->mmx_regno
+= 1;
2627 if (cum
->mmx_nregs
<= 0)
2635 cum
->words
+= words
;
2636 cum
->nregs
-= words
;
2637 cum
->regno
+= words
;
2639 if (cum
->nregs
<= 0)
2649 /* Define where to put the arguments to a function.
2650 Value is zero to push the argument on the stack,
2651 or a hard register in which to store the argument.
2653 MODE is the argument's machine mode.
2654 TYPE is the data type of the argument (as a tree).
2655 This is null for libcalls where that information may
2657 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2658 the preceding args and about the function being called.
2659 NAMED is nonzero if this argument is a named parameter
2660 (otherwise it is an extra parameter matching an ellipsis). */
2663 function_arg (CUMULATIVE_ARGS
*cum
, /* current arg information */
2664 enum machine_mode mode
, /* current arg mode */
2665 tree type
, /* type of the argument or 0 if lib support */
2666 int named
) /* != 0 for normal args, == 0 for ... args */
2670 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2671 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2672 static bool warnedsse
, warnedmmx
;
2674 /* To simplify the code below, represent vector types with a vector mode
2675 even if MMX/SSE are not active. */
2677 && TREE_CODE (type
) == VECTOR_TYPE
2678 && (bytes
== 8 || bytes
== 16)
2679 && GET_MODE_CLASS (TYPE_MODE (type
)) != MODE_VECTOR_INT
2680 && GET_MODE_CLASS (TYPE_MODE (type
)) != MODE_VECTOR_FLOAT
)
2682 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
2683 enum machine_mode newmode
2684 = TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
2685 ? MIN_MODE_VECTOR_FLOAT
: MIN_MODE_VECTOR_INT
;
2687 /* Get the mode which has this inner mode and number of units. */
2688 for (; newmode
!= VOIDmode
; newmode
= GET_MODE_WIDER_MODE (newmode
))
2689 if (GET_MODE_NUNITS (newmode
) == TYPE_VECTOR_SUBPARTS (type
)
2690 && GET_MODE_INNER (newmode
) == innermode
)
2697 /* Handle a hidden AL argument containing number of registers for varargs
2698 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2700 if (mode
== VOIDmode
)
2703 return GEN_INT (cum
->maybe_vaarg
2704 ? (cum
->sse_nregs
< 0
2712 ret
= construct_container (mode
, type
, 0, cum
->nregs
, cum
->sse_nregs
,
2713 &x86_64_int_parameter_registers
[cum
->regno
],
2718 /* For now, pass fp/complex values on the stack. */
2730 if (words
<= cum
->nregs
)
2732 int regno
= cum
->regno
;
2734 /* Fastcall allocates the first two DWORD (SImode) or
2735 smaller arguments to ECX and EDX. */
2738 if (mode
== BLKmode
|| mode
== DImode
)
2741 /* ECX not EAX is the first allocated register. */
2745 ret
= gen_rtx_REG (mode
, regno
);
2755 if (!type
|| !AGGREGATE_TYPE_P (type
))
2757 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
2760 warning ("SSE vector argument without SSE enabled "
2764 ret
= gen_rtx_REG (mode
, cum
->sse_regno
+ FIRST_SSE_REG
);
2771 if (!type
|| !AGGREGATE_TYPE_P (type
))
2773 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
2776 warning ("MMX vector argument without MMX enabled "
2780 ret
= gen_rtx_REG (mode
, cum
->mmx_regno
+ FIRST_MMX_REG
);
2785 if (TARGET_DEBUG_ARG
)
2788 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2789 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
2792 print_simple_rtl (stderr
, ret
);
2794 fprintf (stderr
, ", stack");
2796 fprintf (stderr
, " )\n");
2802 /* A C expression that indicates when an argument must be passed by
2803 reference. If nonzero for an argument, a copy of that argument is
2804 made in memory and a pointer to the argument is passed instead of
2805 the argument itself. The pointer is passed in whatever way is
2806 appropriate for passing a pointer to that type. */
2809 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
2810 enum machine_mode mode ATTRIBUTE_UNUSED
,
2811 tree type
, bool named ATTRIBUTE_UNUSED
)
2816 if (type
&& int_size_in_bytes (type
) == -1)
2818 if (TARGET_DEBUG_ARG
)
2819 fprintf (stderr
, "function_arg_pass_by_reference\n");
2826 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2827 ABI. Only called if TARGET_SSE. */
2829 contains_128bit_aligned_vector_p (tree type
)
2831 enum machine_mode mode
= TYPE_MODE (type
);
2832 if (SSE_REG_MODE_P (mode
)
2833 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
2835 if (TYPE_ALIGN (type
) < 128)
2838 if (AGGREGATE_TYPE_P (type
))
2840 /* Walk the aggregates recursively. */
2841 if (TREE_CODE (type
) == RECORD_TYPE
2842 || TREE_CODE (type
) == UNION_TYPE
2843 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
2847 if (TYPE_BINFO (type
))
2849 tree binfo
, base_binfo
;
2852 for (binfo
= TYPE_BINFO (type
), i
= 0;
2853 BINFO_BASE_ITERATE (binfo
, i
, base_binfo
); i
++)
2854 if (contains_128bit_aligned_vector_p (BINFO_TYPE (base_binfo
)))
2857 /* And now merge the fields of structure. */
2858 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
2860 if (TREE_CODE (field
) == FIELD_DECL
2861 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
2865 /* Just for use if some languages passes arrays by value. */
2866 else if (TREE_CODE (type
) == ARRAY_TYPE
)
2868 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
2877 /* Gives the alignment boundary, in bits, of an argument with the
2878 specified mode and type. */
2881 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
2885 align
= TYPE_ALIGN (type
);
2887 align
= GET_MODE_ALIGNMENT (mode
);
2888 if (align
< PARM_BOUNDARY
)
2889 align
= PARM_BOUNDARY
;
2892 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2893 make an exception for SSE modes since these require 128bit
2896 The handling here differs from field_alignment. ICC aligns MMX
2897 arguments to 4 byte boundaries, while structure fields are aligned
2898 to 8 byte boundaries. */
2900 align
= PARM_BOUNDARY
;
2903 if (!SSE_REG_MODE_P (mode
))
2904 align
= PARM_BOUNDARY
;
2908 if (!contains_128bit_aligned_vector_p (type
))
2909 align
= PARM_BOUNDARY
;
2917 /* Return true if N is a possible register number of function value. */
2919 ix86_function_value_regno_p (int regno
)
2923 return ((regno
) == 0
2924 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
2925 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
));
2927 return ((regno
) == 0 || (regno
) == FIRST_FLOAT_REG
2928 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
)
2929 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
));
2932 /* Define how to find the value returned by a function.
2933 VALTYPE is the data type of the value (as a tree).
2934 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2935 otherwise, FUNC is 0. */
2937 ix86_function_value (tree valtype
)
2941 rtx ret
= construct_container (TYPE_MODE (valtype
), valtype
, 1,
2942 REGPARM_MAX
, SSE_REGPARM_MAX
,
2943 x86_64_int_return_registers
, 0);
2944 /* For zero sized structures, construct_container return NULL, but we need
2945 to keep rest of compiler happy by returning meaningful value. */
2947 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
2951 return gen_rtx_REG (TYPE_MODE (valtype
),
2952 ix86_value_regno (TYPE_MODE (valtype
)));
2955 /* Return false iff type is returned in memory. */
2957 ix86_return_in_memory (tree type
)
2959 int needed_intregs
, needed_sseregs
, size
;
2960 enum machine_mode mode
= TYPE_MODE (type
);
2963 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
2965 if (mode
== BLKmode
)
2968 size
= int_size_in_bytes (type
);
2970 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
2973 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
2975 /* User-created vectors small enough to fit in EAX. */
2979 /* MMX/3dNow values are returned on the stack, since we've
2980 got to EMMS/FEMMS before returning. */
2984 /* SSE values are returned in XMM0, except when it doesn't exist. */
2986 return (TARGET_SSE
? 0 : 1);
2997 /* When returning SSE vector types, we have a choice of either
2998 (1) being abi incompatible with a -march switch, or
2999 (2) generating an error.
3000 Given no good solution, I think the safest thing is one warning.
3001 The user won't be able to use -Werror, but....
3003 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3004 called in response to actually generating a caller or callee that
3005 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3006 via aggregate_value_p for general type probing from tree-ssa. */
3009 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
3013 if (!TARGET_SSE
&& type
&& !warned
)
3015 /* Look at the return type of the function, not the function type. */
3016 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
3019 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
3022 warning ("SSE vector return without SSE enabled changes the ABI");
3029 /* Define how to find the value returned by a library function
3030 assuming the value has mode MODE. */
3032 ix86_libcall_value (enum machine_mode mode
)
3043 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
3045 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
3050 return gen_rtx_REG (mode
, 0);
3054 return gen_rtx_REG (mode
, ix86_value_regno (mode
));
3057 /* Given a mode, return the register to use for a return value. */
3060 ix86_value_regno (enum machine_mode mode
)
3062 /* Floating point return values in %st(0). */
3063 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
&& TARGET_FLOAT_RETURNS_IN_80387
)
3064 return FIRST_FLOAT_REG
;
3065 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3066 we prevent this case when sse is not available. */
3067 if (mode
== TImode
|| (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
3068 return FIRST_SSE_REG
;
3069 /* Everything else in %eax. */
3073 /* Create the va_list data type. */
3076 ix86_build_builtin_va_list (void)
3078 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
3080 /* For i386 we use plain pointer to argument area. */
3082 return build_pointer_type (char_type_node
);
3084 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
3085 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
3087 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
3088 unsigned_type_node
);
3089 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
3090 unsigned_type_node
);
3091 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
3093 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
3096 DECL_FIELD_CONTEXT (f_gpr
) = record
;
3097 DECL_FIELD_CONTEXT (f_fpr
) = record
;
3098 DECL_FIELD_CONTEXT (f_ovf
) = record
;
3099 DECL_FIELD_CONTEXT (f_sav
) = record
;
3101 TREE_CHAIN (record
) = type_decl
;
3102 TYPE_NAME (record
) = type_decl
;
3103 TYPE_FIELDS (record
) = f_gpr
;
3104 TREE_CHAIN (f_gpr
) = f_fpr
;
3105 TREE_CHAIN (f_fpr
) = f_ovf
;
3106 TREE_CHAIN (f_ovf
) = f_sav
;
3108 layout_type (record
);
3110 /* The correct type is an array type of one element. */
3111 return build_array_type (record
, build_index_type (size_zero_node
));
3114 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3117 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3118 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
3121 CUMULATIVE_ARGS next_cum
;
3122 rtx save_area
= NULL_RTX
, mem
;
3135 /* Indicate to allocate space on the stack for varargs save area. */
3136 ix86_save_varrargs_registers
= 1;
3138 cfun
->stack_alignment_needed
= 128;
3140 fntype
= TREE_TYPE (current_function_decl
);
3141 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
3142 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
3143 != void_type_node
));
3145 /* For varargs, we do not want to skip the dummy va_dcl argument.
3146 For stdargs, we do want to skip the last named argument. */
3149 function_arg_advance (&next_cum
, mode
, type
, 1);
3152 save_area
= frame_pointer_rtx
;
3154 set
= get_varargs_alias_set ();
3156 for (i
= next_cum
.regno
; i
< ix86_regparm
; i
++)
3158 mem
= gen_rtx_MEM (Pmode
,
3159 plus_constant (save_area
, i
* UNITS_PER_WORD
));
3160 set_mem_alias_set (mem
, set
);
3161 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
3162 x86_64_int_parameter_registers
[i
]));
3165 if (next_cum
.sse_nregs
)
3167 /* Now emit code to save SSE registers. The AX parameter contains number
3168 of SSE parameter registers used to call this function. We use
3169 sse_prologue_save insn template that produces computed jump across
3170 SSE saves. We need some preparation work to get this working. */
3172 label
= gen_label_rtx ();
3173 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
3175 /* Compute address to jump to :
3176 label - 5*eax + nnamed_sse_arguments*5 */
3177 tmp_reg
= gen_reg_rtx (Pmode
);
3178 nsse_reg
= gen_reg_rtx (Pmode
);
3179 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
3180 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
3181 gen_rtx_MULT (Pmode
, nsse_reg
,
3183 if (next_cum
.sse_regno
)
3186 gen_rtx_CONST (DImode
,
3187 gen_rtx_PLUS (DImode
,
3189 GEN_INT (next_cum
.sse_regno
* 4))));
3191 emit_move_insn (nsse_reg
, label_ref
);
3192 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
3194 /* Compute address of memory block we save into. We always use pointer
3195 pointing 127 bytes after first byte to store - this is needed to keep
3196 instruction size limited by 4 bytes. */
3197 tmp_reg
= gen_reg_rtx (Pmode
);
3198 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
3199 plus_constant (save_area
,
3200 8 * REGPARM_MAX
+ 127)));
3201 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
3202 set_mem_alias_set (mem
, set
);
3203 set_mem_align (mem
, BITS_PER_WORD
);
3205 /* And finally do the dirty job! */
3206 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
3207 GEN_INT (next_cum
.sse_regno
), label
));
3212 /* Implement va_start. */
3215 ix86_va_start (tree valist
, rtx nextarg
)
3217 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
3218 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
3219 tree gpr
, fpr
, ovf
, sav
, t
;
3221 /* Only 64bit target needs something special. */
3224 std_expand_builtin_va_start (valist
, nextarg
);
3228 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3229 f_fpr
= TREE_CHAIN (f_gpr
);
3230 f_ovf
= TREE_CHAIN (f_fpr
);
3231 f_sav
= TREE_CHAIN (f_ovf
);
3233 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
3234 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
3235 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
3236 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
3237 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
3239 /* Count number of gp and fp argument registers used. */
3240 words
= current_function_args_info
.words
;
3241 n_gpr
= current_function_args_info
.regno
;
3242 n_fpr
= current_function_args_info
.sse_regno
;
3244 if (TARGET_DEBUG_ARG
)
3245 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3246 (int) words
, (int) n_gpr
, (int) n_fpr
);
3248 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
,
3249 build_int_cst (NULL_TREE
, n_gpr
* 8));
3250 TREE_SIDE_EFFECTS (t
) = 1;
3251 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3253 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
,
3254 build_int_cst (NULL_TREE
, n_fpr
* 16 + 8*REGPARM_MAX
));
3255 TREE_SIDE_EFFECTS (t
) = 1;
3256 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3258 /* Find the overflow area. */
3259 t
= make_tree (TREE_TYPE (ovf
), virtual_incoming_args_rtx
);
3261 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), t
,
3262 build_int_cst (NULL_TREE
, words
* UNITS_PER_WORD
));
3263 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
3264 TREE_SIDE_EFFECTS (t
) = 1;
3265 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3267 /* Find the register save area.
3268 Prologue of the function save it right above stack frame. */
3269 t
= make_tree (TREE_TYPE (sav
), frame_pointer_rtx
);
3270 t
= build (MODIFY_EXPR
, TREE_TYPE (sav
), sav
, t
);
3271 TREE_SIDE_EFFECTS (t
) = 1;
3272 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3275 /* Implement va_arg. */
3278 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
3280 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
3281 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
3282 tree gpr
, fpr
, ovf
, sav
, t
;
3284 tree lab_false
, lab_over
= NULL_TREE
;
3290 /* Only 64bit target needs something special. */
3292 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
3294 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3295 f_fpr
= TREE_CHAIN (f_gpr
);
3296 f_ovf
= TREE_CHAIN (f_fpr
);
3297 f_sav
= TREE_CHAIN (f_ovf
);
3299 valist
= build_va_arg_indirect_ref (valist
);
3300 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
3301 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
3302 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
3303 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
3305 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
3307 type
= build_pointer_type (type
);
3308 size
= int_size_in_bytes (type
);
3309 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3311 container
= construct_container (TYPE_MODE (type
), type
, 0,
3312 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
3314 * Pull the value out of the saved registers ...
3317 addr
= create_tmp_var (ptr_type_node
, "addr");
3318 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
3322 int needed_intregs
, needed_sseregs
;
3324 tree int_addr
, sse_addr
;
3326 lab_false
= create_artificial_label ();
3327 lab_over
= create_artificial_label ();
3329 examine_argument (TYPE_MODE (type
), type
, 0,
3330 &needed_intregs
, &needed_sseregs
);
3332 need_temp
= (!REG_P (container
)
3333 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
3334 || TYPE_ALIGN (type
) > 128));
3336 /* In case we are passing structure, verify that it is consecutive block
3337 on the register save area. If not we need to do moves. */
3338 if (!need_temp
&& !REG_P (container
))
3340 /* Verify that all registers are strictly consecutive */
3341 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
3345 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
3347 rtx slot
= XVECEXP (container
, 0, i
);
3348 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
3349 || INTVAL (XEXP (slot
, 1)) != i
* 16)
3357 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
3359 rtx slot
= XVECEXP (container
, 0, i
);
3360 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
3361 || INTVAL (XEXP (slot
, 1)) != i
* 8)
3373 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
3374 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
3375 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
3376 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
3378 /* First ensure that we fit completely in registers. */
3381 t
= build_int_cst (TREE_TYPE (gpr
),
3382 (REGPARM_MAX
- needed_intregs
+ 1) * 8);
3383 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
3384 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
3385 t
= build (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
3386 gimplify_and_add (t
, pre_p
);
3390 t
= build_int_cst (TREE_TYPE (fpr
),
3391 (SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
3393 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
3394 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
3395 t
= build (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
3396 gimplify_and_add (t
, pre_p
);
3399 /* Compute index to start of area used for integer regs. */
3402 /* int_addr = gpr + sav; */
3403 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, gpr
);
3404 t
= build2 (MODIFY_EXPR
, void_type_node
, int_addr
, t
);
3405 gimplify_and_add (t
, pre_p
);
3409 /* sse_addr = fpr + sav; */
3410 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, fpr
);
3411 t
= build2 (MODIFY_EXPR
, void_type_node
, sse_addr
, t
);
3412 gimplify_and_add (t
, pre_p
);
3417 tree temp
= create_tmp_var (type
, "va_arg_tmp");
3420 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
3421 t
= build2 (MODIFY_EXPR
, void_type_node
, addr
, t
);
3422 gimplify_and_add (t
, pre_p
);
3424 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
3426 rtx slot
= XVECEXP (container
, 0, i
);
3427 rtx reg
= XEXP (slot
, 0);
3428 enum machine_mode mode
= GET_MODE (reg
);
3429 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
3430 tree addr_type
= build_pointer_type (piece_type
);
3433 tree dest_addr
, dest
;
3435 if (SSE_REGNO_P (REGNO (reg
)))
3437 src_addr
= sse_addr
;
3438 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
3442 src_addr
= int_addr
;
3443 src_offset
= REGNO (reg
) * 8;
3445 src_addr
= fold_convert (addr_type
, src_addr
);
3446 src_addr
= fold (build2 (PLUS_EXPR
, addr_type
, src_addr
,
3447 size_int (src_offset
)));
3448 src
= build_va_arg_indirect_ref (src_addr
);
3450 dest_addr
= fold_convert (addr_type
, addr
);
3451 dest_addr
= fold (build2 (PLUS_EXPR
, addr_type
, dest_addr
,
3452 size_int (INTVAL (XEXP (slot
, 1)))));
3453 dest
= build_va_arg_indirect_ref (dest_addr
);
3455 t
= build2 (MODIFY_EXPR
, void_type_node
, dest
, src
);
3456 gimplify_and_add (t
, pre_p
);
3462 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
3463 build_int_cst (NULL_TREE
, needed_intregs
* 8));
3464 t
= build2 (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
, t
);
3465 gimplify_and_add (t
, pre_p
);
3469 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
3470 build_int_cst (NULL_TREE
, needed_sseregs
* 16));
3471 t
= build2 (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
, t
);
3472 gimplify_and_add (t
, pre_p
);
3475 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
3476 gimplify_and_add (t
, pre_p
);
3478 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
3479 append_to_statement_list (t
, pre_p
);
3482 /* ... otherwise out of the overflow area. */
3484 /* Care for on-stack alignment if needed. */
3485 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64)
3489 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
3490 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
,
3491 build_int_cst (NULL_TREE
, align
- 1));
3492 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
3493 build_int_cst (NULL_TREE
, -align
));
3495 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
3497 t2
= build2 (MODIFY_EXPR
, void_type_node
, addr
, t
);
3498 gimplify_and_add (t2
, pre_p
);
3500 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
3501 build_int_cst (NULL_TREE
, rsize
* UNITS_PER_WORD
));
3502 t
= build2 (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
3503 gimplify_and_add (t
, pre_p
);
3507 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
3508 append_to_statement_list (t
, pre_p
);
3511 ptrtype
= build_pointer_type (type
);
3512 addr
= fold_convert (ptrtype
, addr
);
3515 addr
= build_va_arg_indirect_ref (addr
);
3516 return build_va_arg_indirect_ref (addr
);
3519 /* Return nonzero if OPNUM's MEM should be matched
3520 in movabs* patterns. */
3523 ix86_check_movabs (rtx insn
, int opnum
)
3527 set
= PATTERN (insn
);
3528 if (GET_CODE (set
) == PARALLEL
)
3529 set
= XVECEXP (set
, 0, 0);
3530 if (GET_CODE (set
) != SET
)
3532 mem
= XEXP (set
, opnum
);
3533 while (GET_CODE (mem
) == SUBREG
)
3534 mem
= SUBREG_REG (mem
);
3535 if (GET_CODE (mem
) != MEM
)
3537 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
3540 /* Initialize the table of extra 80387 mathematical constants. */
3543 init_ext_80387_constants (void)
3545 static const char * cst
[5] =
3547 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
3548 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
3549 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
3550 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
3551 "3.1415926535897932385128089594061862044", /* 4: fldpi */
3555 for (i
= 0; i
< 5; i
++)
3557 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
3558 /* Ensure each constant is rounded to XFmode precision. */
3559 real_convert (&ext_80387_constants_table
[i
],
3560 XFmode
, &ext_80387_constants_table
[i
]);
3563 ext_80387_constants_init
= 1;
3566 /* Return true if the constant is something that can be loaded with
3567 a special instruction. */
3570 standard_80387_constant_p (rtx x
)
3572 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
3575 if (x
== CONST0_RTX (GET_MODE (x
)))
3577 if (x
== CONST1_RTX (GET_MODE (x
)))
3580 /* For XFmode constants, try to find a special 80387 instruction when
3581 optimizing for size or on those CPUs that benefit from them. */
3582 if (GET_MODE (x
) == XFmode
3583 && (optimize_size
|| x86_ext_80387_constants
& TUNEMASK
))
3588 if (! ext_80387_constants_init
)
3589 init_ext_80387_constants ();
3591 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3592 for (i
= 0; i
< 5; i
++)
3593 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
3600 /* Return the opcode of the special instruction to be used to load
3604 standard_80387_constant_opcode (rtx x
)
3606 switch (standard_80387_constant_p (x
))
3626 /* Return the CONST_DOUBLE representing the 80387 constant that is
3627 loaded by the specified special instruction. The argument IDX
3628 matches the return value from standard_80387_constant_p. */
3631 standard_80387_constant_rtx (int idx
)
3635 if (! ext_80387_constants_init
)
3636 init_ext_80387_constants ();
3652 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
3656 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3659 standard_sse_constant_p (rtx x
)
3661 if (x
== const0_rtx
)
3663 return (x
== CONST0_RTX (GET_MODE (x
)));
3666 /* Returns 1 if OP contains a symbol reference */
3669 symbolic_reference_mentioned_p (rtx op
)
3674 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
3677 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
3678 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
3684 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
3685 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
3689 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
3696 /* Return 1 if it is appropriate to emit `ret' instructions in the
3697 body of a function. Do this only if the epilogue is simple, needing a
3698 couple of insns. Prior to reloading, we can't tell how many registers
3699 must be saved, so return 0 then. Return 0 if there is no frame
3700 marker to de-allocate.
3702 If NON_SAVING_SETJMP is defined and true, then it is not possible
3703 for the epilogue to be simple, so return 0. This is a special case
3704 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3705 until final, but jump_optimize may need to know sooner if a
3709 ix86_can_use_return_insn_p (void)
3711 struct ix86_frame frame
;
3713 #ifdef NON_SAVING_SETJMP
3714 if (NON_SAVING_SETJMP
&& current_function_calls_setjmp
)
3718 if (! reload_completed
|| frame_pointer_needed
)
3721 /* Don't allow more than 32 pop, since that's all we can do
3722 with one instruction. */
3723 if (current_function_pops_args
3724 && current_function_args_size
>= 32768)
3727 ix86_compute_frame_layout (&frame
);
3728 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
3731 /* Value should be nonzero if functions must have frame pointers.
3732 Zero means the frame pointer need not be set up (and parms may
3733 be accessed via the stack pointer) in functions that seem suitable. */
3736 ix86_frame_pointer_required (void)
3738 /* If we accessed previous frames, then the generated code expects
3739 to be able to access the saved ebp value in our frame. */
3740 if (cfun
->machine
->accesses_prev_frame
)
3743 /* Several x86 os'es need a frame pointer for other reasons,
3744 usually pertaining to setjmp. */
3745 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
3748 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3749 the frame pointer by default. Turn it back on now if we've not
3750 got a leaf function. */
3751 if (TARGET_OMIT_LEAF_FRAME_POINTER
3752 && (!current_function_is_leaf
))
3755 if (current_function_profile
)
3761 /* Record that the current function accesses previous call frames. */
3764 ix86_setup_frame_addresses (void)
3766 cfun
->machine
->accesses_prev_frame
= 1;
3769 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3770 # define USE_HIDDEN_LINKONCE 1
3772 # define USE_HIDDEN_LINKONCE 0
3775 static int pic_labels_used
;
3777 /* Fills in the label name that should be used for a pc thunk for
3778 the given register. */
3781 get_pc_thunk_name (char name
[32], unsigned int regno
)
3783 if (USE_HIDDEN_LINKONCE
)
3784 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
3786 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
3790 /* This function generates code for -fpic that loads %ebx with
3791 the return address of the caller and then returns. */
3794 ix86_file_end (void)
3799 for (regno
= 0; regno
< 8; ++regno
)
3803 if (! ((pic_labels_used
>> regno
) & 1))
3806 get_pc_thunk_name (name
, regno
);
3808 if (USE_HIDDEN_LINKONCE
)
3812 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
3814 TREE_PUBLIC (decl
) = 1;
3815 TREE_STATIC (decl
) = 1;
3816 DECL_ONE_ONLY (decl
) = 1;
3818 (*targetm
.asm_out
.unique_section
) (decl
, 0);
3819 named_section (decl
, NULL
, 0);
3821 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
3822 fputs ("\t.hidden\t", asm_out_file
);
3823 assemble_name (asm_out_file
, name
);
3824 fputc ('\n', asm_out_file
);
3825 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
3830 ASM_OUTPUT_LABEL (asm_out_file
, name
);
3833 xops
[0] = gen_rtx_REG (SImode
, regno
);
3834 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
3835 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
3836 output_asm_insn ("ret", xops
);
3839 if (NEED_INDICATE_EXEC_STACK
)
3840 file_end_indicate_exec_stack ();
3843 /* Emit code for the SET_GOT patterns. */
3846 output_set_got (rtx dest
)
3851 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
3853 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
3855 xops
[2] = gen_rtx_LABEL_REF (Pmode
, gen_label_rtx ());
3858 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
3860 output_asm_insn ("call\t%a2", xops
);
3863 /* Output the "canonical" label name ("Lxx$pb") here too. This
3864 is what will be referred to by the Mach-O PIC subsystem. */
3865 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
3867 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
3868 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
3871 output_asm_insn ("pop{l}\t%0", xops
);
3876 get_pc_thunk_name (name
, REGNO (dest
));
3877 pic_labels_used
|= 1 << REGNO (dest
);
3879 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
3880 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
3881 output_asm_insn ("call\t%X2", xops
);
3884 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
3885 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
3886 else if (!TARGET_MACHO
)
3887 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops
);
3892 /* Generate an "push" pattern for input ARG. */
3897 return gen_rtx_SET (VOIDmode
,
3899 gen_rtx_PRE_DEC (Pmode
,
3900 stack_pointer_rtx
)),
3904 /* Return >= 0 if there is an unused call-clobbered register available
3905 for the entire function. */
3908 ix86_select_alt_pic_regnum (void)
3910 if (current_function_is_leaf
&& !current_function_profile
)
3913 for (i
= 2; i
>= 0; --i
)
3914 if (!regs_ever_live
[i
])
3918 return INVALID_REGNUM
;
3921 /* Return 1 if we need to save REGNO. */
3923 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
3925 if (pic_offset_table_rtx
3926 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
3927 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
3928 || current_function_profile
3929 || current_function_calls_eh_return
3930 || current_function_uses_const_pool
))
3932 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
3937 if (current_function_calls_eh_return
&& maybe_eh_return
)
3942 unsigned test
= EH_RETURN_DATA_REGNO (i
);
3943 if (test
== INVALID_REGNUM
)
3950 return (regs_ever_live
[regno
]
3951 && !call_used_regs
[regno
]
3952 && !fixed_regs
[regno
]
3953 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
3956 /* Return number of registers to be saved on the stack. */
3959 ix86_nsaved_regs (void)
3964 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
3965 if (ix86_save_reg (regno
, true))
3970 /* Return the offset between two registers, one to be eliminated, and the other
3971 its replacement, at the start of a routine. */
3974 ix86_initial_elimination_offset (int from
, int to
)
3976 struct ix86_frame frame
;
3977 ix86_compute_frame_layout (&frame
);
3979 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3980 return frame
.hard_frame_pointer_offset
;
3981 else if (from
== FRAME_POINTER_REGNUM
3982 && to
== HARD_FRAME_POINTER_REGNUM
)
3983 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
3986 if (to
!= STACK_POINTER_REGNUM
)
3988 else if (from
== ARG_POINTER_REGNUM
)
3989 return frame
.stack_pointer_offset
;
3990 else if (from
!= FRAME_POINTER_REGNUM
)
3993 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
3997 /* Fill structure ix86_frame about frame of currently computed function. */
4000 ix86_compute_frame_layout (struct ix86_frame
*frame
)
4002 HOST_WIDE_INT total_size
;
4003 unsigned int stack_alignment_needed
;
4004 HOST_WIDE_INT offset
;
4005 unsigned int preferred_alignment
;
4006 HOST_WIDE_INT size
= get_frame_size ();
4008 frame
->nregs
= ix86_nsaved_regs ();
4011 stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
4012 preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
4014 /* During reload iteration the amount of registers saved can change.
4015 Recompute the value as needed. Do not recompute when amount of registers
4016 didn't change as reload does mutiple calls to the function and does not
4017 expect the decision to change within single iteration. */
4019 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
4021 int count
= frame
->nregs
;
4023 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
4024 /* The fast prologue uses move instead of push to save registers. This
4025 is significantly longer, but also executes faster as modern hardware
4026 can execute the moves in parallel, but can't do that for push/pop.
4028 Be careful about choosing what prologue to emit: When function takes
4029 many instructions to execute we may use slow version as well as in
4030 case function is known to be outside hot spot (this is known with
4031 feedback only). Weight the size of function by number of registers
4032 to save as it is cheap to use one or two push instructions but very
4033 slow to use many of them. */
4035 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
4036 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
4037 || (flag_branch_probabilities
4038 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
4039 cfun
->machine
->use_fast_prologue_epilogue
= false;
4041 cfun
->machine
->use_fast_prologue_epilogue
4042 = !expensive_function_p (count
);
4044 if (TARGET_PROLOGUE_USING_MOVE
4045 && cfun
->machine
->use_fast_prologue_epilogue
)
4046 frame
->save_regs_using_mov
= true;
4048 frame
->save_regs_using_mov
= false;
4051 /* Skip return address and saved base pointer. */
4052 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
4054 frame
->hard_frame_pointer_offset
= offset
;
4056 /* Do some sanity checking of stack_alignment_needed and
4057 preferred_alignment, since i386 port is the only using those features
4058 that may break easily. */
4060 if (size
&& !stack_alignment_needed
)
4062 if (preferred_alignment
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
4064 if (preferred_alignment
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
4066 if (stack_alignment_needed
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
4069 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
4070 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
4072 /* Register save area */
4073 offset
+= frame
->nregs
* UNITS_PER_WORD
;
4076 if (ix86_save_varrargs_registers
)
4078 offset
+= X86_64_VARARGS_SIZE
;
4079 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
4082 frame
->va_arg_size
= 0;
4084 /* Align start of frame for local function. */
4085 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
4086 & -stack_alignment_needed
) - offset
;
4088 offset
+= frame
->padding1
;
4090 /* Frame pointer points here. */
4091 frame
->frame_pointer_offset
= offset
;
4095 /* Add outgoing arguments area. Can be skipped if we eliminated
4096 all the function calls as dead code.
4097 Skipping is however impossible when function calls alloca. Alloca
4098 expander assumes that last current_function_outgoing_args_size
4099 of stack frame are unused. */
4100 if (ACCUMULATE_OUTGOING_ARGS
4101 && (!current_function_is_leaf
|| current_function_calls_alloca
))
4103 offset
+= current_function_outgoing_args_size
;
4104 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
4107 frame
->outgoing_arguments_size
= 0;
4109 /* Align stack boundary. Only needed if we're calling another function
4111 if (!current_function_is_leaf
|| current_function_calls_alloca
)
4112 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
4113 & -preferred_alignment
) - offset
;
4115 frame
->padding2
= 0;
4117 offset
+= frame
->padding2
;
4119 /* We've reached end of stack frame. */
4120 frame
->stack_pointer_offset
= offset
;
4122 /* Size prologue needs to allocate. */
4123 frame
->to_allocate
=
4124 (size
+ frame
->padding1
+ frame
->padding2
4125 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
4127 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
4128 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
4129 frame
->save_regs_using_mov
= false;
4131 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
4132 && current_function_is_leaf
)
4134 frame
->red_zone_size
= frame
->to_allocate
;
4135 if (frame
->save_regs_using_mov
)
4136 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
4137 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
4138 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
4141 frame
->red_zone_size
= 0;
4142 frame
->to_allocate
-= frame
->red_zone_size
;
4143 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
4145 fprintf (stderr
, "nregs: %i\n", frame
->nregs
);
4146 fprintf (stderr
, "size: %i\n", size
);
4147 fprintf (stderr
, "alignment1: %i\n", stack_alignment_needed
);
4148 fprintf (stderr
, "padding1: %i\n", frame
->padding1
);
4149 fprintf (stderr
, "va_arg: %i\n", frame
->va_arg_size
);
4150 fprintf (stderr
, "padding2: %i\n", frame
->padding2
);
4151 fprintf (stderr
, "to_allocate: %i\n", frame
->to_allocate
);
4152 fprintf (stderr
, "red_zone_size: %i\n", frame
->red_zone_size
);
4153 fprintf (stderr
, "frame_pointer_offset: %i\n", frame
->frame_pointer_offset
);
4154 fprintf (stderr
, "hard_frame_pointer_offset: %i\n",
4155 frame
->hard_frame_pointer_offset
);
4156 fprintf (stderr
, "stack_pointer_offset: %i\n", frame
->stack_pointer_offset
);
4160 /* Emit code to save registers in the prologue. */
4163 ix86_emit_save_regs (void)
4168 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
4169 if (ix86_save_reg (regno
, true))
4171 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
4172 RTX_FRAME_RELATED_P (insn
) = 1;
4176 /* Emit code to save registers using MOV insns. First register
4177 is restored from POINTER + OFFSET. */
4179 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
4184 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4185 if (ix86_save_reg (regno
, true))
4187 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
4189 gen_rtx_REG (Pmode
, regno
));
4190 RTX_FRAME_RELATED_P (insn
) = 1;
4191 offset
+= UNITS_PER_WORD
;
4195 /* Expand prologue or epilogue stack adjustment.
4196 The pattern exist to put a dependency on all ebp-based memory accesses.
4197 STYLE should be negative if instructions should be marked as frame related,
4198 zero if %r11 register is live and cannot be freely used and positive
4202 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
4207 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
4208 else if (x86_64_immediate_operand (offset
, DImode
))
4209 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
4213 /* r11 is used by indirect sibcall return as well, set before the
4214 epilogue and used after the epilogue. ATM indirect sibcall
4215 shouldn't be used together with huge frame sizes in one
4216 function because of the frame_size check in sibcall.c. */
4219 r11
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
4220 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
4222 RTX_FRAME_RELATED_P (insn
) = 1;
4223 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
4227 RTX_FRAME_RELATED_P (insn
) = 1;
4230 /* Expand the prologue into a bunch of separate insns. */
4233 ix86_expand_prologue (void)
4237 struct ix86_frame frame
;
4238 HOST_WIDE_INT allocate
;
4240 ix86_compute_frame_layout (&frame
);
4242 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4243 slower on all targets. Also sdb doesn't like it. */
4245 if (frame_pointer_needed
)
4247 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
4248 RTX_FRAME_RELATED_P (insn
) = 1;
4250 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
4251 RTX_FRAME_RELATED_P (insn
) = 1;
4254 allocate
= frame
.to_allocate
;
4256 if (!frame
.save_regs_using_mov
)
4257 ix86_emit_save_regs ();
4259 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
4261 /* When using red zone we may start register saving before allocating
4262 the stack frame saving one cycle of the prologue. */
4263 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
4264 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
4265 : stack_pointer_rtx
,
4266 -frame
.nregs
* UNITS_PER_WORD
);
4270 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
4271 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
4272 GEN_INT (-allocate
), -1);
4275 /* Only valid for Win32. */
4276 rtx eax
= gen_rtx_REG (SImode
, 0);
4277 bool eax_live
= ix86_eax_live_at_start_p ();
4284 emit_insn (gen_push (eax
));
4288 insn
= emit_move_insn (eax
, GEN_INT (allocate
));
4289 RTX_FRAME_RELATED_P (insn
) = 1;
4291 insn
= emit_insn (gen_allocate_stack_worker (eax
));
4292 RTX_FRAME_RELATED_P (insn
) = 1;
4297 if (frame_pointer_needed
)
4298 t
= plus_constant (hard_frame_pointer_rtx
,
4301 - frame
.nregs
* UNITS_PER_WORD
);
4303 t
= plus_constant (stack_pointer_rtx
, allocate
);
4304 emit_move_insn (eax
, gen_rtx_MEM (SImode
, t
));
4308 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
4310 if (!frame_pointer_needed
|| !frame
.to_allocate
)
4311 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
4313 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
4314 -frame
.nregs
* UNITS_PER_WORD
);
4317 pic_reg_used
= false;
4318 if (pic_offset_table_rtx
4319 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
4320 || current_function_profile
))
4322 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
4324 if (alt_pic_reg_used
!= INVALID_REGNUM
)
4325 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
4327 pic_reg_used
= true;
4332 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
4334 /* Even with accurate pre-reload life analysis, we can wind up
4335 deleting all references to the pic register after reload.
4336 Consider if cross-jumping unifies two sides of a branch
4337 controlled by a comparison vs the only read from a global.
4338 In which case, allow the set_got to be deleted, though we're
4339 too late to do anything about the ebx save in the prologue. */
4340 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
4343 /* Prevent function calls from be scheduled before the call to mcount.
4344 In the pic_reg_used case, make sure that the got load isn't deleted. */
4345 if (current_function_profile
)
4346 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
4349 /* Emit code to restore saved registers using MOV insns. First register
4350 is restored from POINTER + OFFSET. */
4352 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
4353 int maybe_eh_return
)
4356 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
4358 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4359 if (ix86_save_reg (regno
, maybe_eh_return
))
4361 /* Ensure that adjust_address won't be forced to produce pointer
4362 out of range allowed by x86-64 instruction set. */
4363 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
4367 r11
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
4368 emit_move_insn (r11
, GEN_INT (offset
));
4369 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
4370 base_address
= gen_rtx_MEM (Pmode
, r11
);
4373 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
4374 adjust_address (base_address
, Pmode
, offset
));
4375 offset
+= UNITS_PER_WORD
;
4379 /* Restore function stack, frame, and registers. */
4382 ix86_expand_epilogue (int style
)
4385 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
4386 struct ix86_frame frame
;
4387 HOST_WIDE_INT offset
;
4389 ix86_compute_frame_layout (&frame
);
4391 /* Calculate start of saved registers relative to ebp. Special care
4392 must be taken for the normal return case of a function using
4393 eh_return: the eax and edx registers are marked as saved, but not
4394 restored along this path. */
4395 offset
= frame
.nregs
;
4396 if (current_function_calls_eh_return
&& style
!= 2)
4398 offset
*= -UNITS_PER_WORD
;
4400 /* If we're only restoring one register and sp is not valid then
4401 using a move instruction to restore the register since it's
4402 less work than reloading sp and popping the register.
4404 The default code result in stack adjustment using add/lea instruction,
4405 while this code results in LEAVE instruction (or discrete equivalent),
4406 so it is profitable in some other cases as well. Especially when there
4407 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4408 and there is exactly one register to pop. This heuristic may need some
4409 tuning in future. */
4410 if ((!sp_valid
&& frame
.nregs
<= 1)
4411 || (TARGET_EPILOGUE_USING_MOVE
4412 && cfun
->machine
->use_fast_prologue_epilogue
4413 && (frame
.nregs
> 1 || frame
.to_allocate
))
4414 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
4415 || (frame_pointer_needed
&& TARGET_USE_LEAVE
4416 && cfun
->machine
->use_fast_prologue_epilogue
4417 && frame
.nregs
== 1)
4418 || current_function_calls_eh_return
)
4420 /* Restore registers. We can use ebp or esp to address the memory
4421 locations. If both are available, default to ebp, since offsets
4422 are known to be small. Only exception is esp pointing directly to the
4423 end of block of saved registers, where we may simplify addressing
4426 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
4427 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
4428 frame
.to_allocate
, style
== 2);
4430 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
4431 offset
, style
== 2);
4433 /* eh_return epilogues need %ecx added to the stack pointer. */
4436 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
4438 if (frame_pointer_needed
)
4440 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
4441 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
4442 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
4444 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
4445 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
4447 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
4452 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
4453 tmp
= plus_constant (tmp
, (frame
.to_allocate
4454 + frame
.nregs
* UNITS_PER_WORD
));
4455 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
4458 else if (!frame_pointer_needed
)
4459 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
4460 GEN_INT (frame
.to_allocate
4461 + frame
.nregs
* UNITS_PER_WORD
),
4463 /* If not an i386, mov & pop is faster than "leave". */
4464 else if (TARGET_USE_LEAVE
|| optimize_size
4465 || !cfun
->machine
->use_fast_prologue_epilogue
)
4466 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4469 pro_epilogue_adjust_stack (stack_pointer_rtx
,
4470 hard_frame_pointer_rtx
,
4473 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4475 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4480 /* First step is to deallocate the stack frame so that we can
4481 pop the registers. */
4484 if (!frame_pointer_needed
)
4486 pro_epilogue_adjust_stack (stack_pointer_rtx
,
4487 hard_frame_pointer_rtx
,
4488 GEN_INT (offset
), style
);
4490 else if (frame
.to_allocate
)
4491 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
4492 GEN_INT (frame
.to_allocate
), style
);
4494 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4495 if (ix86_save_reg (regno
, false))
4498 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
4500 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
4502 if (frame_pointer_needed
)
4504 /* Leave results in shorter dependency chains on CPUs that are
4505 able to grok it fast. */
4506 if (TARGET_USE_LEAVE
)
4507 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4508 else if (TARGET_64BIT
)
4509 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4511 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4515 /* Sibcall epilogues don't want a return instruction. */
4519 if (current_function_pops_args
&& current_function_args_size
)
4521 rtx popc
= GEN_INT (current_function_pops_args
);
4523 /* i386 can only pop 64K bytes. If asked to pop more, pop
4524 return address, do explicit add, and jump indirectly to the
4527 if (current_function_pops_args
>= 65536)
4529 rtx ecx
= gen_rtx_REG (SImode
, 2);
4531 /* There is no "pascal" calling convention in 64bit ABI. */
4535 emit_insn (gen_popsi1 (ecx
));
4536 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
4537 emit_jump_insn (gen_return_indirect_internal (ecx
));
4540 emit_jump_insn (gen_return_pop_internal (popc
));
4543 emit_jump_insn (gen_return_internal ());
4546 /* Reset from the function's potential modifications. */
4549 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
4550 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
4552 if (pic_offset_table_rtx
)
4553 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
4556 /* Extract the parts of an RTL expression that is a valid memory address
4557 for an instruction. Return 0 if the structure of the address is
4558 grossly off. Return -1 if the address contains ASHIFT, so it is not
4559 strictly valid, but still used for computing length of lea instruction. */
4562 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
4564 rtx base
= NULL_RTX
;
4565 rtx index
= NULL_RTX
;
4566 rtx disp
= NULL_RTX
;
4567 HOST_WIDE_INT scale
= 1;
4568 rtx scale_rtx
= NULL_RTX
;
4570 enum ix86_address_seg seg
= SEG_DEFAULT
;
4572 if (GET_CODE (addr
) == REG
|| GET_CODE (addr
) == SUBREG
)
4574 else if (GET_CODE (addr
) == PLUS
)
4584 addends
[n
++] = XEXP (op
, 1);
4587 while (GET_CODE (op
) == PLUS
);
4592 for (i
= n
; i
>= 0; --i
)
4595 switch (GET_CODE (op
))
4600 index
= XEXP (op
, 0);
4601 scale_rtx
= XEXP (op
, 1);
4605 if (XINT (op
, 1) == UNSPEC_TP
4606 && TARGET_TLS_DIRECT_SEG_REFS
4607 && seg
== SEG_DEFAULT
)
4608 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
4637 else if (GET_CODE (addr
) == MULT
)
4639 index
= XEXP (addr
, 0); /* index*scale */
4640 scale_rtx
= XEXP (addr
, 1);
4642 else if (GET_CODE (addr
) == ASHIFT
)
4646 /* We're called for lea too, which implements ashift on occasion. */
4647 index
= XEXP (addr
, 0);
4648 tmp
= XEXP (addr
, 1);
4649 if (GET_CODE (tmp
) != CONST_INT
)
4651 scale
= INTVAL (tmp
);
4652 if ((unsigned HOST_WIDE_INT
) scale
> 3)
4658 disp
= addr
; /* displacement */
4660 /* Extract the integral value of scale. */
4663 if (GET_CODE (scale_rtx
) != CONST_INT
)
4665 scale
= INTVAL (scale_rtx
);
4668 /* Allow arg pointer and stack pointer as index if there is not scaling. */
4669 if (base
&& index
&& scale
== 1
4670 && (index
== arg_pointer_rtx
4671 || index
== frame_pointer_rtx
4672 || (REG_P (index
) && REGNO (index
) == STACK_POINTER_REGNUM
)))
4679 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4680 if ((base
== hard_frame_pointer_rtx
4681 || base
== frame_pointer_rtx
4682 || base
== arg_pointer_rtx
) && !disp
)
4685 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4686 Avoid this by transforming to [%esi+0]. */
4687 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
4688 && base
&& !index
&& !disp
4690 && REGNO_REG_CLASS (REGNO (base
)) == SIREG
)
4693 /* Special case: encode reg+reg instead of reg*2. */
4694 if (!base
&& index
&& scale
&& scale
== 2)
4695 base
= index
, scale
= 1;
4697 /* Special case: scaling cannot be encoded without base or displacement. */
4698 if (!base
&& !disp
&& index
&& scale
!= 1)
4710 /* Return cost of the memory address x.
4711 For i386, it is better to use a complex address than let gcc copy
4712 the address into a reg and make a new pseudo. But not if the address
4713 requires to two regs - that would mean more pseudos with longer
4716 ix86_address_cost (rtx x
)
4718 struct ix86_address parts
;
4721 if (!ix86_decompose_address (x
, &parts
))
4724 /* More complex memory references are better. */
4725 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
4727 if (parts
.seg
!= SEG_DEFAULT
)
4730 /* Attempt to minimize number of registers in the address. */
4732 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
4734 && (!REG_P (parts
.index
)
4735 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
4739 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
4741 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
4742 && parts
.base
!= parts
.index
)
4745 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4746 since it's predecode logic can't detect the length of instructions
4747 and it degenerates to vector decoded. Increase cost of such
4748 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4749 to split such addresses or even refuse such addresses at all.
4751 Following addressing modes are affected:
4756 The first and last case may be avoidable by explicitly coding the zero in
4757 memory address, but I don't have AMD-K6 machine handy to check this
4761 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
4762 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
4763 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
4769 /* If X is a machine specific address (i.e. a symbol or label being
4770 referenced as a displacement from the GOT implemented using an
4771 UNSPEC), then return the base term. Otherwise return X. */
4774 ix86_find_base_term (rtx x
)
4780 if (GET_CODE (x
) != CONST
)
4783 if (GET_CODE (term
) == PLUS
4784 && (GET_CODE (XEXP (term
, 1)) == CONST_INT
4785 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
4786 term
= XEXP (term
, 0);
4787 if (GET_CODE (term
) != UNSPEC
4788 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
4791 term
= XVECEXP (term
, 0, 0);
4793 if (GET_CODE (term
) != SYMBOL_REF
4794 && GET_CODE (term
) != LABEL_REF
)
4800 term
= ix86_delegitimize_address (x
);
4802 if (GET_CODE (term
) != SYMBOL_REF
4803 && GET_CODE (term
) != LABEL_REF
)
4809 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
4810 this is used for to form addresses to local data when -fPIC is in
4814 darwin_local_data_pic (rtx disp
)
4816 if (GET_CODE (disp
) == MINUS
)
4818 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
4819 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
4820 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
4822 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
4823 if (! strcmp (sym_name
, "<pic base>"))
4831 /* Determine if a given RTX is a valid constant. We already know this
4832 satisfies CONSTANT_P. */
4835 legitimate_constant_p (rtx x
)
4837 switch (GET_CODE (x
))
4842 if (GET_CODE (x
) == PLUS
)
4844 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
4849 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
4852 /* Only some unspecs are valid as "constants". */
4853 if (GET_CODE (x
) == UNSPEC
)
4854 switch (XINT (x
, 1))
4858 return local_exec_symbolic_operand (XVECEXP (x
, 0, 0), Pmode
);
4860 return local_dynamic_symbolic_operand (XVECEXP (x
, 0, 0), Pmode
);
4865 /* We must have drilled down to a symbol. */
4866 if (!symbolic_operand (x
, Pmode
))
4871 /* TLS symbols are never valid. */
4872 if (tls_symbolic_operand (x
, Pmode
))
4880 /* Otherwise we handle everything else in the move patterns. */
4884 /* Determine if it's legal to put X into the constant pool. This
4885 is not possible for the address of thread-local symbols, which
4886 is checked above. */
4889 ix86_cannot_force_const_mem (rtx x
)
4891 return !legitimate_constant_p (x
);
4894 /* Determine if a given RTX is a valid constant address. */
4897 constant_address_p (rtx x
)
4899 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
4902 /* Nonzero if the constant value X is a legitimate general operand
4903 when generating PIC code. It is given that flag_pic is on and
4904 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4907 legitimate_pic_operand_p (rtx x
)
4911 switch (GET_CODE (x
))
4914 inner
= XEXP (x
, 0);
4916 /* Only some unspecs are valid as "constants". */
4917 if (GET_CODE (inner
) == UNSPEC
)
4918 switch (XINT (inner
, 1))
4921 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
4929 return legitimate_pic_address_disp_p (x
);
4936 /* Determine if a given CONST RTX is a valid memory displacement
4940 legitimate_pic_address_disp_p (rtx disp
)
4944 /* In 64bit mode we can allow direct addresses of symbols and labels
4945 when they are not dynamic symbols. */
4948 /* TLS references should always be enclosed in UNSPEC. */
4949 if (tls_symbolic_operand (disp
, GET_MODE (disp
)))
4951 if (GET_CODE (disp
) == SYMBOL_REF
4952 && ix86_cmodel
== CM_SMALL_PIC
4953 && SYMBOL_REF_LOCAL_P (disp
))
4955 if (GET_CODE (disp
) == LABEL_REF
)
4957 if (GET_CODE (disp
) == CONST
4958 && GET_CODE (XEXP (disp
, 0)) == PLUS
)
4960 rtx op0
= XEXP (XEXP (disp
, 0), 0);
4961 rtx op1
= XEXP (XEXP (disp
, 0), 1);
4963 /* TLS references should always be enclosed in UNSPEC. */
4964 if (tls_symbolic_operand (op0
, GET_MODE (op0
)))
4966 if (((GET_CODE (op0
) == SYMBOL_REF
4967 && ix86_cmodel
== CM_SMALL_PIC
4968 && SYMBOL_REF_LOCAL_P (op0
))
4969 || GET_CODE (op0
) == LABEL_REF
)
4970 && GET_CODE (op1
) == CONST_INT
4971 && INTVAL (op1
) < 16*1024*1024
4972 && INTVAL (op1
) >= -16*1024*1024)
4976 if (GET_CODE (disp
) != CONST
)
4978 disp
= XEXP (disp
, 0);
4982 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4983 of GOT tables. We should not need these anyway. */
4984 if (GET_CODE (disp
) != UNSPEC
4985 || XINT (disp
, 1) != UNSPEC_GOTPCREL
)
4988 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
4989 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
4995 if (GET_CODE (disp
) == PLUS
)
4997 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
4999 disp
= XEXP (disp
, 0);
5003 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
5006 if (GET_CODE (disp
) != UNSPEC
)
5009 switch (XINT (disp
, 1))
5014 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
5016 if (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
5017 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
5018 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5020 case UNSPEC_GOTTPOFF
:
5021 case UNSPEC_GOTNTPOFF
:
5022 case UNSPEC_INDNTPOFF
:
5025 return initial_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5027 return local_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5029 return local_dynamic_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5035 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5036 memory address for an instruction. The MODE argument is the machine mode
5037 for the MEM expression that wants to use this address.
5039 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5040 convert common non-canonical forms to canonical form so that they will
5044 legitimate_address_p (enum machine_mode mode
, rtx addr
, int strict
)
5046 struct ix86_address parts
;
5047 rtx base
, index
, disp
;
5048 HOST_WIDE_INT scale
;
5049 const char *reason
= NULL
;
5050 rtx reason_rtx
= NULL_RTX
;
5052 if (TARGET_DEBUG_ADDR
)
5055 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5056 GET_MODE_NAME (mode
), strict
);
5060 if (ix86_decompose_address (addr
, &parts
) <= 0)
5062 reason
= "decomposition failed";
5067 index
= parts
.index
;
5069 scale
= parts
.scale
;
5071 /* Validate base register.
5073 Don't allow SUBREG's here, it can lead to spill failures when the base
5074 is one word out of a two word structure, which is represented internally
5081 if (GET_CODE (base
) != REG
)
5083 reason
= "base is not a register";
5087 if (GET_MODE (base
) != Pmode
)
5089 reason
= "base is not in Pmode";
5093 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (base
))
5094 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (base
)))
5096 reason
= "base is not valid";
5101 /* Validate index register.
5103 Don't allow SUBREG's here, it can lead to spill failures when the index
5104 is one word out of a two word structure, which is represented internally
5111 if (GET_CODE (index
) != REG
)
5113 reason
= "index is not a register";
5117 if (GET_MODE (index
) != Pmode
)
5119 reason
= "index is not in Pmode";
5123 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (index
))
5124 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (index
)))
5126 reason
= "index is not valid";
5131 /* Validate scale factor. */
5134 reason_rtx
= GEN_INT (scale
);
5137 reason
= "scale without index";
5141 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
5143 reason
= "scale is not a valid multiplier";
5148 /* Validate displacement. */
5153 if (GET_CODE (disp
) == CONST
5154 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
5155 switch (XINT (XEXP (disp
, 0), 1))
5159 case UNSPEC_GOTPCREL
:
5162 goto is_legitimate_pic
;
5164 case UNSPEC_GOTTPOFF
:
5165 case UNSPEC_GOTNTPOFF
:
5166 case UNSPEC_INDNTPOFF
:
5172 reason
= "invalid address unspec";
5176 else if (flag_pic
&& (SYMBOLIC_CONST (disp
)
5178 && !machopic_operand_p (disp
)
5183 if (TARGET_64BIT
&& (index
|| base
))
5185 /* foo@dtpoff(%rX) is ok. */
5186 if (GET_CODE (disp
) != CONST
5187 || GET_CODE (XEXP (disp
, 0)) != PLUS
5188 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
5189 || GET_CODE (XEXP (XEXP (disp
, 0), 1)) != CONST_INT
5190 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
5191 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
5193 reason
= "non-constant pic memory reference";
5197 else if (! legitimate_pic_address_disp_p (disp
))
5199 reason
= "displacement is an invalid pic construct";
5203 /* This code used to verify that a symbolic pic displacement
5204 includes the pic_offset_table_rtx register.
5206 While this is good idea, unfortunately these constructs may
5207 be created by "adds using lea" optimization for incorrect
5216 This code is nonsensical, but results in addressing
5217 GOT table with pic_offset_table_rtx base. We can't
5218 just refuse it easily, since it gets matched by
5219 "addsi3" pattern, that later gets split to lea in the
5220 case output register differs from input. While this
5221 can be handled by separate addsi pattern for this case
5222 that never results in lea, this seems to be easier and
5223 correct fix for crash to disable this test. */
5225 else if (GET_CODE (disp
) != LABEL_REF
5226 && GET_CODE (disp
) != CONST_INT
5227 && (GET_CODE (disp
) != CONST
5228 || !legitimate_constant_p (disp
))
5229 && (GET_CODE (disp
) != SYMBOL_REF
5230 || !legitimate_constant_p (disp
)))
5232 reason
= "displacement is not constant";
5235 else if (TARGET_64BIT
5236 && !x86_64_immediate_operand (disp
, VOIDmode
))
5238 reason
= "displacement is out of range";
5243 /* Everything looks valid. */
5244 if (TARGET_DEBUG_ADDR
)
5245 fprintf (stderr
, "Success.\n");
5249 if (TARGET_DEBUG_ADDR
)
5251 fprintf (stderr
, "Error: %s\n", reason
);
5252 debug_rtx (reason_rtx
);
5257 /* Return an unique alias set for the GOT. */
5259 static HOST_WIDE_INT
5260 ix86_GOT_alias_set (void)
5262 static HOST_WIDE_INT set
= -1;
5264 set
= new_alias_set ();
5268 /* Return a legitimate reference for ORIG (an address) using the
5269 register REG. If REG is 0, a new pseudo is generated.
5271 There are two types of references that must be handled:
5273 1. Global data references must load the address from the GOT, via
5274 the PIC reg. An insn is emitted to do this load, and the reg is
5277 2. Static data references, constant pool addresses, and code labels
5278 compute the address as an offset from the GOT, whose base is in
5279 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
5280 differentiate them from global data objects. The returned
5281 address is the PIC reg + an unspec constant.
5283 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5284 reg also appears in the address. */
5287 legitimize_pic_address (rtx orig
, rtx reg
)
5295 reg
= gen_reg_rtx (Pmode
);
5296 /* Use the generic Mach-O PIC machinery. */
5297 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
5300 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
5302 else if (!TARGET_64BIT
&& local_symbolic_operand (addr
, Pmode
))
5304 /* This symbol may be referenced via a displacement from the PIC
5305 base address (@GOTOFF). */
5307 if (reload_in_progress
)
5308 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5309 if (GET_CODE (addr
) == CONST
)
5310 addr
= XEXP (addr
, 0);
5311 if (GET_CODE (addr
) == PLUS
)
5313 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
5314 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
5317 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
5318 new = gen_rtx_CONST (Pmode
, new);
5319 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5323 emit_move_insn (reg
, new);
5327 else if (GET_CODE (addr
) == SYMBOL_REF
)
5331 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
5332 new = gen_rtx_CONST (Pmode
, new);
5333 new = gen_const_mem (Pmode
, new);
5334 set_mem_alias_set (new, ix86_GOT_alias_set ());
5337 reg
= gen_reg_rtx (Pmode
);
5338 /* Use directly gen_movsi, otherwise the address is loaded
5339 into register for CSE. We don't want to CSE this addresses,
5340 instead we CSE addresses from the GOT table, so skip this. */
5341 emit_insn (gen_movsi (reg
, new));
5346 /* This symbol must be referenced via a load from the
5347 Global Offset Table (@GOT). */
5349 if (reload_in_progress
)
5350 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5351 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
5352 new = gen_rtx_CONST (Pmode
, new);
5353 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5354 new = gen_const_mem (Pmode
, new);
5355 set_mem_alias_set (new, ix86_GOT_alias_set ());
5358 reg
= gen_reg_rtx (Pmode
);
5359 emit_move_insn (reg
, new);
5365 if (GET_CODE (addr
) == CONST
)
5367 addr
= XEXP (addr
, 0);
5369 /* We must match stuff we generate before. Assume the only
5370 unspecs that can get here are ours. Not that we could do
5371 anything with them anyway.... */
5372 if (GET_CODE (addr
) == UNSPEC
5373 || (GET_CODE (addr
) == PLUS
5374 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
5376 if (GET_CODE (addr
) != PLUS
)
5379 if (GET_CODE (addr
) == PLUS
)
5381 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
5383 /* Check first to see if this is a constant offset from a @GOTOFF
5384 symbol reference. */
5385 if (local_symbolic_operand (op0
, Pmode
)
5386 && GET_CODE (op1
) == CONST_INT
)
5390 if (reload_in_progress
)
5391 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5392 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
5394 new = gen_rtx_PLUS (Pmode
, new, op1
);
5395 new = gen_rtx_CONST (Pmode
, new);
5396 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5400 emit_move_insn (reg
, new);
5406 if (INTVAL (op1
) < -16*1024*1024
5407 || INTVAL (op1
) >= 16*1024*1024)
5408 new = gen_rtx_PLUS (Pmode
, op0
, force_reg (Pmode
, op1
));
5413 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
5414 new = legitimize_pic_address (XEXP (addr
, 1),
5415 base
== reg
? NULL_RTX
: reg
);
5417 if (GET_CODE (new) == CONST_INT
)
5418 new = plus_constant (base
, INTVAL (new));
5421 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
5423 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
5424 new = XEXP (new, 1);
5426 new = gen_rtx_PLUS (Pmode
, base
, new);
5434 /* Load the thread pointer. If TO_REG is true, force it into a register. */
5437 get_thread_pointer (int to_reg
)
5441 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
5445 reg
= gen_reg_rtx (Pmode
);
5446 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
5447 insn
= emit_insn (insn
);
5452 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
5453 false if we expect this to be used for a memory address and true if
5454 we expect to load the address into a register. */
5457 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
5459 rtx dest
, base
, off
, pic
;
5464 case TLS_MODEL_GLOBAL_DYNAMIC
:
5465 dest
= gen_reg_rtx (Pmode
);
5468 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
5471 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
5472 insns
= get_insns ();
5475 emit_libcall_block (insns
, dest
, rax
, x
);
5478 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
5481 case TLS_MODEL_LOCAL_DYNAMIC
:
5482 base
= gen_reg_rtx (Pmode
);
5485 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
5488 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
5489 insns
= get_insns ();
5492 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
5493 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
5494 emit_libcall_block (insns
, base
, rax
, note
);
5497 emit_insn (gen_tls_local_dynamic_base_32 (base
));
5499 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
5500 off
= gen_rtx_CONST (Pmode
, off
);
5502 return gen_rtx_PLUS (Pmode
, base
, off
);
5504 case TLS_MODEL_INITIAL_EXEC
:
5508 type
= UNSPEC_GOTNTPOFF
;
5512 if (reload_in_progress
)
5513 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5514 pic
= pic_offset_table_rtx
;
5515 type
= TARGET_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
5517 else if (!TARGET_GNU_TLS
)
5519 pic
= gen_reg_rtx (Pmode
);
5520 emit_insn (gen_set_got (pic
));
5521 type
= UNSPEC_GOTTPOFF
;
5526 type
= UNSPEC_INDNTPOFF
;
5529 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
5530 off
= gen_rtx_CONST (Pmode
, off
);
5532 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
5533 off
= gen_const_mem (Pmode
, off
);
5534 set_mem_alias_set (off
, ix86_GOT_alias_set ());
5536 if (TARGET_64BIT
|| TARGET_GNU_TLS
)
5538 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
5539 off
= force_reg (Pmode
, off
);
5540 return gen_rtx_PLUS (Pmode
, base
, off
);
5544 base
= get_thread_pointer (true);
5545 dest
= gen_reg_rtx (Pmode
);
5546 emit_insn (gen_subsi3 (dest
, base
, off
));
5550 case TLS_MODEL_LOCAL_EXEC
:
5551 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
5552 (TARGET_64BIT
|| TARGET_GNU_TLS
)
5553 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
5554 off
= gen_rtx_CONST (Pmode
, off
);
5556 if (TARGET_64BIT
|| TARGET_GNU_TLS
)
5558 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
5559 return gen_rtx_PLUS (Pmode
, base
, off
);
5563 base
= get_thread_pointer (true);
5564 dest
= gen_reg_rtx (Pmode
);
5565 emit_insn (gen_subsi3 (dest
, base
, off
));
5576 /* Try machine-dependent ways of modifying an illegitimate address
5577 to be legitimate. If we find one, return the new, valid address.
5578 This macro is used in only one place: `memory_address' in explow.c.
5580 OLDX is the address as it was before break_out_memory_refs was called.
5581 In some cases it is useful to look at this to decide what needs to be done.
5583 MODE and WIN are passed so that this macro can use
5584 GO_IF_LEGITIMATE_ADDRESS.
5586 It is always safe for this macro to do nothing. It exists to recognize
5587 opportunities to optimize the output.
5589 For the 80386, we handle X+REG by loading X into a register R and
5590 using R+REG. R will go in a general reg and indexing will be used.
5591 However, if REG is a broken-out memory address or multiplication,
5592 nothing needs to be done because REG can certainly go in a general reg.
5594 When -fpic is used, special handling is needed for symbolic references.
5595 See comments by legitimize_pic_address in i386.c for details. */
5598 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
5603 if (TARGET_DEBUG_ADDR
)
5605 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5606 GET_MODE_NAME (mode
));
5610 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
5612 return legitimize_tls_address (x
, log
, false);
5613 if (GET_CODE (x
) == CONST
5614 && GET_CODE (XEXP (x
, 0)) == PLUS
5615 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
5616 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
5618 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0), log
, false);
5619 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
5622 if (flag_pic
&& SYMBOLIC_CONST (x
))
5623 return legitimize_pic_address (x
, 0);
5625 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5626 if (GET_CODE (x
) == ASHIFT
5627 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5628 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (x
, 1)))) < 4)
5631 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
5632 GEN_INT (1 << log
));
5635 if (GET_CODE (x
) == PLUS
)
5637 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5639 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
5640 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
5641 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)))) < 4)
5644 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
5645 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
5646 GEN_INT (1 << log
));
5649 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
5650 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
5651 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 1), 1)))) < 4)
5654 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
5655 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
5656 GEN_INT (1 << log
));
5659 /* Put multiply first if it isn't already. */
5660 if (GET_CODE (XEXP (x
, 1)) == MULT
)
5662 rtx tmp
= XEXP (x
, 0);
5663 XEXP (x
, 0) = XEXP (x
, 1);
5668 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5669 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5670 created by virtual register instantiation, register elimination, and
5671 similar optimizations. */
5672 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
5675 x
= gen_rtx_PLUS (Pmode
,
5676 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
5677 XEXP (XEXP (x
, 1), 0)),
5678 XEXP (XEXP (x
, 1), 1));
5682 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5683 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5684 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
5685 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
5686 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
5687 && CONSTANT_P (XEXP (x
, 1)))
5690 rtx other
= NULL_RTX
;
5692 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5694 constant
= XEXP (x
, 1);
5695 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5697 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
5699 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5700 other
= XEXP (x
, 1);
5708 x
= gen_rtx_PLUS (Pmode
,
5709 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
5710 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
5711 plus_constant (other
, INTVAL (constant
)));
5715 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
5718 if (GET_CODE (XEXP (x
, 0)) == MULT
)
5721 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
5724 if (GET_CODE (XEXP (x
, 1)) == MULT
)
5727 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
5731 && GET_CODE (XEXP (x
, 1)) == REG
5732 && GET_CODE (XEXP (x
, 0)) == REG
)
5735 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
5738 x
= legitimize_pic_address (x
, 0);
5741 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
5744 if (GET_CODE (XEXP (x
, 0)) == REG
)
5746 rtx temp
= gen_reg_rtx (Pmode
);
5747 rtx val
= force_operand (XEXP (x
, 1), temp
);
5749 emit_move_insn (temp
, val
);
5755 else if (GET_CODE (XEXP (x
, 1)) == REG
)
5757 rtx temp
= gen_reg_rtx (Pmode
);
5758 rtx val
= force_operand (XEXP (x
, 0), temp
);
5760 emit_move_insn (temp
, val
);
5770 /* Print an integer constant expression in assembler syntax. Addition
5771 and subtraction are the only arithmetic that may appear in these
5772 expressions. FILE is the stdio stream to write to, X is the rtx, and
5773 CODE is the operand print code from the output string. */
5776 output_pic_addr_const (FILE *file
, rtx x
, int code
)
5780 switch (GET_CODE (x
))
5790 /* Mark the decl as referenced so that cgraph will output the function. */
5791 if (SYMBOL_REF_DECL (x
))
5792 mark_decl_referenced (SYMBOL_REF_DECL (x
));
5794 assemble_name (file
, XSTR (x
, 0));
5795 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
5796 fputs ("@PLT", file
);
5803 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
5804 assemble_name (asm_out_file
, buf
);
5808 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
5812 /* This used to output parentheses around the expression,
5813 but that does not work on the 386 (either ATT or BSD assembler). */
5814 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5818 if (GET_MODE (x
) == VOIDmode
)
5820 /* We can use %d if the number is <32 bits and positive. */
5821 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
5822 fprintf (file
, "0x%lx%08lx",
5823 (unsigned long) CONST_DOUBLE_HIGH (x
),
5824 (unsigned long) CONST_DOUBLE_LOW (x
));
5826 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
5829 /* We can't handle floating point constants;
5830 PRINT_OPERAND must handle them. */
5831 output_operand_lossage ("floating constant misused");
5835 /* Some assemblers need integer constants to appear first. */
5836 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
5838 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5840 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5842 else if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5844 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5846 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5854 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
5855 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5857 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5859 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
5863 if (XVECLEN (x
, 0) != 1)
5865 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
5866 switch (XINT (x
, 1))
5869 fputs ("@GOT", file
);
5872 fputs ("@GOTOFF", file
);
5874 case UNSPEC_GOTPCREL
:
5875 fputs ("@GOTPCREL(%rip)", file
);
5877 case UNSPEC_GOTTPOFF
:
5878 /* FIXME: This might be @TPOFF in Sun ld too. */
5879 fputs ("@GOTTPOFF", file
);
5882 fputs ("@TPOFF", file
);
5886 fputs ("@TPOFF", file
);
5888 fputs ("@NTPOFF", file
);
5891 fputs ("@DTPOFF", file
);
5893 case UNSPEC_GOTNTPOFF
:
5895 fputs ("@GOTTPOFF(%rip)", file
);
5897 fputs ("@GOTNTPOFF", file
);
5899 case UNSPEC_INDNTPOFF
:
5900 fputs ("@INDNTPOFF", file
);
5903 output_operand_lossage ("invalid UNSPEC as operand");
5909 output_operand_lossage ("invalid expression as operand");
5913 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5914 We need to handle our special PIC relocations. */
5917 i386_dwarf_output_addr_const (FILE *file
, rtx x
)
5920 fprintf (file
, "%s", TARGET_64BIT
? ASM_QUAD
: ASM_LONG
);
5924 fprintf (file
, "%s", ASM_LONG
);
5927 output_pic_addr_const (file
, x
, '\0');
5929 output_addr_const (file
, x
);
5933 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
5934 We need to emit DTP-relative relocations. */
5937 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
5939 fputs (ASM_LONG
, file
);
5940 output_addr_const (file
, x
);
5941 fputs ("@DTPOFF", file
);
5947 fputs (", 0", file
);
5954 /* In the name of slightly smaller debug output, and to cater to
5955 general assembler losage, recognize PIC+GOTOFF and turn it back
5956 into a direct symbol reference. */
5959 ix86_delegitimize_address (rtx orig_x
)
5963 if (GET_CODE (x
) == MEM
)
5968 if (GET_CODE (x
) != CONST
5969 || GET_CODE (XEXP (x
, 0)) != UNSPEC
5970 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
5971 || GET_CODE (orig_x
) != MEM
)
5973 return XVECEXP (XEXP (x
, 0), 0, 0);
5976 if (GET_CODE (x
) != PLUS
5977 || GET_CODE (XEXP (x
, 1)) != CONST
)
5980 if (GET_CODE (XEXP (x
, 0)) == REG
5981 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
5982 /* %ebx + GOT/GOTOFF */
5984 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
5986 /* %ebx + %reg * scale + GOT/GOTOFF */
5988 if (GET_CODE (XEXP (y
, 0)) == REG
5989 && REGNO (XEXP (y
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
5991 else if (GET_CODE (XEXP (y
, 1)) == REG
5992 && REGNO (XEXP (y
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
5996 if (GET_CODE (y
) != REG
5997 && GET_CODE (y
) != MULT
5998 && GET_CODE (y
) != ASHIFT
)
6004 x
= XEXP (XEXP (x
, 1), 0);
6005 if (GET_CODE (x
) == UNSPEC
6006 && ((XINT (x
, 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
6007 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& GET_CODE (orig_x
) != MEM
)))
6010 return gen_rtx_PLUS (Pmode
, y
, XVECEXP (x
, 0, 0));
6011 return XVECEXP (x
, 0, 0);
6014 if (GET_CODE (x
) == PLUS
6015 && GET_CODE (XEXP (x
, 0)) == UNSPEC
6016 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6017 && ((XINT (XEXP (x
, 0), 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
6018 || (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTOFF
6019 && GET_CODE (orig_x
) != MEM
)))
6021 x
= gen_rtx_PLUS (VOIDmode
, XVECEXP (XEXP (x
, 0), 0, 0), XEXP (x
, 1));
6023 return gen_rtx_PLUS (Pmode
, y
, x
);
6031 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
6036 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
6038 enum rtx_code second_code
, bypass_code
;
6039 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
6040 if (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
)
6042 code
= ix86_fp_compare_code_to_integer (code
);
6046 code
= reverse_condition (code
);
6057 if (mode
!= CCmode
&& mode
!= CCNOmode
&& mode
!= CCGCmode
)
6062 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6063 Those same assemblers have the same but opposite losage on cmov. */
6066 suffix
= fp
? "nbe" : "a";
6069 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
6071 else if (mode
== CCmode
|| mode
== CCGCmode
)
6082 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
6084 else if (mode
== CCmode
|| mode
== CCGCmode
)
6093 suffix
= fp
? "nb" : "ae";
6096 if (mode
!= CCmode
&& mode
!= CCGCmode
&& mode
!= CCNOmode
)
6106 suffix
= fp
? "u" : "p";
6109 suffix
= fp
? "nu" : "np";
6114 fputs (suffix
, file
);
6117 /* Print the name of register X to FILE based on its machine mode and number.
6118 If CODE is 'w', pretend the mode is HImode.
6119 If CODE is 'b', pretend the mode is QImode.
6120 If CODE is 'k', pretend the mode is SImode.
6121 If CODE is 'q', pretend the mode is DImode.
6122 If CODE is 'h', pretend the reg is the `high' byte register.
6123 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
6126 print_reg (rtx x
, int code
, FILE *file
)
6128 if (REGNO (x
) == ARG_POINTER_REGNUM
6129 || REGNO (x
) == FRAME_POINTER_REGNUM
6130 || REGNO (x
) == FLAGS_REG
6131 || REGNO (x
) == FPSR_REG
)
6134 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
6137 if (code
== 'w' || MMX_REG_P (x
))
6139 else if (code
== 'b')
6141 else if (code
== 'k')
6143 else if (code
== 'q')
6145 else if (code
== 'y')
6147 else if (code
== 'h')
6150 code
= GET_MODE_SIZE (GET_MODE (x
));
6152 /* Irritatingly, AMD extended registers use different naming convention
6153 from the normal registers. */
6154 if (REX_INT_REG_P (x
))
6161 error ("extended registers have no high halves");
6164 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6167 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6170 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6173 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6176 error ("unsupported operand size for extended register");
6184 if (STACK_TOP_P (x
))
6186 fputs ("st(0)", file
);
6193 if (! ANY_FP_REG_P (x
))
6194 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
6199 fputs (hi_reg_name
[REGNO (x
)], file
);
6202 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
6204 fputs (qi_reg_name
[REGNO (x
)], file
);
6207 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
6209 fputs (qi_high_reg_name
[REGNO (x
)], file
);
6216 /* Locate some local-dynamic symbol still in use by this function
6217 so that we can print its name in some tls_local_dynamic_base
6221 get_some_local_dynamic_name (void)
6225 if (cfun
->machine
->some_ld_name
)
6226 return cfun
->machine
->some_ld_name
;
6228 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
6230 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
6231 return cfun
->machine
->some_ld_name
;
6237 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
6241 if (GET_CODE (x
) == SYMBOL_REF
6242 && local_dynamic_symbolic_operand (x
, Pmode
))
6244 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
6252 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6253 C -- print opcode suffix for set/cmov insn.
6254 c -- like C, but print reversed condition
6255 F,f -- likewise, but for floating-point.
6256 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6258 R -- print the prefix for register names.
6259 z -- print the opcode suffix for the size of the current operand.
6260 * -- print a star (in certain assembler syntax)
6261 A -- print an absolute memory reference.
6262 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6263 s -- print a shift double count, followed by the assemblers argument
6265 b -- print the QImode name of the register for the indicated operand.
6266 %b0 would print %al if operands[0] is reg 0.
6267 w -- likewise, print the HImode name of the register.
6268 k -- likewise, print the SImode name of the register.
6269 q -- likewise, print the DImode name of the register.
6270 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6271 y -- print "st(0)" instead of "st" as a register.
6272 D -- print condition for SSE cmp instruction.
6273 P -- if PIC, print an @PLT suffix.
6274 X -- don't print any sort of PIC '@' suffix for a symbol.
6275 & -- print some in-use local-dynamic symbol name.
6279 print_operand (FILE *file
, rtx x
, int code
)
6286 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6291 assemble_name (file
, get_some_local_dynamic_name ());
6295 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6297 else if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6299 /* Intel syntax. For absolute addresses, registers should not
6300 be surrounded by braces. */
6301 if (GET_CODE (x
) != REG
)
6304 PRINT_OPERAND (file
, x
, 0);
6312 PRINT_OPERAND (file
, x
, 0);
6317 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6322 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6327 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6332 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6337 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6342 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6347 /* 387 opcodes don't get size suffixes if the operands are
6349 if (STACK_REG_P (x
))
6352 /* Likewise if using Intel opcodes. */
6353 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6356 /* This is the size of op from size of operand. */
6357 switch (GET_MODE_SIZE (GET_MODE (x
)))
6360 #ifdef HAVE_GAS_FILDS_FISTS
6366 if (GET_MODE (x
) == SFmode
)
6381 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
6383 #ifdef GAS_MNEMONICS
6409 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
6411 PRINT_OPERAND (file
, x
, 0);
6417 /* Little bit of braindamage here. The SSE compare instructions
6418 does use completely different names for the comparisons that the
6419 fp conditional moves. */
6420 switch (GET_CODE (x
))
6435 fputs ("unord", file
);
6439 fputs ("neq", file
);
6443 fputs ("nlt", file
);
6447 fputs ("nle", file
);
6450 fputs ("ord", file
);
6458 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6459 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6461 switch (GET_MODE (x
))
6463 case HImode
: putc ('w', file
); break;
6465 case SFmode
: putc ('l', file
); break;
6467 case DFmode
: putc ('q', file
); break;
6475 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
6478 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6479 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6482 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
6485 /* Like above, but reverse condition */
6487 /* Check to see if argument to %c is really a constant
6488 and not a condition code which needs to be reversed. */
6489 if (!COMPARISON_P (x
))
6491 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6494 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
6497 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6498 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6501 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
6507 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
6510 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
6513 int pred_val
= INTVAL (XEXP (x
, 0));
6515 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
6516 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
6518 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
6519 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
6521 /* Emit hints only in the case default branch prediction
6522 heuristics would fail. */
6523 if (taken
!= cputaken
)
6525 /* We use 3e (DS) prefix for taken branches and
6526 2e (CS) prefix for not taken branches. */
6528 fputs ("ds ; ", file
);
6530 fputs ("cs ; ", file
);
6537 output_operand_lossage ("invalid operand code `%c'", code
);
6541 if (GET_CODE (x
) == REG
)
6542 print_reg (x
, code
, file
);
6544 else if (GET_CODE (x
) == MEM
)
6546 /* No `byte ptr' prefix for call instructions. */
6547 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
6550 switch (GET_MODE_SIZE (GET_MODE (x
)))
6552 case 1: size
= "BYTE"; break;
6553 case 2: size
= "WORD"; break;
6554 case 4: size
= "DWORD"; break;
6555 case 8: size
= "QWORD"; break;
6556 case 12: size
= "XWORD"; break;
6557 case 16: size
= "XMMWORD"; break;
6562 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6565 else if (code
== 'w')
6567 else if (code
== 'k')
6571 fputs (" PTR ", file
);
6575 /* Avoid (%rip) for call operands. */
6576 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
6577 && GET_CODE (x
) != CONST_INT
)
6578 output_addr_const (file
, x
);
6579 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
6580 output_operand_lossage ("invalid constraints for operand");
6585 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
6590 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
6591 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
6593 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6595 fprintf (file
, "0x%08lx", l
);
6598 /* These float cases don't actually occur as immediate operands. */
6599 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
6603 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
6604 fprintf (file
, "%s", dstr
);
6607 else if (GET_CODE (x
) == CONST_DOUBLE
6608 && GET_MODE (x
) == XFmode
)
6612 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
6613 fprintf (file
, "%s", dstr
);
6620 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
6622 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6625 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
6626 || GET_CODE (x
) == LABEL_REF
)
6628 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6631 fputs ("OFFSET FLAT:", file
);
6634 if (GET_CODE (x
) == CONST_INT
)
6635 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
6637 output_pic_addr_const (file
, x
, code
);
6639 output_addr_const (file
, x
);
6643 /* Print a memory operand whose address is ADDR. */
6646 print_operand_address (FILE *file
, rtx addr
)
6648 struct ix86_address parts
;
6649 rtx base
, index
, disp
;
6652 if (! ix86_decompose_address (addr
, &parts
))
6656 index
= parts
.index
;
6658 scale
= parts
.scale
;
6666 if (USER_LABEL_PREFIX
[0] == 0)
6668 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
6674 if (!base
&& !index
)
6676 /* Displacement only requires special attention. */
6678 if (GET_CODE (disp
) == CONST_INT
)
6680 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
6682 if (USER_LABEL_PREFIX
[0] == 0)
6684 fputs ("ds:", file
);
6686 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
6689 output_pic_addr_const (file
, disp
, 0);
6691 output_addr_const (file
, disp
);
6693 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6695 && ((GET_CODE (disp
) == SYMBOL_REF
6696 && ! tls_symbolic_operand (disp
, GET_MODE (disp
)))
6697 || GET_CODE (disp
) == LABEL_REF
6698 || (GET_CODE (disp
) == CONST
6699 && GET_CODE (XEXP (disp
, 0)) == PLUS
6700 && (GET_CODE (XEXP (XEXP (disp
, 0), 0)) == SYMBOL_REF
6701 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) == LABEL_REF
)
6702 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)))
6703 fputs ("(%rip)", file
);
6707 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6712 output_pic_addr_const (file
, disp
, 0);
6713 else if (GET_CODE (disp
) == LABEL_REF
)
6714 output_asm_label (disp
);
6716 output_addr_const (file
, disp
);
6721 print_reg (base
, 0, file
);
6725 print_reg (index
, 0, file
);
6727 fprintf (file
, ",%d", scale
);
6733 rtx offset
= NULL_RTX
;
6737 /* Pull out the offset of a symbol; print any symbol itself. */
6738 if (GET_CODE (disp
) == CONST
6739 && GET_CODE (XEXP (disp
, 0)) == PLUS
6740 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
6742 offset
= XEXP (XEXP (disp
, 0), 1);
6743 disp
= gen_rtx_CONST (VOIDmode
,
6744 XEXP (XEXP (disp
, 0), 0));
6748 output_pic_addr_const (file
, disp
, 0);
6749 else if (GET_CODE (disp
) == LABEL_REF
)
6750 output_asm_label (disp
);
6751 else if (GET_CODE (disp
) == CONST_INT
)
6754 output_addr_const (file
, disp
);
6760 print_reg (base
, 0, file
);
6763 if (INTVAL (offset
) >= 0)
6765 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
6769 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
6776 print_reg (index
, 0, file
);
6778 fprintf (file
, "*%d", scale
);
6786 output_addr_const_extra (FILE *file
, rtx x
)
6790 if (GET_CODE (x
) != UNSPEC
)
6793 op
= XVECEXP (x
, 0, 0);
6794 switch (XINT (x
, 1))
6796 case UNSPEC_GOTTPOFF
:
6797 output_addr_const (file
, op
);
6798 /* FIXME: This might be @TPOFF in Sun ld. */
6799 fputs ("@GOTTPOFF", file
);
6802 output_addr_const (file
, op
);
6803 fputs ("@TPOFF", file
);
6806 output_addr_const (file
, op
);
6808 fputs ("@TPOFF", file
);
6810 fputs ("@NTPOFF", file
);
6813 output_addr_const (file
, op
);
6814 fputs ("@DTPOFF", file
);
6816 case UNSPEC_GOTNTPOFF
:
6817 output_addr_const (file
, op
);
6819 fputs ("@GOTTPOFF(%rip)", file
);
6821 fputs ("@GOTNTPOFF", file
);
6823 case UNSPEC_INDNTPOFF
:
6824 output_addr_const (file
, op
);
6825 fputs ("@INDNTPOFF", file
);
6835 /* Split one or more DImode RTL references into pairs of SImode
6836 references. The RTL can be REG, offsettable MEM, integer constant, or
6837 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6838 split and "num" is its length. lo_half and hi_half are output arrays
6839 that parallel "operands". */
6842 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
6846 rtx op
= operands
[num
];
6848 /* simplify_subreg refuse to split volatile memory addresses,
6849 but we still have to handle it. */
6850 if (GET_CODE (op
) == MEM
)
6852 lo_half
[num
] = adjust_address (op
, SImode
, 0);
6853 hi_half
[num
] = adjust_address (op
, SImode
, 4);
6857 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
6858 GET_MODE (op
) == VOIDmode
6859 ? DImode
: GET_MODE (op
), 0);
6860 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
6861 GET_MODE (op
) == VOIDmode
6862 ? DImode
: GET_MODE (op
), 4);
6866 /* Split one or more TImode RTL references into pairs of SImode
6867 references. The RTL can be REG, offsettable MEM, integer constant, or
6868 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6869 split and "num" is its length. lo_half and hi_half are output arrays
6870 that parallel "operands". */
6873 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
6877 rtx op
= operands
[num
];
6879 /* simplify_subreg refuse to split volatile memory addresses, but we
6880 still have to handle it. */
6881 if (GET_CODE (op
) == MEM
)
6883 lo_half
[num
] = adjust_address (op
, DImode
, 0);
6884 hi_half
[num
] = adjust_address (op
, DImode
, 8);
6888 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
6889 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
6894 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6895 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6896 is the expression of the binary operation. The output may either be
6897 emitted here, or returned to the caller, like all output_* functions.
6899 There is no guarantee that the operands are the same mode, as they
6900 might be within FLOAT or FLOAT_EXTEND expressions. */
6902 #ifndef SYSV386_COMPAT
6903 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6904 wants to fix the assemblers because that causes incompatibility
6905 with gcc. No-one wants to fix gcc because that causes
6906 incompatibility with assemblers... You can use the option of
6907 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6908 #define SYSV386_COMPAT 1
6912 output_387_binary_op (rtx insn
, rtx
*operands
)
6914 static char buf
[30];
6917 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]) | SSE_REG_P (operands
[2]);
6919 #ifdef ENABLE_CHECKING
6920 /* Even if we do not want to check the inputs, this documents input
6921 constraints. Which helps in understanding the following code. */
6922 if (STACK_REG_P (operands
[0])
6923 && ((REG_P (operands
[1])
6924 && REGNO (operands
[0]) == REGNO (operands
[1])
6925 && (STACK_REG_P (operands
[2]) || GET_CODE (operands
[2]) == MEM
))
6926 || (REG_P (operands
[2])
6927 && REGNO (operands
[0]) == REGNO (operands
[2])
6928 && (STACK_REG_P (operands
[1]) || GET_CODE (operands
[1]) == MEM
)))
6929 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
6935 switch (GET_CODE (operands
[3]))
6938 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6939 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6947 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6948 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6956 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6957 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6965 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6966 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6980 if (GET_MODE (operands
[0]) == SFmode
)
6981 strcat (buf
, "ss\t{%2, %0|%0, %2}");
6983 strcat (buf
, "sd\t{%2, %0|%0, %2}");
6988 switch (GET_CODE (operands
[3]))
6992 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
6994 rtx temp
= operands
[2];
6995 operands
[2] = operands
[1];
6999 /* know operands[0] == operands[1]. */
7001 if (GET_CODE (operands
[2]) == MEM
)
7007 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
7009 if (STACK_TOP_P (operands
[0]))
7010 /* How is it that we are storing to a dead operand[2]?
7011 Well, presumably operands[1] is dead too. We can't
7012 store the result to st(0) as st(0) gets popped on this
7013 instruction. Instead store to operands[2] (which I
7014 think has to be st(1)). st(1) will be popped later.
7015 gcc <= 2.8.1 didn't have this check and generated
7016 assembly code that the Unixware assembler rejected. */
7017 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7019 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7023 if (STACK_TOP_P (operands
[0]))
7024 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7026 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7031 if (GET_CODE (operands
[1]) == MEM
)
7037 if (GET_CODE (operands
[2]) == MEM
)
7043 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
7046 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7047 derived assemblers, confusingly reverse the direction of
7048 the operation for fsub{r} and fdiv{r} when the
7049 destination register is not st(0). The Intel assembler
7050 doesn't have this brain damage. Read !SYSV386_COMPAT to
7051 figure out what the hardware really does. */
7052 if (STACK_TOP_P (operands
[0]))
7053 p
= "{p\t%0, %2|rp\t%2, %0}";
7055 p
= "{rp\t%2, %0|p\t%0, %2}";
7057 if (STACK_TOP_P (operands
[0]))
7058 /* As above for fmul/fadd, we can't store to st(0). */
7059 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7061 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7066 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
7069 if (STACK_TOP_P (operands
[0]))
7070 p
= "{rp\t%0, %1|p\t%1, %0}";
7072 p
= "{p\t%1, %0|rp\t%0, %1}";
7074 if (STACK_TOP_P (operands
[0]))
7075 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7077 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7082 if (STACK_TOP_P (operands
[0]))
7084 if (STACK_TOP_P (operands
[1]))
7085 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7087 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7090 else if (STACK_TOP_P (operands
[1]))
7093 p
= "{\t%1, %0|r\t%0, %1}";
7095 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7101 p
= "{r\t%2, %0|\t%0, %2}";
7103 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7116 /* Output code to initialize control word copies used by trunc?f?i and
7117 rounding patterns. CURRENT_MODE is set to current control word,
7118 while NEW_MODE is set to new control word. */
7121 emit_i387_cw_initialization (rtx current_mode
, rtx new_mode
, int mode
)
7123 rtx reg
= gen_reg_rtx (HImode
);
7125 emit_insn (gen_x86_fnstcw_1 (current_mode
));
7126 emit_move_insn (reg
, current_mode
);
7128 if (!TARGET_PARTIAL_REG_STALL
&& !optimize_size
7134 /* round down toward -oo */
7135 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
7139 /* round up toward +oo */
7140 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
7144 /* round toward zero (truncate) */
7145 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
7148 case I387_CW_MASK_PM
:
7149 /* mask precision exception for nearbyint() */
7150 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
7162 /* round down toward -oo */
7163 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
7164 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
7168 /* round up toward +oo */
7169 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
7170 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
7174 /* round toward zero (truncate) */
7175 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
7178 case I387_CW_MASK_PM
:
7179 /* mask precision exception for nearbyint() */
7180 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
7188 emit_move_insn (new_mode
, reg
);
7191 /* Output code for INSN to convert a float to a signed int. OPERANDS
7192 are the insn operands. The output may be [HSD]Imode and the input
7193 operand may be [SDX]Fmode. */
7196 output_fix_trunc (rtx insn
, rtx
*operands
)
7198 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
7199 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
7201 /* Jump through a hoop or two for DImode, since the hardware has no
7202 non-popping instruction. We used to do this a different way, but
7203 that was somewhat fragile and broke with post-reload splitters. */
7204 if (dimode_p
&& !stack_top_dies
)
7205 output_asm_insn ("fld\t%y1", operands
);
7207 if (!STACK_TOP_P (operands
[1]))
7210 if (GET_CODE (operands
[0]) != MEM
)
7213 output_asm_insn ("fldcw\t%3", operands
);
7214 if (stack_top_dies
|| dimode_p
)
7215 output_asm_insn ("fistp%z0\t%0", operands
);
7217 output_asm_insn ("fist%z0\t%0", operands
);
7218 output_asm_insn ("fldcw\t%2", operands
);
7223 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7224 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7225 when fucom should be used. */
7228 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
7231 rtx cmp_op0
, cmp_op1
;
7232 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]);
7236 cmp_op0
= operands
[1];
7237 cmp_op1
= operands
[2];
7241 cmp_op0
= operands
[0];
7242 cmp_op1
= operands
[1];
7247 if (GET_MODE (operands
[0]) == SFmode
)
7249 return "ucomiss\t{%1, %0|%0, %1}";
7251 return "comiss\t{%1, %0|%0, %1}";
7254 return "ucomisd\t{%1, %0|%0, %1}";
7256 return "comisd\t{%1, %0|%0, %1}";
7259 if (! STACK_TOP_P (cmp_op0
))
7262 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
7264 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
7268 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
7269 return TARGET_USE_FFREEP
? "ffreep\t%y1" : "fstp\t%y1";
7272 return "ftst\n\tfnstsw\t%0";
7275 if (STACK_REG_P (cmp_op1
)
7277 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
7278 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
7280 /* If both the top of the 387 stack dies, and the other operand
7281 is also a stack register that dies, then this must be a
7282 `fcompp' float compare */
7286 /* There is no double popping fcomi variant. Fortunately,
7287 eflags is immune from the fstp's cc clobbering. */
7289 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
7291 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
7292 return TARGET_USE_FFREEP
? "ffreep\t%y0" : "fstp\t%y0";
7299 return "fucompp\n\tfnstsw\t%0";
7301 return "fcompp\n\tfnstsw\t%0";
7314 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7316 static const char * const alt
[24] =
7328 "fcomi\t{%y1, %0|%0, %y1}",
7329 "fcomip\t{%y1, %0|%0, %y1}",
7330 "fucomi\t{%y1, %0|%0, %y1}",
7331 "fucomip\t{%y1, %0|%0, %y1}",
7338 "fcom%z2\t%y2\n\tfnstsw\t%0",
7339 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7340 "fucom%z2\t%y2\n\tfnstsw\t%0",
7341 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7343 "ficom%z2\t%y2\n\tfnstsw\t%0",
7344 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7352 mask
= eflags_p
<< 3;
7353 mask
|= (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
) << 2;
7354 mask
|= unordered_p
<< 1;
7355 mask
|= stack_top_dies
;
7368 ix86_output_addr_vec_elt (FILE *file
, int value
)
7370 const char *directive
= ASM_LONG
;
7375 directive
= ASM_QUAD
;
7381 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
7385 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
7388 fprintf (file
, "%s%s%d-%s%d\n",
7389 ASM_LONG
, LPREFIX
, value
, LPREFIX
, rel
);
7390 else if (HAVE_AS_GOTOFF_IN_DATA
)
7391 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
7393 else if (TARGET_MACHO
)
7395 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
7396 machopic_output_function_base_name (file
);
7397 fprintf(file
, "\n");
7401 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
7402 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
7405 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7409 ix86_expand_clear (rtx dest
)
7413 /* We play register width games, which are only valid after reload. */
7414 if (!reload_completed
)
7417 /* Avoid HImode and its attendant prefix byte. */
7418 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
7419 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
7421 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
7423 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7424 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
7426 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
7427 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
7433 /* X is an unchanging MEM. If it is a constant pool reference, return
7434 the constant pool rtx, else NULL. */
7437 maybe_get_pool_constant (rtx x
)
7439 x
= ix86_delegitimize_address (XEXP (x
, 0));
7441 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
7442 return get_pool_constant (x
);
7448 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
7450 int strict
= (reload_in_progress
|| reload_completed
);
7452 enum tls_model model
;
7457 model
= GET_CODE (op1
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (op1
) : 0;
7460 op1
= legitimize_tls_address (op1
, model
, true);
7461 op1
= force_operand (op1
, op0
);
7466 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
7471 rtx temp
= ((reload_in_progress
7472 || ((op0
&& GET_CODE (op0
) == REG
)
7474 ? op0
: gen_reg_rtx (Pmode
));
7475 op1
= machopic_indirect_data_reference (op1
, temp
);
7476 op1
= machopic_legitimize_pic_address (op1
, mode
,
7477 temp
== op1
? 0 : temp
);
7479 else if (MACHOPIC_INDIRECT
)
7480 op1
= machopic_indirect_data_reference (op1
, 0);
7484 if (GET_CODE (op0
) == MEM
)
7485 op1
= force_reg (Pmode
, op1
);
7487 op1
= legitimize_address (op1
, op1
, Pmode
);
7488 #endif /* TARGET_MACHO */
7492 if (GET_CODE (op0
) == MEM
7493 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
7494 || !push_operand (op0
, mode
))
7495 && GET_CODE (op1
) == MEM
)
7496 op1
= force_reg (mode
, op1
);
7498 if (push_operand (op0
, mode
)
7499 && ! general_no_elim_operand (op1
, mode
))
7500 op1
= copy_to_mode_reg (mode
, op1
);
7502 /* Force large constants in 64bit compilation into register
7503 to get them CSEed. */
7504 if (TARGET_64BIT
&& mode
== DImode
7505 && immediate_operand (op1
, mode
)
7506 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
7507 && !register_operand (op0
, mode
)
7508 && optimize
&& !reload_completed
&& !reload_in_progress
)
7509 op1
= copy_to_mode_reg (mode
, op1
);
7511 if (FLOAT_MODE_P (mode
))
7513 /* If we are loading a floating point constant to a register,
7514 force the value to memory now, since we'll get better code
7515 out the back end. */
7519 else if (GET_CODE (op1
) == CONST_DOUBLE
)
7521 op1
= validize_mem (force_const_mem (mode
, op1
));
7522 if (!register_operand (op0
, mode
))
7524 rtx temp
= gen_reg_rtx (mode
);
7525 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
7526 emit_move_insn (op0
, temp
);
7533 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
7537 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
7539 /* Force constants other than zero into memory. We do not know how
7540 the instructions used to build constants modify the upper 64 bits
7541 of the register, once we have that information we may be able
7542 to handle some of them more efficiently. */
7543 if ((reload_in_progress
| reload_completed
) == 0
7544 && register_operand (operands
[0], mode
)
7545 && CONSTANT_P (operands
[1]) && operands
[1] != CONST0_RTX (mode
))
7546 operands
[1] = validize_mem (force_const_mem (mode
, operands
[1]));
7548 /* Make operand1 a register if it isn't already. */
7550 && !register_operand (operands
[0], mode
)
7551 && !register_operand (operands
[1], mode
))
7553 rtx temp
= force_reg (GET_MODE (operands
[1]), operands
[1]);
7554 emit_move_insn (operands
[0], temp
);
7558 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], operands
[1]));
7561 /* Attempt to expand a binary operator. Make the expansion closer to the
7562 actual machine, then just general_operand, which will allow 3 separate
7563 memory references (one output, two input) in a single insn. */
7566 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
7569 int matching_memory
;
7570 rtx src1
, src2
, dst
, op
, clob
;
7576 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7577 if (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
7578 && (rtx_equal_p (dst
, src2
)
7579 || immediate_operand (src1
, mode
)))
7586 /* If the destination is memory, and we do not have matching source
7587 operands, do things in registers. */
7588 matching_memory
= 0;
7589 if (GET_CODE (dst
) == MEM
)
7591 if (rtx_equal_p (dst
, src1
))
7592 matching_memory
= 1;
7593 else if (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
7594 && rtx_equal_p (dst
, src2
))
7595 matching_memory
= 2;
7597 dst
= gen_reg_rtx (mode
);
7600 /* Both source operands cannot be in memory. */
7601 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
7603 if (matching_memory
!= 2)
7604 src2
= force_reg (mode
, src2
);
7606 src1
= force_reg (mode
, src1
);
7609 /* If the operation is not commutable, source 1 cannot be a constant
7610 or non-matching memory. */
7611 if ((CONSTANT_P (src1
)
7612 || (!matching_memory
&& GET_CODE (src1
) == MEM
))
7613 && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
7614 src1
= force_reg (mode
, src1
);
7616 /* If optimizing, copy to regs to improve CSE */
7617 if (optimize
&& ! no_new_pseudos
)
7619 if (GET_CODE (dst
) == MEM
)
7620 dst
= gen_reg_rtx (mode
);
7621 if (GET_CODE (src1
) == MEM
)
7622 src1
= force_reg (mode
, src1
);
7623 if (GET_CODE (src2
) == MEM
)
7624 src2
= force_reg (mode
, src2
);
7627 /* Emit the instruction. */
7629 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
7630 if (reload_in_progress
)
7632 /* Reload doesn't know about the flags register, and doesn't know that
7633 it doesn't want to clobber it. We can only do this with PLUS. */
7640 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
7641 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
7644 /* Fix up the destination if needed. */
7645 if (dst
!= operands
[0])
7646 emit_move_insn (operands
[0], dst
);
7649 /* Return TRUE or FALSE depending on whether the binary operator meets the
7650 appropriate constraints. */
7653 ix86_binary_operator_ok (enum rtx_code code
,
7654 enum machine_mode mode ATTRIBUTE_UNUSED
,
7657 /* Both source operands cannot be in memory. */
7658 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
7660 /* If the operation is not commutable, source 1 cannot be a constant. */
7661 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
7663 /* If the destination is memory, we must have a matching source operand. */
7664 if (GET_CODE (operands
[0]) == MEM
7665 && ! (rtx_equal_p (operands
[0], operands
[1])
7666 || (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
7667 && rtx_equal_p (operands
[0], operands
[2]))))
7669 /* If the operation is not commutable and the source 1 is memory, we must
7670 have a matching destination. */
7671 if (GET_CODE (operands
[1]) == MEM
7672 && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
7673 && ! rtx_equal_p (operands
[0], operands
[1]))
7678 /* Attempt to expand a unary operator. Make the expansion closer to the
7679 actual machine, then just general_operand, which will allow 2 separate
7680 memory references (one output, one input) in a single insn. */
7683 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
7686 int matching_memory
;
7687 rtx src
, dst
, op
, clob
;
7692 /* If the destination is memory, and we do not have matching source
7693 operands, do things in registers. */
7694 matching_memory
= 0;
7695 if (GET_CODE (dst
) == MEM
)
7697 if (rtx_equal_p (dst
, src
))
7698 matching_memory
= 1;
7700 dst
= gen_reg_rtx (mode
);
7703 /* When source operand is memory, destination must match. */
7704 if (!matching_memory
&& GET_CODE (src
) == MEM
)
7705 src
= force_reg (mode
, src
);
7707 /* If optimizing, copy to regs to improve CSE */
7708 if (optimize
&& ! no_new_pseudos
)
7710 if (GET_CODE (dst
) == MEM
)
7711 dst
= gen_reg_rtx (mode
);
7712 if (GET_CODE (src
) == MEM
)
7713 src
= force_reg (mode
, src
);
7716 /* Emit the instruction. */
7718 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
7719 if (reload_in_progress
|| code
== NOT
)
7721 /* Reload doesn't know about the flags register, and doesn't know that
7722 it doesn't want to clobber it. */
7729 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
7730 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
7733 /* Fix up the destination if needed. */
7734 if (dst
!= operands
[0])
7735 emit_move_insn (operands
[0], dst
);
7738 /* Return TRUE or FALSE depending on whether the unary operator meets the
7739 appropriate constraints. */
7742 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
7743 enum machine_mode mode ATTRIBUTE_UNUSED
,
7744 rtx operands
[2] ATTRIBUTE_UNUSED
)
7746 /* If one of operands is memory, source and destination must match. */
7747 if ((GET_CODE (operands
[0]) == MEM
7748 || GET_CODE (operands
[1]) == MEM
)
7749 && ! rtx_equal_p (operands
[0], operands
[1]))
7754 /* Return TRUE or FALSE depending on whether the first SET in INSN
7755 has source and destination with matching CC modes, and that the
7756 CC mode is at least as constrained as REQ_MODE. */
7759 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
7762 enum machine_mode set_mode
;
7764 set
= PATTERN (insn
);
7765 if (GET_CODE (set
) == PARALLEL
)
7766 set
= XVECEXP (set
, 0, 0);
7767 if (GET_CODE (set
) != SET
)
7769 if (GET_CODE (SET_SRC (set
)) != COMPARE
)
7772 set_mode
= GET_MODE (SET_DEST (set
));
7776 if (req_mode
!= CCNOmode
7777 && (req_mode
!= CCmode
7778 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
7782 if (req_mode
== CCGCmode
)
7786 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
7790 if (req_mode
== CCZmode
)
7800 return (GET_MODE (SET_SRC (set
)) == set_mode
);
7803 /* Generate insn patterns to do an integer compare of OPERANDS. */
7806 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
7808 enum machine_mode cmpmode
;
7811 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
7812 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
7814 /* This is very simple, but making the interface the same as in the
7815 FP case makes the rest of the code easier. */
7816 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
7817 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
7819 /* Return the test that should be put into the flags user, i.e.
7820 the bcc, scc, or cmov instruction. */
7821 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
7824 /* Figure out whether to use ordered or unordered fp comparisons.
7825 Return the appropriate mode to use. */
7828 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
7830 /* ??? In order to make all comparisons reversible, we do all comparisons
7831 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7832 all forms trapping and nontrapping comparisons, we can make inequality
7833 comparisons trapping again, since it results in better code when using
7834 FCOM based compares. */
7835 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
7839 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
7841 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
7842 return ix86_fp_compare_mode (code
);
7845 /* Only zero flag is needed. */
7847 case NE
: /* ZF!=0 */
7849 /* Codes needing carry flag. */
7850 case GEU
: /* CF=0 */
7851 case GTU
: /* CF=0 & ZF=0 */
7852 case LTU
: /* CF=1 */
7853 case LEU
: /* CF=1 | ZF=1 */
7855 /* Codes possibly doable only with sign flag when
7856 comparing against zero. */
7857 case GE
: /* SF=OF or SF=0 */
7858 case LT
: /* SF<>OF or SF=1 */
7859 if (op1
== const0_rtx
)
7862 /* For other cases Carry flag is not required. */
7864 /* Codes doable only with sign flag when comparing
7865 against zero, but we miss jump instruction for it
7866 so we need to use relational tests against overflow
7867 that thus needs to be zero. */
7868 case GT
: /* ZF=0 & SF=OF */
7869 case LE
: /* ZF=1 | SF<>OF */
7870 if (op1
== const0_rtx
)
7874 /* strcmp pattern do (use flags) and combine may ask us for proper
7883 /* Return the fixed registers used for condition codes. */
7886 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
7893 /* If two condition code modes are compatible, return a condition code
7894 mode which is compatible with both. Otherwise, return
7897 static enum machine_mode
7898 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
7903 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
7906 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
7907 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
7935 /* These are only compatible with themselves, which we already
7941 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7944 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
7946 enum rtx_code swapped_code
= swap_condition (code
);
7947 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
7948 || (ix86_fp_comparison_cost (swapped_code
)
7949 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
7952 /* Swap, force into registers, or otherwise massage the two operands
7953 to a fp comparison. The operands are updated in place; the new
7954 comparison code is returned. */
7956 static enum rtx_code
7957 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
7959 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
7960 rtx op0
= *pop0
, op1
= *pop1
;
7961 enum machine_mode op_mode
= GET_MODE (op0
);
7962 int is_sse
= SSE_REG_P (op0
) | SSE_REG_P (op1
);
7964 /* All of the unordered compare instructions only work on registers.
7965 The same is true of the fcomi compare instructions. The same is
7966 true of the XFmode compare instructions if not comparing with
7967 zero (ftst insn is used in this case). */
7970 && (fpcmp_mode
== CCFPUmode
7971 || (op_mode
== XFmode
7972 && ! (standard_80387_constant_p (op0
) == 1
7973 || standard_80387_constant_p (op1
) == 1))
7974 || ix86_use_fcomi_compare (code
)))
7976 op0
= force_reg (op_mode
, op0
);
7977 op1
= force_reg (op_mode
, op1
);
7981 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7982 things around if they appear profitable, otherwise force op0
7985 if (standard_80387_constant_p (op0
) == 0
7986 || (GET_CODE (op0
) == MEM
7987 && ! (standard_80387_constant_p (op1
) == 0
7988 || GET_CODE (op1
) == MEM
)))
7991 tmp
= op0
, op0
= op1
, op1
= tmp
;
7992 code
= swap_condition (code
);
7995 if (GET_CODE (op0
) != REG
)
7996 op0
= force_reg (op_mode
, op0
);
7998 if (CONSTANT_P (op1
))
8000 int tmp
= standard_80387_constant_p (op1
);
8002 op1
= validize_mem (force_const_mem (op_mode
, op1
));
8006 op1
= force_reg (op_mode
, op1
);
8009 op1
= force_reg (op_mode
, op1
);
8013 /* Try to rearrange the comparison to make it cheaper. */
8014 if (ix86_fp_comparison_cost (code
)
8015 > ix86_fp_comparison_cost (swap_condition (code
))
8016 && (GET_CODE (op1
) == REG
|| !no_new_pseudos
))
8019 tmp
= op0
, op0
= op1
, op1
= tmp
;
8020 code
= swap_condition (code
);
8021 if (GET_CODE (op0
) != REG
)
8022 op0
= force_reg (op_mode
, op0
);
8030 /* Convert comparison codes we use to represent FP comparison to integer
8031 code that will result in proper branch. Return UNKNOWN if no such code
8035 ix86_fp_compare_code_to_integer (enum rtx_code code
)
8064 /* Split comparison code CODE into comparisons we can do using branch
8065 instructions. BYPASS_CODE is comparison code for branch that will
8066 branch around FIRST_CODE and SECOND_CODE. If some of branches
8067 is not required, set value to UNKNOWN.
8068 We never require more than two branches. */
8071 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
8072 enum rtx_code
*first_code
,
8073 enum rtx_code
*second_code
)
8076 *bypass_code
= UNKNOWN
;
8077 *second_code
= UNKNOWN
;
8079 /* The fcomi comparison sets flags as follows:
8089 case GT
: /* GTU - CF=0 & ZF=0 */
8090 case GE
: /* GEU - CF=0 */
8091 case ORDERED
: /* PF=0 */
8092 case UNORDERED
: /* PF=1 */
8093 case UNEQ
: /* EQ - ZF=1 */
8094 case UNLT
: /* LTU - CF=1 */
8095 case UNLE
: /* LEU - CF=1 | ZF=1 */
8096 case LTGT
: /* EQ - ZF=0 */
8098 case LT
: /* LTU - CF=1 - fails on unordered */
8100 *bypass_code
= UNORDERED
;
8102 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
8104 *bypass_code
= UNORDERED
;
8106 case EQ
: /* EQ - ZF=1 - fails on unordered */
8108 *bypass_code
= UNORDERED
;
8110 case NE
: /* NE - ZF=0 - fails on unordered */
8112 *second_code
= UNORDERED
;
8114 case UNGE
: /* GEU - CF=0 - fails on unordered */
8116 *second_code
= UNORDERED
;
8118 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
8120 *second_code
= UNORDERED
;
8125 if (!TARGET_IEEE_FP
)
8127 *second_code
= UNKNOWN
;
8128 *bypass_code
= UNKNOWN
;
8132 /* Return cost of comparison done fcom + arithmetics operations on AX.
8133 All following functions do use number of instructions as a cost metrics.
8134 In future this should be tweaked to compute bytes for optimize_size and
8135 take into account performance of various instructions on various CPUs. */
8137 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
8139 if (!TARGET_IEEE_FP
)
8141 /* The cost of code output by ix86_expand_fp_compare. */
8169 /* Return cost of comparison done using fcomi operation.
8170 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8172 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
8174 enum rtx_code bypass_code
, first_code
, second_code
;
8175 /* Return arbitrarily high cost when instruction is not supported - this
8176 prevents gcc from using it. */
8179 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8180 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 2;
8183 /* Return cost of comparison done using sahf operation.
8184 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8186 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
8188 enum rtx_code bypass_code
, first_code
, second_code
;
8189 /* Return arbitrarily high cost when instruction is not preferred - this
8190 avoids gcc from using it. */
8191 if (!TARGET_USE_SAHF
&& !optimize_size
)
8193 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8194 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 3;
8197 /* Compute cost of the comparison done using any method.
8198 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8200 ix86_fp_comparison_cost (enum rtx_code code
)
8202 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
8205 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
8206 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
8208 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
8209 if (min
> sahf_cost
)
8211 if (min
> fcomi_cost
)
8216 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8219 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
8220 rtx
*second_test
, rtx
*bypass_test
)
8222 enum machine_mode fpcmp_mode
, intcmp_mode
;
8224 int cost
= ix86_fp_comparison_cost (code
);
8225 enum rtx_code bypass_code
, first_code
, second_code
;
8227 fpcmp_mode
= ix86_fp_compare_mode (code
);
8228 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
8231 *second_test
= NULL_RTX
;
8233 *bypass_test
= NULL_RTX
;
8235 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8237 /* Do fcomi/sahf based test when profitable. */
8238 if ((bypass_code
== UNKNOWN
|| bypass_test
)
8239 && (second_code
== UNKNOWN
|| second_test
)
8240 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
8244 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8245 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
8251 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8252 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
8254 scratch
= gen_reg_rtx (HImode
);
8255 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
8256 emit_insn (gen_x86_sahf_1 (scratch
));
8259 /* The FP codes work out to act like unsigned. */
8260 intcmp_mode
= fpcmp_mode
;
8262 if (bypass_code
!= UNKNOWN
)
8263 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
8264 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8266 if (second_code
!= UNKNOWN
)
8267 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
8268 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8273 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8274 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8275 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
8277 scratch
= gen_reg_rtx (HImode
);
8278 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
8280 /* In the unordered case, we have to check C2 for NaN's, which
8281 doesn't happen to work out to anything nice combination-wise.
8282 So do some bit twiddling on the value we've got in AH to come
8283 up with an appropriate set of condition codes. */
8285 intcmp_mode
= CCNOmode
;
8290 if (code
== GT
|| !TARGET_IEEE_FP
)
8292 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
8297 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8298 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
8299 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
8300 intcmp_mode
= CCmode
;
8306 if (code
== LT
&& TARGET_IEEE_FP
)
8308 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8309 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
8310 intcmp_mode
= CCmode
;
8315 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
8321 if (code
== GE
|| !TARGET_IEEE_FP
)
8323 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
8328 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8329 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
8336 if (code
== LE
&& TARGET_IEEE_FP
)
8338 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8339 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
8340 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
8341 intcmp_mode
= CCmode
;
8346 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
8352 if (code
== EQ
&& TARGET_IEEE_FP
)
8354 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8355 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
8356 intcmp_mode
= CCmode
;
8361 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
8368 if (code
== NE
&& TARGET_IEEE_FP
)
8370 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8371 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
8377 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
8383 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
8387 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
8396 /* Return the test that should be put into the flags user, i.e.
8397 the bcc, scc, or cmov instruction. */
8398 return gen_rtx_fmt_ee (code
, VOIDmode
,
8399 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8404 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
8407 op0
= ix86_compare_op0
;
8408 op1
= ix86_compare_op1
;
8411 *second_test
= NULL_RTX
;
8413 *bypass_test
= NULL_RTX
;
8415 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
8416 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
8417 second_test
, bypass_test
);
8419 ret
= ix86_expand_int_compare (code
, op0
, op1
);
8424 /* Return true if the CODE will result in nontrivial jump sequence. */
8426 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
8428 enum rtx_code bypass_code
, first_code
, second_code
;
8431 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8432 return bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
;
8436 ix86_expand_branch (enum rtx_code code
, rtx label
)
8440 switch (GET_MODE (ix86_compare_op0
))
8446 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
8447 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
8448 gen_rtx_LABEL_REF (VOIDmode
, label
),
8450 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
8459 enum rtx_code bypass_code
, first_code
, second_code
;
8461 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
8464 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8466 /* Check whether we will use the natural sequence with one jump. If
8467 so, we can expand jump early. Otherwise delay expansion by
8468 creating compound insn to not confuse optimizers. */
8469 if (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
8472 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
8473 gen_rtx_LABEL_REF (VOIDmode
, label
),
8478 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
8479 ix86_compare_op0
, ix86_compare_op1
);
8480 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
8481 gen_rtx_LABEL_REF (VOIDmode
, label
),
8483 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
8485 use_fcomi
= ix86_use_fcomi_compare (code
);
8486 vec
= rtvec_alloc (3 + !use_fcomi
);
8487 RTVEC_ELT (vec
, 0) = tmp
;
8489 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
8491 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
8494 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
8496 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
8504 /* Expand DImode branch into multiple compare+branch. */
8506 rtx lo
[2], hi
[2], label2
;
8507 enum rtx_code code1
, code2
, code3
;
8509 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
8511 tmp
= ix86_compare_op0
;
8512 ix86_compare_op0
= ix86_compare_op1
;
8513 ix86_compare_op1
= tmp
;
8514 code
= swap_condition (code
);
8516 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
8517 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
8519 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8520 avoid two branches. This costs one extra insn, so disable when
8521 optimizing for size. */
8523 if ((code
== EQ
|| code
== NE
)
8525 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
8530 if (hi
[1] != const0_rtx
)
8531 xor1
= expand_binop (SImode
, xor_optab
, xor1
, hi
[1],
8532 NULL_RTX
, 0, OPTAB_WIDEN
);
8535 if (lo
[1] != const0_rtx
)
8536 xor0
= expand_binop (SImode
, xor_optab
, xor0
, lo
[1],
8537 NULL_RTX
, 0, OPTAB_WIDEN
);
8539 tmp
= expand_binop (SImode
, ior_optab
, xor1
, xor0
,
8540 NULL_RTX
, 0, OPTAB_WIDEN
);
8542 ix86_compare_op0
= tmp
;
8543 ix86_compare_op1
= const0_rtx
;
8544 ix86_expand_branch (code
, label
);
8548 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8549 op1 is a constant and the low word is zero, then we can just
8550 examine the high word. */
8552 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
)
8555 case LT
: case LTU
: case GE
: case GEU
:
8556 ix86_compare_op0
= hi
[0];
8557 ix86_compare_op1
= hi
[1];
8558 ix86_expand_branch (code
, label
);
8564 /* Otherwise, we need two or three jumps. */
8566 label2
= gen_label_rtx ();
8569 code2
= swap_condition (code
);
8570 code3
= unsigned_condition (code
);
8574 case LT
: case GT
: case LTU
: case GTU
:
8577 case LE
: code1
= LT
; code2
= GT
; break;
8578 case GE
: code1
= GT
; code2
= LT
; break;
8579 case LEU
: code1
= LTU
; code2
= GTU
; break;
8580 case GEU
: code1
= GTU
; code2
= LTU
; break;
8582 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
8583 case NE
: code2
= UNKNOWN
; break;
8591 * if (hi(a) < hi(b)) goto true;
8592 * if (hi(a) > hi(b)) goto false;
8593 * if (lo(a) < lo(b)) goto true;
8597 ix86_compare_op0
= hi
[0];
8598 ix86_compare_op1
= hi
[1];
8600 if (code1
!= UNKNOWN
)
8601 ix86_expand_branch (code1
, label
);
8602 if (code2
!= UNKNOWN
)
8603 ix86_expand_branch (code2
, label2
);
8605 ix86_compare_op0
= lo
[0];
8606 ix86_compare_op1
= lo
[1];
8607 ix86_expand_branch (code3
, label
);
8609 if (code2
!= UNKNOWN
)
8610 emit_label (label2
);
8619 /* Split branch based on floating point condition. */
8621 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
8622 rtx target1
, rtx target2
, rtx tmp
)
8625 rtx label
= NULL_RTX
;
8627 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
8630 if (target2
!= pc_rtx
)
8633 code
= reverse_condition_maybe_unordered (code
);
8638 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
8639 tmp
, &second
, &bypass
);
8641 if (split_branch_probability
>= 0)
8643 /* Distribute the probabilities across the jumps.
8644 Assume the BYPASS and SECOND to be always test
8646 probability
= split_branch_probability
;
8648 /* Value of 1 is low enough to make no need for probability
8649 to be updated. Later we may run some experiments and see
8650 if unordered values are more frequent in practice. */
8652 bypass_probability
= 1;
8654 second_probability
= 1;
8656 if (bypass
!= NULL_RTX
)
8658 label
= gen_label_rtx ();
8659 i
= emit_jump_insn (gen_rtx_SET
8661 gen_rtx_IF_THEN_ELSE (VOIDmode
,
8663 gen_rtx_LABEL_REF (VOIDmode
,
8666 if (bypass_probability
>= 0)
8668 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
8669 GEN_INT (bypass_probability
),
8672 i
= emit_jump_insn (gen_rtx_SET
8674 gen_rtx_IF_THEN_ELSE (VOIDmode
,
8675 condition
, target1
, target2
)));
8676 if (probability
>= 0)
8678 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
8679 GEN_INT (probability
),
8681 if (second
!= NULL_RTX
)
8683 i
= emit_jump_insn (gen_rtx_SET
8685 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
8687 if (second_probability
>= 0)
8689 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
8690 GEN_INT (second_probability
),
8693 if (label
!= NULL_RTX
)
8698 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
8700 rtx ret
, tmp
, tmpreg
, equiv
;
8701 rtx second_test
, bypass_test
;
8703 if (GET_MODE (ix86_compare_op0
) == DImode
8705 return 0; /* FAIL */
8707 if (GET_MODE (dest
) != QImode
)
8710 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
8711 PUT_MODE (ret
, QImode
);
8716 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
8717 if (bypass_test
|| second_test
)
8719 rtx test
= second_test
;
8721 rtx tmp2
= gen_reg_rtx (QImode
);
8728 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
8730 PUT_MODE (test
, QImode
);
8731 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
8734 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
8736 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
8739 /* Attach a REG_EQUAL note describing the comparison result. */
8740 equiv
= simplify_gen_relational (code
, QImode
,
8741 GET_MODE (ix86_compare_op0
),
8742 ix86_compare_op0
, ix86_compare_op1
);
8743 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
8745 return 1; /* DONE */
8748 /* Expand comparison setting or clearing carry flag. Return true when
8749 successful and set pop for the operation. */
8751 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
8753 enum machine_mode mode
=
8754 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
8756 /* Do not handle DImode compares that go trought special path. Also we can't
8757 deal with FP compares yet. This is possible to add. */
8758 if ((mode
== DImode
&& !TARGET_64BIT
))
8760 if (FLOAT_MODE_P (mode
))
8762 rtx second_test
= NULL
, bypass_test
= NULL
;
8763 rtx compare_op
, compare_seq
;
8765 /* Shortcut: following common codes never translate into carry flag compares. */
8766 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
8767 || code
== ORDERED
|| code
== UNORDERED
)
8770 /* These comparisons require zero flag; swap operands so they won't. */
8771 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
8777 code
= swap_condition (code
);
8780 /* Try to expand the comparison and verify that we end up with carry flag
8781 based comparison. This is fails to be true only when we decide to expand
8782 comparison using arithmetic that is not too common scenario. */
8784 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
8785 &second_test
, &bypass_test
);
8786 compare_seq
= get_insns ();
8789 if (second_test
|| bypass_test
)
8791 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
8792 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
8793 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
8795 code
= GET_CODE (compare_op
);
8796 if (code
!= LTU
&& code
!= GEU
)
8798 emit_insn (compare_seq
);
8802 if (!INTEGRAL_MODE_P (mode
))
8810 /* Convert a==0 into (unsigned)a<1. */
8813 if (op1
!= const0_rtx
)
8816 code
= (code
== EQ
? LTU
: GEU
);
8819 /* Convert a>b into b<a or a>=b-1. */
8822 if (GET_CODE (op1
) == CONST_INT
)
8824 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
8825 /* Bail out on overflow. We still can swap operands but that
8826 would force loading of the constant into register. */
8827 if (op1
== const0_rtx
8828 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
8830 code
= (code
== GTU
? GEU
: LTU
);
8837 code
= (code
== GTU
? LTU
: GEU
);
8841 /* Convert a>=0 into (unsigned)a<0x80000000. */
8844 if (mode
== DImode
|| op1
!= const0_rtx
)
8846 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
8847 code
= (code
== LT
? GEU
: LTU
);
8851 if (mode
== DImode
|| op1
!= constm1_rtx
)
8853 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
8854 code
= (code
== LE
? GEU
: LTU
);
8860 /* Swapping operands may cause constant to appear as first operand. */
8861 if (!nonimmediate_operand (op0
, VOIDmode
))
8865 op0
= force_reg (mode
, op0
);
8867 ix86_compare_op0
= op0
;
8868 ix86_compare_op1
= op1
;
8869 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
8870 if (GET_CODE (*pop
) != LTU
&& GET_CODE (*pop
) != GEU
)
8876 ix86_expand_int_movcc (rtx operands
[])
8878 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
8879 rtx compare_seq
, compare_op
;
8880 rtx second_test
, bypass_test
;
8881 enum machine_mode mode
= GET_MODE (operands
[0]);
8882 bool sign_bit_compare_p
= false;;
8885 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
8886 compare_seq
= get_insns ();
8889 compare_code
= GET_CODE (compare_op
);
8891 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
8892 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
8893 sign_bit_compare_p
= true;
8895 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8896 HImode insns, we'd be swallowed in word prefix ops. */
8898 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
8899 && (mode
!= DImode
|| TARGET_64BIT
)
8900 && GET_CODE (operands
[2]) == CONST_INT
8901 && GET_CODE (operands
[3]) == CONST_INT
)
8903 rtx out
= operands
[0];
8904 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
8905 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
8909 /* Sign bit compares are better done using shifts than we do by using
8911 if (sign_bit_compare_p
8912 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
8913 ix86_compare_op1
, &compare_op
))
8915 /* Detect overlap between destination and compare sources. */
8918 if (!sign_bit_compare_p
)
8922 compare_code
= GET_CODE (compare_op
);
8924 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
8925 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
8928 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
8931 /* To simplify rest of code, restrict to the GEU case. */
8932 if (compare_code
== LTU
)
8934 HOST_WIDE_INT tmp
= ct
;
8937 compare_code
= reverse_condition (compare_code
);
8938 code
= reverse_condition (code
);
8943 PUT_CODE (compare_op
,
8944 reverse_condition_maybe_unordered
8945 (GET_CODE (compare_op
)));
8947 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
8951 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
8952 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
8953 tmp
= gen_reg_rtx (mode
);
8956 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
8958 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
8962 if (code
== GT
|| code
== GE
)
8963 code
= reverse_condition (code
);
8966 HOST_WIDE_INT tmp
= ct
;
8971 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
8972 ix86_compare_op1
, VOIDmode
, 0, -1);
8985 tmp
= expand_simple_binop (mode
, PLUS
,
8987 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
8998 tmp
= expand_simple_binop (mode
, IOR
,
9000 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9002 else if (diff
== -1 && ct
)
9012 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
9014 tmp
= expand_simple_binop (mode
, PLUS
,
9015 copy_rtx (tmp
), GEN_INT (cf
),
9016 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9024 * andl cf - ct, dest
9034 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
9037 tmp
= expand_simple_binop (mode
, AND
,
9039 gen_int_mode (cf
- ct
, mode
),
9040 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9042 tmp
= expand_simple_binop (mode
, PLUS
,
9043 copy_rtx (tmp
), GEN_INT (ct
),
9044 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9047 if (!rtx_equal_p (tmp
, out
))
9048 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
9050 return 1; /* DONE */
9056 tmp
= ct
, ct
= cf
, cf
= tmp
;
9058 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
9060 /* We may be reversing unordered compare to normal compare, that
9061 is not valid in general (we may convert non-trapping condition
9062 to trapping one), however on i386 we currently emit all
9063 comparisons unordered. */
9064 compare_code
= reverse_condition_maybe_unordered (compare_code
);
9065 code
= reverse_condition_maybe_unordered (code
);
9069 compare_code
= reverse_condition (compare_code
);
9070 code
= reverse_condition (code
);
9074 compare_code
= UNKNOWN
;
9075 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
9076 && GET_CODE (ix86_compare_op1
) == CONST_INT
)
9078 if (ix86_compare_op1
== const0_rtx
9079 && (code
== LT
|| code
== GE
))
9080 compare_code
= code
;
9081 else if (ix86_compare_op1
== constm1_rtx
)
9085 else if (code
== GT
)
9090 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9091 if (compare_code
!= UNKNOWN
9092 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
9093 && (cf
== -1 || ct
== -1))
9095 /* If lea code below could be used, only optimize
9096 if it results in a 2 insn sequence. */
9098 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
9099 || diff
== 3 || diff
== 5 || diff
== 9)
9100 || (compare_code
== LT
&& ct
== -1)
9101 || (compare_code
== GE
&& cf
== -1))
9104 * notl op1 (if necessary)
9112 code
= reverse_condition (code
);
9115 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9116 ix86_compare_op1
, VOIDmode
, 0, -1);
9118 out
= expand_simple_binop (mode
, IOR
,
9120 out
, 1, OPTAB_DIRECT
);
9121 if (out
!= operands
[0])
9122 emit_move_insn (operands
[0], out
);
9124 return 1; /* DONE */
9129 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
9130 || diff
== 3 || diff
== 5 || diff
== 9)
9131 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
9133 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
9139 * lea cf(dest*(ct-cf)),dest
9143 * This also catches the degenerate setcc-only case.
9149 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9150 ix86_compare_op1
, VOIDmode
, 0, 1);
9153 /* On x86_64 the lea instruction operates on Pmode, so we need
9154 to get arithmetics done in proper mode to match. */
9156 tmp
= copy_rtx (out
);
9160 out1
= copy_rtx (out
);
9161 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
9165 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
9171 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
9174 if (!rtx_equal_p (tmp
, out
))
9177 out
= force_operand (tmp
, copy_rtx (out
));
9179 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
9181 if (!rtx_equal_p (out
, operands
[0]))
9182 emit_move_insn (operands
[0], copy_rtx (out
));
9184 return 1; /* DONE */
9188 * General case: Jumpful:
9189 * xorl dest,dest cmpl op1, op2
9190 * cmpl op1, op2 movl ct, dest
9192 * decl dest movl cf, dest
9193 * andl (cf-ct),dest 1:
9198 * This is reasonably steep, but branch mispredict costs are
9199 * high on modern cpus, so consider failing only if optimizing
9203 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
9204 && BRANCH_COST
>= 2)
9210 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
9211 /* We may be reversing unordered compare to normal compare,
9212 that is not valid in general (we may convert non-trapping
9213 condition to trapping one), however on i386 we currently
9214 emit all comparisons unordered. */
9215 code
= reverse_condition_maybe_unordered (code
);
9218 code
= reverse_condition (code
);
9219 if (compare_code
!= UNKNOWN
)
9220 compare_code
= reverse_condition (compare_code
);
9224 if (compare_code
!= UNKNOWN
)
9226 /* notl op1 (if needed)
9231 For x < 0 (resp. x <= -1) there will be no notl,
9232 so if possible swap the constants to get rid of the
9234 True/false will be -1/0 while code below (store flag
9235 followed by decrement) is 0/-1, so the constants need
9236 to be exchanged once more. */
9238 if (compare_code
== GE
|| !cf
)
9240 code
= reverse_condition (code
);
9245 HOST_WIDE_INT tmp
= cf
;
9250 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9251 ix86_compare_op1
, VOIDmode
, 0, -1);
9255 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9256 ix86_compare_op1
, VOIDmode
, 0, 1);
9258 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
9259 copy_rtx (out
), 1, OPTAB_DIRECT
);
9262 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
9263 gen_int_mode (cf
- ct
, mode
),
9264 copy_rtx (out
), 1, OPTAB_DIRECT
);
9266 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
9267 copy_rtx (out
), 1, OPTAB_DIRECT
);
9268 if (!rtx_equal_p (out
, operands
[0]))
9269 emit_move_insn (operands
[0], copy_rtx (out
));
9271 return 1; /* DONE */
9275 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
9277 /* Try a few things more with specific constants and a variable. */
9280 rtx var
, orig_out
, out
, tmp
;
9282 if (BRANCH_COST
<= 2)
9283 return 0; /* FAIL */
9285 /* If one of the two operands is an interesting constant, load a
9286 constant with the above and mask it in with a logical operation. */
9288 if (GET_CODE (operands
[2]) == CONST_INT
)
9291 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
9292 operands
[3] = constm1_rtx
, op
= and_optab
;
9293 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
9294 operands
[3] = const0_rtx
, op
= ior_optab
;
9296 return 0; /* FAIL */
9298 else if (GET_CODE (operands
[3]) == CONST_INT
)
9301 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
9302 operands
[2] = constm1_rtx
, op
= and_optab
;
9303 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
9304 operands
[2] = const0_rtx
, op
= ior_optab
;
9306 return 0; /* FAIL */
9309 return 0; /* FAIL */
9311 orig_out
= operands
[0];
9312 tmp
= gen_reg_rtx (mode
);
9315 /* Recurse to get the constant loaded. */
9316 if (ix86_expand_int_movcc (operands
) == 0)
9317 return 0; /* FAIL */
9319 /* Mask in the interesting variable. */
9320 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
9322 if (!rtx_equal_p (out
, orig_out
))
9323 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
9325 return 1; /* DONE */
9329 * For comparison with above,
9339 if (! nonimmediate_operand (operands
[2], mode
))
9340 operands
[2] = force_reg (mode
, operands
[2]);
9341 if (! nonimmediate_operand (operands
[3], mode
))
9342 operands
[3] = force_reg (mode
, operands
[3]);
9344 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
9346 rtx tmp
= gen_reg_rtx (mode
);
9347 emit_move_insn (tmp
, operands
[3]);
9350 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
9352 rtx tmp
= gen_reg_rtx (mode
);
9353 emit_move_insn (tmp
, operands
[2]);
9357 if (! register_operand (operands
[2], VOIDmode
)
9359 || ! register_operand (operands
[3], VOIDmode
)))
9360 operands
[2] = force_reg (mode
, operands
[2]);
9363 && ! register_operand (operands
[3], VOIDmode
))
9364 operands
[3] = force_reg (mode
, operands
[3]);
9366 emit_insn (compare_seq
);
9367 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9368 gen_rtx_IF_THEN_ELSE (mode
,
9369 compare_op
, operands
[2],
9372 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
9373 gen_rtx_IF_THEN_ELSE (mode
,
9375 copy_rtx (operands
[3]),
9376 copy_rtx (operands
[0]))));
9378 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
9379 gen_rtx_IF_THEN_ELSE (mode
,
9381 copy_rtx (operands
[2]),
9382 copy_rtx (operands
[0]))));
9384 return 1; /* DONE */
9388 ix86_expand_fp_movcc (rtx operands
[])
9392 rtx compare_op
, second_test
, bypass_test
;
9394 /* For SF/DFmode conditional moves based on comparisons
9395 in same mode, we may want to use SSE min/max instructions. */
9396 if (((TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == SFmode
)
9397 || (TARGET_SSE2
&& TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == DFmode
))
9398 && GET_MODE (ix86_compare_op0
) == GET_MODE (operands
[0])
9399 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9401 || (GET_CODE (operands
[1]) != LTGT
&& GET_CODE (operands
[1]) != UNEQ
))
9402 /* We may be called from the post-reload splitter. */
9403 && (!REG_P (operands
[0])
9404 || SSE_REG_P (operands
[0])
9405 || REGNO (operands
[0]) >= FIRST_PSEUDO_REGISTER
))
9407 rtx op0
= ix86_compare_op0
, op1
= ix86_compare_op1
;
9408 code
= GET_CODE (operands
[1]);
9410 /* See if we have (cross) match between comparison operands and
9411 conditional move operands. */
9412 if (rtx_equal_p (operands
[2], op1
))
9417 code
= reverse_condition_maybe_unordered (code
);
9419 if (rtx_equal_p (operands
[2], op0
) && rtx_equal_p (operands
[3], op1
))
9421 /* Check for min operation. */
9422 if (code
== LT
|| code
== UNLE
)
9430 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
9431 if (memory_operand (op0
, VOIDmode
))
9432 op0
= force_reg (GET_MODE (operands
[0]), op0
);
9433 if (GET_MODE (operands
[0]) == SFmode
)
9434 emit_insn (gen_minsf3 (operands
[0], op0
, op1
));
9436 emit_insn (gen_mindf3 (operands
[0], op0
, op1
));
9439 /* Check for max operation. */
9440 if (code
== GT
|| code
== UNGE
)
9448 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
9449 if (memory_operand (op0
, VOIDmode
))
9450 op0
= force_reg (GET_MODE (operands
[0]), op0
);
9451 if (GET_MODE (operands
[0]) == SFmode
)
9452 emit_insn (gen_maxsf3 (operands
[0], op0
, op1
));
9454 emit_insn (gen_maxdf3 (operands
[0], op0
, op1
));
9458 /* Manage condition to be sse_comparison_operator. In case we are
9459 in non-ieee mode, try to canonicalize the destination operand
9460 to be first in the comparison - this helps reload to avoid extra
9462 if (!sse_comparison_operator (operands
[1], VOIDmode
)
9463 || (rtx_equal_p (operands
[0], ix86_compare_op1
) && !TARGET_IEEE_FP
))
9465 rtx tmp
= ix86_compare_op0
;
9466 ix86_compare_op0
= ix86_compare_op1
;
9467 ix86_compare_op1
= tmp
;
9468 operands
[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands
[1])),
9469 VOIDmode
, ix86_compare_op0
,
9472 /* Similarly try to manage result to be first operand of conditional
9473 move. We also don't support the NE comparison on SSE, so try to
9475 if ((rtx_equal_p (operands
[0], operands
[3])
9476 && (!TARGET_IEEE_FP
|| GET_CODE (operands
[1]) != EQ
))
9477 || (GET_CODE (operands
[1]) == NE
&& TARGET_IEEE_FP
))
9479 rtx tmp
= operands
[2];
9480 operands
[2] = operands
[3];
9482 operands
[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9483 (GET_CODE (operands
[1])),
9484 VOIDmode
, ix86_compare_op0
,
9487 if (GET_MODE (operands
[0]) == SFmode
)
9488 emit_insn (gen_sse_movsfcc (operands
[0], operands
[1],
9489 operands
[2], operands
[3],
9490 ix86_compare_op0
, ix86_compare_op1
));
9492 emit_insn (gen_sse_movdfcc (operands
[0], operands
[1],
9493 operands
[2], operands
[3],
9494 ix86_compare_op0
, ix86_compare_op1
));
9498 /* The floating point conditional move instructions don't directly
9499 support conditions resulting from a signed integer comparison. */
9501 code
= GET_CODE (operands
[1]);
9502 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9504 /* The floating point conditional move instructions don't directly
9505 support signed integer comparisons. */
9507 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
9509 if (second_test
!= NULL
|| bypass_test
!= NULL
)
9511 tmp
= gen_reg_rtx (QImode
);
9512 ix86_expand_setcc (code
, tmp
);
9514 ix86_compare_op0
= tmp
;
9515 ix86_compare_op1
= const0_rtx
;
9516 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9518 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
9520 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
9521 emit_move_insn (tmp
, operands
[3]);
9524 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
9526 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
9527 emit_move_insn (tmp
, operands
[2]);
9531 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9532 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9537 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9538 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9543 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9544 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9552 /* Expand conditional increment or decrement using adb/sbb instructions.
9553 The default case using setcc followed by the conditional move can be
9554 done by generic code. */
9556 ix86_expand_int_addcc (rtx operands
[])
9558 enum rtx_code code
= GET_CODE (operands
[1]);
9560 rtx val
= const0_rtx
;
9562 enum machine_mode mode
= GET_MODE (operands
[0]);
9564 if (operands
[3] != const1_rtx
9565 && operands
[3] != constm1_rtx
)
9567 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
9568 ix86_compare_op1
, &compare_op
))
9570 code
= GET_CODE (compare_op
);
9572 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
9573 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
9576 code
= ix86_fp_compare_code_to_integer (code
);
9583 PUT_CODE (compare_op
,
9584 reverse_condition_maybe_unordered
9585 (GET_CODE (compare_op
)));
9587 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
9589 PUT_MODE (compare_op
, mode
);
9591 /* Construct either adc or sbb insn. */
9592 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
9594 switch (GET_MODE (operands
[0]))
9597 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
9600 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
9603 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
9606 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
9614 switch (GET_MODE (operands
[0]))
9617 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
9620 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
9623 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
9626 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
9632 return 1; /* DONE */
9636 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9637 works for floating pointer parameters and nonoffsetable memories.
9638 For pushes, it returns just stack offsets; the values will be saved
9639 in the right order. Maximally three parts are generated. */
9642 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
9647 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
9649 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
9651 if (GET_CODE (operand
) == REG
&& MMX_REGNO_P (REGNO (operand
)))
9653 if (size
< 2 || size
> 3)
9656 /* Optimize constant pool reference to immediates. This is used by fp
9657 moves, that force all constants to memory to allow combining. */
9658 if (GET_CODE (operand
) == MEM
&& MEM_READONLY_P (operand
))
9660 rtx tmp
= maybe_get_pool_constant (operand
);
9665 if (GET_CODE (operand
) == MEM
&& !offsettable_memref_p (operand
))
9667 /* The only non-offsetable memories we handle are pushes. */
9668 if (! push_operand (operand
, VOIDmode
))
9671 operand
= copy_rtx (operand
);
9672 PUT_MODE (operand
, Pmode
);
9673 parts
[0] = parts
[1] = parts
[2] = operand
;
9675 else if (!TARGET_64BIT
)
9678 split_di (&operand
, 1, &parts
[0], &parts
[1]);
9681 if (REG_P (operand
))
9683 if (!reload_completed
)
9685 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
9686 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
9688 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
9690 else if (offsettable_memref_p (operand
))
9692 operand
= adjust_address (operand
, SImode
, 0);
9694 parts
[1] = adjust_address (operand
, SImode
, 4);
9696 parts
[2] = adjust_address (operand
, SImode
, 8);
9698 else if (GET_CODE (operand
) == CONST_DOUBLE
)
9703 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
9707 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
9708 parts
[2] = gen_int_mode (l
[2], SImode
);
9711 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
9716 parts
[1] = gen_int_mode (l
[1], SImode
);
9717 parts
[0] = gen_int_mode (l
[0], SImode
);
9726 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
9727 if (mode
== XFmode
|| mode
== TFmode
)
9729 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
9730 if (REG_P (operand
))
9732 if (!reload_completed
)
9734 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
9735 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
9737 else if (offsettable_memref_p (operand
))
9739 operand
= adjust_address (operand
, DImode
, 0);
9741 parts
[1] = adjust_address (operand
, upper_mode
, 8);
9743 else if (GET_CODE (operand
) == CONST_DOUBLE
)
9748 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
9749 real_to_target (l
, &r
, mode
);
9750 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9751 if (HOST_BITS_PER_WIDE_INT
>= 64)
9754 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
9755 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
9758 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
9759 if (upper_mode
== SImode
)
9760 parts
[1] = gen_int_mode (l
[2], SImode
);
9761 else if (HOST_BITS_PER_WIDE_INT
>= 64)
9764 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
9765 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
9768 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
9778 /* Emit insns to perform a move or push of DI, DF, and XF values.
9779 Return false when normal moves are needed; true when all required
9780 insns have been emitted. Operands 2-4 contain the input values
9781 int the correct order; operands 5-7 contain the output values. */
9784 ix86_split_long_move (rtx operands
[])
9790 enum machine_mode mode
= GET_MODE (operands
[0]);
9792 /* The DFmode expanders may ask us to move double.
9793 For 64bit target this is single move. By hiding the fact
9794 here we simplify i386.md splitters. */
9795 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
9797 /* Optimize constant pool reference to immediates. This is used by
9798 fp moves, that force all constants to memory to allow combining. */
9800 if (GET_CODE (operands
[1]) == MEM
9801 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
9802 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
9803 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
9804 if (push_operand (operands
[0], VOIDmode
))
9806 operands
[0] = copy_rtx (operands
[0]);
9807 PUT_MODE (operands
[0], Pmode
);
9810 operands
[0] = gen_lowpart (DImode
, operands
[0]);
9811 operands
[1] = gen_lowpart (DImode
, operands
[1]);
9812 emit_move_insn (operands
[0], operands
[1]);
9816 /* The only non-offsettable memory we handle is push. */
9817 if (push_operand (operands
[0], VOIDmode
))
9819 else if (GET_CODE (operands
[0]) == MEM
9820 && ! offsettable_memref_p (operands
[0]))
9823 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
9824 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
9826 /* When emitting push, take care for source operands on the stack. */
9827 if (push
&& GET_CODE (operands
[1]) == MEM
9828 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
9831 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
9832 XEXP (part
[1][2], 0));
9833 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
9834 XEXP (part
[1][1], 0));
9837 /* We need to do copy in the right order in case an address register
9838 of the source overlaps the destination. */
9839 if (REG_P (part
[0][0]) && GET_CODE (part
[1][0]) == MEM
)
9841 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
9843 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
9846 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
9849 /* Collision in the middle part can be handled by reordering. */
9850 if (collisions
== 1 && nparts
== 3
9851 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
9854 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
9855 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
9858 /* If there are more collisions, we can't handle it by reordering.
9859 Do an lea to the last part and use only one colliding move. */
9860 else if (collisions
> 1)
9866 base
= part
[0][nparts
- 1];
9868 /* Handle the case when the last part isn't valid for lea.
9869 Happens in 64-bit mode storing the 12-byte XFmode. */
9870 if (GET_MODE (base
) != Pmode
)
9871 base
= gen_rtx_REG (Pmode
, REGNO (base
));
9873 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
9874 part
[1][0] = replace_equiv_address (part
[1][0], base
);
9875 part
[1][1] = replace_equiv_address (part
[1][1],
9876 plus_constant (base
, UNITS_PER_WORD
));
9878 part
[1][2] = replace_equiv_address (part
[1][2],
9879 plus_constant (base
, 8));
9889 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
9890 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
9891 emit_move_insn (part
[0][2], part
[1][2]);
9896 /* In 64bit mode we don't have 32bit push available. In case this is
9897 register, it is OK - we will just use larger counterpart. We also
9898 retype memory - these comes from attempt to avoid REX prefix on
9899 moving of second half of TFmode value. */
9900 if (GET_MODE (part
[1][1]) == SImode
)
9902 if (GET_CODE (part
[1][1]) == MEM
)
9903 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
9904 else if (REG_P (part
[1][1]))
9905 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
9908 if (GET_MODE (part
[1][0]) == SImode
)
9909 part
[1][0] = part
[1][1];
9912 emit_move_insn (part
[0][1], part
[1][1]);
9913 emit_move_insn (part
[0][0], part
[1][0]);
9917 /* Choose correct order to not overwrite the source before it is copied. */
9918 if ((REG_P (part
[0][0])
9919 && REG_P (part
[1][1])
9920 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
9922 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
9924 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
9928 operands
[2] = part
[0][2];
9929 operands
[3] = part
[0][1];
9930 operands
[4] = part
[0][0];
9931 operands
[5] = part
[1][2];
9932 operands
[6] = part
[1][1];
9933 operands
[7] = part
[1][0];
9937 operands
[2] = part
[0][1];
9938 operands
[3] = part
[0][0];
9939 operands
[5] = part
[1][1];
9940 operands
[6] = part
[1][0];
9947 operands
[2] = part
[0][0];
9948 operands
[3] = part
[0][1];
9949 operands
[4] = part
[0][2];
9950 operands
[5] = part
[1][0];
9951 operands
[6] = part
[1][1];
9952 operands
[7] = part
[1][2];
9956 operands
[2] = part
[0][0];
9957 operands
[3] = part
[0][1];
9958 operands
[5] = part
[1][0];
9959 operands
[6] = part
[1][1];
9963 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
9966 if (GET_CODE (operands
[5]) == CONST_INT
9967 && operands
[5] != const0_rtx
9968 && REG_P (operands
[2]))
9970 if (GET_CODE (operands
[6]) == CONST_INT
9971 && INTVAL (operands
[6]) == INTVAL (operands
[5]))
9972 operands
[6] = operands
[2];
9975 && GET_CODE (operands
[7]) == CONST_INT
9976 && INTVAL (operands
[7]) == INTVAL (operands
[5]))
9977 operands
[7] = operands
[2];
9981 && GET_CODE (operands
[6]) == CONST_INT
9982 && operands
[6] != const0_rtx
9983 && REG_P (operands
[3])
9984 && GET_CODE (operands
[7]) == CONST_INT
9985 && INTVAL (operands
[7]) == INTVAL (operands
[6]))
9986 operands
[7] = operands
[3];
9989 emit_move_insn (operands
[2], operands
[5]);
9990 emit_move_insn (operands
[3], operands
[6]);
9992 emit_move_insn (operands
[4], operands
[7]);
9997 /* Helper function of ix86_split_ashldi used to generate an SImode
9998 left shift by a constant, either using a single shift or
9999 a sequence of add instructions. */
10002 ix86_expand_ashlsi3_const (rtx operand
, int count
)
10005 emit_insn (gen_addsi3 (operand
, operand
, operand
));
10006 else if (!optimize_size
10007 && count
* ix86_cost
->add
<= ix86_cost
->shift_const
)
10010 for (i
=0; i
<count
; i
++)
10011 emit_insn (gen_addsi3 (operand
, operand
, operand
));
10014 emit_insn (gen_ashlsi3 (operand
, operand
, GEN_INT (count
)));
10018 ix86_split_ashldi (rtx
*operands
, rtx scratch
)
10020 rtx low
[2], high
[2];
10023 if (GET_CODE (operands
[2]) == CONST_INT
)
10025 split_di (operands
, 2, low
, high
);
10026 count
= INTVAL (operands
[2]) & 63;
10030 emit_move_insn (high
[0], low
[1]);
10031 emit_move_insn (low
[0], const0_rtx
);
10034 ix86_expand_ashlsi3_const (high
[0], count
- 32);
10038 if (!rtx_equal_p (operands
[0], operands
[1]))
10039 emit_move_insn (operands
[0], operands
[1]);
10040 emit_insn (gen_x86_shld_1 (high
[0], low
[0], GEN_INT (count
)));
10041 ix86_expand_ashlsi3_const (low
[0], count
);
10046 split_di (operands
, 1, low
, high
);
10048 if (operands
[1] == const1_rtx
)
10050 /* Assuming we've chosen a QImode capable registers, then 1LL << N
10051 can be done with two 32-bit shifts, no branches, no cmoves. */
10052 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
10054 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
10056 ix86_expand_clear (low
[0]);
10057 ix86_expand_clear (high
[0]);
10058 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (32)));
10060 d
= gen_lowpart (QImode
, low
[0]);
10061 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
10062 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
10063 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
10065 d
= gen_lowpart (QImode
, high
[0]);
10066 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
10067 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
10068 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
10071 /* Otherwise, we can get the same results by manually performing
10072 a bit extract operation on bit 5, and then performing the two
10073 shifts. The two methods of getting 0/1 into low/high are exactly
10074 the same size. Avoiding the shift in the bit extract case helps
10075 pentium4 a bit; no one else seems to care much either way. */
10080 if (TARGET_PARTIAL_REG_STALL
&& !optimize_size
)
10081 x
= gen_rtx_ZERO_EXTEND (SImode
, operands
[2]);
10083 x
= gen_lowpart (SImode
, operands
[2]);
10084 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
10086 emit_insn (gen_lshrsi3 (high
[0], high
[0], GEN_INT (5)));
10087 emit_insn (gen_andsi3 (high
[0], high
[0], GEN_INT (1)));
10088 emit_move_insn (low
[0], high
[0]);
10089 emit_insn (gen_xorsi3 (low
[0], low
[0], GEN_INT (1)));
10092 emit_insn (gen_ashlsi3 (low
[0], low
[0], operands
[2]));
10093 emit_insn (gen_ashlsi3 (high
[0], high
[0], operands
[2]));
10097 if (operands
[1] == constm1_rtx
)
10099 /* For -1LL << N, we can avoid the shld instruction, because we
10100 know that we're shifting 0...31 ones into a -1. */
10101 emit_move_insn (low
[0], constm1_rtx
);
10103 emit_move_insn (high
[0], low
[0]);
10105 emit_move_insn (high
[0], constm1_rtx
);
10109 if (!rtx_equal_p (operands
[0], operands
[1]))
10110 emit_move_insn (operands
[0], operands
[1]);
10112 split_di (operands
, 1, low
, high
);
10113 emit_insn (gen_x86_shld_1 (high
[0], low
[0], operands
[2]));
10116 emit_insn (gen_ashlsi3 (low
[0], low
[0], operands
[2]));
10118 if (TARGET_CMOVE
&& scratch
)
10120 ix86_expand_clear (scratch
);
10121 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
10124 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
10128 ix86_split_ashrdi (rtx
*operands
, rtx scratch
)
10130 rtx low
[2], high
[2];
10133 if (GET_CODE (operands
[2]) == CONST_INT
)
10135 split_di (operands
, 2, low
, high
);
10136 count
= INTVAL (operands
[2]) & 63;
10140 emit_move_insn (high
[0], high
[1]);
10141 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
10142 emit_move_insn (low
[0], high
[0]);
10145 else if (count
>= 32)
10147 emit_move_insn (low
[0], high
[1]);
10148 emit_move_insn (high
[0], low
[0]);
10149 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
10151 emit_insn (gen_ashrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
10155 if (!rtx_equal_p (operands
[0], operands
[1]))
10156 emit_move_insn (operands
[0], operands
[1]);
10157 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
10158 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (count
)));
10163 if (!rtx_equal_p (operands
[0], operands
[1]))
10164 emit_move_insn (operands
[0], operands
[1]);
10166 split_di (operands
, 1, low
, high
);
10168 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
10169 emit_insn (gen_ashrsi3 (high
[0], high
[0], operands
[2]));
10171 if (TARGET_CMOVE
&& scratch
)
10173 emit_move_insn (scratch
, high
[0]);
10174 emit_insn (gen_ashrsi3 (scratch
, scratch
, GEN_INT (31)));
10175 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
10179 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
10184 ix86_split_lshrdi (rtx
*operands
, rtx scratch
)
10186 rtx low
[2], high
[2];
10189 if (GET_CODE (operands
[2]) == CONST_INT
)
10191 split_di (operands
, 2, low
, high
);
10192 count
= INTVAL (operands
[2]) & 63;
10196 emit_move_insn (low
[0], high
[1]);
10197 ix86_expand_clear (high
[0]);
10200 emit_insn (gen_lshrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
10204 if (!rtx_equal_p (operands
[0], operands
[1]))
10205 emit_move_insn (operands
[0], operands
[1]);
10206 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
10207 emit_insn (gen_lshrsi3 (high
[0], high
[0], GEN_INT (count
)));
10212 if (!rtx_equal_p (operands
[0], operands
[1]))
10213 emit_move_insn (operands
[0], operands
[1]);
10215 split_di (operands
, 1, low
, high
);
10217 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
10218 emit_insn (gen_lshrsi3 (high
[0], high
[0], operands
[2]));
10220 /* Heh. By reversing the arguments, we can reuse this pattern. */
10221 if (TARGET_CMOVE
&& scratch
)
10223 ix86_expand_clear (scratch
);
10224 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
10228 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
10232 /* Helper function for the string operations below. Dest VARIABLE whether
10233 it is aligned to VALUE bytes. If true, jump to the label. */
10235 ix86_expand_aligntest (rtx variable
, int value
)
10237 rtx label
= gen_label_rtx ();
10238 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
10239 if (GET_MODE (variable
) == DImode
)
10240 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
10242 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
10243 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
10248 /* Adjust COUNTER by the VALUE. */
10250 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
10252 if (GET_MODE (countreg
) == DImode
)
10253 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
10255 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
10258 /* Zero extend possibly SImode EXP to Pmode register. */
10260 ix86_zero_extend_to_Pmode (rtx exp
)
10263 if (GET_MODE (exp
) == VOIDmode
)
10264 return force_reg (Pmode
, exp
);
10265 if (GET_MODE (exp
) == Pmode
)
10266 return copy_to_mode_reg (Pmode
, exp
);
10267 r
= gen_reg_rtx (Pmode
);
10268 emit_insn (gen_zero_extendsidi2 (r
, exp
));
10272 /* Expand string move (memcpy) operation. Use i386 string operations when
10273 profitable. expand_clrmem contains similar code. */
10275 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
)
10277 rtx srcreg
, destreg
, countreg
, srcexp
, destexp
;
10278 enum machine_mode counter_mode
;
10279 HOST_WIDE_INT align
= 0;
10280 unsigned HOST_WIDE_INT count
= 0;
10282 if (GET_CODE (align_exp
) == CONST_INT
)
10283 align
= INTVAL (align_exp
);
10285 /* Can't use any of this if the user has appropriated esi or edi. */
10286 if (global_regs
[4] || global_regs
[5])
10289 /* This simple hack avoids all inlining code and simplifies code below. */
10290 if (!TARGET_ALIGN_STRINGOPS
)
10293 if (GET_CODE (count_exp
) == CONST_INT
)
10295 count
= INTVAL (count_exp
);
10296 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
10300 /* Figure out proper mode for counter. For 32bits it is always SImode,
10301 for 64bits use SImode when possible, otherwise DImode.
10302 Set count to number of bytes copied when known at compile time. */
10304 || GET_MODE (count_exp
) == SImode
10305 || x86_64_zext_immediate_operand (count_exp
, VOIDmode
))
10306 counter_mode
= SImode
;
10308 counter_mode
= DImode
;
10310 if (counter_mode
!= SImode
&& counter_mode
!= DImode
)
10313 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
10314 if (destreg
!= XEXP (dst
, 0))
10315 dst
= replace_equiv_address_nv (dst
, destreg
);
10316 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
10317 if (srcreg
!= XEXP (src
, 0))
10318 src
= replace_equiv_address_nv (src
, srcreg
);
10320 /* When optimizing for size emit simple rep ; movsb instruction for
10321 counts not divisible by 4. */
10323 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
10325 emit_insn (gen_cld ());
10326 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
10327 destexp
= gen_rtx_PLUS (Pmode
, destreg
, countreg
);
10328 srcexp
= gen_rtx_PLUS (Pmode
, srcreg
, countreg
);
10329 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
, countreg
,
10333 /* For constant aligned (or small unaligned) copies use rep movsl
10334 followed by code copying the rest. For PentiumPro ensure 8 byte
10335 alignment to allow rep movsl acceleration. */
10337 else if (count
!= 0
10339 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
10340 || optimize_size
|| count
< (unsigned int) 64))
10342 unsigned HOST_WIDE_INT offset
= 0;
10343 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
10344 rtx srcmem
, dstmem
;
10346 emit_insn (gen_cld ());
10347 if (count
& ~(size
- 1))
10349 countreg
= copy_to_mode_reg (counter_mode
,
10350 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
10351 & (TARGET_64BIT
? -1 : 0x3fffffff)));
10352 countreg
= ix86_zero_extend_to_Pmode (countreg
);
10354 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
10355 GEN_INT (size
== 4 ? 2 : 3));
10356 srcexp
= gen_rtx_PLUS (Pmode
, destexp
, srcreg
);
10357 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
10359 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
,
10360 countreg
, destexp
, srcexp
));
10361 offset
= count
& ~(size
- 1);
10363 if (size
== 8 && (count
& 0x04))
10365 srcmem
= adjust_automodify_address_nv (src
, SImode
, srcreg
,
10367 dstmem
= adjust_automodify_address_nv (dst
, SImode
, destreg
,
10369 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10374 srcmem
= adjust_automodify_address_nv (src
, HImode
, srcreg
,
10376 dstmem
= adjust_automodify_address_nv (dst
, HImode
, destreg
,
10378 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10383 srcmem
= adjust_automodify_address_nv (src
, QImode
, srcreg
,
10385 dstmem
= adjust_automodify_address_nv (dst
, QImode
, destreg
,
10387 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10390 /* The generic code based on the glibc implementation:
10391 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10392 allowing accelerated copying there)
10393 - copy the data using rep movsl
10394 - copy the rest. */
10399 rtx srcmem
, dstmem
;
10400 int desired_alignment
= (TARGET_PENTIUMPRO
10401 && (count
== 0 || count
>= (unsigned int) 260)
10402 ? 8 : UNITS_PER_WORD
);
10403 /* Get rid of MEM_OFFSETs, they won't be accurate. */
10404 dst
= change_address (dst
, BLKmode
, destreg
);
10405 src
= change_address (src
, BLKmode
, srcreg
);
10407 /* In case we don't know anything about the alignment, default to
10408 library version, since it is usually equally fast and result in
10411 Also emit call when we know that the count is large and call overhead
10412 will not be important. */
10413 if (!TARGET_INLINE_ALL_STRINGOPS
10414 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
10417 if (TARGET_SINGLE_STRINGOP
)
10418 emit_insn (gen_cld ());
10420 countreg2
= gen_reg_rtx (Pmode
);
10421 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
10423 /* We don't use loops to align destination and to copy parts smaller
10424 than 4 bytes, because gcc is able to optimize such code better (in
10425 the case the destination or the count really is aligned, gcc is often
10426 able to predict the branches) and also it is friendlier to the
10427 hardware branch prediction.
10429 Using loops is beneficial for generic case, because we can
10430 handle small counts using the loops. Many CPUs (such as Athlon)
10431 have large REP prefix setup costs.
10433 This is quite costly. Maybe we can revisit this decision later or
10434 add some customizability to this code. */
10436 if (count
== 0 && align
< desired_alignment
)
10438 label
= gen_label_rtx ();
10439 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
10440 LEU
, 0, counter_mode
, 1, label
);
10444 rtx label
= ix86_expand_aligntest (destreg
, 1);
10445 srcmem
= change_address (src
, QImode
, srcreg
);
10446 dstmem
= change_address (dst
, QImode
, destreg
);
10447 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10448 ix86_adjust_counter (countreg
, 1);
10449 emit_label (label
);
10450 LABEL_NUSES (label
) = 1;
10454 rtx label
= ix86_expand_aligntest (destreg
, 2);
10455 srcmem
= change_address (src
, HImode
, srcreg
);
10456 dstmem
= change_address (dst
, HImode
, destreg
);
10457 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10458 ix86_adjust_counter (countreg
, 2);
10459 emit_label (label
);
10460 LABEL_NUSES (label
) = 1;
10462 if (align
<= 4 && desired_alignment
> 4)
10464 rtx label
= ix86_expand_aligntest (destreg
, 4);
10465 srcmem
= change_address (src
, SImode
, srcreg
);
10466 dstmem
= change_address (dst
, SImode
, destreg
);
10467 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10468 ix86_adjust_counter (countreg
, 4);
10469 emit_label (label
);
10470 LABEL_NUSES (label
) = 1;
10473 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
10475 emit_label (label
);
10476 LABEL_NUSES (label
) = 1;
10479 if (!TARGET_SINGLE_STRINGOP
)
10480 emit_insn (gen_cld ());
10483 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
10485 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, GEN_INT (3));
10489 emit_insn (gen_lshrsi3 (countreg2
, countreg
, const2_rtx
));
10490 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, const2_rtx
);
10492 srcexp
= gen_rtx_PLUS (Pmode
, destexp
, srcreg
);
10493 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
10494 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
,
10495 countreg2
, destexp
, srcexp
));
10499 emit_label (label
);
10500 LABEL_NUSES (label
) = 1;
10502 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
10504 srcmem
= change_address (src
, SImode
, srcreg
);
10505 dstmem
= change_address (dst
, SImode
, destreg
);
10506 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10508 if ((align
<= 4 || count
== 0) && TARGET_64BIT
)
10510 rtx label
= ix86_expand_aligntest (countreg
, 4);
10511 srcmem
= change_address (src
, SImode
, srcreg
);
10512 dstmem
= change_address (dst
, SImode
, destreg
);
10513 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10514 emit_label (label
);
10515 LABEL_NUSES (label
) = 1;
10517 if (align
> 2 && count
!= 0 && (count
& 2))
10519 srcmem
= change_address (src
, HImode
, srcreg
);
10520 dstmem
= change_address (dst
, HImode
, destreg
);
10521 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10523 if (align
<= 2 || count
== 0)
10525 rtx label
= ix86_expand_aligntest (countreg
, 2);
10526 srcmem
= change_address (src
, HImode
, srcreg
);
10527 dstmem
= change_address (dst
, HImode
, destreg
);
10528 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10529 emit_label (label
);
10530 LABEL_NUSES (label
) = 1;
10532 if (align
> 1 && count
!= 0 && (count
& 1))
10534 srcmem
= change_address (src
, QImode
, srcreg
);
10535 dstmem
= change_address (dst
, QImode
, destreg
);
10536 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10538 if (align
<= 1 || count
== 0)
10540 rtx label
= ix86_expand_aligntest (countreg
, 1);
10541 srcmem
= change_address (src
, QImode
, srcreg
);
10542 dstmem
= change_address (dst
, QImode
, destreg
);
10543 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10544 emit_label (label
);
10545 LABEL_NUSES (label
) = 1;
10552 /* Expand string clear operation (bzero). Use i386 string operations when
10553 profitable. expand_movmem contains similar code. */
10555 ix86_expand_clrmem (rtx dst
, rtx count_exp
, rtx align_exp
)
10557 rtx destreg
, zeroreg
, countreg
, destexp
;
10558 enum machine_mode counter_mode
;
10559 HOST_WIDE_INT align
= 0;
10560 unsigned HOST_WIDE_INT count
= 0;
10562 if (GET_CODE (align_exp
) == CONST_INT
)
10563 align
= INTVAL (align_exp
);
10565 /* Can't use any of this if the user has appropriated esi. */
10566 if (global_regs
[4])
10569 /* This simple hack avoids all inlining code and simplifies code below. */
10570 if (!TARGET_ALIGN_STRINGOPS
)
10573 if (GET_CODE (count_exp
) == CONST_INT
)
10575 count
= INTVAL (count_exp
);
10576 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
10579 /* Figure out proper mode for counter. For 32bits it is always SImode,
10580 for 64bits use SImode when possible, otherwise DImode.
10581 Set count to number of bytes copied when known at compile time. */
10583 || GET_MODE (count_exp
) == SImode
10584 || x86_64_zext_immediate_operand (count_exp
, VOIDmode
))
10585 counter_mode
= SImode
;
10587 counter_mode
= DImode
;
10589 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
10590 if (destreg
!= XEXP (dst
, 0))
10591 dst
= replace_equiv_address_nv (dst
, destreg
);
10594 /* When optimizing for size emit simple rep ; movsb instruction for
10595 counts not divisible by 4. The movl $N, %ecx; rep; stosb
10596 sequence is 7 bytes long, so if optimizing for size and count is
10597 small enough that some stosl, stosw and stosb instructions without
10598 rep are shorter, fall back into the next if. */
10600 if ((!optimize
|| optimize_size
)
10603 && (!optimize_size
|| (count
& 0x03) + (count
>> 2) > 7))))
10605 emit_insn (gen_cld ());
10607 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
10608 zeroreg
= copy_to_mode_reg (QImode
, const0_rtx
);
10609 destexp
= gen_rtx_PLUS (Pmode
, destreg
, countreg
);
10610 emit_insn (gen_rep_stos (destreg
, countreg
, dst
, zeroreg
, destexp
));
10612 else if (count
!= 0
10614 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
10615 || optimize_size
|| count
< (unsigned int) 64))
10617 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
10618 unsigned HOST_WIDE_INT offset
= 0;
10620 emit_insn (gen_cld ());
10622 zeroreg
= copy_to_mode_reg (size
== 4 ? SImode
: DImode
, const0_rtx
);
10623 if (count
& ~(size
- 1))
10625 unsigned HOST_WIDE_INT repcount
;
10626 unsigned int max_nonrep
;
10628 repcount
= count
>> (size
== 4 ? 2 : 3);
10630 repcount
&= 0x3fffffff;
10632 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
10633 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
10634 bytes. In both cases the latter seems to be faster for small
10636 max_nonrep
= size
== 4 ? 7 : 4;
10637 if (!optimize_size
)
10640 case PROCESSOR_PENTIUM4
:
10641 case PROCESSOR_NOCONA
:
10648 if (repcount
<= max_nonrep
)
10649 while (repcount
-- > 0)
10651 rtx mem
= adjust_automodify_address_nv (dst
,
10652 GET_MODE (zeroreg
),
10654 emit_insn (gen_strset (destreg
, mem
, zeroreg
));
10659 countreg
= copy_to_mode_reg (counter_mode
, GEN_INT (repcount
));
10660 countreg
= ix86_zero_extend_to_Pmode (countreg
);
10661 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
10662 GEN_INT (size
== 4 ? 2 : 3));
10663 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
10664 emit_insn (gen_rep_stos (destreg
, countreg
, dst
, zeroreg
,
10666 offset
= count
& ~(size
- 1);
10669 if (size
== 8 && (count
& 0x04))
10671 rtx mem
= adjust_automodify_address_nv (dst
, SImode
, destreg
,
10673 emit_insn (gen_strset (destreg
, mem
,
10674 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
10679 rtx mem
= adjust_automodify_address_nv (dst
, HImode
, destreg
,
10681 emit_insn (gen_strset (destreg
, mem
,
10682 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10687 rtx mem
= adjust_automodify_address_nv (dst
, QImode
, destreg
,
10689 emit_insn (gen_strset (destreg
, mem
,
10690 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10697 /* Compute desired alignment of the string operation. */
10698 int desired_alignment
= (TARGET_PENTIUMPRO
10699 && (count
== 0 || count
>= (unsigned int) 260)
10700 ? 8 : UNITS_PER_WORD
);
10702 /* In case we don't know anything about the alignment, default to
10703 library version, since it is usually equally fast and result in
10706 Also emit call when we know that the count is large and call overhead
10707 will not be important. */
10708 if (!TARGET_INLINE_ALL_STRINGOPS
10709 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
10712 if (TARGET_SINGLE_STRINGOP
)
10713 emit_insn (gen_cld ());
10715 countreg2
= gen_reg_rtx (Pmode
);
10716 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
10717 zeroreg
= copy_to_mode_reg (Pmode
, const0_rtx
);
10718 /* Get rid of MEM_OFFSET, it won't be accurate. */
10719 dst
= change_address (dst
, BLKmode
, destreg
);
10721 if (count
== 0 && align
< desired_alignment
)
10723 label
= gen_label_rtx ();
10724 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
10725 LEU
, 0, counter_mode
, 1, label
);
10729 rtx label
= ix86_expand_aligntest (destreg
, 1);
10730 emit_insn (gen_strset (destreg
, dst
,
10731 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10732 ix86_adjust_counter (countreg
, 1);
10733 emit_label (label
);
10734 LABEL_NUSES (label
) = 1;
10738 rtx label
= ix86_expand_aligntest (destreg
, 2);
10739 emit_insn (gen_strset (destreg
, dst
,
10740 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10741 ix86_adjust_counter (countreg
, 2);
10742 emit_label (label
);
10743 LABEL_NUSES (label
) = 1;
10745 if (align
<= 4 && desired_alignment
> 4)
10747 rtx label
= ix86_expand_aligntest (destreg
, 4);
10748 emit_insn (gen_strset (destreg
, dst
,
10750 ? gen_rtx_SUBREG (SImode
, zeroreg
, 0)
10752 ix86_adjust_counter (countreg
, 4);
10753 emit_label (label
);
10754 LABEL_NUSES (label
) = 1;
10757 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
10759 emit_label (label
);
10760 LABEL_NUSES (label
) = 1;
10764 if (!TARGET_SINGLE_STRINGOP
)
10765 emit_insn (gen_cld ());
10768 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
10770 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, GEN_INT (3));
10774 emit_insn (gen_lshrsi3 (countreg2
, countreg
, const2_rtx
));
10775 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, const2_rtx
);
10777 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
10778 emit_insn (gen_rep_stos (destreg
, countreg2
, dst
, zeroreg
, destexp
));
10782 emit_label (label
);
10783 LABEL_NUSES (label
) = 1;
10786 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
10787 emit_insn (gen_strset (destreg
, dst
,
10788 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
10789 if (TARGET_64BIT
&& (align
<= 4 || count
== 0))
10791 rtx label
= ix86_expand_aligntest (countreg
, 4);
10792 emit_insn (gen_strset (destreg
, dst
,
10793 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
10794 emit_label (label
);
10795 LABEL_NUSES (label
) = 1;
10797 if (align
> 2 && count
!= 0 && (count
& 2))
10798 emit_insn (gen_strset (destreg
, dst
,
10799 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10800 if (align
<= 2 || count
== 0)
10802 rtx label
= ix86_expand_aligntest (countreg
, 2);
10803 emit_insn (gen_strset (destreg
, dst
,
10804 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10805 emit_label (label
);
10806 LABEL_NUSES (label
) = 1;
10808 if (align
> 1 && count
!= 0 && (count
& 1))
10809 emit_insn (gen_strset (destreg
, dst
,
10810 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10811 if (align
<= 1 || count
== 0)
10813 rtx label
= ix86_expand_aligntest (countreg
, 1);
10814 emit_insn (gen_strset (destreg
, dst
,
10815 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10816 emit_label (label
);
10817 LABEL_NUSES (label
) = 1;
10823 /* Expand strlen. */
10825 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
10827 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
10829 /* The generic case of strlen expander is long. Avoid it's
10830 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10832 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
10833 && !TARGET_INLINE_ALL_STRINGOPS
10835 && (GET_CODE (align
) != CONST_INT
|| INTVAL (align
) < 4))
10838 addr
= force_reg (Pmode
, XEXP (src
, 0));
10839 scratch1
= gen_reg_rtx (Pmode
);
10841 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
10844 /* Well it seems that some optimizer does not combine a call like
10845 foo(strlen(bar), strlen(bar));
10846 when the move and the subtraction is done here. It does calculate
10847 the length just once when these instructions are done inside of
10848 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10849 often used and I use one fewer register for the lifetime of
10850 output_strlen_unroll() this is better. */
10852 emit_move_insn (out
, addr
);
10854 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
10856 /* strlensi_unroll_1 returns the address of the zero at the end of
10857 the string, like memchr(), so compute the length by subtracting
10858 the start address. */
10860 emit_insn (gen_subdi3 (out
, out
, addr
));
10862 emit_insn (gen_subsi3 (out
, out
, addr
));
10867 scratch2
= gen_reg_rtx (Pmode
);
10868 scratch3
= gen_reg_rtx (Pmode
);
10869 scratch4
= force_reg (Pmode
, constm1_rtx
);
10871 emit_move_insn (scratch3
, addr
);
10872 eoschar
= force_reg (QImode
, eoschar
);
10874 emit_insn (gen_cld ());
10875 src
= replace_equiv_address_nv (src
, scratch3
);
10877 /* If .md starts supporting :P, this can be done in .md. */
10878 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
10879 scratch4
), UNSPEC_SCAS
);
10880 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
10883 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
10884 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
10888 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
10889 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
10895 /* Expand the appropriate insns for doing strlen if not just doing
10898 out = result, initialized with the start address
10899 align_rtx = alignment of the address.
10900 scratch = scratch register, initialized with the startaddress when
10901 not aligned, otherwise undefined
10903 This is just the body. It needs the initializations mentioned above and
10904 some address computing at the end. These things are done in i386.md. */
10907 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
10911 rtx align_2_label
= NULL_RTX
;
10912 rtx align_3_label
= NULL_RTX
;
10913 rtx align_4_label
= gen_label_rtx ();
10914 rtx end_0_label
= gen_label_rtx ();
10916 rtx tmpreg
= gen_reg_rtx (SImode
);
10917 rtx scratch
= gen_reg_rtx (SImode
);
10921 if (GET_CODE (align_rtx
) == CONST_INT
)
10922 align
= INTVAL (align_rtx
);
10924 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10926 /* Is there a known alignment and is it less than 4? */
10929 rtx scratch1
= gen_reg_rtx (Pmode
);
10930 emit_move_insn (scratch1
, out
);
10931 /* Is there a known alignment and is it not 2? */
10934 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
10935 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
10937 /* Leave just the 3 lower bits. */
10938 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
10939 NULL_RTX
, 0, OPTAB_WIDEN
);
10941 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
10942 Pmode
, 1, align_4_label
);
10943 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
10944 Pmode
, 1, align_2_label
);
10945 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
10946 Pmode
, 1, align_3_label
);
10950 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10951 check if is aligned to 4 - byte. */
10953 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
10954 NULL_RTX
, 0, OPTAB_WIDEN
);
10956 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
10957 Pmode
, 1, align_4_label
);
10960 mem
= change_address (src
, QImode
, out
);
10962 /* Now compare the bytes. */
10964 /* Compare the first n unaligned byte on a byte per byte basis. */
10965 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
10966 QImode
, 1, end_0_label
);
10968 /* Increment the address. */
10970 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
10972 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
10974 /* Not needed with an alignment of 2 */
10977 emit_label (align_2_label
);
10979 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
10983 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
10985 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
10987 emit_label (align_3_label
);
10990 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
10994 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
10996 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
10999 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11000 align this loop. It gives only huge programs, but does not help to
11002 emit_label (align_4_label
);
11004 mem
= change_address (src
, SImode
, out
);
11005 emit_move_insn (scratch
, mem
);
11007 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
11009 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
11011 /* This formula yields a nonzero result iff one of the bytes is zero.
11012 This saves three branches inside loop and many cycles. */
11014 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
11015 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
11016 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
11017 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
11018 gen_int_mode (0x80808080, SImode
)));
11019 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
11024 rtx reg
= gen_reg_rtx (SImode
);
11025 rtx reg2
= gen_reg_rtx (Pmode
);
11026 emit_move_insn (reg
, tmpreg
);
11027 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
11029 /* If zero is not in the first two bytes, move two bytes forward. */
11030 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
11031 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
11032 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
11033 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
11034 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
11037 /* Emit lea manually to avoid clobbering of flags. */
11038 emit_insn (gen_rtx_SET (SImode
, reg2
,
11039 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
11041 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
11042 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
11043 emit_insn (gen_rtx_SET (VOIDmode
, out
,
11044 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
11051 rtx end_2_label
= gen_label_rtx ();
11052 /* Is zero in the first two bytes? */
11054 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
11055 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
11056 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
11057 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11058 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
11060 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
11061 JUMP_LABEL (tmp
) = end_2_label
;
11063 /* Not in the first two. Move two bytes forward. */
11064 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
11066 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
11068 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
11070 emit_label (end_2_label
);
11074 /* Avoid branch in fixing the byte. */
11075 tmpreg
= gen_lowpart (QImode
, tmpreg
);
11076 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
11077 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, 17), const0_rtx
);
11079 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
11081 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
11083 emit_label (end_0_label
);
11087 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
11088 rtx callarg2 ATTRIBUTE_UNUSED
,
11089 rtx pop
, int sibcall
)
11091 rtx use
= NULL
, call
;
11093 if (pop
== const0_rtx
)
11095 if (TARGET_64BIT
&& pop
)
11099 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
11100 fnaddr
= machopic_indirect_call_target (fnaddr
);
11102 /* Static functions and indirect calls don't need the pic register. */
11103 if (! TARGET_64BIT
&& flag_pic
11104 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
11105 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
11106 use_reg (&use
, pic_offset_table_rtx
);
11108 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
11110 rtx al
= gen_rtx_REG (QImode
, 0);
11111 emit_move_insn (al
, callarg2
);
11112 use_reg (&use
, al
);
11114 #endif /* TARGET_MACHO */
11116 if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
11118 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
11119 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
11121 if (sibcall
&& TARGET_64BIT
11122 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
11125 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
11126 fnaddr
= gen_rtx_REG (Pmode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
11127 emit_move_insn (fnaddr
, addr
);
11128 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
11131 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
11133 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
11136 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
11137 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
11138 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
11141 call
= emit_call_insn (call
);
11143 CALL_INSN_FUNCTION_USAGE (call
) = use
;
11147 /* Clear stack slot assignments remembered from previous functions.
11148 This is called from INIT_EXPANDERS once before RTL is emitted for each
11151 static struct machine_function
*
11152 ix86_init_machine_status (void)
11154 struct machine_function
*f
;
11156 f
= ggc_alloc_cleared (sizeof (struct machine_function
));
11157 f
->use_fast_prologue_epilogue_nregs
= -1;
11162 /* Return a MEM corresponding to a stack slot with mode MODE.
11163 Allocate a new slot if necessary.
11165 The RTL for a function can have several slots available: N is
11166 which slot to use. */
11169 assign_386_stack_local (enum machine_mode mode
, int n
)
11171 struct stack_local_entry
*s
;
11173 if (n
< 0 || n
>= MAX_386_STACK_LOCALS
)
11176 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
11177 if (s
->mode
== mode
&& s
->n
== n
)
11180 s
= (struct stack_local_entry
*)
11181 ggc_alloc (sizeof (struct stack_local_entry
));
11184 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
11186 s
->next
= ix86_stack_locals
;
11187 ix86_stack_locals
= s
;
11191 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11193 static GTY(()) rtx ix86_tls_symbol
;
11195 ix86_tls_get_addr (void)
11198 if (!ix86_tls_symbol
)
11200 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
11201 (TARGET_GNU_TLS
&& !TARGET_64BIT
)
11202 ? "___tls_get_addr"
11203 : "__tls_get_addr");
11206 return ix86_tls_symbol
;
11209 /* Calculate the length of the memory address in the instruction
11210 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11213 memory_address_length (rtx addr
)
11215 struct ix86_address parts
;
11216 rtx base
, index
, disp
;
11219 if (GET_CODE (addr
) == PRE_DEC
11220 || GET_CODE (addr
) == POST_INC
11221 || GET_CODE (addr
) == PRE_MODIFY
11222 || GET_CODE (addr
) == POST_MODIFY
)
11225 if (! ix86_decompose_address (addr
, &parts
))
11229 index
= parts
.index
;
11234 - esp as the base always wants an index,
11235 - ebp as the base always wants a displacement. */
11237 /* Register Indirect. */
11238 if (base
&& !index
&& !disp
)
11240 /* esp (for its index) and ebp (for its displacement) need
11241 the two-byte modrm form. */
11242 if (addr
== stack_pointer_rtx
11243 || addr
== arg_pointer_rtx
11244 || addr
== frame_pointer_rtx
11245 || addr
== hard_frame_pointer_rtx
)
11249 /* Direct Addressing. */
11250 else if (disp
&& !base
&& !index
)
11255 /* Find the length of the displacement constant. */
11258 if (GET_CODE (disp
) == CONST_INT
11259 && CONST_OK_FOR_LETTER_P (INTVAL (disp
), 'K')
11265 /* ebp always wants a displacement. */
11266 else if (base
== hard_frame_pointer_rtx
)
11269 /* An index requires the two-byte modrm form.... */
11271 /* ...like esp, which always wants an index. */
11272 || base
== stack_pointer_rtx
11273 || base
== arg_pointer_rtx
11274 || base
== frame_pointer_rtx
)
11281 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11282 is set, expect that insn have 8bit immediate alternative. */
11284 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
11288 extract_insn_cached (insn
);
11289 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
11290 if (CONSTANT_P (recog_data
.operand
[i
]))
11295 && GET_CODE (recog_data
.operand
[i
]) == CONST_INT
11296 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data
.operand
[i
]), 'K'))
11300 switch (get_attr_mode (insn
))
11311 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11316 fatal_insn ("unknown insn mode", insn
);
11322 /* Compute default value for "length_address" attribute. */
11324 ix86_attr_length_address_default (rtx insn
)
11328 if (get_attr_type (insn
) == TYPE_LEA
)
11330 rtx set
= PATTERN (insn
);
11331 if (GET_CODE (set
) == SET
)
11333 else if (GET_CODE (set
) == PARALLEL
11334 && GET_CODE (XVECEXP (set
, 0, 0)) == SET
)
11335 set
= XVECEXP (set
, 0, 0);
11338 #ifdef ENABLE_CHECKING
11344 return memory_address_length (SET_SRC (set
));
11347 extract_insn_cached (insn
);
11348 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
11349 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
11351 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
11357 /* Return the maximum number of instructions a cpu can issue. */
11360 ix86_issue_rate (void)
11364 case PROCESSOR_PENTIUM
:
11368 case PROCESSOR_PENTIUMPRO
:
11369 case PROCESSOR_PENTIUM4
:
11370 case PROCESSOR_ATHLON
:
11372 case PROCESSOR_NOCONA
:
11380 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11381 by DEP_INSN and nothing set by DEP_INSN. */
11384 ix86_flags_dependant (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
11388 /* Simplify the test for uninteresting insns. */
11389 if (insn_type
!= TYPE_SETCC
11390 && insn_type
!= TYPE_ICMOV
11391 && insn_type
!= TYPE_FCMOV
11392 && insn_type
!= TYPE_IBR
)
11395 if ((set
= single_set (dep_insn
)) != 0)
11397 set
= SET_DEST (set
);
11400 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
11401 && XVECLEN (PATTERN (dep_insn
), 0) == 2
11402 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
11403 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
11405 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
11406 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
11411 if (GET_CODE (set
) != REG
|| REGNO (set
) != FLAGS_REG
)
11414 /* This test is true if the dependent insn reads the flags but
11415 not any other potentially set register. */
11416 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
11419 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
11425 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11426 address with operands set by DEP_INSN. */
11429 ix86_agi_dependant (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
11433 if (insn_type
== TYPE_LEA
11436 addr
= PATTERN (insn
);
11437 if (GET_CODE (addr
) == SET
)
11439 else if (GET_CODE (addr
) == PARALLEL
11440 && GET_CODE (XVECEXP (addr
, 0, 0)) == SET
)
11441 addr
= XVECEXP (addr
, 0, 0);
11444 addr
= SET_SRC (addr
);
11449 extract_insn_cached (insn
);
11450 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
11451 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
11453 addr
= XEXP (recog_data
.operand
[i
], 0);
11460 return modified_in_p (addr
, dep_insn
);
11464 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
11466 enum attr_type insn_type
, dep_insn_type
;
11467 enum attr_memory memory
;
11469 int dep_insn_code_number
;
11471 /* Anti and output dependencies have zero cost on all CPUs. */
11472 if (REG_NOTE_KIND (link
) != 0)
11475 dep_insn_code_number
= recog_memoized (dep_insn
);
11477 /* If we can't recognize the insns, we can't really do anything. */
11478 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
11481 insn_type
= get_attr_type (insn
);
11482 dep_insn_type
= get_attr_type (dep_insn
);
11486 case PROCESSOR_PENTIUM
:
11487 /* Address Generation Interlock adds a cycle of latency. */
11488 if (ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11491 /* ??? Compares pair with jump/setcc. */
11492 if (ix86_flags_dependant (insn
, dep_insn
, insn_type
))
11495 /* Floating point stores require value to be ready one cycle earlier. */
11496 if (insn_type
== TYPE_FMOV
11497 && get_attr_memory (insn
) == MEMORY_STORE
11498 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11502 case PROCESSOR_PENTIUMPRO
:
11503 memory
= get_attr_memory (insn
);
11505 /* INT->FP conversion is expensive. */
11506 if (get_attr_fp_int_src (dep_insn
))
11509 /* There is one cycle extra latency between an FP op and a store. */
11510 if (insn_type
== TYPE_FMOV
11511 && (set
= single_set (dep_insn
)) != NULL_RTX
11512 && (set2
= single_set (insn
)) != NULL_RTX
11513 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
11514 && GET_CODE (SET_DEST (set2
)) == MEM
)
11517 /* Show ability of reorder buffer to hide latency of load by executing
11518 in parallel with previous instruction in case
11519 previous instruction is not needed to compute the address. */
11520 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
11521 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11523 /* Claim moves to take one cycle, as core can issue one load
11524 at time and the next load can start cycle later. */
11525 if (dep_insn_type
== TYPE_IMOV
11526 || dep_insn_type
== TYPE_FMOV
)
11534 memory
= get_attr_memory (insn
);
11536 /* The esp dependency is resolved before the instruction is really
11538 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
11539 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
11542 /* INT->FP conversion is expensive. */
11543 if (get_attr_fp_int_src (dep_insn
))
11546 /* Show ability of reorder buffer to hide latency of load by executing
11547 in parallel with previous instruction in case
11548 previous instruction is not needed to compute the address. */
11549 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
11550 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11552 /* Claim moves to take one cycle, as core can issue one load
11553 at time and the next load can start cycle later. */
11554 if (dep_insn_type
== TYPE_IMOV
11555 || dep_insn_type
== TYPE_FMOV
)
11564 case PROCESSOR_ATHLON
:
11566 memory
= get_attr_memory (insn
);
11568 /* Show ability of reorder buffer to hide latency of load by executing
11569 in parallel with previous instruction in case
11570 previous instruction is not needed to compute the address. */
11571 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
11572 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11574 enum attr_unit unit
= get_attr_unit (insn
);
11577 /* Because of the difference between the length of integer and
11578 floating unit pipeline preparation stages, the memory operands
11579 for floating point are cheaper.
11581 ??? For Athlon it the difference is most probably 2. */
11582 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
11585 loadcost
= TARGET_ATHLON
? 2 : 0;
11587 if (cost
>= loadcost
)
11600 /* How many alternative schedules to try. This should be as wide as the
11601 scheduling freedom in the DFA, but no wider. Making this value too
11602 large results extra work for the scheduler. */
11605 ia32_multipass_dfa_lookahead (void)
11607 if (ix86_tune
== PROCESSOR_PENTIUM
)
11610 if (ix86_tune
== PROCESSOR_PENTIUMPRO
11611 || ix86_tune
== PROCESSOR_K6
)
11619 /* Compute the alignment given to a constant that is being placed in memory.
11620 EXP is the constant and ALIGN is the alignment that the object would
11622 The value of this function is used instead of that alignment to align
11626 ix86_constant_alignment (tree exp
, int align
)
11628 if (TREE_CODE (exp
) == REAL_CST
)
11630 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
11632 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
11635 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
11636 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
11637 return BITS_PER_WORD
;
11642 /* Compute the alignment for a static variable.
11643 TYPE is the data type, and ALIGN is the alignment that
11644 the object would ordinarily have. The value of this function is used
11645 instead of that alignment to align the object. */
11648 ix86_data_alignment (tree type
, int align
)
11650 if (AGGREGATE_TYPE_P (type
)
11651 && TYPE_SIZE (type
)
11652 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
11653 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 256
11654 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 256)
11657 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11658 to 16byte boundary. */
11661 if (AGGREGATE_TYPE_P (type
)
11662 && TYPE_SIZE (type
)
11663 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
11664 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
11665 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
11669 if (TREE_CODE (type
) == ARRAY_TYPE
)
11671 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
11673 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
11676 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
11679 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
11681 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
11684 else if ((TREE_CODE (type
) == RECORD_TYPE
11685 || TREE_CODE (type
) == UNION_TYPE
11686 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
11687 && TYPE_FIELDS (type
))
11689 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
11691 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
11694 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
11695 || TREE_CODE (type
) == INTEGER_TYPE
)
11697 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
11699 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
11706 /* Compute the alignment for a local variable.
11707 TYPE is the data type, and ALIGN is the alignment that
11708 the object would ordinarily have. The value of this macro is used
11709 instead of that alignment to align the object. */
11712 ix86_local_alignment (tree type
, int align
)
11714 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11715 to 16byte boundary. */
11718 if (AGGREGATE_TYPE_P (type
)
11719 && TYPE_SIZE (type
)
11720 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
11721 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
11722 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
11725 if (TREE_CODE (type
) == ARRAY_TYPE
)
11727 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
11729 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
11732 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
11734 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
11736 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
11739 else if ((TREE_CODE (type
) == RECORD_TYPE
11740 || TREE_CODE (type
) == UNION_TYPE
11741 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
11742 && TYPE_FIELDS (type
))
11744 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
11746 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
11749 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
11750 || TREE_CODE (type
) == INTEGER_TYPE
)
11753 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
11755 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
11761 /* Emit RTL insns to initialize the variable parts of a trampoline.
11762 FNADDR is an RTX for the address of the function's pure code.
11763 CXT is an RTX for the static chain value for the function. */
11765 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
11769 /* Compute offset from the end of the jmp to the target function. */
11770 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
11771 plus_constant (tramp
, 10),
11772 NULL_RTX
, 1, OPTAB_DIRECT
);
11773 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
11774 gen_int_mode (0xb9, QImode
));
11775 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
11776 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
11777 gen_int_mode (0xe9, QImode
));
11778 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
11783 /* Try to load address using shorter movl instead of movabs.
11784 We may want to support movq for kernel mode, but kernel does not use
11785 trampolines at the moment. */
11786 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
11788 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
11789 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11790 gen_int_mode (0xbb41, HImode
));
11791 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
11792 gen_lowpart (SImode
, fnaddr
));
11797 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11798 gen_int_mode (0xbb49, HImode
));
11799 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
11803 /* Load static chain using movabs to r10. */
11804 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11805 gen_int_mode (0xba49, HImode
));
11806 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
11809 /* Jump to the r11 */
11810 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11811 gen_int_mode (0xff49, HImode
));
11812 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
11813 gen_int_mode (0xe3, QImode
));
11815 if (offset
> TRAMPOLINE_SIZE
)
11819 #ifdef ENABLE_EXECUTE_STACK
11820 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
11821 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
11825 #define def_builtin(MASK, NAME, TYPE, CODE) \
11827 if ((MASK) & target_flags \
11828 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
11829 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11830 NULL, NULL_TREE); \
11833 struct builtin_description
11835 const unsigned int mask
;
11836 const enum insn_code icode
;
11837 const char *const name
;
11838 const enum ix86_builtins code
;
11839 const enum rtx_code comparison
;
11840 const unsigned int flag
;
11843 static const struct builtin_description bdesc_comi
[] =
11845 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
11846 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
11847 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
11848 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
11849 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
11850 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
11851 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
11852 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
11853 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
11854 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
11855 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
11856 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
11857 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
11858 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
11859 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
11860 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
11861 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
11862 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
11863 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
11864 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
11865 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
11866 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
11867 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
11868 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
11871 static const struct builtin_description bdesc_2arg
[] =
11874 { MASK_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
11875 { MASK_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
11876 { MASK_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
11877 { MASK_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
11878 { MASK_SSE
, CODE_FOR_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
11879 { MASK_SSE
, CODE_FOR_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
11880 { MASK_SSE
, CODE_FOR_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
11881 { MASK_SSE
, CODE_FOR_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
11883 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
11884 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
11885 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
11886 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, 1 },
11887 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, 1 },
11888 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
11889 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, EQ
, 0 },
11890 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, LT
, 0 },
11891 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, LE
, 0 },
11892 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, LT
, 1 },
11893 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, LE
, 1 },
11894 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, UNORDERED
, 0 },
11895 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
11896 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
11897 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
11898 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
11899 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, EQ
, 0 },
11900 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, LT
, 0 },
11901 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, LE
, 0 },
11902 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
11904 { MASK_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
11905 { MASK_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
11906 { MASK_SSE
, CODE_FOR_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
11907 { MASK_SSE
, CODE_FOR_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
11909 { MASK_SSE
, CODE_FOR_sse_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
11910 { MASK_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
11911 { MASK_SSE
, CODE_FOR_sse_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
11912 { MASK_SSE
, CODE_FOR_sse_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
11914 { MASK_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
11915 { MASK_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
11916 { MASK_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
11917 { MASK_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
11918 { MASK_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
11921 { MASK_MMX
, CODE_FOR_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
11922 { MASK_MMX
, CODE_FOR_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
11923 { MASK_MMX
, CODE_FOR_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
11924 { MASK_MMX
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, 0, 0 },
11925 { MASK_MMX
, CODE_FOR_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
11926 { MASK_MMX
, CODE_FOR_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
11927 { MASK_MMX
, CODE_FOR_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
11928 { MASK_MMX
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, 0, 0 },
11930 { MASK_MMX
, CODE_FOR_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
11931 { MASK_MMX
, CODE_FOR_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
11932 { MASK_MMX
, CODE_FOR_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
11933 { MASK_MMX
, CODE_FOR_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
11934 { MASK_MMX
, CODE_FOR_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
11935 { MASK_MMX
, CODE_FOR_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
11936 { MASK_MMX
, CODE_FOR_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
11937 { MASK_MMX
, CODE_FOR_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
11939 { MASK_MMX
, CODE_FOR_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
11940 { MASK_MMX
, CODE_FOR_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
11941 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
11943 { MASK_MMX
, CODE_FOR_mmx_anddi3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
11944 { MASK_MMX
, CODE_FOR_mmx_nanddi3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
11945 { MASK_MMX
, CODE_FOR_mmx_iordi3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
11946 { MASK_MMX
, CODE_FOR_mmx_xordi3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
11948 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
11949 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
11951 { MASK_MMX
, CODE_FOR_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
11952 { MASK_MMX
, CODE_FOR_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
11953 { MASK_MMX
, CODE_FOR_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
11954 { MASK_MMX
, CODE_FOR_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
11955 { MASK_MMX
, CODE_FOR_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
11956 { MASK_MMX
, CODE_FOR_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
11958 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
11959 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
11960 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
11961 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
11963 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
11964 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
11965 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
11966 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
11967 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
11968 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
11971 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
11972 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
11973 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
11975 { MASK_SSE
, CODE_FOR_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
11976 { MASK_SSE
, CODE_FOR_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
11977 { MASK_SSE
| MASK_64BIT
, CODE_FOR_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, 0, 0 },
11979 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
11980 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
11981 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
11982 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
11983 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
11984 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
11986 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
11987 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
11988 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
11989 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
11990 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
11991 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
11993 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
11994 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
11995 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
11996 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
11998 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
11999 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
12002 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
12003 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
12004 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
12005 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
12006 { MASK_SSE2
, CODE_FOR_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
12007 { MASK_SSE2
, CODE_FOR_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
12008 { MASK_SSE2
, CODE_FOR_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
12009 { MASK_SSE2
, CODE_FOR_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
12011 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
12012 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
12013 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
12014 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, 1 },
12015 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, 1 },
12016 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
12017 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, EQ
, 0 },
12018 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, LT
, 0 },
12019 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, LE
, 0 },
12020 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, LT
, 1 },
12021 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, LE
, 1 },
12022 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, UNORDERED
, 0 },
12023 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
12024 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
12025 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
12026 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
12027 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, EQ
, 0 },
12028 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, LT
, 0 },
12029 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, LE
, 0 },
12030 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, UNORDERED
, 0 },
12032 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
12033 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
12034 { MASK_SSE2
, CODE_FOR_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
12035 { MASK_SSE2
, CODE_FOR_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
12037 { MASK_SSE2
, CODE_FOR_sse2_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
12038 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
12039 { MASK_SSE2
, CODE_FOR_sse2_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
12040 { MASK_SSE2
, CODE_FOR_sse2_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
12042 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
12043 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
12044 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
12047 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
12048 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
12049 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
12050 { MASK_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
12051 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
12052 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
12053 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
12054 { MASK_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
12056 { MASK_MMX
, CODE_FOR_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
12057 { MASK_MMX
, CODE_FOR_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
12058 { MASK_MMX
, CODE_FOR_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
12059 { MASK_MMX
, CODE_FOR_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
12060 { MASK_MMX
, CODE_FOR_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
12061 { MASK_MMX
, CODE_FOR_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
12062 { MASK_MMX
, CODE_FOR_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
12063 { MASK_MMX
, CODE_FOR_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
12065 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
12066 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
12068 { MASK_SSE2
, CODE_FOR_sse2_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
12069 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
12070 { MASK_SSE2
, CODE_FOR_sse2_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
12071 { MASK_SSE2
, CODE_FOR_sse2_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
12073 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
12074 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
12076 { MASK_SSE2
, CODE_FOR_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
12077 { MASK_SSE2
, CODE_FOR_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
12078 { MASK_SSE2
, CODE_FOR_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
12079 { MASK_SSE2
, CODE_FOR_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
12080 { MASK_SSE2
, CODE_FOR_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
12081 { MASK_SSE2
, CODE_FOR_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
12083 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
12084 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
12085 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
12086 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
12088 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
12089 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
12090 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
12091 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
12092 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
12093 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
12094 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
12095 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
12097 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
12098 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
12099 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
12101 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
12102 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
12104 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, 0, IX86_BUILTIN_PMULUDQ
, 0, 0 },
12105 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, 0, IX86_BUILTIN_PMULUDQ128
, 0, 0 },
12107 { MASK_SSE2
, CODE_FOR_ashlv8hi3_ti
, 0, IX86_BUILTIN_PSLLW128
, 0, 0 },
12108 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
12109 { MASK_SSE2
, CODE_FOR_ashlv4si3_ti
, 0, IX86_BUILTIN_PSLLD128
, 0, 0 },
12110 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
12111 { MASK_SSE2
, CODE_FOR_ashlv2di3_ti
, 0, IX86_BUILTIN_PSLLQ128
, 0, 0 },
12112 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
12114 { MASK_SSE2
, CODE_FOR_lshrv8hi3_ti
, 0, IX86_BUILTIN_PSRLW128
, 0, 0 },
12115 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
12116 { MASK_SSE2
, CODE_FOR_lshrv4si3_ti
, 0, IX86_BUILTIN_PSRLD128
, 0, 0 },
12117 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
12118 { MASK_SSE2
, CODE_FOR_lshrv2di3_ti
, 0, IX86_BUILTIN_PSRLQ128
, 0, 0 },
12119 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
12121 { MASK_SSE2
, CODE_FOR_ashrv8hi3_ti
, 0, IX86_BUILTIN_PSRAW128
, 0, 0 },
12122 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
12123 { MASK_SSE2
, CODE_FOR_ashrv4si3_ti
, 0, IX86_BUILTIN_PSRAD128
, 0, 0 },
12124 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
12126 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
12128 { MASK_SSE2
, CODE_FOR_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
12129 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, 0, 0 },
12130 { MASK_SSE2
, CODE_FOR_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
12131 { MASK_SSE2
, CODE_FOR_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 },
12134 { MASK_SSE3
, CODE_FOR_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, 0, 0 },
12135 { MASK_SSE3
, CODE_FOR_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, 0, 0 },
12136 { MASK_SSE3
, CODE_FOR_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, 0, 0 },
12137 { MASK_SSE3
, CODE_FOR_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, 0, 0 },
12138 { MASK_SSE3
, CODE_FOR_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, 0, 0 },
12139 { MASK_SSE3
, CODE_FOR_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, 0, 0 }
12142 static const struct builtin_description bdesc_1arg
[] =
12144 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
12145 { MASK_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
12147 { MASK_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
12148 { MASK_SSE
, CODE_FOR_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
12149 { MASK_SSE
, CODE_FOR_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
12151 { MASK_SSE
, CODE_FOR_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
12152 { MASK_SSE
, CODE_FOR_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
12153 { MASK_SSE
| MASK_64BIT
, CODE_FOR_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, 0, 0 },
12154 { MASK_SSE
, CODE_FOR_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
12155 { MASK_SSE
, CODE_FOR_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
12156 { MASK_SSE
| MASK_64BIT
, CODE_FOR_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, 0, 0 },
12158 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
12159 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
12160 { MASK_SSE2
, CODE_FOR_sse2_movq2dq
, 0, IX86_BUILTIN_MOVQ2DQ
, 0, 0 },
12161 { MASK_SSE2
, CODE_FOR_sse2_movdq2q
, 0, IX86_BUILTIN_MOVDQ2Q
, 0, 0 },
12163 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
12165 { MASK_SSE2
, CODE_FOR_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
12166 { MASK_SSE2
, CODE_FOR_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
12168 { MASK_SSE2
, CODE_FOR_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
12169 { MASK_SSE2
, CODE_FOR_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
12170 { MASK_SSE2
, CODE_FOR_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
12171 { MASK_SSE2
, CODE_FOR_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
12172 { MASK_SSE2
, CODE_FOR_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
12174 { MASK_SSE2
, CODE_FOR_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
12176 { MASK_SSE2
, CODE_FOR_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
12177 { MASK_SSE2
, CODE_FOR_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
12178 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, 0, 0 },
12179 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, 0, 0 },
12181 { MASK_SSE2
, CODE_FOR_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
12182 { MASK_SSE2
, CODE_FOR_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
12183 { MASK_SSE2
, CODE_FOR_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
12185 { MASK_SSE2
, CODE_FOR_sse2_movq
, 0, IX86_BUILTIN_MOVQ
, 0, 0 },
12188 { MASK_SSE3
, CODE_FOR_movshdup
, 0, IX86_BUILTIN_MOVSHDUP
, 0, 0 },
12189 { MASK_SSE3
, CODE_FOR_movsldup
, 0, IX86_BUILTIN_MOVSLDUP
, 0, 0 },
12190 { MASK_SSE3
, CODE_FOR_movddup
, 0, IX86_BUILTIN_MOVDDUP
, 0, 0 }
12194 ix86_init_builtins (void)
12197 ix86_init_mmx_sse_builtins ();
12200 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12201 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12204 ix86_init_mmx_sse_builtins (void)
12206 const struct builtin_description
* d
;
12209 tree V16QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V16QImode
);
12210 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
12211 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
12212 tree V2DI_type_node
= build_vector_type_for_mode (intDI_type_node
, V2DImode
);
12213 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
12214 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
12215 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
12216 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
12217 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
12218 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
12220 tree pchar_type_node
= build_pointer_type (char_type_node
);
12221 tree pcchar_type_node
= build_pointer_type (
12222 build_type_variant (char_type_node
, 1, 0));
12223 tree pfloat_type_node
= build_pointer_type (float_type_node
);
12224 tree pcfloat_type_node
= build_pointer_type (
12225 build_type_variant (float_type_node
, 1, 0));
12226 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
12227 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
12228 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
12231 tree int_ftype_v4sf_v4sf
12232 = build_function_type_list (integer_type_node
,
12233 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12234 tree v4si_ftype_v4sf_v4sf
12235 = build_function_type_list (V4SI_type_node
,
12236 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12237 /* MMX/SSE/integer conversions. */
12238 tree int_ftype_v4sf
12239 = build_function_type_list (integer_type_node
,
12240 V4SF_type_node
, NULL_TREE
);
12241 tree int64_ftype_v4sf
12242 = build_function_type_list (long_long_integer_type_node
,
12243 V4SF_type_node
, NULL_TREE
);
12244 tree int_ftype_v8qi
12245 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
12246 tree v4sf_ftype_v4sf_int
12247 = build_function_type_list (V4SF_type_node
,
12248 V4SF_type_node
, integer_type_node
, NULL_TREE
);
12249 tree v4sf_ftype_v4sf_int64
12250 = build_function_type_list (V4SF_type_node
,
12251 V4SF_type_node
, long_long_integer_type_node
,
12253 tree v4sf_ftype_v4sf_v2si
12254 = build_function_type_list (V4SF_type_node
,
12255 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
12256 tree int_ftype_v4hi_int
12257 = build_function_type_list (integer_type_node
,
12258 V4HI_type_node
, integer_type_node
, NULL_TREE
);
12259 tree v4hi_ftype_v4hi_int_int
12260 = build_function_type_list (V4HI_type_node
, V4HI_type_node
,
12261 integer_type_node
, integer_type_node
,
12263 /* Miscellaneous. */
12264 tree v8qi_ftype_v4hi_v4hi
12265 = build_function_type_list (V8QI_type_node
,
12266 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
12267 tree v4hi_ftype_v2si_v2si
12268 = build_function_type_list (V4HI_type_node
,
12269 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
12270 tree v4sf_ftype_v4sf_v4sf_int
12271 = build_function_type_list (V4SF_type_node
,
12272 V4SF_type_node
, V4SF_type_node
,
12273 integer_type_node
, NULL_TREE
);
12274 tree v2si_ftype_v4hi_v4hi
12275 = build_function_type_list (V2SI_type_node
,
12276 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
12277 tree v4hi_ftype_v4hi_int
12278 = build_function_type_list (V4HI_type_node
,
12279 V4HI_type_node
, integer_type_node
, NULL_TREE
);
12280 tree v4hi_ftype_v4hi_di
12281 = build_function_type_list (V4HI_type_node
,
12282 V4HI_type_node
, long_long_unsigned_type_node
,
12284 tree v2si_ftype_v2si_di
12285 = build_function_type_list (V2SI_type_node
,
12286 V2SI_type_node
, long_long_unsigned_type_node
,
12288 tree void_ftype_void
12289 = build_function_type (void_type_node
, void_list_node
);
12290 tree void_ftype_unsigned
12291 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
12292 tree void_ftype_unsigned_unsigned
12293 = build_function_type_list (void_type_node
, unsigned_type_node
,
12294 unsigned_type_node
, NULL_TREE
);
12295 tree void_ftype_pcvoid_unsigned_unsigned
12296 = build_function_type_list (void_type_node
, const_ptr_type_node
,
12297 unsigned_type_node
, unsigned_type_node
,
12299 tree unsigned_ftype_void
12300 = build_function_type (unsigned_type_node
, void_list_node
);
12302 = build_function_type (long_long_unsigned_type_node
, void_list_node
);
12303 tree v4sf_ftype_void
12304 = build_function_type (V4SF_type_node
, void_list_node
);
12305 tree v2si_ftype_v4sf
12306 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
12307 /* Loads/stores. */
12308 tree void_ftype_v8qi_v8qi_pchar
12309 = build_function_type_list (void_type_node
,
12310 V8QI_type_node
, V8QI_type_node
,
12311 pchar_type_node
, NULL_TREE
);
12312 tree v4sf_ftype_pcfloat
12313 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
12314 /* @@@ the type is bogus */
12315 tree v4sf_ftype_v4sf_pv2si
12316 = build_function_type_list (V4SF_type_node
,
12317 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
12318 tree void_ftype_pv2si_v4sf
12319 = build_function_type_list (void_type_node
,
12320 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
12321 tree void_ftype_pfloat_v4sf
12322 = build_function_type_list (void_type_node
,
12323 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
12324 tree void_ftype_pdi_di
12325 = build_function_type_list (void_type_node
,
12326 pdi_type_node
, long_long_unsigned_type_node
,
12328 tree void_ftype_pv2di_v2di
12329 = build_function_type_list (void_type_node
,
12330 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
12331 /* Normal vector unops. */
12332 tree v4sf_ftype_v4sf
12333 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12335 /* Normal vector binops. */
12336 tree v4sf_ftype_v4sf_v4sf
12337 = build_function_type_list (V4SF_type_node
,
12338 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12339 tree v8qi_ftype_v8qi_v8qi
12340 = build_function_type_list (V8QI_type_node
,
12341 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
12342 tree v4hi_ftype_v4hi_v4hi
12343 = build_function_type_list (V4HI_type_node
,
12344 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
12345 tree v2si_ftype_v2si_v2si
12346 = build_function_type_list (V2SI_type_node
,
12347 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
12348 tree di_ftype_di_di
12349 = build_function_type_list (long_long_unsigned_type_node
,
12350 long_long_unsigned_type_node
,
12351 long_long_unsigned_type_node
, NULL_TREE
);
12353 tree v2si_ftype_v2sf
12354 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
12355 tree v2sf_ftype_v2si
12356 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
12357 tree v2si_ftype_v2si
12358 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
12359 tree v2sf_ftype_v2sf
12360 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
12361 tree v2sf_ftype_v2sf_v2sf
12362 = build_function_type_list (V2SF_type_node
,
12363 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
12364 tree v2si_ftype_v2sf_v2sf
12365 = build_function_type_list (V2SI_type_node
,
12366 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
12367 tree pint_type_node
= build_pointer_type (integer_type_node
);
12368 tree pcint_type_node
= build_pointer_type (
12369 build_type_variant (integer_type_node
, 1, 0));
12370 tree pdouble_type_node
= build_pointer_type (double_type_node
);
12371 tree pcdouble_type_node
= build_pointer_type (
12372 build_type_variant (double_type_node
, 1, 0));
12373 tree int_ftype_v2df_v2df
12374 = build_function_type_list (integer_type_node
,
12375 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12378 = build_function_type (intTI_type_node
, void_list_node
);
12379 tree v2di_ftype_void
12380 = build_function_type (V2DI_type_node
, void_list_node
);
12381 tree ti_ftype_ti_ti
12382 = build_function_type_list (intTI_type_node
,
12383 intTI_type_node
, intTI_type_node
, NULL_TREE
);
12384 tree void_ftype_pcvoid
12385 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
12387 = build_function_type_list (V2DI_type_node
,
12388 long_long_unsigned_type_node
, NULL_TREE
);
12390 = build_function_type_list (long_long_unsigned_type_node
,
12391 V2DI_type_node
, NULL_TREE
);
12392 tree v4sf_ftype_v4si
12393 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
12394 tree v4si_ftype_v4sf
12395 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
12396 tree v2df_ftype_v4si
12397 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
12398 tree v4si_ftype_v2df
12399 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
12400 tree v2si_ftype_v2df
12401 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
12402 tree v4sf_ftype_v2df
12403 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
12404 tree v2df_ftype_v2si
12405 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
12406 tree v2df_ftype_v4sf
12407 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
12408 tree int_ftype_v2df
12409 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
12410 tree int64_ftype_v2df
12411 = build_function_type_list (long_long_integer_type_node
,
12412 V2DF_type_node
, NULL_TREE
);
12413 tree v2df_ftype_v2df_int
12414 = build_function_type_list (V2DF_type_node
,
12415 V2DF_type_node
, integer_type_node
, NULL_TREE
);
12416 tree v2df_ftype_v2df_int64
12417 = build_function_type_list (V2DF_type_node
,
12418 V2DF_type_node
, long_long_integer_type_node
,
12420 tree v4sf_ftype_v4sf_v2df
12421 = build_function_type_list (V4SF_type_node
,
12422 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
12423 tree v2df_ftype_v2df_v4sf
12424 = build_function_type_list (V2DF_type_node
,
12425 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
12426 tree v2df_ftype_v2df_v2df_int
12427 = build_function_type_list (V2DF_type_node
,
12428 V2DF_type_node
, V2DF_type_node
,
12431 tree v2df_ftype_v2df_pv2si
12432 = build_function_type_list (V2DF_type_node
,
12433 V2DF_type_node
, pv2si_type_node
, NULL_TREE
);
12434 tree void_ftype_pv2si_v2df
12435 = build_function_type_list (void_type_node
,
12436 pv2si_type_node
, V2DF_type_node
, NULL_TREE
);
12437 tree void_ftype_pdouble_v2df
12438 = build_function_type_list (void_type_node
,
12439 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
12440 tree void_ftype_pint_int
12441 = build_function_type_list (void_type_node
,
12442 pint_type_node
, integer_type_node
, NULL_TREE
);
12443 tree void_ftype_v16qi_v16qi_pchar
12444 = build_function_type_list (void_type_node
,
12445 V16QI_type_node
, V16QI_type_node
,
12446 pchar_type_node
, NULL_TREE
);
12447 tree v2df_ftype_pcdouble
12448 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
12449 tree v2df_ftype_v2df_v2df
12450 = build_function_type_list (V2DF_type_node
,
12451 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12452 tree v16qi_ftype_v16qi_v16qi
12453 = build_function_type_list (V16QI_type_node
,
12454 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
12455 tree v8hi_ftype_v8hi_v8hi
12456 = build_function_type_list (V8HI_type_node
,
12457 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
12458 tree v4si_ftype_v4si_v4si
12459 = build_function_type_list (V4SI_type_node
,
12460 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
12461 tree v2di_ftype_v2di_v2di
12462 = build_function_type_list (V2DI_type_node
,
12463 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
12464 tree v2di_ftype_v2df_v2df
12465 = build_function_type_list (V2DI_type_node
,
12466 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12467 tree v2df_ftype_v2df
12468 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12469 tree v2df_ftype_double
12470 = build_function_type_list (V2DF_type_node
, double_type_node
, NULL_TREE
);
12471 tree v2df_ftype_double_double
12472 = build_function_type_list (V2DF_type_node
,
12473 double_type_node
, double_type_node
, NULL_TREE
);
12474 tree int_ftype_v8hi_int
12475 = build_function_type_list (integer_type_node
,
12476 V8HI_type_node
, integer_type_node
, NULL_TREE
);
12477 tree v8hi_ftype_v8hi_int_int
12478 = build_function_type_list (V8HI_type_node
,
12479 V8HI_type_node
, integer_type_node
,
12480 integer_type_node
, NULL_TREE
);
12481 tree v2di_ftype_v2di_int
12482 = build_function_type_list (V2DI_type_node
,
12483 V2DI_type_node
, integer_type_node
, NULL_TREE
);
12484 tree v4si_ftype_v4si_int
12485 = build_function_type_list (V4SI_type_node
,
12486 V4SI_type_node
, integer_type_node
, NULL_TREE
);
12487 tree v8hi_ftype_v8hi_int
12488 = build_function_type_list (V8HI_type_node
,
12489 V8HI_type_node
, integer_type_node
, NULL_TREE
);
12490 tree v8hi_ftype_v8hi_v2di
12491 = build_function_type_list (V8HI_type_node
,
12492 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
12493 tree v4si_ftype_v4si_v2di
12494 = build_function_type_list (V4SI_type_node
,
12495 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
12496 tree v4si_ftype_v8hi_v8hi
12497 = build_function_type_list (V4SI_type_node
,
12498 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
12499 tree di_ftype_v8qi_v8qi
12500 = build_function_type_list (long_long_unsigned_type_node
,
12501 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
12502 tree di_ftype_v2si_v2si
12503 = build_function_type_list (long_long_unsigned_type_node
,
12504 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
12505 tree v2di_ftype_v16qi_v16qi
12506 = build_function_type_list (V2DI_type_node
,
12507 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
12508 tree v2di_ftype_v4si_v4si
12509 = build_function_type_list (V2DI_type_node
,
12510 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
12511 tree int_ftype_v16qi
12512 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
12513 tree v16qi_ftype_pcchar
12514 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
12515 tree void_ftype_pchar_v16qi
12516 = build_function_type_list (void_type_node
,
12517 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
12518 tree v4si_ftype_pcint
12519 = build_function_type_list (V4SI_type_node
, pcint_type_node
, NULL_TREE
);
12520 tree void_ftype_pcint_v4si
12521 = build_function_type_list (void_type_node
,
12522 pcint_type_node
, V4SI_type_node
, NULL_TREE
);
12523 tree v2di_ftype_v2di
12524 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
12527 tree float128_type
;
12529 /* The __float80 type. */
12530 if (TYPE_MODE (long_double_type_node
) == XFmode
)
12531 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
12535 /* The __float80 type. */
12536 float80_type
= make_node (REAL_TYPE
);
12537 TYPE_PRECISION (float80_type
) = 80;
12538 layout_type (float80_type
);
12539 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
12542 float128_type
= make_node (REAL_TYPE
);
12543 TYPE_PRECISION (float128_type
) = 128;
12544 layout_type (float128_type
);
12545 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
12547 /* Add all builtins that are more or less simple operations on two
12549 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
12551 /* Use one of the operands; the target can have a different mode for
12552 mask-generating compares. */
12553 enum machine_mode mode
;
12558 mode
= insn_data
[d
->icode
].operand
[1].mode
;
12563 type
= v16qi_ftype_v16qi_v16qi
;
12566 type
= v8hi_ftype_v8hi_v8hi
;
12569 type
= v4si_ftype_v4si_v4si
;
12572 type
= v2di_ftype_v2di_v2di
;
12575 type
= v2df_ftype_v2df_v2df
;
12578 type
= ti_ftype_ti_ti
;
12581 type
= v4sf_ftype_v4sf_v4sf
;
12584 type
= v8qi_ftype_v8qi_v8qi
;
12587 type
= v4hi_ftype_v4hi_v4hi
;
12590 type
= v2si_ftype_v2si_v2si
;
12593 type
= di_ftype_di_di
;
12600 /* Override for comparisons. */
12601 if (d
->icode
== CODE_FOR_maskcmpv4sf3
12602 || d
->icode
== CODE_FOR_maskncmpv4sf3
12603 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
12604 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
)
12605 type
= v4si_ftype_v4sf_v4sf
;
12607 if (d
->icode
== CODE_FOR_maskcmpv2df3
12608 || d
->icode
== CODE_FOR_maskncmpv2df3
12609 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
12610 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
12611 type
= v2di_ftype_v2df_v2df
;
12613 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
12616 /* Add the remaining MMX insns with somewhat more complicated types. */
12617 def_builtin (MASK_MMX
, "__builtin_ia32_mmx_zero", di_ftype_void
, IX86_BUILTIN_MMX_ZERO
);
12618 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
12619 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
12620 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
12621 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
12623 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
12624 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
12625 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
12627 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
12628 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
12630 def_builtin (MASK_MMX
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
12631 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
12633 /* comi/ucomi insns. */
12634 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
12635 if (d
->mask
== MASK_SSE2
)
12636 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
12638 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
12640 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
12641 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
12642 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
12644 def_builtin (MASK_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
12645 def_builtin (MASK_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
12646 def_builtin (MASK_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
12647 def_builtin (MASK_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
12648 def_builtin (MASK_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
12649 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
12650 def_builtin (MASK_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
12651 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
12652 def_builtin (MASK_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
12653 def_builtin (MASK_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
12654 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
12656 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pextrw", int_ftype_v4hi_int
, IX86_BUILTIN_PEXTRW
);
12657 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int
, IX86_BUILTIN_PINSRW
);
12659 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
12661 def_builtin (MASK_SSE
, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADAPS
);
12662 def_builtin (MASK_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
12663 def_builtin (MASK_SSE
, "__builtin_ia32_loadss", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADSS
);
12664 def_builtin (MASK_SSE
, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREAPS
);
12665 def_builtin (MASK_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
12666 def_builtin (MASK_SSE
, "__builtin_ia32_storess", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STORESS
);
12668 def_builtin (MASK_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
12669 def_builtin (MASK_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
12670 def_builtin (MASK_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
12671 def_builtin (MASK_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
12673 def_builtin (MASK_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
12674 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
12675 def_builtin (MASK_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
12676 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
12678 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
12680 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
12682 def_builtin (MASK_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
12683 def_builtin (MASK_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
12684 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
12685 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
12686 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
12687 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
12689 def_builtin (MASK_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
12691 /* Original 3DNow! */
12692 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
12693 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
12694 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
12695 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
12696 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
12697 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
12698 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
12699 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
12700 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
12701 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
12702 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
12703 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
12704 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
12705 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
12706 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
12707 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
12708 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
12709 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
12710 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
12711 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
12713 /* 3DNow! extension as used in the Athlon CPU. */
12714 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
12715 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
12716 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
12717 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
12718 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
12719 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
12721 def_builtin (MASK_SSE
, "__builtin_ia32_setzerops", v4sf_ftype_void
, IX86_BUILTIN_SSE_ZERO
);
12724 def_builtin (MASK_SSE2
, "__builtin_ia32_pextrw128", int_ftype_v8hi_int
, IX86_BUILTIN_PEXTRW128
);
12725 def_builtin (MASK_SSE2
, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int
, IX86_BUILTIN_PINSRW128
);
12727 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
12728 def_builtin (MASK_SSE2
, "__builtin_ia32_movq2dq", v2di_ftype_di
, IX86_BUILTIN_MOVQ2DQ
);
12729 def_builtin (MASK_SSE2
, "__builtin_ia32_movdq2q", di_ftype_v2di
, IX86_BUILTIN_MOVDQ2Q
);
12731 def_builtin (MASK_SSE2
, "__builtin_ia32_loadapd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADAPD
);
12732 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
12733 def_builtin (MASK_SSE2
, "__builtin_ia32_loadsd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADSD
);
12734 def_builtin (MASK_SSE2
, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREAPD
);
12735 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
12736 def_builtin (MASK_SSE2
, "__builtin_ia32_storesd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORESD
);
12738 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADHPD
);
12739 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADLPD
);
12740 def_builtin (MASK_SSE2
, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STOREHPD
);
12741 def_builtin (MASK_SSE2
, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STORELPD
);
12743 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
12744 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
12745 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
12746 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
12747 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
12749 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
12750 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
12751 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
12752 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
12754 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
12755 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
12757 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
12759 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
12760 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
12762 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
12763 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
12764 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
12765 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
12766 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
12768 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
12770 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
12771 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
12772 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
12773 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
12775 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
12776 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
12777 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
12779 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
12780 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
12781 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
12782 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
12784 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd1", v2df_ftype_double
, IX86_BUILTIN_SETPD1
);
12785 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd", v2df_ftype_double_double
, IX86_BUILTIN_SETPD
);
12786 def_builtin (MASK_SSE2
, "__builtin_ia32_setzeropd", ti_ftype_void
, IX86_BUILTIN_CLRPD
);
12787 def_builtin (MASK_SSE2
, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADPD1
);
12788 def_builtin (MASK_SSE2
, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADRPD
);
12789 def_builtin (MASK_SSE2
, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREPD1
);
12790 def_builtin (MASK_SSE2
, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORERPD
);
12792 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
12793 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
12794 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
12796 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQA
);
12797 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
12798 def_builtin (MASK_SSE2
, "__builtin_ia32_loadd", v4si_ftype_pcint
, IX86_BUILTIN_LOADD
);
12799 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQA
);
12800 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
12801 def_builtin (MASK_SSE2
, "__builtin_ia32_stored", void_ftype_pcint_v4si
, IX86_BUILTIN_STORED
);
12802 def_builtin (MASK_SSE2
, "__builtin_ia32_movq", v2di_ftype_v2di
, IX86_BUILTIN_MOVQ
);
12804 def_builtin (MASK_SSE
, "__builtin_ia32_setzero128", v2di_ftype_void
, IX86_BUILTIN_CLRTI
);
12806 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si
, IX86_BUILTIN_PMULUDQ
);
12807 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULUDQ128
);
12809 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
12810 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
12811 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
12813 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
12814 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
12815 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
12817 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
12818 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
12820 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
12821 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
12822 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
12823 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
12825 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
12826 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
12827 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
12828 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
12830 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
12831 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
12833 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
12835 /* Prescott New Instructions. */
12836 def_builtin (MASK_SSE3
, "__builtin_ia32_monitor",
12837 void_ftype_pcvoid_unsigned_unsigned
,
12838 IX86_BUILTIN_MONITOR
);
12839 def_builtin (MASK_SSE3
, "__builtin_ia32_mwait",
12840 void_ftype_unsigned_unsigned
,
12841 IX86_BUILTIN_MWAIT
);
12842 def_builtin (MASK_SSE3
, "__builtin_ia32_movshdup",
12844 IX86_BUILTIN_MOVSHDUP
);
12845 def_builtin (MASK_SSE3
, "__builtin_ia32_movsldup",
12847 IX86_BUILTIN_MOVSLDUP
);
12848 def_builtin (MASK_SSE3
, "__builtin_ia32_lddqu",
12849 v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
12850 def_builtin (MASK_SSE3
, "__builtin_ia32_loadddup",
12851 v2df_ftype_pcdouble
, IX86_BUILTIN_LOADDDUP
);
12852 def_builtin (MASK_SSE3
, "__builtin_ia32_movddup",
12853 v2df_ftype_v2df
, IX86_BUILTIN_MOVDDUP
);
12856 /* Errors in the source file can cause expand_expr to return const0_rtx
12857 where we expect a vector. To avoid crashing, use one of the vector
12858 clear instructions. */
12860 safe_vector_operand (rtx x
, enum machine_mode mode
)
12862 if (x
!= const0_rtx
)
12864 x
= gen_reg_rtx (mode
);
12866 if (VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
))
12867 emit_insn (gen_mmx_clrdi (mode
== DImode
? x
12868 : gen_rtx_SUBREG (DImode
, x
, 0)));
12870 emit_insn (gen_sse_clrv4sf (mode
== V4SFmode
? x
12871 : gen_rtx_SUBREG (V4SFmode
, x
, 0),
12872 CONST0_RTX (V4SFmode
)));
12876 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12879 ix86_expand_binop_builtin (enum insn_code icode
, tree arglist
, rtx target
)
12882 tree arg0
= TREE_VALUE (arglist
);
12883 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12884 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12885 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12886 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
12887 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
12888 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
12890 if (VECTOR_MODE_P (mode0
))
12891 op0
= safe_vector_operand (op0
, mode0
);
12892 if (VECTOR_MODE_P (mode1
))
12893 op1
= safe_vector_operand (op1
, mode1
);
12896 || GET_MODE (target
) != tmode
12897 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12898 target
= gen_reg_rtx (tmode
);
12900 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
12902 rtx x
= gen_reg_rtx (V4SImode
);
12903 emit_insn (gen_sse2_loadd (x
, op1
));
12904 op1
= gen_lowpart (TImode
, x
);
12907 /* In case the insn wants input operands in modes different from
12908 the result, abort. */
12909 if ((GET_MODE (op0
) != mode0
&& GET_MODE (op0
) != VOIDmode
)
12910 || (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
))
12913 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12914 op0
= copy_to_mode_reg (mode0
, op0
);
12915 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
12916 op1
= copy_to_mode_reg (mode1
, op1
);
12918 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12919 yet one of the two must not be a memory. This is normally enforced
12920 by expanders, but we didn't bother to create one here. */
12921 if (GET_CODE (op0
) == MEM
&& GET_CODE (op1
) == MEM
)
12922 op0
= copy_to_mode_reg (mode0
, op0
);
12924 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
12931 /* Subroutine of ix86_expand_builtin to take care of stores. */
12934 ix86_expand_store_builtin (enum insn_code icode
, tree arglist
)
12937 tree arg0
= TREE_VALUE (arglist
);
12938 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12939 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12940 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12941 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
12942 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
12944 if (VECTOR_MODE_P (mode1
))
12945 op1
= safe_vector_operand (op1
, mode1
);
12947 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
12948 op1
= copy_to_mode_reg (mode1
, op1
);
12950 pat
= GEN_FCN (icode
) (op0
, op1
);
12956 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12959 ix86_expand_unop_builtin (enum insn_code icode
, tree arglist
,
12960 rtx target
, int do_load
)
12963 tree arg0
= TREE_VALUE (arglist
);
12964 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12965 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
12966 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
12969 || GET_MODE (target
) != tmode
12970 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12971 target
= gen_reg_rtx (tmode
);
12973 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
12976 if (VECTOR_MODE_P (mode0
))
12977 op0
= safe_vector_operand (op0
, mode0
);
12979 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12980 op0
= copy_to_mode_reg (mode0
, op0
);
12983 pat
= GEN_FCN (icode
) (target
, op0
);
12990 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12991 sqrtss, rsqrtss, rcpss. */
12994 ix86_expand_unop1_builtin (enum insn_code icode
, tree arglist
, rtx target
)
12997 tree arg0
= TREE_VALUE (arglist
);
12998 rtx op1
, op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12999 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
13000 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
13003 || GET_MODE (target
) != tmode
13004 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13005 target
= gen_reg_rtx (tmode
);
13007 if (VECTOR_MODE_P (mode0
))
13008 op0
= safe_vector_operand (op0
, mode0
);
13010 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13011 op0
= copy_to_mode_reg (mode0
, op0
);
13014 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
13015 op1
= copy_to_mode_reg (mode0
, op1
);
13017 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13024 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13027 ix86_expand_sse_compare (const struct builtin_description
*d
, tree arglist
,
13031 tree arg0
= TREE_VALUE (arglist
);
13032 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13033 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13034 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13036 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
13037 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
13038 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
13039 enum rtx_code comparison
= d
->comparison
;
13041 if (VECTOR_MODE_P (mode0
))
13042 op0
= safe_vector_operand (op0
, mode0
);
13043 if (VECTOR_MODE_P (mode1
))
13044 op1
= safe_vector_operand (op1
, mode1
);
13046 /* Swap operands if we have a comparison that isn't available in
13050 rtx tmp
= gen_reg_rtx (mode1
);
13051 emit_move_insn (tmp
, op1
);
13057 || GET_MODE (target
) != tmode
13058 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
13059 target
= gen_reg_rtx (tmode
);
13061 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
13062 op0
= copy_to_mode_reg (mode0
, op0
);
13063 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
13064 op1
= copy_to_mode_reg (mode1
, op1
);
13066 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
13067 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
13074 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13077 ix86_expand_sse_comi (const struct builtin_description
*d
, tree arglist
,
13081 tree arg0
= TREE_VALUE (arglist
);
13082 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13083 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13084 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13086 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
13087 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
13088 enum rtx_code comparison
= d
->comparison
;
13090 if (VECTOR_MODE_P (mode0
))
13091 op0
= safe_vector_operand (op0
, mode0
);
13092 if (VECTOR_MODE_P (mode1
))
13093 op1
= safe_vector_operand (op1
, mode1
);
13095 /* Swap operands if we have a comparison that isn't available in
13104 target
= gen_reg_rtx (SImode
);
13105 emit_move_insn (target
, const0_rtx
);
13106 target
= gen_rtx_SUBREG (QImode
, target
, 0);
13108 if (! (*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
13109 op0
= copy_to_mode_reg (mode0
, op0
);
13110 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
13111 op1
= copy_to_mode_reg (mode1
, op1
);
13113 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
13114 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
13118 emit_insn (gen_rtx_SET (VOIDmode
,
13119 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
13120 gen_rtx_fmt_ee (comparison
, QImode
,
13124 return SUBREG_REG (target
);
13127 /* Expand an expression EXP that calls a built-in function,
13128 with result going to TARGET if that's convenient
13129 (and in mode MODE if that's convenient).
13130 SUBTARGET may be used as the target for computing one of EXP's operands.
13131 IGNORE is nonzero if the value is to be ignored. */
13134 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
13135 enum machine_mode mode ATTRIBUTE_UNUSED
,
13136 int ignore ATTRIBUTE_UNUSED
)
13138 const struct builtin_description
*d
;
13140 enum insn_code icode
;
13141 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
13142 tree arglist
= TREE_OPERAND (exp
, 1);
13143 tree arg0
, arg1
, arg2
;
13144 rtx op0
, op1
, op2
, pat
;
13145 enum machine_mode tmode
, mode0
, mode1
, mode2
;
13146 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
13150 case IX86_BUILTIN_EMMS
:
13151 emit_insn (gen_emms ());
13154 case IX86_BUILTIN_SFENCE
:
13155 emit_insn (gen_sfence ());
13158 case IX86_BUILTIN_PEXTRW
:
13159 case IX86_BUILTIN_PEXTRW128
:
13160 icode
= (fcode
== IX86_BUILTIN_PEXTRW
13161 ? CODE_FOR_mmx_pextrw
13162 : CODE_FOR_sse2_pextrw
);
13163 arg0
= TREE_VALUE (arglist
);
13164 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13165 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13166 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13167 tmode
= insn_data
[icode
].operand
[0].mode
;
13168 mode0
= insn_data
[icode
].operand
[1].mode
;
13169 mode1
= insn_data
[icode
].operand
[2].mode
;
13171 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13172 op0
= copy_to_mode_reg (mode0
, op0
);
13173 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13175 error ("selector must be an integer constant in the range 0..%i",
13176 fcode
== IX86_BUILTIN_PEXTRW
? 3:7);
13177 return gen_reg_rtx (tmode
);
13180 || GET_MODE (target
) != tmode
13181 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13182 target
= gen_reg_rtx (tmode
);
13183 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13189 case IX86_BUILTIN_PINSRW
:
13190 case IX86_BUILTIN_PINSRW128
:
13191 icode
= (fcode
== IX86_BUILTIN_PINSRW
13192 ? CODE_FOR_mmx_pinsrw
13193 : CODE_FOR_sse2_pinsrw
);
13194 arg0
= TREE_VALUE (arglist
);
13195 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13196 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13197 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13198 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13199 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13200 tmode
= insn_data
[icode
].operand
[0].mode
;
13201 mode0
= insn_data
[icode
].operand
[1].mode
;
13202 mode1
= insn_data
[icode
].operand
[2].mode
;
13203 mode2
= insn_data
[icode
].operand
[3].mode
;
13205 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13206 op0
= copy_to_mode_reg (mode0
, op0
);
13207 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13208 op1
= copy_to_mode_reg (mode1
, op1
);
13209 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
13211 error ("selector must be an integer constant in the range 0..%i",
13212 fcode
== IX86_BUILTIN_PINSRW
? 15:255);
13216 || GET_MODE (target
) != tmode
13217 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13218 target
= gen_reg_rtx (tmode
);
13219 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
13225 case IX86_BUILTIN_MASKMOVQ
:
13226 case IX86_BUILTIN_MASKMOVDQU
:
13227 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
13228 ? (TARGET_64BIT
? CODE_FOR_mmx_maskmovq_rex
: CODE_FOR_mmx_maskmovq
)
13229 : (TARGET_64BIT
? CODE_FOR_sse2_maskmovdqu_rex64
13230 : CODE_FOR_sse2_maskmovdqu
));
13231 /* Note the arg order is different from the operand order. */
13232 arg1
= TREE_VALUE (arglist
);
13233 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
13234 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13235 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13236 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13237 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13238 mode0
= insn_data
[icode
].operand
[0].mode
;
13239 mode1
= insn_data
[icode
].operand
[1].mode
;
13240 mode2
= insn_data
[icode
].operand
[2].mode
;
13242 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
13243 op0
= copy_to_mode_reg (mode0
, op0
);
13244 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
13245 op1
= copy_to_mode_reg (mode1
, op1
);
13246 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
13247 op2
= copy_to_mode_reg (mode2
, op2
);
13248 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
13254 case IX86_BUILTIN_SQRTSS
:
13255 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2
, arglist
, target
);
13256 case IX86_BUILTIN_RSQRTSS
:
13257 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2
, arglist
, target
);
13258 case IX86_BUILTIN_RCPSS
:
13259 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2
, arglist
, target
);
13261 case IX86_BUILTIN_LOADAPS
:
13262 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps
, arglist
, target
, 1);
13264 case IX86_BUILTIN_LOADUPS
:
13265 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
13267 case IX86_BUILTIN_STOREAPS
:
13268 return ix86_expand_store_builtin (CODE_FOR_sse_movaps
, arglist
);
13270 case IX86_BUILTIN_STOREUPS
:
13271 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
);
13273 case IX86_BUILTIN_LOADSS
:
13274 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss
, arglist
, target
, 1);
13276 case IX86_BUILTIN_STORESS
:
13277 return ix86_expand_store_builtin (CODE_FOR_sse_storess
, arglist
);
13279 case IX86_BUILTIN_LOADHPS
:
13280 case IX86_BUILTIN_LOADLPS
:
13281 case IX86_BUILTIN_LOADHPD
:
13282 case IX86_BUILTIN_LOADLPD
:
13283 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_movhps
13284 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_movlps
13285 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_movhpd
13286 : CODE_FOR_sse2_movsd
);
13287 arg0
= TREE_VALUE (arglist
);
13288 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13289 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13290 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13291 tmode
= insn_data
[icode
].operand
[0].mode
;
13292 mode0
= insn_data
[icode
].operand
[1].mode
;
13293 mode1
= insn_data
[icode
].operand
[2].mode
;
13295 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13296 op0
= copy_to_mode_reg (mode0
, op0
);
13297 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
13299 || GET_MODE (target
) != tmode
13300 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13301 target
= gen_reg_rtx (tmode
);
13302 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13308 case IX86_BUILTIN_STOREHPS
:
13309 case IX86_BUILTIN_STORELPS
:
13310 case IX86_BUILTIN_STOREHPD
:
13311 case IX86_BUILTIN_STORELPD
:
13312 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_movhps
13313 : fcode
== IX86_BUILTIN_STORELPS
? CODE_FOR_sse_movlps
13314 : fcode
== IX86_BUILTIN_STOREHPD
? CODE_FOR_sse2_movhpd
13315 : CODE_FOR_sse2_movsd
);
13316 arg0
= TREE_VALUE (arglist
);
13317 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13318 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13319 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13320 mode0
= insn_data
[icode
].operand
[1].mode
;
13321 mode1
= insn_data
[icode
].operand
[2].mode
;
13323 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
13324 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13325 op1
= copy_to_mode_reg (mode1
, op1
);
13327 pat
= GEN_FCN (icode
) (op0
, op0
, op1
);
13333 case IX86_BUILTIN_MOVNTPS
:
13334 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
);
13335 case IX86_BUILTIN_MOVNTQ
:
13336 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
);
13338 case IX86_BUILTIN_LDMXCSR
:
13339 op0
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
13340 target
= assign_386_stack_local (SImode
, 0);
13341 emit_move_insn (target
, op0
);
13342 emit_insn (gen_ldmxcsr (target
));
13345 case IX86_BUILTIN_STMXCSR
:
13346 target
= assign_386_stack_local (SImode
, 0);
13347 emit_insn (gen_stmxcsr (target
));
13348 return copy_to_mode_reg (SImode
, target
);
13350 case IX86_BUILTIN_SHUFPS
:
13351 case IX86_BUILTIN_SHUFPD
:
13352 icode
= (fcode
== IX86_BUILTIN_SHUFPS
13353 ? CODE_FOR_sse_shufps
13354 : CODE_FOR_sse2_shufpd
);
13355 arg0
= TREE_VALUE (arglist
);
13356 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13357 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13358 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13359 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13360 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13361 tmode
= insn_data
[icode
].operand
[0].mode
;
13362 mode0
= insn_data
[icode
].operand
[1].mode
;
13363 mode1
= insn_data
[icode
].operand
[2].mode
;
13364 mode2
= insn_data
[icode
].operand
[3].mode
;
13366 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13367 op0
= copy_to_mode_reg (mode0
, op0
);
13368 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13369 op1
= copy_to_mode_reg (mode1
, op1
);
13370 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
13372 /* @@@ better error message */
13373 error ("mask must be an immediate");
13374 return gen_reg_rtx (tmode
);
13377 || GET_MODE (target
) != tmode
13378 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13379 target
= gen_reg_rtx (tmode
);
13380 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
13386 case IX86_BUILTIN_PSHUFW
:
13387 case IX86_BUILTIN_PSHUFD
:
13388 case IX86_BUILTIN_PSHUFHW
:
13389 case IX86_BUILTIN_PSHUFLW
:
13390 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
13391 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
13392 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
13393 : CODE_FOR_mmx_pshufw
);
13394 arg0
= TREE_VALUE (arglist
);
13395 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13396 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13397 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13398 tmode
= insn_data
[icode
].operand
[0].mode
;
13399 mode1
= insn_data
[icode
].operand
[1].mode
;
13400 mode2
= insn_data
[icode
].operand
[2].mode
;
13402 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
13403 op0
= copy_to_mode_reg (mode1
, op0
);
13404 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
13406 /* @@@ better error message */
13407 error ("mask must be an immediate");
13411 || GET_MODE (target
) != tmode
13412 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13413 target
= gen_reg_rtx (tmode
);
13414 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13420 case IX86_BUILTIN_PSLLDQI128
:
13421 case IX86_BUILTIN_PSRLDQI128
:
13422 icode
= ( fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
13423 : CODE_FOR_sse2_lshrti3
);
13424 arg0
= TREE_VALUE (arglist
);
13425 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13426 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13427 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13428 tmode
= insn_data
[icode
].operand
[0].mode
;
13429 mode1
= insn_data
[icode
].operand
[1].mode
;
13430 mode2
= insn_data
[icode
].operand
[2].mode
;
13432 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
13434 op0
= copy_to_reg (op0
);
13435 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
13437 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
13439 error ("shift must be an immediate");
13442 target
= gen_reg_rtx (V2DImode
);
13443 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0), op0
, op1
);
13449 case IX86_BUILTIN_FEMMS
:
13450 emit_insn (gen_femms ());
13453 case IX86_BUILTIN_PAVGUSB
:
13454 return ix86_expand_binop_builtin (CODE_FOR_pavgusb
, arglist
, target
);
13456 case IX86_BUILTIN_PF2ID
:
13457 return ix86_expand_unop_builtin (CODE_FOR_pf2id
, arglist
, target
, 0);
13459 case IX86_BUILTIN_PFACC
:
13460 return ix86_expand_binop_builtin (CODE_FOR_pfacc
, arglist
, target
);
13462 case IX86_BUILTIN_PFADD
:
13463 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3
, arglist
, target
);
13465 case IX86_BUILTIN_PFCMPEQ
:
13466 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3
, arglist
, target
);
13468 case IX86_BUILTIN_PFCMPGE
:
13469 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3
, arglist
, target
);
13471 case IX86_BUILTIN_PFCMPGT
:
13472 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3
, arglist
, target
);
13474 case IX86_BUILTIN_PFMAX
:
13475 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3
, arglist
, target
);
13477 case IX86_BUILTIN_PFMIN
:
13478 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3
, arglist
, target
);
13480 case IX86_BUILTIN_PFMUL
:
13481 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3
, arglist
, target
);
13483 case IX86_BUILTIN_PFRCP
:
13484 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2
, arglist
, target
, 0);
13486 case IX86_BUILTIN_PFRCPIT1
:
13487 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3
, arglist
, target
);
13489 case IX86_BUILTIN_PFRCPIT2
:
13490 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3
, arglist
, target
);
13492 case IX86_BUILTIN_PFRSQIT1
:
13493 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3
, arglist
, target
);
13495 case IX86_BUILTIN_PFRSQRT
:
13496 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2
, arglist
, target
, 0);
13498 case IX86_BUILTIN_PFSUB
:
13499 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3
, arglist
, target
);
13501 case IX86_BUILTIN_PFSUBR
:
13502 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3
, arglist
, target
);
13504 case IX86_BUILTIN_PI2FD
:
13505 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2
, arglist
, target
, 0);
13507 case IX86_BUILTIN_PMULHRW
:
13508 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3
, arglist
, target
);
13510 case IX86_BUILTIN_PF2IW
:
13511 return ix86_expand_unop_builtin (CODE_FOR_pf2iw
, arglist
, target
, 0);
13513 case IX86_BUILTIN_PFNACC
:
13514 return ix86_expand_binop_builtin (CODE_FOR_pfnacc
, arglist
, target
);
13516 case IX86_BUILTIN_PFPNACC
:
13517 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc
, arglist
, target
);
13519 case IX86_BUILTIN_PI2FW
:
13520 return ix86_expand_unop_builtin (CODE_FOR_pi2fw
, arglist
, target
, 0);
13522 case IX86_BUILTIN_PSWAPDSI
:
13523 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2
, arglist
, target
, 0);
13525 case IX86_BUILTIN_PSWAPDSF
:
13526 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2
, arglist
, target
, 0);
13528 case IX86_BUILTIN_SSE_ZERO
:
13529 target
= gen_reg_rtx (V4SFmode
);
13530 emit_insn (gen_sse_clrv4sf (target
, CONST0_RTX (V4SFmode
)));
13533 case IX86_BUILTIN_MMX_ZERO
:
13534 target
= gen_reg_rtx (DImode
);
13535 emit_insn (gen_mmx_clrdi (target
));
13538 case IX86_BUILTIN_CLRTI
:
13539 target
= gen_reg_rtx (V2DImode
);
13540 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode
, target
, V2DImode
, 0)));
13544 case IX86_BUILTIN_SQRTSD
:
13545 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2
, arglist
, target
);
13546 case IX86_BUILTIN_LOADAPD
:
13547 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
, target
, 1);
13548 case IX86_BUILTIN_LOADUPD
:
13549 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, arglist
, target
, 1);
13551 case IX86_BUILTIN_STOREAPD
:
13552 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
13553 case IX86_BUILTIN_STOREUPD
:
13554 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, arglist
);
13556 case IX86_BUILTIN_LOADSD
:
13557 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
, target
, 1);
13559 case IX86_BUILTIN_STORESD
:
13560 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd
, arglist
);
13562 case IX86_BUILTIN_SETPD1
:
13563 target
= assign_386_stack_local (DFmode
, 0);
13564 arg0
= TREE_VALUE (arglist
);
13565 emit_move_insn (adjust_address (target
, DFmode
, 0),
13566 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
13567 op0
= gen_reg_rtx (V2DFmode
);
13568 emit_insn (gen_sse2_loadsd (op0
, adjust_address (target
, V2DFmode
, 0)));
13569 emit_insn (gen_sse2_shufpd (op0
, op0
, op0
, const0_rtx
));
13572 case IX86_BUILTIN_SETPD
:
13573 target
= assign_386_stack_local (V2DFmode
, 0);
13574 arg0
= TREE_VALUE (arglist
);
13575 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13576 emit_move_insn (adjust_address (target
, DFmode
, 0),
13577 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
13578 emit_move_insn (adjust_address (target
, DFmode
, 8),
13579 expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0));
13580 op0
= gen_reg_rtx (V2DFmode
);
13581 emit_insn (gen_sse2_movapd (op0
, target
));
13584 case IX86_BUILTIN_LOADRPD
:
13585 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
,
13586 gen_reg_rtx (V2DFmode
), 1);
13587 emit_insn (gen_sse2_shufpd (target
, target
, target
, const1_rtx
));
13590 case IX86_BUILTIN_LOADPD1
:
13591 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
,
13592 gen_reg_rtx (V2DFmode
), 1);
13593 emit_insn (gen_sse2_shufpd (target
, target
, target
, const0_rtx
));
13596 case IX86_BUILTIN_STOREPD1
:
13597 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
13598 case IX86_BUILTIN_STORERPD
:
13599 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
13601 case IX86_BUILTIN_CLRPD
:
13602 target
= gen_reg_rtx (V2DFmode
);
13603 emit_insn (gen_sse_clrv2df (target
));
13606 case IX86_BUILTIN_MFENCE
:
13607 emit_insn (gen_sse2_mfence ());
13609 case IX86_BUILTIN_LFENCE
:
13610 emit_insn (gen_sse2_lfence ());
13613 case IX86_BUILTIN_CLFLUSH
:
13614 arg0
= TREE_VALUE (arglist
);
13615 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13616 icode
= CODE_FOR_sse2_clflush
;
13617 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
13618 op0
= copy_to_mode_reg (Pmode
, op0
);
13620 emit_insn (gen_sse2_clflush (op0
));
13623 case IX86_BUILTIN_MOVNTPD
:
13624 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, arglist
);
13625 case IX86_BUILTIN_MOVNTDQ
:
13626 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, arglist
);
13627 case IX86_BUILTIN_MOVNTI
:
13628 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, arglist
);
13630 case IX86_BUILTIN_LOADDQA
:
13631 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa
, arglist
, target
, 1);
13632 case IX86_BUILTIN_LOADDQU
:
13633 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, arglist
, target
, 1);
13634 case IX86_BUILTIN_LOADD
:
13635 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd
, arglist
, target
, 1);
13637 case IX86_BUILTIN_STOREDQA
:
13638 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa
, arglist
);
13639 case IX86_BUILTIN_STOREDQU
:
13640 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, arglist
);
13641 case IX86_BUILTIN_STORED
:
13642 return ix86_expand_store_builtin (CODE_FOR_sse2_stored
, arglist
);
13644 case IX86_BUILTIN_MONITOR
:
13645 arg0
= TREE_VALUE (arglist
);
13646 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13647 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13648 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13649 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13650 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13652 op0
= copy_to_mode_reg (SImode
, op0
);
13654 op1
= copy_to_mode_reg (SImode
, op1
);
13656 op2
= copy_to_mode_reg (SImode
, op2
);
13657 emit_insn (gen_monitor (op0
, op1
, op2
));
13660 case IX86_BUILTIN_MWAIT
:
13661 arg0
= TREE_VALUE (arglist
);
13662 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13663 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13664 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13666 op0
= copy_to_mode_reg (SImode
, op0
);
13668 op1
= copy_to_mode_reg (SImode
, op1
);
13669 emit_insn (gen_mwait (op0
, op1
));
13672 case IX86_BUILTIN_LOADDDUP
:
13673 return ix86_expand_unop_builtin (CODE_FOR_loadddup
, arglist
, target
, 1);
13675 case IX86_BUILTIN_LDDQU
:
13676 return ix86_expand_unop_builtin (CODE_FOR_lddqu
, arglist
, target
,
13683 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
13684 if (d
->code
== fcode
)
13686 /* Compares are treated specially. */
13687 if (d
->icode
== CODE_FOR_maskcmpv4sf3
13688 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
13689 || d
->icode
== CODE_FOR_maskncmpv4sf3
13690 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
13691 || d
->icode
== CODE_FOR_maskcmpv2df3
13692 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
13693 || d
->icode
== CODE_FOR_maskncmpv2df3
13694 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
13695 return ix86_expand_sse_compare (d
, arglist
, target
);
13697 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
13700 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
13701 if (d
->code
== fcode
)
13702 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
13704 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
13705 if (d
->code
== fcode
)
13706 return ix86_expand_sse_comi (d
, arglist
, target
);
13708 /* @@@ Should really do something sensible here. */
13712 /* Store OPERAND to the memory after reload is completed. This means
13713 that we can't easily use assign_stack_local. */
13715 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
13718 if (!reload_completed
)
13720 if (TARGET_RED_ZONE
)
13722 result
= gen_rtx_MEM (mode
,
13723 gen_rtx_PLUS (Pmode
,
13725 GEN_INT (-RED_ZONE_SIZE
)));
13726 emit_move_insn (result
, operand
);
13728 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
13734 operand
= gen_lowpart (DImode
, operand
);
13738 gen_rtx_SET (VOIDmode
,
13739 gen_rtx_MEM (DImode
,
13740 gen_rtx_PRE_DEC (DImode
,
13741 stack_pointer_rtx
)),
13747 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
13756 split_di (&operand
, 1, operands
, operands
+ 1);
13758 gen_rtx_SET (VOIDmode
,
13759 gen_rtx_MEM (SImode
,
13760 gen_rtx_PRE_DEC (Pmode
,
13761 stack_pointer_rtx
)),
13764 gen_rtx_SET (VOIDmode
,
13765 gen_rtx_MEM (SImode
,
13766 gen_rtx_PRE_DEC (Pmode
,
13767 stack_pointer_rtx
)),
13772 /* It is better to store HImodes as SImodes. */
13773 if (!TARGET_PARTIAL_REG_STALL
)
13774 operand
= gen_lowpart (SImode
, operand
);
13778 gen_rtx_SET (VOIDmode
,
13779 gen_rtx_MEM (GET_MODE (operand
),
13780 gen_rtx_PRE_DEC (SImode
,
13781 stack_pointer_rtx
)),
13787 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
13792 /* Free operand from the memory. */
13794 ix86_free_from_memory (enum machine_mode mode
)
13796 if (!TARGET_RED_ZONE
)
13800 if (mode
== DImode
|| TARGET_64BIT
)
13802 else if (mode
== HImode
&& TARGET_PARTIAL_REG_STALL
)
13806 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13807 to pop or add instruction if registers are available. */
13808 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
13809 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
13814 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13815 QImode must go into class Q_REGS.
13816 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13817 movdf to do mem-to-mem moves through integer regs. */
13819 ix86_preferred_reload_class (rtx x
, enum reg_class
class)
13821 if (GET_CODE (x
) == CONST_VECTOR
&& x
!= CONST0_RTX (GET_MODE (x
)))
13823 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
13825 /* SSE can't load any constant directly yet. */
13826 if (SSE_CLASS_P (class))
13828 /* Floats can load 0 and 1. */
13829 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x
))
13831 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13832 if (MAYBE_SSE_CLASS_P (class))
13833 return (reg_class_subset_p (class, GENERAL_REGS
)
13834 ? GENERAL_REGS
: FLOAT_REGS
);
13838 /* General regs can load everything. */
13839 if (reg_class_subset_p (class, GENERAL_REGS
))
13840 return GENERAL_REGS
;
13841 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13842 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13845 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x
))
13847 if (GET_MODE (x
) == QImode
&& ! reg_class_subset_p (class, Q_REGS
))
13852 /* If we are copying between general and FP registers, we need a memory
13853 location. The same is true for SSE and MMX registers.
13855 The macro can't work reliably when one of the CLASSES is class containing
13856 registers from multiple units (SSE, MMX, integer). We avoid this by never
13857 combining those units in single alternative in the machine description.
13858 Ensure that this constraint holds to avoid unexpected surprises.
13860 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13861 enforce these sanity checks. */
13863 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
13864 enum machine_mode mode
, int strict
)
13866 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
13867 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
13868 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
13869 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
13870 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
13871 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
13878 return (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
)
13879 || ((SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
)
13880 || MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
13881 && ((mode
!= SImode
&& (mode
!= DImode
|| !TARGET_64BIT
))
13882 || (!TARGET_INTER_UNIT_MOVES
&& !optimize_size
))));
13884 /* Return the cost of moving data from a register in class CLASS1 to
13885 one in class CLASS2.
13887 It is not required that the cost always equal 2 when FROM is the same as TO;
13888 on some machines it is expensive to move between registers if they are not
13889 general registers. */
13891 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
13892 enum reg_class class2
)
13894 /* In case we require secondary memory, compute cost of the store followed
13895 by load. In order to avoid bad register allocation choices, we need
13896 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
13898 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
13902 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
13903 MEMORY_MOVE_COST (mode
, class1
, 1));
13904 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
13905 MEMORY_MOVE_COST (mode
, class2
, 1));
13907 /* In case of copying from general_purpose_register we may emit multiple
13908 stores followed by single load causing memory size mismatch stall.
13909 Count this as arbitrarily high cost of 20. */
13910 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
13913 /* In the case of FP/MMX moves, the registers actually overlap, and we
13914 have to switch modes in order to treat them differently. */
13915 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
13916 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
13922 /* Moves between SSE/MMX and integer unit are expensive. */
13923 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
13924 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
13925 return ix86_cost
->mmxsse_to_integer
;
13926 if (MAYBE_FLOAT_CLASS_P (class1
))
13927 return ix86_cost
->fp_move
;
13928 if (MAYBE_SSE_CLASS_P (class1
))
13929 return ix86_cost
->sse_move
;
13930 if (MAYBE_MMX_CLASS_P (class1
))
13931 return ix86_cost
->mmx_move
;
13935 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13937 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
13939 /* Flags and only flags can only hold CCmode values. */
13940 if (CC_REGNO_P (regno
))
13941 return GET_MODE_CLASS (mode
) == MODE_CC
;
13942 if (GET_MODE_CLASS (mode
) == MODE_CC
13943 || GET_MODE_CLASS (mode
) == MODE_RANDOM
13944 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
13946 if (FP_REGNO_P (regno
))
13947 return VALID_FP_MODE_P (mode
);
13948 if (SSE_REGNO_P (regno
))
13949 return (TARGET_SSE
? VALID_SSE_REG_MODE (mode
) : 0);
13950 if (MMX_REGNO_P (regno
))
13952 ? VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
) : 0);
13953 /* We handle both integer and floats in the general purpose registers.
13954 In future we should be able to handle vector modes as well. */
13955 if (!VALID_INT_MODE_P (mode
) && !VALID_FP_MODE_P (mode
))
13957 /* Take care for QImode values - they can be in non-QI regs, but then
13958 they do cause partial register stalls. */
13959 if (regno
< 4 || mode
!= QImode
|| TARGET_64BIT
)
13961 return reload_in_progress
|| reload_completed
|| !TARGET_PARTIAL_REG_STALL
;
13964 /* Return the cost of moving data of mode M between a
13965 register and memory. A value of 2 is the default; this cost is
13966 relative to those in `REGISTER_MOVE_COST'.
13968 If moving between registers and memory is more expensive than
13969 between two registers, you should define this macro to express the
13972 Model also increased moving costs of QImode registers in non
13976 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class
class, int in
)
13978 if (FLOAT_CLASS_P (class))
13995 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
13997 if (SSE_CLASS_P (class))
14000 switch (GET_MODE_SIZE (mode
))
14014 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
14016 if (MMX_CLASS_P (class))
14019 switch (GET_MODE_SIZE (mode
))
14030 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
14032 switch (GET_MODE_SIZE (mode
))
14036 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
14037 : ix86_cost
->movzbl_load
);
14039 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
14040 : ix86_cost
->int_store
[0] + 4);
14043 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
14045 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14046 if (mode
== TFmode
)
14048 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
14049 * (((int) GET_MODE_SIZE (mode
)
14050 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
14054 /* Compute a (partial) cost for rtx X. Return true if the complete
14055 cost has been computed, and false if subexpressions should be
14056 scanned. In either case, *TOTAL contains the cost result. */
14059 ix86_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
14061 enum machine_mode mode
= GET_MODE (x
);
14069 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
14071 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
14073 else if (flag_pic
&& SYMBOLIC_CONST (x
)
14075 || (!GET_CODE (x
) != LABEL_REF
14076 && (GET_CODE (x
) != SYMBOL_REF
14077 || !SYMBOL_REF_LOCAL_P (x
)))))
14084 if (mode
== VOIDmode
)
14087 switch (standard_80387_constant_p (x
))
14092 default: /* Other constants */
14097 /* Start with (MEM (SYMBOL_REF)), since that's where
14098 it'll probably end up. Add a penalty for size. */
14099 *total
= (COSTS_N_INSNS (1)
14100 + (flag_pic
!= 0 && !TARGET_64BIT
)
14101 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
14107 /* The zero extensions is often completely free on x86_64, so make
14108 it as cheap as possible. */
14109 if (TARGET_64BIT
&& mode
== DImode
14110 && GET_MODE (XEXP (x
, 0)) == SImode
)
14112 else if (TARGET_ZERO_EXTEND_WITH_AND
)
14113 *total
= COSTS_N_INSNS (ix86_cost
->add
);
14115 *total
= COSTS_N_INSNS (ix86_cost
->movzx
);
14119 *total
= COSTS_N_INSNS (ix86_cost
->movsx
);
14123 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
14124 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
14126 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
14129 *total
= COSTS_N_INSNS (ix86_cost
->add
);
14132 if ((value
== 2 || value
== 3)
14133 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
14135 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
14145 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
14147 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
14149 if (INTVAL (XEXP (x
, 1)) > 32)
14150 *total
= COSTS_N_INSNS(ix86_cost
->shift_const
+ 2);
14152 *total
= COSTS_N_INSNS(ix86_cost
->shift_const
* 2);
14156 if (GET_CODE (XEXP (x
, 1)) == AND
)
14157 *total
= COSTS_N_INSNS(ix86_cost
->shift_var
* 2);
14159 *total
= COSTS_N_INSNS(ix86_cost
->shift_var
* 6 + 2);
14164 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
14165 *total
= COSTS_N_INSNS (ix86_cost
->shift_const
);
14167 *total
= COSTS_N_INSNS (ix86_cost
->shift_var
);
14172 if (FLOAT_MODE_P (mode
))
14174 *total
= COSTS_N_INSNS (ix86_cost
->fmul
);
14179 rtx op0
= XEXP (x
, 0);
14180 rtx op1
= XEXP (x
, 1);
14182 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
14184 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
14185 for (nbits
= 0; value
!= 0; value
&= value
- 1)
14189 /* This is arbitrary. */
14192 /* Compute costs correctly for widening multiplication. */
14193 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
14194 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
14195 == GET_MODE_SIZE (mode
))
14197 int is_mulwiden
= 0;
14198 enum machine_mode inner_mode
= GET_MODE (op0
);
14200 if (GET_CODE (op0
) == GET_CODE (op1
))
14201 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
14202 else if (GET_CODE (op1
) == CONST_INT
)
14204 if (GET_CODE (op0
) == SIGN_EXTEND
)
14205 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
14208 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
14212 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
14215 *total
= COSTS_N_INSNS (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
14216 + nbits
* ix86_cost
->mult_bit
)
14217 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
);
14226 if (FLOAT_MODE_P (mode
))
14227 *total
= COSTS_N_INSNS (ix86_cost
->fdiv
);
14229 *total
= COSTS_N_INSNS (ix86_cost
->divide
[MODE_INDEX (mode
)]);
14233 if (FLOAT_MODE_P (mode
))
14234 *total
= COSTS_N_INSNS (ix86_cost
->fadd
);
14235 else if (GET_MODE_CLASS (mode
) == MODE_INT
14236 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
14238 if (GET_CODE (XEXP (x
, 0)) == PLUS
14239 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
14240 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == CONST_INT
14241 && CONSTANT_P (XEXP (x
, 1)))
14243 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
14244 if (val
== 2 || val
== 4 || val
== 8)
14246 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
14247 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
14248 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
14250 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
14254 else if (GET_CODE (XEXP (x
, 0)) == MULT
14255 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)
14257 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
14258 if (val
== 2 || val
== 4 || val
== 8)
14260 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
14261 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
14262 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
14266 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
14268 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
14269 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
14270 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
14271 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
14278 if (FLOAT_MODE_P (mode
))
14280 *total
= COSTS_N_INSNS (ix86_cost
->fadd
);
14288 if (!TARGET_64BIT
&& mode
== DImode
)
14290 *total
= (COSTS_N_INSNS (ix86_cost
->add
) * 2
14291 + (rtx_cost (XEXP (x
, 0), outer_code
)
14292 << (GET_MODE (XEXP (x
, 0)) != DImode
))
14293 + (rtx_cost (XEXP (x
, 1), outer_code
)
14294 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
14300 if (FLOAT_MODE_P (mode
))
14302 *total
= COSTS_N_INSNS (ix86_cost
->fchs
);
14308 if (!TARGET_64BIT
&& mode
== DImode
)
14309 *total
= COSTS_N_INSNS (ix86_cost
->add
* 2);
14311 *total
= COSTS_N_INSNS (ix86_cost
->add
);
14315 if (!TARGET_SSE_MATH
|| !VALID_SSE_REG_MODE (mode
))
14320 if (FLOAT_MODE_P (mode
))
14321 *total
= COSTS_N_INSNS (ix86_cost
->fabs
);
14325 if (FLOAT_MODE_P (mode
))
14326 *total
= COSTS_N_INSNS (ix86_cost
->fsqrt
);
14330 if (XINT (x
, 1) == UNSPEC_TP
)
14339 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
14341 ix86_svr3_asm_out_constructor (rtx symbol
, int priority ATTRIBUTE_UNUSED
)
14344 fputs ("\tpushl $", asm_out_file
);
14345 assemble_name (asm_out_file
, XSTR (symbol
, 0));
14346 fputc ('\n', asm_out_file
);
14352 static int current_machopic_label_num
;
14354 /* Given a symbol name and its associated stub, write out the
14355 definition of the stub. */
14358 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
14360 unsigned int length
;
14361 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
14362 int label
= ++current_machopic_label_num
;
14364 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14365 symb
= (*targetm
.strip_name_encoding
) (symb
);
14367 length
= strlen (stub
);
14368 binder_name
= alloca (length
+ 32);
14369 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
14371 length
= strlen (symb
);
14372 symbol_name
= alloca (length
+ 32);
14373 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
14375 sprintf (lazy_ptr_name
, "L%d$lz", label
);
14378 machopic_picsymbol_stub_section ();
14380 machopic_symbol_stub_section ();
14382 fprintf (file
, "%s:\n", stub
);
14383 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
14387 fprintf (file
, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label
, label
);
14388 fprintf (file
, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
14389 fprintf (file
, "\tjmp %%edx\n");
14392 fprintf (file
, "\tjmp *%s\n", lazy_ptr_name
);
14394 fprintf (file
, "%s:\n", binder_name
);
14398 fprintf (file
, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
14399 fprintf (file
, "\tpushl %%eax\n");
14402 fprintf (file
, "\t pushl $%s\n", lazy_ptr_name
);
14404 fprintf (file
, "\tjmp dyld_stub_binding_helper\n");
14406 machopic_lazy_symbol_ptr_section ();
14407 fprintf (file
, "%s:\n", lazy_ptr_name
);
14408 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
14409 fprintf (file
, "\t.long %s\n", binder_name
);
14411 #endif /* TARGET_MACHO */
14413 /* Order the registers for register allocator. */
14416 x86_order_regs_for_local_alloc (void)
14421 /* First allocate the local general purpose registers. */
14422 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
14423 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
14424 reg_alloc_order
[pos
++] = i
;
14426 /* Global general purpose registers. */
14427 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
14428 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
14429 reg_alloc_order
[pos
++] = i
;
14431 /* x87 registers come first in case we are doing FP math
14433 if (!TARGET_SSE_MATH
)
14434 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
14435 reg_alloc_order
[pos
++] = i
;
14437 /* SSE registers. */
14438 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
14439 reg_alloc_order
[pos
++] = i
;
14440 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
14441 reg_alloc_order
[pos
++] = i
;
14443 /* x87 registers. */
14444 if (TARGET_SSE_MATH
)
14445 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
14446 reg_alloc_order
[pos
++] = i
;
14448 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
14449 reg_alloc_order
[pos
++] = i
;
14451 /* Initialize the rest of array as we do not allocate some registers
14453 while (pos
< FIRST_PSEUDO_REGISTER
)
14454 reg_alloc_order
[pos
++] = 0;
14457 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
14458 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
14461 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
14462 struct attribute_spec.handler. */
14464 ix86_handle_struct_attribute (tree
*node
, tree name
,
14465 tree args ATTRIBUTE_UNUSED
,
14466 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
14469 if (DECL_P (*node
))
14471 if (TREE_CODE (*node
) == TYPE_DECL
)
14472 type
= &TREE_TYPE (*node
);
14477 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
14478 || TREE_CODE (*type
) == UNION_TYPE
)))
14480 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name
));
14481 *no_add_attrs
= true;
14484 else if ((is_attribute_p ("ms_struct", name
)
14485 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
14486 || ((is_attribute_p ("gcc_struct", name
)
14487 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
14489 warning ("`%s' incompatible attribute ignored",
14490 IDENTIFIER_POINTER (name
));
14491 *no_add_attrs
= true;
14498 ix86_ms_bitfield_layout_p (tree record_type
)
14500 return (TARGET_USE_MS_BITFIELD_LAYOUT
&&
14501 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
14502 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
14505 /* Returns an expression indicating where the this parameter is
14506 located on entry to the FUNCTION. */
14509 x86_this_parameter (tree function
)
14511 tree type
= TREE_TYPE (function
);
14515 int n
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
14516 return gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
14519 if (ix86_function_regparm (type
, function
) > 0)
14523 parm
= TYPE_ARG_TYPES (type
);
14524 /* Figure out whether or not the function has a variable number of
14526 for (; parm
; parm
= TREE_CHAIN (parm
))
14527 if (TREE_VALUE (parm
) == void_type_node
)
14529 /* If not, the this parameter is in the first argument. */
14533 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
14535 return gen_rtx_REG (SImode
, regno
);
14539 if (aggregate_value_p (TREE_TYPE (type
), type
))
14540 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
14542 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
14545 /* Determine whether x86_output_mi_thunk can succeed. */
14548 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED
,
14549 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
14550 HOST_WIDE_INT vcall_offset
, tree function
)
14552 /* 64-bit can handle anything. */
14556 /* For 32-bit, everything's fine if we have one free register. */
14557 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
14560 /* Need a free register for vcall_offset. */
14564 /* Need a free register for GOT references. */
14565 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
14568 /* Otherwise ok. */
14572 /* Output the assembler code for a thunk function. THUNK_DECL is the
14573 declaration for the thunk function itself, FUNCTION is the decl for
14574 the target function. DELTA is an immediate constant offset to be
14575 added to THIS. If VCALL_OFFSET is nonzero, the word at
14576 *(*this + vcall_offset) should be added to THIS. */
14579 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
14580 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
14581 HOST_WIDE_INT vcall_offset
, tree function
)
14584 rtx
this = x86_this_parameter (function
);
14587 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14588 pull it in now and let DELTA benefit. */
14591 else if (vcall_offset
)
14593 /* Put the this parameter into %eax. */
14595 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
14596 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
14599 this_reg
= NULL_RTX
;
14601 /* Adjust the this parameter by a fixed constant. */
14604 xops
[0] = GEN_INT (delta
);
14605 xops
[1] = this_reg
? this_reg
: this;
14608 if (!x86_64_general_operand (xops
[0], DImode
))
14610 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
14612 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
14616 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
14619 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
14622 /* Adjust the this parameter by a value stored in the vtable. */
14626 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
14629 int tmp_regno
= 2 /* ECX */;
14630 if (lookup_attribute ("fastcall",
14631 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
14632 tmp_regno
= 0 /* EAX */;
14633 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
14636 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
14639 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
14641 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
14643 /* Adjust the this parameter. */
14644 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
14645 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
14647 rtx tmp2
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
14648 xops
[0] = GEN_INT (vcall_offset
);
14650 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
14651 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
14653 xops
[1] = this_reg
;
14655 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
14657 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
14660 /* If necessary, drop THIS back to its stack slot. */
14661 if (this_reg
&& this_reg
!= this)
14663 xops
[0] = this_reg
;
14665 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
14668 xops
[0] = XEXP (DECL_RTL (function
), 0);
14671 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
14672 output_asm_insn ("jmp\t%P0", xops
);
14675 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
14676 tmp
= gen_rtx_CONST (Pmode
, tmp
);
14677 tmp
= gen_rtx_MEM (QImode
, tmp
);
14679 output_asm_insn ("jmp\t%A0", xops
);
14684 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
14685 output_asm_insn ("jmp\t%P0", xops
);
14690 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
14691 tmp
= (gen_rtx_SYMBOL_REF
14693 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
14694 tmp
= gen_rtx_MEM (QImode
, tmp
);
14696 output_asm_insn ("jmp\t%0", xops
);
14699 #endif /* TARGET_MACHO */
14701 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
14702 output_set_got (tmp
);
14705 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
14706 output_asm_insn ("jmp\t{*}%1", xops
);
14712 x86_file_start (void)
14714 default_file_start ();
14715 if (X86_FILE_START_VERSION_DIRECTIVE
)
14716 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
14717 if (X86_FILE_START_FLTUSED
)
14718 fputs ("\t.global\t__fltused\n", asm_out_file
);
14719 if (ix86_asm_dialect
== ASM_INTEL
)
14720 fputs ("\t.intel_syntax\n", asm_out_file
);
14724 x86_field_alignment (tree field
, int computed
)
14726 enum machine_mode mode
;
14727 tree type
= TREE_TYPE (field
);
14729 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
14731 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
14732 ? get_inner_array_type (type
) : type
);
14733 if (mode
== DFmode
|| mode
== DCmode
14734 || GET_MODE_CLASS (mode
) == MODE_INT
14735 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
14736 return MIN (32, computed
);
14740 /* Output assembler code to FILE to increment profiler label # LABELNO
14741 for profiling a function entry. */
14743 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
14748 #ifndef NO_PROFILE_COUNTERS
14749 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
14751 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
14755 #ifndef NO_PROFILE_COUNTERS
14756 fprintf (file
, "\tmovq\t$%sP%d,%%r11\n", LPREFIX
, labelno
);
14758 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
14762 #ifndef NO_PROFILE_COUNTERS
14763 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
14764 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
14766 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
14770 #ifndef NO_PROFILE_COUNTERS
14771 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
14772 PROFILE_COUNT_REGISTER
);
14774 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
14778 /* We don't have exact information about the insn sizes, but we may assume
14779 quite safely that we are informed about all 1 byte insns and memory
14780 address sizes. This is enough to eliminate unnecessary padding in
14784 min_insn_size (rtx insn
)
14788 if (!INSN_P (insn
) || !active_insn_p (insn
))
14791 /* Discard alignments we've emit and jump instructions. */
14792 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
14793 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
14795 if (GET_CODE (insn
) == JUMP_INSN
14796 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
14797 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
14800 /* Important case - calls are always 5 bytes.
14801 It is common to have many calls in the row. */
14802 if (GET_CODE (insn
) == CALL_INSN
14803 && symbolic_reference_mentioned_p (PATTERN (insn
))
14804 && !SIBLING_CALL_P (insn
))
14806 if (get_attr_length (insn
) <= 1)
14809 /* For normal instructions we may rely on the sizes of addresses
14810 and the presence of symbol to require 4 bytes of encoding.
14811 This is not the case for jumps where references are PC relative. */
14812 if (GET_CODE (insn
) != JUMP_INSN
)
14814 l
= get_attr_length_address (insn
);
14815 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
14824 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
14828 ix86_avoid_jump_misspredicts (void)
14830 rtx insn
, start
= get_insns ();
14831 int nbytes
= 0, njumps
= 0;
14834 /* Look for all minimal intervals of instructions containing 4 jumps.
14835 The intervals are bounded by START and INSN. NBYTES is the total
14836 size of instructions in the interval including INSN and not including
14837 START. When the NBYTES is smaller than 16 bytes, it is possible
14838 that the end of START and INSN ends up in the same 16byte page.
14840 The smallest offset in the page INSN can start is the case where START
14841 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
14842 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
14844 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14847 nbytes
+= min_insn_size (insn
);
14849 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
14850 INSN_UID (insn
), min_insn_size (insn
));
14851 if ((GET_CODE (insn
) == JUMP_INSN
14852 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
14853 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
14854 || GET_CODE (insn
) == CALL_INSN
)
14861 start
= NEXT_INSN (start
);
14862 if ((GET_CODE (start
) == JUMP_INSN
14863 && GET_CODE (PATTERN (start
)) != ADDR_VEC
14864 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
14865 || GET_CODE (start
) == CALL_INSN
)
14866 njumps
--, isjump
= 1;
14869 nbytes
-= min_insn_size (start
);
14874 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
14875 INSN_UID (start
), INSN_UID (insn
), nbytes
);
14877 if (njumps
== 3 && isjump
&& nbytes
< 16)
14879 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
14882 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
14883 INSN_UID (insn
), padsize
);
14884 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
14889 /* AMD Athlon works faster
14890 when RET is not destination of conditional jump or directly preceded
14891 by other jump instruction. We avoid the penalty by inserting NOP just
14892 before the RET instructions in such cases. */
14894 ix86_pad_returns (void)
14898 for (e
= EXIT_BLOCK_PTR
->pred
; e
; e
= e
->pred_next
)
14900 basic_block bb
= e
->src
;
14901 rtx ret
= BB_END (bb
);
14903 bool replace
= false;
14905 if (GET_CODE (ret
) != JUMP_INSN
|| GET_CODE (PATTERN (ret
)) != RETURN
14906 || !maybe_hot_bb_p (bb
))
14908 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
14909 if (active_insn_p (prev
) || GET_CODE (prev
) == CODE_LABEL
)
14911 if (prev
&& GET_CODE (prev
) == CODE_LABEL
)
14914 for (e
= bb
->pred
; e
; e
= e
->pred_next
)
14915 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
14916 && !(e
->flags
& EDGE_FALLTHRU
))
14921 prev
= prev_active_insn (ret
);
14923 && ((GET_CODE (prev
) == JUMP_INSN
&& any_condjump_p (prev
))
14924 || GET_CODE (prev
) == CALL_INSN
))
14926 /* Empty functions get branch mispredict even when the jump destination
14927 is not visible to us. */
14928 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
14933 emit_insn_before (gen_return_internal_long (), ret
);
14939 /* Implement machine specific optimizations. We implement padding of returns
14940 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
14944 if (TARGET_ATHLON_K8
&& optimize
&& !optimize_size
)
14945 ix86_pad_returns ();
14946 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
14947 ix86_avoid_jump_misspredicts ();
14950 /* Return nonzero when QImode register that must be represented via REX prefix
14953 x86_extended_QIreg_mentioned_p (rtx insn
)
14956 extract_insn_cached (insn
);
14957 for (i
= 0; i
< recog_data
.n_operands
; i
++)
14958 if (REG_P (recog_data
.operand
[i
])
14959 && REGNO (recog_data
.operand
[i
]) >= 4)
14964 /* Return nonzero when P points to register encoded via REX prefix.
14965 Called via for_each_rtx. */
14967 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
14969 unsigned int regno
;
14972 regno
= REGNO (*p
);
14973 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
14976 /* Return true when INSN mentions register that must be encoded using REX
14979 x86_extended_reg_mentioned_p (rtx insn
)
14981 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
14984 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
14985 optabs would emit if we didn't have TFmode patterns. */
14988 x86_emit_floatuns (rtx operands
[2])
14990 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
14991 enum machine_mode mode
, inmode
;
14993 inmode
= GET_MODE (operands
[1]);
14994 if (inmode
!= SImode
14995 && inmode
!= DImode
)
14999 in
= force_reg (inmode
, operands
[1]);
15000 mode
= GET_MODE (out
);
15001 neglab
= gen_label_rtx ();
15002 donelab
= gen_label_rtx ();
15003 i1
= gen_reg_rtx (Pmode
);
15004 f0
= gen_reg_rtx (mode
);
15006 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, Pmode
, 0, neglab
);
15008 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_FLOAT (mode
, in
)));
15009 emit_jump_insn (gen_jump (donelab
));
15012 emit_label (neglab
);
15014 i0
= expand_simple_binop (Pmode
, LSHIFTRT
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
15015 i1
= expand_simple_binop (Pmode
, AND
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
15016 i0
= expand_simple_binop (Pmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
15017 expand_float (f0
, i0
, 0);
15018 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
15020 emit_label (donelab
);
15023 /* Initialize vector TARGET via VALS. */
15025 ix86_expand_vector_init (rtx target
, rtx vals
)
15027 enum machine_mode mode
= GET_MODE (target
);
15028 int elt_size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
15029 int n_elts
= (GET_MODE_SIZE (mode
) / elt_size
);
15032 for (i
= n_elts
- 1; i
>= 0; i
--)
15033 if (GET_CODE (XVECEXP (vals
, 0, i
)) != CONST_INT
15034 && GET_CODE (XVECEXP (vals
, 0, i
)) != CONST_DOUBLE
)
15037 /* Few special cases first...
15038 ... constants are best loaded from constant pool. */
15041 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
15045 /* ... values where only first field is non-constant are best loaded
15046 from the pool and overwritten via move later. */
15049 rtx op
= simplify_gen_subreg (mode
, XVECEXP (vals
, 0, 0),
15050 GET_MODE_INNER (mode
), 0);
15052 op
= force_reg (mode
, op
);
15053 XVECEXP (vals
, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode
));
15054 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
15055 switch (GET_MODE (target
))
15058 emit_insn (gen_sse2_movsd (target
, target
, op
));
15061 emit_insn (gen_sse_movss (target
, target
, op
));
15069 /* And the busy sequence doing rotations. */
15070 switch (GET_MODE (target
))
15075 simplify_gen_subreg (V2DFmode
, XVECEXP (vals
, 0, 0), DFmode
, 0);
15077 simplify_gen_subreg (V2DFmode
, XVECEXP (vals
, 0, 1), DFmode
, 0);
15079 vecop0
= force_reg (V2DFmode
, vecop0
);
15080 vecop1
= force_reg (V2DFmode
, vecop1
);
15081 emit_insn (gen_sse2_unpcklpd (target
, vecop0
, vecop1
));
15087 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 0), SFmode
, 0);
15089 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 1), SFmode
, 0);
15091 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 2), SFmode
, 0);
15093 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 3), SFmode
, 0);
15094 rtx tmp1
= gen_reg_rtx (V4SFmode
);
15095 rtx tmp2
= gen_reg_rtx (V4SFmode
);
15097 vecop0
= force_reg (V4SFmode
, vecop0
);
15098 vecop1
= force_reg (V4SFmode
, vecop1
);
15099 vecop2
= force_reg (V4SFmode
, vecop2
);
15100 vecop3
= force_reg (V4SFmode
, vecop3
);
15101 emit_insn (gen_sse_unpcklps (tmp1
, vecop1
, vecop3
));
15102 emit_insn (gen_sse_unpcklps (tmp2
, vecop0
, vecop2
));
15103 emit_insn (gen_sse_unpcklps (target
, tmp2
, tmp1
));
15111 /* Implements target hook vector_mode_supported_p. */
15113 ix86_vector_mode_supported_p (enum machine_mode mode
)
15116 && VALID_SSE_REG_MODE (mode
))
15119 else if (TARGET_MMX
15120 && VALID_MMX_REG_MODE (mode
))
15123 else if (TARGET_3DNOW
15124 && VALID_MMX_REG_MODE_3DNOW (mode
))
15131 /* Worker function for TARGET_MD_ASM_CLOBBERS.
15133 We do this in the new i386 backend to maintain source compatibility
15134 with the old cc0-based compiler. */
15137 ix86_md_asm_clobbers (tree clobbers
)
15139 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
15141 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
15143 clobbers
= tree_cons (NULL_TREE
, build_string (7, "dirflag"),
15148 /* Worker function for REVERSE_CONDITION. */
15151 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
15153 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
15154 ? reverse_condition (code
)
15155 : reverse_condition_maybe_unordered (code
));
15158 /* Output code to perform an x87 FP register move, from OPERANDS[1]
15162 output_387_reg_move (rtx insn
, rtx
*operands
)
15164 if (REG_P (operands
[1])
15165 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
15167 if (REGNO (operands
[0]) == FIRST_STACK_REG
15168 && TARGET_USE_FFREEP
)
15169 return "ffreep\t%y0";
15170 return "fstp\t%y0";
15172 if (STACK_TOP_P (operands
[0]))
15173 return "fld%z1\t%y1";
15177 /* Output code to perform a conditional jump to LABEL, if C2 flag in
15178 FP status register is set. */
15181 ix86_emit_fp_unordered_jump (rtx label
)
15183 rtx reg
= gen_reg_rtx (HImode
);
15186 emit_insn (gen_x86_fnstsw_1 (reg
));
15188 if (TARGET_USE_SAHF
)
15190 emit_insn (gen_x86_sahf_1 (reg
));
15192 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
15193 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
15197 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
15199 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15200 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
15203 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
15204 gen_rtx_LABEL_REF (VOIDmode
, label
),
15206 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
15207 emit_jump_insn (temp
);
15210 /* Output code to perform a log1p XFmode calculation. */
15212 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
15214 rtx label1
= gen_label_rtx ();
15215 rtx label2
= gen_label_rtx ();
15217 rtx tmp
= gen_reg_rtx (XFmode
);
15218 rtx tmp2
= gen_reg_rtx (XFmode
);
15220 emit_insn (gen_absxf2 (tmp
, op1
));
15221 emit_insn (gen_cmpxf (tmp
,
15222 CONST_DOUBLE_FROM_REAL_VALUE (
15223 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
15225 emit_jump_insn (gen_bge (label1
));
15227 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
15228 emit_insn (gen_fyl2xp1_xf3 (op0
, tmp2
, op1
));
15229 emit_jump (label2
);
15231 emit_label (label1
);
15232 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
15233 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
15234 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
15235 emit_insn (gen_fyl2x_xf3 (op0
, tmp2
, tmp
));
15237 emit_label (label2
);
15240 #include "gt-i386.h"