1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
64 /* Processor costs (relative to an add) */
66 struct processor_costs size_cost
= { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
109 /* Processor costs (relative to an add) */
111 struct processor_costs i386_cost
= { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
155 struct processor_costs i486_cost
= { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
199 struct processor_costs pentium_cost
= {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
243 struct processor_costs pentiumpro_cost
= {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
287 struct processor_costs k6_cost
= {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
331 struct processor_costs athlon_cost
= {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
375 struct processor_costs k8_cost
= {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
419 struct processor_costs pentium4_cost
= {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
463 struct processor_costs nocona_cost
= {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
506 const struct processor_costs
*ix86_cost
= &pentium_cost
;
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
520 const int x86_use_leave
= m_386
| m_K6
| m_ATHLON_K8
;
521 const int x86_push_memory
= m_386
| m_K6
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
522 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
523 const int x86_movx
= m_ATHLON_K8
| m_PPRO
| m_PENT4
| m_NOCONA
/* m_386 | m_K6 */;
524 const int x86_double_with_add
= ~m_386
;
525 const int x86_use_bit_test
= m_386
;
526 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8
| m_K6
;
527 const int x86_cmove
= m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
528 const int x86_3dnow_a
= m_ATHLON_K8
;
529 const int x86_deep_branch
= m_PPRO
| m_K6
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
530 /* Branch hints were put in P4 based on simulation result. But
531 after P4 was made, no performance benefit was observed with
532 branch hints. It also increases the code size. As the result,
533 icc never generates branch hints. */
534 const int x86_branch_hints
= 0;
535 const int x86_use_sahf
= m_PPRO
| m_K6
| m_PENT4
| m_NOCONA
;
536 const int x86_partial_reg_stall
= m_PPRO
;
537 const int x86_use_loop
= m_K6
;
538 const int x86_use_fiop
= ~(m_PPRO
| m_ATHLON_K8
| m_PENT
);
539 const int x86_use_mov0
= m_K6
;
540 const int x86_use_cltd
= ~(m_PENT
| m_K6
);
541 const int x86_read_modify_write
= ~m_PENT
;
542 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
543 const int x86_split_long_moves
= m_PPRO
;
544 const int x86_promote_QImode
= m_K6
| m_PENT
| m_386
| m_486
| m_ATHLON_K8
;
545 const int x86_fast_prefix
= ~(m_PENT
| m_486
| m_386
);
546 const int x86_single_stringop
= m_386
| m_PENT4
| m_NOCONA
;
547 const int x86_qimode_math
= ~(0);
548 const int x86_promote_qi_regs
= 0;
549 const int x86_himode_math
= ~(m_PPRO
);
550 const int x86_promote_hi_regs
= m_PPRO
;
551 const int x86_sub_esp_4
= m_ATHLON_K8
| m_PPRO
| m_PENT4
| m_NOCONA
;
552 const int x86_sub_esp_8
= m_ATHLON_K8
| m_PPRO
| m_386
| m_486
| m_PENT4
| m_NOCONA
;
553 const int x86_add_esp_4
= m_ATHLON_K8
| m_K6
| m_PENT4
| m_NOCONA
;
554 const int x86_add_esp_8
= m_ATHLON_K8
| m_PPRO
| m_K6
| m_386
| m_486
| m_PENT4
| m_NOCONA
;
555 const int x86_integer_DFmode_moves
= ~(m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
);
556 const int x86_partial_reg_dependency
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
557 const int x86_memory_mismatch_stall
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
558 const int x86_accumulate_outgoing_args
= m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
;
559 const int x86_prologue_using_move
= m_ATHLON_K8
| m_PPRO
;
560 const int x86_epilogue_using_move
= m_ATHLON_K8
| m_PPRO
;
561 const int x86_decompose_lea
= m_PENT4
| m_NOCONA
;
562 const int x86_shift1
= ~m_486
;
563 const int x86_arch_always_fancy_math_387
= m_PENT
| m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
564 const int x86_sse_partial_reg_dependency
= m_PENT4
| m_NOCONA
| m_PPRO
;
565 /* Set for machines where the type and dependencies are resolved on SSE
566 register parts instead of whole registers, so we may maintain just
567 lower part of scalar values in proper format leaving the upper part
569 const int x86_sse_split_regs
= m_ATHLON_K8
;
570 const int x86_sse_typeless_stores
= m_ATHLON_K8
;
571 const int x86_sse_load0_by_pxor
= m_PPRO
| m_PENT4
| m_NOCONA
;
572 const int x86_use_ffreep
= m_ATHLON_K8
;
573 const int x86_rep_movl_optimal
= m_386
| m_PENT
| m_PPRO
| m_K6
;
574 const int x86_inter_unit_moves
= ~(m_ATHLON_K8
);
575 const int x86_ext_80387_constants
= m_K6
| m_ATHLON
| m_PENT4
| m_NOCONA
| m_PPRO
;
576 /* Some CPU cores are not able to predict more than 4 branch instructions in
577 the 16 byte window. */
578 const int x86_four_jump_limit
= m_PPRO
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
;
579 const int x86_schedule
= m_PPRO
| m_ATHLON_K8
| m_K6
| m_PENT
;
580 const int x86_use_bt
= m_ATHLON_K8
;
582 /* In case the average insn count for single function invocation is
583 lower than this constant, emit fast (but longer) prologue and
585 #define FAST_PROLOGUE_INSN_COUNT 20
587 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
588 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
589 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
590 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
592 /* Array of the smallest class containing reg number REGNO, indexed by
593 REGNO. Used by REGNO_REG_CLASS in i386.h. */
595 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
598 AREG
, DREG
, CREG
, BREG
,
600 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
602 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
603 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
606 /* flags, fpsr, dirflag, frame */
607 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
608 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
610 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
612 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
613 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
614 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
618 /* The "default" register map used in 32bit mode. */
620 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
622 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
623 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
624 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
625 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
626 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
627 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
628 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
631 static int const x86_64_int_parameter_registers
[6] =
633 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
634 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
637 static int const x86_64_int_return_registers
[4] =
639 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
642 /* The "default" register map used in 64bit mode. */
643 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
645 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
646 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
647 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
648 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
649 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
650 8,9,10,11,12,13,14,15, /* extended integer registers */
651 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
654 /* Define the register numbers to be used in Dwarf debugging information.
655 The SVR4 reference port C compiler uses the following register numbers
656 in its Dwarf output code:
657 0 for %eax (gcc regno = 0)
658 1 for %ecx (gcc regno = 2)
659 2 for %edx (gcc regno = 1)
660 3 for %ebx (gcc regno = 3)
661 4 for %esp (gcc regno = 7)
662 5 for %ebp (gcc regno = 6)
663 6 for %esi (gcc regno = 4)
664 7 for %edi (gcc regno = 5)
665 The following three DWARF register numbers are never generated by
666 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
667 believes these numbers have these meanings.
668 8 for %eip (no gcc equivalent)
669 9 for %eflags (gcc regno = 17)
670 10 for %trapno (no gcc equivalent)
671 It is not at all clear how we should number the FP stack registers
672 for the x86 architecture. If the version of SDB on x86/svr4 were
673 a bit less brain dead with respect to floating-point then we would
674 have a precedent to follow with respect to DWARF register numbers
675 for x86 FP registers, but the SDB on x86/svr4 is so completely
676 broken with respect to FP registers that it is hardly worth thinking
677 of it as something to strive for compatibility with.
678 The version of x86/svr4 SDB I have at the moment does (partially)
679 seem to believe that DWARF register number 11 is associated with
680 the x86 register %st(0), but that's about all. Higher DWARF
681 register numbers don't seem to be associated with anything in
682 particular, and even for DWARF regno 11, SDB only seems to under-
683 stand that it should say that a variable lives in %st(0) (when
684 asked via an `=' command) if we said it was in DWARF regno 11,
685 but SDB still prints garbage when asked for the value of the
686 variable in question (via a `/' command).
687 (Also note that the labels SDB prints for various FP stack regs
688 when doing an `x' command are all wrong.)
689 Note that these problems generally don't affect the native SVR4
690 C compiler because it doesn't allow the use of -O with -g and
691 because when it is *not* optimizing, it allocates a memory
692 location for each floating-point variable, and the memory
693 location is what gets described in the DWARF AT_location
694 attribute for the variable in question.
695 Regardless of the severe mental illness of the x86/svr4 SDB, we
696 do something sensible here and we use the following DWARF
697 register numbers. Note that these are all stack-top-relative
699 11 for %st(0) (gcc regno = 8)
700 12 for %st(1) (gcc regno = 9)
701 13 for %st(2) (gcc regno = 10)
702 14 for %st(3) (gcc regno = 11)
703 15 for %st(4) (gcc regno = 12)
704 16 for %st(5) (gcc regno = 13)
705 17 for %st(6) (gcc regno = 14)
706 18 for %st(7) (gcc regno = 15)
708 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
710 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
711 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
712 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
713 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
714 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
715 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
716 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
719 /* Test and compare insns in i386.md store the information needed to
720 generate branch and scc insns here. */
722 rtx ix86_compare_op0
= NULL_RTX
;
723 rtx ix86_compare_op1
= NULL_RTX
;
725 #define MAX_386_STACK_LOCALS 3
726 /* Size of the register save area. */
727 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
729 /* Define the structure for the machine field in struct function. */
731 struct stack_local_entry
GTY(())
736 struct stack_local_entry
*next
;
739 /* Structure describing stack frame layout.
740 Stack grows downward:
746 saved frame pointer if frame_pointer_needed
747 <- HARD_FRAME_POINTER
753 > to_allocate <- FRAME_POINTER
765 int outgoing_arguments_size
;
768 HOST_WIDE_INT to_allocate
;
769 /* The offsets relative to ARG_POINTER. */
770 HOST_WIDE_INT frame_pointer_offset
;
771 HOST_WIDE_INT hard_frame_pointer_offset
;
772 HOST_WIDE_INT stack_pointer_offset
;
774 /* When save_regs_using_mov is set, emit prologue using
775 move instead of push instructions. */
776 bool save_regs_using_mov
;
779 /* Used to enable/disable debugging features. */
780 const char *ix86_debug_arg_string
, *ix86_debug_addr_string
;
781 /* Code model option as passed by user. */
782 const char *ix86_cmodel_string
;
784 enum cmodel ix86_cmodel
;
786 const char *ix86_asm_string
;
787 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
789 const char *ix86_tls_dialect_string
;
790 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
792 /* Which unit we are generating floating point math for. */
793 enum fpmath_unit ix86_fpmath
;
795 /* Which cpu are we scheduling for. */
796 enum processor_type ix86_tune
;
797 /* Which instruction set architecture to use. */
798 enum processor_type ix86_arch
;
800 /* Strings to hold which cpu and instruction set architecture to use. */
801 const char *ix86_tune_string
; /* for -mtune=<xxx> */
802 const char *ix86_arch_string
; /* for -march=<xxx> */
803 const char *ix86_fpmath_string
; /* for -mfpmath=<xxx> */
805 /* # of registers to use to pass arguments. */
806 const char *ix86_regparm_string
;
808 /* true if sse prefetch instruction is not NOOP. */
809 int x86_prefetch_sse
;
811 /* ix86_regparm_string as a number */
814 /* Alignment to use for loops and jumps: */
816 /* Power of two alignment for loops. */
817 const char *ix86_align_loops_string
;
819 /* Power of two alignment for non-loop jumps. */
820 const char *ix86_align_jumps_string
;
822 /* Power of two alignment for stack boundary in bytes. */
823 const char *ix86_preferred_stack_boundary_string
;
825 /* Preferred alignment for stack boundary in bits. */
826 unsigned int ix86_preferred_stack_boundary
;
828 /* Values 1-5: see jump.c */
829 int ix86_branch_cost
;
830 const char *ix86_branch_cost_string
;
832 /* Power of two alignment for functions. */
833 const char *ix86_align_funcs_string
;
835 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
836 char internal_label_prefix
[16];
837 int internal_label_prefix_len
;
839 static void output_pic_addr_const (FILE *, rtx
, int);
840 static void put_condition_code (enum rtx_code
, enum machine_mode
,
842 static const char *get_some_local_dynamic_name (void);
843 static int get_some_local_dynamic_name_1 (rtx
*, void *);
844 static rtx
ix86_expand_int_compare (enum rtx_code
, rtx
, rtx
);
845 static enum rtx_code
ix86_prepare_fp_compare_args (enum rtx_code
, rtx
*,
847 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
848 static enum machine_mode
ix86_cc_modes_compatible (enum machine_mode
,
850 static rtx
get_thread_pointer (int);
851 static rtx
legitimize_tls_address (rtx
, enum tls_model
, int);
852 static void get_pc_thunk_name (char [32], unsigned int);
853 static rtx
gen_push (rtx
);
854 static int ix86_flags_dependant (rtx
, rtx
, enum attr_type
);
855 static int ix86_agi_dependant (rtx
, rtx
, enum attr_type
);
856 static struct machine_function
* ix86_init_machine_status (void);
857 static int ix86_split_to_parts (rtx
, rtx
*, enum machine_mode
);
858 static int ix86_nsaved_regs (void);
859 static void ix86_emit_save_regs (void);
860 static void ix86_emit_save_regs_using_mov (rtx
, HOST_WIDE_INT
);
861 static void ix86_emit_restore_regs_using_mov (rtx
, HOST_WIDE_INT
, int);
862 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT
);
863 static HOST_WIDE_INT
ix86_GOT_alias_set (void);
864 static void ix86_adjust_counter (rtx
, HOST_WIDE_INT
);
865 static rtx
ix86_expand_aligntest (rtx
, int);
866 static void ix86_expand_strlensi_unroll_1 (rtx
, rtx
, rtx
);
867 static int ix86_issue_rate (void);
868 static int ix86_adjust_cost (rtx
, rtx
, rtx
, int);
869 static int ia32_multipass_dfa_lookahead (void);
870 static void ix86_init_mmx_sse_builtins (void);
871 static rtx
x86_this_parameter (tree
);
872 static void x86_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
873 HOST_WIDE_INT
, tree
);
874 static bool x86_can_output_mi_thunk (tree
, HOST_WIDE_INT
, HOST_WIDE_INT
, tree
);
875 static void x86_file_start (void);
876 static void ix86_reorg (void);
877 static bool ix86_expand_carry_flag_compare (enum rtx_code
, rtx
, rtx
, rtx
*);
878 static tree
ix86_build_builtin_va_list (void);
879 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
881 static tree
ix86_gimplify_va_arg (tree
, tree
, tree
*, tree
*);
882 static bool ix86_vector_mode_supported_p (enum machine_mode
);
884 static int ix86_address_cost (rtx
);
885 static bool ix86_cannot_force_const_mem (rtx
);
886 static rtx
ix86_delegitimize_address (rtx
);
888 struct builtin_description
;
889 static rtx
ix86_expand_sse_comi (const struct builtin_description
*,
891 static rtx
ix86_expand_sse_compare (const struct builtin_description
*,
893 static rtx
ix86_expand_unop1_builtin (enum insn_code
, tree
, rtx
);
894 static rtx
ix86_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
895 static rtx
ix86_expand_binop_builtin (enum insn_code
, tree
, rtx
);
896 static rtx
ix86_expand_store_builtin (enum insn_code
, tree
);
897 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
898 static rtx
ix86_expand_fp_compare (enum rtx_code
, rtx
, rtx
, rtx
, rtx
*, rtx
*);
899 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code
);
900 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code
);
901 static int ix86_fp_comparison_sahf_cost (enum rtx_code code
);
902 static int ix86_fp_comparison_cost (enum rtx_code code
);
903 static unsigned int ix86_select_alt_pic_regnum (void);
904 static int ix86_save_reg (unsigned int, int);
905 static void ix86_compute_frame_layout (struct ix86_frame
*);
906 static int ix86_comp_type_attributes (tree
, tree
);
907 static int ix86_function_regparm (tree
, tree
);
908 const struct attribute_spec ix86_attribute_table
[];
909 static bool ix86_function_ok_for_sibcall (tree
, tree
);
910 static tree
ix86_handle_cdecl_attribute (tree
*, tree
, tree
, int, bool *);
911 static tree
ix86_handle_regparm_attribute (tree
*, tree
, tree
, int, bool *);
912 static int ix86_value_regno (enum machine_mode
);
913 static bool contains_128bit_aligned_vector_p (tree
);
914 static rtx
ix86_struct_value_rtx (tree
, int);
915 static bool ix86_ms_bitfield_layout_p (tree
);
916 static tree
ix86_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
917 static int extended_reg_mentioned_1 (rtx
*, void *);
918 static bool ix86_rtx_costs (rtx
, int, int, int *);
919 static int min_insn_size (rtx
);
920 static tree
ix86_md_asm_clobbers (tree clobbers
);
921 static bool ix86_must_pass_in_stack (enum machine_mode mode
, tree type
);
922 static bool ix86_pass_by_reference (CUMULATIVE_ARGS
*, enum machine_mode
,
925 /* This function is only used on Solaris. */
926 static void i386_solaris_elf_named_section (const char *, unsigned int, tree
)
929 /* Register class used for passing given 64bit part of the argument.
930 These represent classes as documented by the PS ABI, with the exception
931 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
932 use SF or DFmode move instead of DImode to avoid reformatting penalties.
934 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
935 whenever possible (upper half does contain padding).
937 enum x86_64_reg_class
940 X86_64_INTEGER_CLASS
,
941 X86_64_INTEGERSI_CLASS
,
948 X86_64_COMPLEX_X87_CLASS
,
951 static const char * const x86_64_reg_class_name
[] = {
952 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
953 "sseup", "x87", "x87up", "cplx87", "no"
956 #define MAX_CLASSES 4
958 /* Table of constants used by fldpi, fldln2, etc.... */
959 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
960 static bool ext_80387_constants_init
= 0;
961 static void init_ext_80387_constants (void);
963 /* Initialize the GCC target structure. */
964 #undef TARGET_ATTRIBUTE_TABLE
965 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
966 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
967 # undef TARGET_MERGE_DECL_ATTRIBUTES
968 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
971 #undef TARGET_COMP_TYPE_ATTRIBUTES
972 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
974 #undef TARGET_INIT_BUILTINS
975 #define TARGET_INIT_BUILTINS ix86_init_builtins
977 #undef TARGET_EXPAND_BUILTIN
978 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
980 #undef TARGET_ASM_FUNCTION_EPILOGUE
981 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
983 #undef TARGET_ASM_OPEN_PAREN
984 #define TARGET_ASM_OPEN_PAREN ""
985 #undef TARGET_ASM_CLOSE_PAREN
986 #define TARGET_ASM_CLOSE_PAREN ""
988 #undef TARGET_ASM_ALIGNED_HI_OP
989 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
990 #undef TARGET_ASM_ALIGNED_SI_OP
991 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
993 #undef TARGET_ASM_ALIGNED_DI_OP
994 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
997 #undef TARGET_ASM_UNALIGNED_HI_OP
998 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
999 #undef TARGET_ASM_UNALIGNED_SI_OP
1000 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1001 #undef TARGET_ASM_UNALIGNED_DI_OP
1002 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1004 #undef TARGET_SCHED_ADJUST_COST
1005 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1006 #undef TARGET_SCHED_ISSUE_RATE
1007 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1008 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1009 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1010 ia32_multipass_dfa_lookahead
1012 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1013 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1016 #undef TARGET_HAVE_TLS
1017 #define TARGET_HAVE_TLS true
1019 #undef TARGET_CANNOT_FORCE_CONST_MEM
1020 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1022 #undef TARGET_DELEGITIMIZE_ADDRESS
1023 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1025 #undef TARGET_MS_BITFIELD_LAYOUT_P
1026 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1028 #undef TARGET_ASM_OUTPUT_MI_THUNK
1029 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1030 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1031 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1033 #undef TARGET_ASM_FILE_START
1034 #define TARGET_ASM_FILE_START x86_file_start
1036 #undef TARGET_RTX_COSTS
1037 #define TARGET_RTX_COSTS ix86_rtx_costs
1038 #undef TARGET_ADDRESS_COST
1039 #define TARGET_ADDRESS_COST ix86_address_cost
1041 #undef TARGET_FIXED_CONDITION_CODE_REGS
1042 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1043 #undef TARGET_CC_MODES_COMPATIBLE
1044 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1046 #undef TARGET_MACHINE_DEPENDENT_REORG
1047 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1049 #undef TARGET_BUILD_BUILTIN_VA_LIST
1050 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1052 #undef TARGET_MD_ASM_CLOBBERS
1053 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1055 #undef TARGET_PROMOTE_PROTOTYPES
1056 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1057 #undef TARGET_STRUCT_VALUE_RTX
1058 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1059 #undef TARGET_SETUP_INCOMING_VARARGS
1060 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1061 #undef TARGET_MUST_PASS_IN_STACK
1062 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1063 #undef TARGET_PASS_BY_REFERENCE
1064 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1066 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1067 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1069 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1070 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1072 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1073 #undef TARGET_INSERT_ATTRIBUTES
1074 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1077 struct gcc_target targetm
= TARGET_INITIALIZER
;
1080 /* The svr4 ABI for the i386 says that records and unions are returned
1082 #ifndef DEFAULT_PCC_STRUCT_RETURN
1083 #define DEFAULT_PCC_STRUCT_RETURN 1
1086 /* Sometimes certain combinations of command options do not make
1087 sense on a particular target machine. You can define a macro
1088 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1089 defined, is executed once just after all the command options have
1092 Don't use this macro to turn on various extra optimizations for
1093 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1096 override_options (void)
1099 int ix86_tune_defaulted
= 0;
1101 /* Comes from final.c -- no real reason to change it. */
1102 #define MAX_CODE_ALIGN 16
1106 const struct processor_costs
*cost
; /* Processor costs */
1107 const int target_enable
; /* Target flags to enable. */
1108 const int target_disable
; /* Target flags to disable. */
1109 const int align_loop
; /* Default alignments. */
1110 const int align_loop_max_skip
;
1111 const int align_jump
;
1112 const int align_jump_max_skip
;
1113 const int align_func
;
1115 const processor_target_table
[PROCESSOR_max
] =
1117 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4},
1118 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16},
1119 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16},
1120 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16},
1121 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32},
1122 {&athlon_cost
, 0, 0, 16, 7, 16, 7, 16},
1123 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0},
1124 {&k8_cost
, 0, 0, 16, 7, 16, 7, 16},
1125 {&nocona_cost
, 0, 0, 0, 0, 0, 0, 0}
1128 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1131 const char *const name
; /* processor name or nickname. */
1132 const enum processor_type processor
;
1133 const enum pta_flags
1139 PTA_PREFETCH_SSE
= 16,
1145 const processor_alias_table
[] =
1147 {"i386", PROCESSOR_I386
, 0},
1148 {"i486", PROCESSOR_I486
, 0},
1149 {"i586", PROCESSOR_PENTIUM
, 0},
1150 {"pentium", PROCESSOR_PENTIUM
, 0},
1151 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1152 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1153 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1154 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1155 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_SSE
},
1156 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1157 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1158 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1159 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1160 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1161 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
| PTA_SSE2
},
1162 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1163 | PTA_MMX
| PTA_PREFETCH_SSE
},
1164 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1165 | PTA_MMX
| PTA_PREFETCH_SSE
},
1166 {"prescott", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
1167 | PTA_MMX
| PTA_PREFETCH_SSE
},
1168 {"nocona", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_64BIT
1169 | PTA_MMX
| PTA_PREFETCH_SSE
},
1170 {"k6", PROCESSOR_K6
, PTA_MMX
},
1171 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1172 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1173 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1175 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
1176 | PTA_3DNOW
| PTA_3DNOW_A
},
1177 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1178 | PTA_3DNOW_A
| PTA_SSE
},
1179 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1180 | PTA_3DNOW_A
| PTA_SSE
},
1181 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1182 | PTA_3DNOW_A
| PTA_SSE
},
1183 {"x86-64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_64BIT
1184 | PTA_SSE
| PTA_SSE2
},
1185 {"k8", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1186 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1187 {"opteron", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1188 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1189 {"athlon64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1190 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1191 {"athlon-fx", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1192 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1195 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1197 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1198 SUBTARGET_OVERRIDE_OPTIONS
;
1201 /* Set the default values for switches whose default depends on TARGET_64BIT
1202 in case they weren't overwritten by command line options. */
1205 if (flag_omit_frame_pointer
== 2)
1206 flag_omit_frame_pointer
= 1;
1207 if (flag_asynchronous_unwind_tables
== 2)
1208 flag_asynchronous_unwind_tables
= 1;
1209 if (flag_pcc_struct_return
== 2)
1210 flag_pcc_struct_return
= 0;
1214 if (flag_omit_frame_pointer
== 2)
1215 flag_omit_frame_pointer
= 0;
1216 if (flag_asynchronous_unwind_tables
== 2)
1217 flag_asynchronous_unwind_tables
= 0;
1218 if (flag_pcc_struct_return
== 2)
1219 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
1222 if (!ix86_tune_string
&& ix86_arch_string
)
1223 ix86_tune_string
= ix86_arch_string
;
1224 if (!ix86_tune_string
)
1226 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
1227 ix86_tune_defaulted
= 1;
1229 if (!ix86_arch_string
)
1230 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
1232 if (ix86_cmodel_string
!= 0)
1234 if (!strcmp (ix86_cmodel_string
, "small"))
1235 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1237 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string
);
1238 else if (!strcmp (ix86_cmodel_string
, "32"))
1239 ix86_cmodel
= CM_32
;
1240 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
1241 ix86_cmodel
= CM_KERNEL
;
1242 else if (!strcmp (ix86_cmodel_string
, "medium") && !flag_pic
)
1243 ix86_cmodel
= CM_MEDIUM
;
1244 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
1245 ix86_cmodel
= CM_LARGE
;
1247 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
1251 ix86_cmodel
= CM_32
;
1253 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1255 if (ix86_asm_string
!= 0)
1257 if (!strcmp (ix86_asm_string
, "intel"))
1258 ix86_asm_dialect
= ASM_INTEL
;
1259 else if (!strcmp (ix86_asm_string
, "att"))
1260 ix86_asm_dialect
= ASM_ATT
;
1262 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
1264 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
1265 error ("code model %qs not supported in the %s bit mode",
1266 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
1267 if (ix86_cmodel
== CM_LARGE
)
1268 sorry ("code model %<large%> not supported yet");
1269 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
1270 sorry ("%i-bit mode not compiled in",
1271 (target_flags
& MASK_64BIT
) ? 64 : 32);
1273 for (i
= 0; i
< pta_size
; i
++)
1274 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
1276 ix86_arch
= processor_alias_table
[i
].processor
;
1277 /* Default cpu tuning to the architecture. */
1278 ix86_tune
= ix86_arch
;
1279 if (processor_alias_table
[i
].flags
& PTA_MMX
1280 && !(target_flags_explicit
& MASK_MMX
))
1281 target_flags
|= MASK_MMX
;
1282 if (processor_alias_table
[i
].flags
& PTA_3DNOW
1283 && !(target_flags_explicit
& MASK_3DNOW
))
1284 target_flags
|= MASK_3DNOW
;
1285 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
1286 && !(target_flags_explicit
& MASK_3DNOW_A
))
1287 target_flags
|= MASK_3DNOW_A
;
1288 if (processor_alias_table
[i
].flags
& PTA_SSE
1289 && !(target_flags_explicit
& MASK_SSE
))
1290 target_flags
|= MASK_SSE
;
1291 if (processor_alias_table
[i
].flags
& PTA_SSE2
1292 && !(target_flags_explicit
& MASK_SSE2
))
1293 target_flags
|= MASK_SSE2
;
1294 if (processor_alias_table
[i
].flags
& PTA_SSE3
1295 && !(target_flags_explicit
& MASK_SSE3
))
1296 target_flags
|= MASK_SSE3
;
1297 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1298 x86_prefetch_sse
= true;
1299 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1300 error ("CPU you selected does not support x86-64 "
1306 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1308 for (i
= 0; i
< pta_size
; i
++)
1309 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
1311 ix86_tune
= processor_alias_table
[i
].processor
;
1312 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1314 if (ix86_tune_defaulted
)
1316 ix86_tune_string
= "x86-64";
1317 for (i
= 0; i
< pta_size
; i
++)
1318 if (! strcmp (ix86_tune_string
,
1319 processor_alias_table
[i
].name
))
1321 ix86_tune
= processor_alias_table
[i
].processor
;
1324 error ("CPU you selected does not support x86-64 "
1327 /* Intel CPUs have always interpreted SSE prefetch instructions as
1328 NOPs; so, we can enable SSE prefetch instructions even when
1329 -mtune (rather than -march) points us to a processor that has them.
1330 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1331 higher processors. */
1332 if (TARGET_CMOVE
&& (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
))
1333 x86_prefetch_sse
= true;
1337 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1340 ix86_cost
= &size_cost
;
1342 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
1343 target_flags
|= processor_target_table
[ix86_tune
].target_enable
;
1344 target_flags
&= ~processor_target_table
[ix86_tune
].target_disable
;
1346 /* Arrange to set up i386_stack_locals for all functions. */
1347 init_machine_status
= ix86_init_machine_status
;
1349 /* Validate -mregparm= value. */
1350 if (ix86_regparm_string
)
1352 i
= atoi (ix86_regparm_string
);
1353 if (i
< 0 || i
> REGPARM_MAX
)
1354 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
1360 ix86_regparm
= REGPARM_MAX
;
1362 /* If the user has provided any of the -malign-* options,
1363 warn and use that value only if -falign-* is not set.
1364 Remove this code in GCC 3.2 or later. */
1365 if (ix86_align_loops_string
)
1367 warning ("-malign-loops is obsolete, use -falign-loops");
1368 if (align_loops
== 0)
1370 i
= atoi (ix86_align_loops_string
);
1371 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1372 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1374 align_loops
= 1 << i
;
1378 if (ix86_align_jumps_string
)
1380 warning ("-malign-jumps is obsolete, use -falign-jumps");
1381 if (align_jumps
== 0)
1383 i
= atoi (ix86_align_jumps_string
);
1384 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1385 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1387 align_jumps
= 1 << i
;
1391 if (ix86_align_funcs_string
)
1393 warning ("-malign-functions is obsolete, use -falign-functions");
1394 if (align_functions
== 0)
1396 i
= atoi (ix86_align_funcs_string
);
1397 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1398 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1400 align_functions
= 1 << i
;
1404 /* Default align_* from the processor table. */
1405 if (align_loops
== 0)
1407 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
1408 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
1410 if (align_jumps
== 0)
1412 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
1413 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
1415 if (align_functions
== 0)
1417 align_functions
= processor_target_table
[ix86_tune
].align_func
;
1420 /* Validate -mpreferred-stack-boundary= value, or provide default.
1421 The default of 128 bits is for Pentium III's SSE __m128, but we
1422 don't want additional code to keep the stack aligned when
1423 optimizing for code size. */
1424 ix86_preferred_stack_boundary
= (optimize_size
1425 ? TARGET_64BIT
? 128 : 32
1427 if (ix86_preferred_stack_boundary_string
)
1429 i
= atoi (ix86_preferred_stack_boundary_string
);
1430 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
1431 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
1432 TARGET_64BIT
? 4 : 2);
1434 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
1437 /* Validate -mbranch-cost= value, or provide default. */
1438 ix86_branch_cost
= processor_target_table
[ix86_tune
].cost
->branch_cost
;
1439 if (ix86_branch_cost_string
)
1441 i
= atoi (ix86_branch_cost_string
);
1443 error ("-mbranch-cost=%d is not between 0 and 5", i
);
1445 ix86_branch_cost
= i
;
1448 if (ix86_tls_dialect_string
)
1450 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
1451 ix86_tls_dialect
= TLS_DIALECT_GNU
;
1452 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
1453 ix86_tls_dialect
= TLS_DIALECT_SUN
;
1455 error ("bad value (%s) for -mtls-dialect= switch",
1456 ix86_tls_dialect_string
);
1459 /* Keep nonleaf frame pointers. */
1460 if (flag_omit_frame_pointer
)
1461 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
1462 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
1463 flag_omit_frame_pointer
= 1;
1465 /* If we're doing fast math, we don't care about comparison order
1466 wrt NaNs. This lets us use a shorter comparison sequence. */
1467 if (flag_unsafe_math_optimizations
)
1468 target_flags
&= ~MASK_IEEE_FP
;
1470 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1471 since the insns won't need emulation. */
1472 if (x86_arch_always_fancy_math_387
& (1 << ix86_arch
))
1473 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
1475 /* Likewise, if the target doesn't have a 387, or we've specified
1476 software floating point, don't use 387 inline instrinsics. */
1478 target_flags
|= MASK_NO_FANCY_MATH_387
;
1480 /* Turn on SSE2 builtins for -msse3. */
1482 target_flags
|= MASK_SSE2
;
1484 /* Turn on SSE builtins for -msse2. */
1486 target_flags
|= MASK_SSE
;
1488 /* Turn on MMX builtins for -msse. */
1491 target_flags
|= MASK_MMX
& ~target_flags_explicit
;
1492 x86_prefetch_sse
= true;
1495 /* Turn on MMX builtins for 3Dnow. */
1497 target_flags
|= MASK_MMX
;
1501 if (TARGET_ALIGN_DOUBLE
)
1502 error ("-malign-double makes no sense in the 64bit mode");
1504 error ("-mrtd calling convention not supported in the 64bit mode");
1506 /* Enable by default the SSE and MMX builtins. Do allow the user to
1507 explicitly disable any of these. In particular, disabling SSE and
1508 MMX for kernel code is extremely useful. */
1510 |= ((MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
)
1511 & ~target_flags_explicit
);
1514 ix86_fpmath
= FPMATH_SSE
;
1518 ix86_fpmath
= FPMATH_387
;
1519 /* i386 ABI does not specify red zone. It still makes sense to use it
1520 when programmer takes care to stack from being destroyed. */
1521 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
1522 target_flags
|= MASK_NO_RED_ZONE
;
1525 if (ix86_fpmath_string
!= 0)
1527 if (! strcmp (ix86_fpmath_string
, "387"))
1528 ix86_fpmath
= FPMATH_387
;
1529 else if (! strcmp (ix86_fpmath_string
, "sse"))
1533 warning ("SSE instruction set disabled, using 387 arithmetics");
1534 ix86_fpmath
= FPMATH_387
;
1537 ix86_fpmath
= FPMATH_SSE
;
1539 else if (! strcmp (ix86_fpmath_string
, "387,sse")
1540 || ! strcmp (ix86_fpmath_string
, "sse,387"))
1544 warning ("SSE instruction set disabled, using 387 arithmetics");
1545 ix86_fpmath
= FPMATH_387
;
1547 else if (!TARGET_80387
)
1549 warning ("387 instruction set disabled, using SSE arithmetics");
1550 ix86_fpmath
= FPMATH_SSE
;
1553 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
1556 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
1559 /* If fpmath doesn't include 387, disable use of x87 intrinsics. */
1560 if (! (ix86_fpmath
& FPMATH_387
))
1561 target_flags
|= MASK_NO_FANCY_MATH_387
;
1563 if ((x86_accumulate_outgoing_args
& TUNEMASK
)
1564 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
1566 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
1568 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1571 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
1572 p
= strchr (internal_label_prefix
, 'X');
1573 internal_label_prefix_len
= p
- internal_label_prefix
;
1577 /* When scheduling description is not available, disable scheduler pass
1578 so it won't slow down the compilation and make x87 code slower. */
1579 if (!TARGET_SCHEDULE
)
1580 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
1584 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
1586 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1587 make the problem with not enough registers even worse. */
1588 #ifdef INSN_SCHEDULING
1590 flag_schedule_insns
= 0;
1593 /* The default values of these switches depend on the TARGET_64BIT
1594 that is not known at this moment. Mark these values with 2 and
1595 let user the to override these. In case there is no command line option
1596 specifying them, we will set the defaults in override_options. */
1598 flag_omit_frame_pointer
= 2;
1599 flag_pcc_struct_return
= 2;
1600 flag_asynchronous_unwind_tables
= 2;
1601 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
1602 SUBTARGET_OPTIMIZATION_OPTIONS
;
1606 /* Table of valid machine attributes. */
1607 const struct attribute_spec ix86_attribute_table
[] =
1609 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1610 /* Stdcall attribute says callee is responsible for popping arguments
1611 if they are not variable. */
1612 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1613 /* Fastcall attribute says callee is responsible for popping arguments
1614 if they are not variable. */
1615 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1616 /* Cdecl attribute says the callee is a normal C declaration */
1617 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1618 /* Regparm attribute specifies how many integer arguments are to be
1619 passed in registers. */
1620 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute
},
1621 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1622 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
1623 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
1624 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
1626 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
1627 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
1628 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1629 SUBTARGET_ATTRIBUTE_TABLE
,
1631 { NULL
, 0, 0, false, false, false, NULL
}
1634 /* Decide whether we can make a sibling call to a function. DECL is the
1635 declaration of the function being targeted by the call and EXP is the
1636 CALL_EXPR representing the call. */
1639 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
1641 /* If we are generating position-independent code, we cannot sibcall
1642 optimize any indirect call, or a direct call to a global function,
1643 as the PLT requires %ebx be live. */
1644 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| TREE_PUBLIC (decl
)))
1647 /* If we are returning floats on the 80387 register stack, we cannot
1648 make a sibcall from a function that doesn't return a float to a
1649 function that does or, conversely, from a function that does return
1650 a float to a function that doesn't; the necessary stack adjustment
1651 would not be executed. */
1652 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp
)))
1653 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)))))
1656 /* If this call is indirect, we'll need to be able to use a call-clobbered
1657 register for the address of the target function. Make sure that all
1658 such registers are not used for passing parameters. */
1659 if (!decl
&& !TARGET_64BIT
)
1663 /* We're looking at the CALL_EXPR, we need the type of the function. */
1664 type
= TREE_OPERAND (exp
, 0); /* pointer expression */
1665 type
= TREE_TYPE (type
); /* pointer type */
1666 type
= TREE_TYPE (type
); /* function type */
1668 if (ix86_function_regparm (type
, NULL
) >= 3)
1670 /* ??? Need to count the actual number of registers to be used,
1671 not the possible number of registers. Fix later. */
1676 /* Otherwise okay. That also includes certain types of indirect calls. */
1680 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1681 arguments as in struct attribute_spec.handler. */
1683 ix86_handle_cdecl_attribute (tree
*node
, tree name
,
1684 tree args ATTRIBUTE_UNUSED
,
1685 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
1687 if (TREE_CODE (*node
) != FUNCTION_TYPE
1688 && TREE_CODE (*node
) != METHOD_TYPE
1689 && TREE_CODE (*node
) != FIELD_DECL
1690 && TREE_CODE (*node
) != TYPE_DECL
)
1692 warning ("%qs attribute only applies to functions",
1693 IDENTIFIER_POINTER (name
));
1694 *no_add_attrs
= true;
1698 if (is_attribute_p ("fastcall", name
))
1700 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
1702 error ("fastcall and stdcall attributes are not compatible");
1704 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
1706 error ("fastcall and regparm attributes are not compatible");
1709 else if (is_attribute_p ("stdcall", name
))
1711 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
1713 error ("fastcall and stdcall attributes are not compatible");
1720 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name
));
1721 *no_add_attrs
= true;
1727 /* Handle a "regparm" attribute;
1728 arguments as in struct attribute_spec.handler. */
1730 ix86_handle_regparm_attribute (tree
*node
, tree name
, tree args
,
1731 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
1733 if (TREE_CODE (*node
) != FUNCTION_TYPE
1734 && TREE_CODE (*node
) != METHOD_TYPE
1735 && TREE_CODE (*node
) != FIELD_DECL
1736 && TREE_CODE (*node
) != TYPE_DECL
)
1738 warning ("%qs attribute only applies to functions",
1739 IDENTIFIER_POINTER (name
));
1740 *no_add_attrs
= true;
1746 cst
= TREE_VALUE (args
);
1747 if (TREE_CODE (cst
) != INTEGER_CST
)
1749 warning ("%qs attribute requires an integer constant argument",
1750 IDENTIFIER_POINTER (name
));
1751 *no_add_attrs
= true;
1753 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
1755 warning ("argument to %qs attribute larger than %d",
1756 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
1757 *no_add_attrs
= true;
1760 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
1762 error ("fastcall and regparm attributes are not compatible");
1769 /* Return 0 if the attributes for two types are incompatible, 1 if they
1770 are compatible, and 2 if they are nearly compatible (which causes a
1771 warning to be generated). */
1774 ix86_comp_type_attributes (tree type1
, tree type2
)
1776 /* Check for mismatch of non-default calling convention. */
1777 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
1779 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
1782 /* Check for mismatched fastcall types */
1783 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
1784 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
1787 /* Check for mismatched return types (cdecl vs stdcall). */
1788 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
1789 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
1791 if (ix86_function_regparm (type1
, NULL
)
1792 != ix86_function_regparm (type2
, NULL
))
1797 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1798 DECL may be NULL when calling function indirectly
1799 or considering a libcall. */
1802 ix86_function_regparm (tree type
, tree decl
)
1805 int regparm
= ix86_regparm
;
1806 bool user_convention
= false;
1810 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
1813 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1814 user_convention
= true;
1817 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
1820 user_convention
= true;
1823 /* Use register calling convention for local functions when possible. */
1824 if (!TARGET_64BIT
&& !user_convention
&& decl
1825 && flag_unit_at_a_time
&& !profile_flag
)
1827 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
1830 /* We can't use regparm(3) for nested functions as these use
1831 static chain pointer in third argument. */
1832 if (DECL_CONTEXT (decl
) && !DECL_NO_STATIC_CHAIN (decl
))
1842 /* Return true if EAX is live at the start of the function. Used by
1843 ix86_expand_prologue to determine if we need special help before
1844 calling allocate_stack_worker. */
1847 ix86_eax_live_at_start_p (void)
1849 /* Cheat. Don't bother working forward from ix86_function_regparm
1850 to the function type to whether an actual argument is located in
1851 eax. Instead just look at cfg info, which is still close enough
1852 to correct at this point. This gives false positives for broken
1853 functions that might use uninitialized data that happens to be
1854 allocated in eax, but who cares? */
1855 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR
->global_live_at_end
, 0);
1858 /* Value is the number of bytes of arguments automatically
1859 popped when returning from a subroutine call.
1860 FUNDECL is the declaration node of the function (as a tree),
1861 FUNTYPE is the data type of the function (as a tree),
1862 or for a library call it is an identifier node for the subroutine name.
1863 SIZE is the number of bytes of arguments passed on the stack.
1865 On the 80386, the RTD insn may be used to pop them if the number
1866 of args is fixed, but if the number is variable then the caller
1867 must pop them all. RTD can't be used for library calls now
1868 because the library is compiled with the Unix compiler.
1869 Use of RTD is a selectable option, since it is incompatible with
1870 standard Unix calling sequences. If the option is not selected,
1871 the caller must always pop the args.
1873 The attribute stdcall is equivalent to RTD on a per module basis. */
1876 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
1878 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
1880 /* Cdecl functions override -mrtd, and never pop the stack. */
1881 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
1883 /* Stdcall and fastcall functions will pop the stack if not
1885 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
1886 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
1890 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
1891 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
1892 == void_type_node
)))
1896 /* Lose any fake structure return argument if it is passed on the stack. */
1897 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
1899 && !KEEP_AGGREGATE_RETURN_POINTER
)
1901 int nregs
= ix86_function_regparm (funtype
, fundecl
);
1904 return GET_MODE_SIZE (Pmode
);
1910 /* Argument support functions. */
1912 /* Return true when register may be used to pass function parameters. */
1914 ix86_function_arg_regno_p (int regno
)
1918 return (regno
< REGPARM_MAX
1919 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
1920 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
1922 /* RAX is used as hidden argument to va_arg functions. */
1925 for (i
= 0; i
< REGPARM_MAX
; i
++)
1926 if (regno
== x86_64_int_parameter_registers
[i
])
1931 /* Return if we do not know how to pass TYPE solely in registers. */
1934 ix86_must_pass_in_stack (enum machine_mode mode
, tree type
)
1936 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
1939 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1940 The layout_type routine is crafty and tries to trick us into passing
1941 currently unsupported vector types on the stack by using TImode. */
1942 return (!TARGET_64BIT
&& mode
== TImode
1943 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
1946 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1947 for a call to a function whose data type is FNTYPE.
1948 For a library call, FNTYPE is 0. */
1951 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
1952 tree fntype
, /* tree ptr for function decl */
1953 rtx libname
, /* SYMBOL_REF of library name or 0 */
1956 static CUMULATIVE_ARGS zero_cum
;
1957 tree param
, next_param
;
1959 if (TARGET_DEBUG_ARG
)
1961 fprintf (stderr
, "\ninit_cumulative_args (");
1963 fprintf (stderr
, "fntype code = %s, ret code = %s",
1964 tree_code_name
[(int) TREE_CODE (fntype
)],
1965 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
1967 fprintf (stderr
, "no fntype");
1970 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
1975 /* Set up the number of registers to use for passing arguments. */
1977 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
1979 cum
->nregs
= ix86_regparm
;
1981 cum
->sse_nregs
= SSE_REGPARM_MAX
;
1983 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
1984 cum
->warn_sse
= true;
1985 cum
->warn_mmx
= true;
1986 cum
->maybe_vaarg
= false;
1988 /* Use ecx and edx registers if function has fastcall attribute */
1989 if (fntype
&& !TARGET_64BIT
)
1991 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
1998 /* Determine if this function has variable arguments. This is
1999 indicated by the last argument being 'void_type_mode' if there
2000 are no variable arguments. If there are variable arguments, then
2001 we won't pass anything in registers in 32-bit mode. */
2003 if (cum
->nregs
|| cum
->mmx_nregs
|| cum
->sse_nregs
)
2005 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
2006 param
!= 0; param
= next_param
)
2008 next_param
= TREE_CHAIN (param
);
2009 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
2020 cum
->maybe_vaarg
= true;
2024 if ((!fntype
&& !libname
)
2025 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
2026 cum
->maybe_vaarg
= 1;
2028 if (TARGET_DEBUG_ARG
)
2029 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
2034 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2035 But in the case of vector types, it is some vector mode.
2037 When we have only some of our vector isa extensions enabled, then there
2038 are some modes for which vector_mode_supported_p is false. For these
2039 modes, the generic vector support in gcc will choose some non-vector mode
2040 in order to implement the type. By computing the natural mode, we'll
2041 select the proper ABI location for the operand and not depend on whatever
2042 the middle-end decides to do with these vector types. */
2044 static enum machine_mode
2045 type_natural_mode (tree type
)
2047 enum machine_mode mode
= TYPE_MODE (type
);
2049 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
2051 HOST_WIDE_INT size
= int_size_in_bytes (type
);
2052 if ((size
== 8 || size
== 16)
2053 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2054 && TYPE_VECTOR_SUBPARTS (type
) > 1)
2056 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
2058 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
2059 mode
= MIN_MODE_VECTOR_FLOAT
;
2061 mode
= MIN_MODE_VECTOR_INT
;
2063 /* Get the mode which has this inner mode and number of units. */
2064 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
2065 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
2066 && GET_MODE_INNER (mode
) == innermode
)
2076 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2077 this may not agree with the mode that the type system has chosen for the
2078 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2079 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2082 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
2087 if (orig_mode
!= BLKmode
)
2088 tmp
= gen_rtx_REG (orig_mode
, regno
);
2091 tmp
= gen_rtx_REG (mode
, regno
);
2092 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
2093 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
2099 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2100 of this code is to classify each 8bytes of incoming argument by the register
2101 class and assign registers accordingly. */
2103 /* Return the union class of CLASS1 and CLASS2.
2104 See the x86-64 PS ABI for details. */
2106 static enum x86_64_reg_class
2107 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
2109 /* Rule #1: If both classes are equal, this is the resulting class. */
2110 if (class1
== class2
)
2113 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2115 if (class1
== X86_64_NO_CLASS
)
2117 if (class2
== X86_64_NO_CLASS
)
2120 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2121 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
2122 return X86_64_MEMORY_CLASS
;
2124 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2125 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
2126 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
2127 return X86_64_INTEGERSI_CLASS
;
2128 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
2129 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
2130 return X86_64_INTEGER_CLASS
;
2132 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2134 if (class1
== X86_64_X87_CLASS
2135 || class1
== X86_64_X87UP_CLASS
2136 || class1
== X86_64_COMPLEX_X87_CLASS
2137 || class2
== X86_64_X87_CLASS
2138 || class2
== X86_64_X87UP_CLASS
2139 || class2
== X86_64_COMPLEX_X87_CLASS
)
2140 return X86_64_MEMORY_CLASS
;
2142 /* Rule #6: Otherwise class SSE is used. */
2143 return X86_64_SSE_CLASS
;
2146 /* Classify the argument of type TYPE and mode MODE.
2147 CLASSES will be filled by the register class used to pass each word
2148 of the operand. The number of words is returned. In case the parameter
2149 should be passed in memory, 0 is returned. As a special case for zero
2150 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2152 BIT_OFFSET is used internally for handling records and specifies offset
2153 of the offset in bits modulo 256 to avoid overflow cases.
2155 See the x86-64 PS ABI for details.
2159 classify_argument (enum machine_mode mode
, tree type
,
2160 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
2162 HOST_WIDE_INT bytes
=
2163 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2164 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2166 /* Variable sized entities are always passed/returned in memory. */
2170 if (mode
!= VOIDmode
2171 && targetm
.calls
.must_pass_in_stack (mode
, type
))
2174 if (type
&& AGGREGATE_TYPE_P (type
))
2178 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
2180 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2184 for (i
= 0; i
< words
; i
++)
2185 classes
[i
] = X86_64_NO_CLASS
;
2187 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2188 signalize memory class, so handle it as special case. */
2191 classes
[0] = X86_64_NO_CLASS
;
2195 /* Classify each field of record and merge classes. */
2196 if (TREE_CODE (type
) == RECORD_TYPE
)
2198 /* For classes first merge in the field of the subclasses. */
2199 if (TYPE_BINFO (type
))
2201 tree binfo
, base_binfo
;
2204 for (binfo
= TYPE_BINFO (type
), basenum
= 0;
2205 BINFO_BASE_ITERATE (binfo
, basenum
, base_binfo
); basenum
++)
2208 int offset
= tree_low_cst (BINFO_OFFSET (base_binfo
), 0) * 8;
2209 tree type
= BINFO_TYPE (base_binfo
);
2211 num
= classify_argument (TYPE_MODE (type
),
2213 (offset
+ bit_offset
) % 256);
2216 for (i
= 0; i
< num
; i
++)
2218 int pos
= (offset
+ (bit_offset
% 64)) / 8 / 8;
2220 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2224 /* And now merge the fields of structure. */
2225 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
2227 if (TREE_CODE (field
) == FIELD_DECL
)
2231 /* Bitfields are always classified as integer. Handle them
2232 early, since later code would consider them to be
2233 misaligned integers. */
2234 if (DECL_BIT_FIELD (field
))
2236 for (i
= int_bit_position (field
) / 8 / 8;
2237 i
< (int_bit_position (field
)
2238 + tree_low_cst (DECL_SIZE (field
), 0)
2241 merge_classes (X86_64_INTEGER_CLASS
,
2246 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
2247 TREE_TYPE (field
), subclasses
,
2248 (int_bit_position (field
)
2249 + bit_offset
) % 256);
2252 for (i
= 0; i
< num
; i
++)
2255 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
2257 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2263 /* Arrays are handled as small records. */
2264 else if (TREE_CODE (type
) == ARRAY_TYPE
)
2267 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
2268 TREE_TYPE (type
), subclasses
, bit_offset
);
2272 /* The partial classes are now full classes. */
2273 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
2274 subclasses
[0] = X86_64_SSE_CLASS
;
2275 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
2276 subclasses
[0] = X86_64_INTEGER_CLASS
;
2278 for (i
= 0; i
< words
; i
++)
2279 classes
[i
] = subclasses
[i
% num
];
2281 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2282 else if (TREE_CODE (type
) == UNION_TYPE
2283 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
2285 /* For classes first merge in the field of the subclasses. */
2286 if (TYPE_BINFO (type
))
2288 tree binfo
, base_binfo
;
2291 for (binfo
= TYPE_BINFO (type
), basenum
= 0;
2292 BINFO_BASE_ITERATE (binfo
, basenum
, base_binfo
); basenum
++)
2295 int offset
= tree_low_cst (BINFO_OFFSET (base_binfo
), 0) * 8;
2296 tree type
= BINFO_TYPE (base_binfo
);
2298 num
= classify_argument (TYPE_MODE (type
),
2300 (offset
+ (bit_offset
% 64)) % 256);
2303 for (i
= 0; i
< num
; i
++)
2305 int pos
= (offset
+ (bit_offset
% 64)) / 8 / 8;
2307 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2311 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
2313 if (TREE_CODE (field
) == FIELD_DECL
)
2316 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
2317 TREE_TYPE (field
), subclasses
,
2321 for (i
= 0; i
< num
; i
++)
2322 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
2329 /* Final merger cleanup. */
2330 for (i
= 0; i
< words
; i
++)
2332 /* If one class is MEMORY, everything should be passed in
2334 if (classes
[i
] == X86_64_MEMORY_CLASS
)
2337 /* The X86_64_SSEUP_CLASS should be always preceded by
2338 X86_64_SSE_CLASS. */
2339 if (classes
[i
] == X86_64_SSEUP_CLASS
2340 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
2341 classes
[i
] = X86_64_SSE_CLASS
;
2343 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2344 if (classes
[i
] == X86_64_X87UP_CLASS
2345 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
2346 classes
[i
] = X86_64_SSE_CLASS
;
2351 /* Compute alignment needed. We align all types to natural boundaries with
2352 exception of XFmode that is aligned to 64bits. */
2353 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
2355 int mode_alignment
= GET_MODE_BITSIZE (mode
);
2358 mode_alignment
= 128;
2359 else if (mode
== XCmode
)
2360 mode_alignment
= 256;
2361 if (COMPLEX_MODE_P (mode
))
2362 mode_alignment
/= 2;
2363 /* Misaligned fields are always returned in memory. */
2364 if (bit_offset
% mode_alignment
)
2368 /* for V1xx modes, just use the base mode */
2369 if (VECTOR_MODE_P (mode
)
2370 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
2371 mode
= GET_MODE_INNER (mode
);
2373 /* Classification of atomic types. */
2383 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
2384 classes
[0] = X86_64_INTEGERSI_CLASS
;
2386 classes
[0] = X86_64_INTEGER_CLASS
;
2390 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
2395 if (!(bit_offset
% 64))
2396 classes
[0] = X86_64_SSESF_CLASS
;
2398 classes
[0] = X86_64_SSE_CLASS
;
2401 classes
[0] = X86_64_SSEDF_CLASS
;
2404 classes
[0] = X86_64_X87_CLASS
;
2405 classes
[1] = X86_64_X87UP_CLASS
;
2408 classes
[0] = X86_64_SSE_CLASS
;
2409 classes
[1] = X86_64_SSEUP_CLASS
;
2412 classes
[0] = X86_64_SSE_CLASS
;
2415 classes
[0] = X86_64_SSEDF_CLASS
;
2416 classes
[1] = X86_64_SSEDF_CLASS
;
2419 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
2422 /* This modes is larger than 16 bytes. */
2430 classes
[0] = X86_64_SSE_CLASS
;
2431 classes
[1] = X86_64_SSEUP_CLASS
;
2437 classes
[0] = X86_64_SSE_CLASS
;
2443 if (VECTOR_MODE_P (mode
))
2447 if (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
)
2449 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
2450 classes
[0] = X86_64_INTEGERSI_CLASS
;
2452 classes
[0] = X86_64_INTEGER_CLASS
;
2453 classes
[1] = X86_64_INTEGER_CLASS
;
2454 return 1 + (bytes
> 8);
2461 /* Examine the argument and return set number of register required in each
2462 class. Return 0 iff parameter should be passed in memory. */
2464 examine_argument (enum machine_mode mode
, tree type
, int in_return
,
2465 int *int_nregs
, int *sse_nregs
)
2467 enum x86_64_reg_class
class[MAX_CLASSES
];
2468 int n
= classify_argument (mode
, type
, class, 0);
2474 for (n
--; n
>= 0; n
--)
2477 case X86_64_INTEGER_CLASS
:
2478 case X86_64_INTEGERSI_CLASS
:
2481 case X86_64_SSE_CLASS
:
2482 case X86_64_SSESF_CLASS
:
2483 case X86_64_SSEDF_CLASS
:
2486 case X86_64_NO_CLASS
:
2487 case X86_64_SSEUP_CLASS
:
2489 case X86_64_X87_CLASS
:
2490 case X86_64_X87UP_CLASS
:
2494 case X86_64_COMPLEX_X87_CLASS
:
2495 return in_return
? 2 : 0;
2496 case X86_64_MEMORY_CLASS
:
2502 /* Construct container for the argument used by GCC interface. See
2503 FUNCTION_ARG for the detailed description. */
2506 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
2507 tree type
, int in_return
, int nintregs
, int nsseregs
,
2508 const int *intreg
, int sse_regno
)
2510 enum machine_mode tmpmode
;
2512 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2513 enum x86_64_reg_class
class[MAX_CLASSES
];
2517 int needed_sseregs
, needed_intregs
;
2518 rtx exp
[MAX_CLASSES
];
2521 n
= classify_argument (mode
, type
, class, 0);
2522 if (TARGET_DEBUG_ARG
)
2525 fprintf (stderr
, "Memory class\n");
2528 fprintf (stderr
, "Classes:");
2529 for (i
= 0; i
< n
; i
++)
2531 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
2533 fprintf (stderr
, "\n");
2538 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
2541 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
2544 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2545 some less clueful developer tries to use floating-point anyway. */
2546 if (needed_sseregs
&& !TARGET_SSE
)
2548 static bool issued_error
;
2551 issued_error
= true;
2553 error ("SSE register return with SSE disabled");
2555 error ("SSE register argument with SSE disabled");
2560 /* First construct simple cases. Avoid SCmode, since we want to use
2561 single register to pass this type. */
2562 if (n
== 1 && mode
!= SCmode
)
2565 case X86_64_INTEGER_CLASS
:
2566 case X86_64_INTEGERSI_CLASS
:
2567 return gen_rtx_REG (mode
, intreg
[0]);
2568 case X86_64_SSE_CLASS
:
2569 case X86_64_SSESF_CLASS
:
2570 case X86_64_SSEDF_CLASS
:
2571 return gen_reg_or_parallel (mode
, orig_mode
, SSE_REGNO (sse_regno
));
2572 case X86_64_X87_CLASS
:
2573 case X86_64_COMPLEX_X87_CLASS
:
2574 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
2575 case X86_64_NO_CLASS
:
2576 /* Zero sized array, struct or class. */
2581 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
2583 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
2585 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
2586 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
2587 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
2588 && class[1] == X86_64_INTEGER_CLASS
2589 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
2590 && intreg
[0] + 1 == intreg
[1])
2591 return gen_rtx_REG (mode
, intreg
[0]);
2593 /* Otherwise figure out the entries of the PARALLEL. */
2594 for (i
= 0; i
< n
; i
++)
2598 case X86_64_NO_CLASS
:
2600 case X86_64_INTEGER_CLASS
:
2601 case X86_64_INTEGERSI_CLASS
:
2602 /* Merge TImodes on aligned occasions here too. */
2603 if (i
* 8 + 8 > bytes
)
2604 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
2605 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
2609 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2610 if (tmpmode
== BLKmode
)
2612 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2613 gen_rtx_REG (tmpmode
, *intreg
),
2617 case X86_64_SSESF_CLASS
:
2618 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2619 gen_rtx_REG (SFmode
,
2620 SSE_REGNO (sse_regno
)),
2624 case X86_64_SSEDF_CLASS
:
2625 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2626 gen_rtx_REG (DFmode
,
2627 SSE_REGNO (sse_regno
)),
2631 case X86_64_SSE_CLASS
:
2632 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
2636 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2637 gen_rtx_REG (tmpmode
,
2638 SSE_REGNO (sse_regno
)),
2640 if (tmpmode
== TImode
)
2648 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
2649 for (i
= 0; i
< nexps
; i
++)
2650 XVECEXP (ret
, 0, i
) = exp
[i
];
2654 /* Update the data in CUM to advance over an argument
2655 of mode MODE and data type TYPE.
2656 (TYPE is null for libcalls where that information may not be available.) */
2659 function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
2660 tree type
, int named
)
2663 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2664 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2666 if (TARGET_DEBUG_ARG
)
2667 fprintf (stderr
, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
2668 "mode=%s, named=%d)\n\n",
2669 words
, cum
->words
, cum
->nregs
, cum
->sse_nregs
,
2670 GET_MODE_NAME (mode
), named
);
2673 int int_nregs
, sse_nregs
;
2674 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
2675 cum
->words
+= words
;
2676 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
2678 cum
->nregs
-= int_nregs
;
2679 cum
->sse_nregs
-= sse_nregs
;
2680 cum
->regno
+= int_nregs
;
2681 cum
->sse_regno
+= sse_nregs
;
2684 cum
->words
+= words
;
2688 if (TARGET_SSE
&& SSE_REG_MODE_P (mode
)
2689 && (!type
|| !AGGREGATE_TYPE_P (type
)))
2691 cum
->sse_words
+= words
;
2692 cum
->sse_nregs
-= 1;
2693 cum
->sse_regno
+= 1;
2694 if (cum
->sse_nregs
<= 0)
2700 else if (TARGET_MMX
&& MMX_REG_MODE_P (mode
)
2701 && (!type
|| !AGGREGATE_TYPE_P (type
)))
2703 cum
->mmx_words
+= words
;
2704 cum
->mmx_nregs
-= 1;
2705 cum
->mmx_regno
+= 1;
2706 if (cum
->mmx_nregs
<= 0)
2714 cum
->words
+= words
;
2715 cum
->nregs
-= words
;
2716 cum
->regno
+= words
;
2718 if (cum
->nregs
<= 0)
2728 /* Define where to put the arguments to a function.
2729 Value is zero to push the argument on the stack,
2730 or a hard register in which to store the argument.
2732 MODE is the argument's machine mode.
2733 TYPE is the data type of the argument (as a tree).
2734 This is null for libcalls where that information may
2736 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2737 the preceding args and about the function being called.
2738 NAMED is nonzero if this argument is a named parameter
2739 (otherwise it is an extra parameter matching an ellipsis). */
2742 function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode orig_mode
,
2743 tree type
, int named
)
2745 enum machine_mode mode
= orig_mode
;
2748 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2749 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2750 static bool warnedsse
, warnedmmx
;
2752 /* To simplify the code below, represent vector types with a vector mode
2753 even if MMX/SSE are not active. */
2754 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
2755 mode
= type_natural_mode (type
);
2757 /* Handle a hidden AL argument containing number of registers for varargs
2758 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2760 if (mode
== VOIDmode
)
2763 return GEN_INT (cum
->maybe_vaarg
2764 ? (cum
->sse_nregs
< 0
2772 ret
= construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
2774 &x86_64_int_parameter_registers
[cum
->regno
],
2779 /* For now, pass fp/complex values on the stack. */
2791 if (words
<= cum
->nregs
)
2793 int regno
= cum
->regno
;
2795 /* Fastcall allocates the first two DWORD (SImode) or
2796 smaller arguments to ECX and EDX. */
2799 if (mode
== BLKmode
|| mode
== DImode
)
2802 /* ECX not EAX is the first allocated register. */
2806 ret
= gen_rtx_REG (mode
, regno
);
2816 if (!type
|| !AGGREGATE_TYPE_P (type
))
2818 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
2821 warning ("SSE vector argument without SSE enabled "
2825 ret
= gen_reg_or_parallel (mode
, orig_mode
,
2826 cum
->sse_regno
+ FIRST_SSE_REG
);
2833 if (!type
|| !AGGREGATE_TYPE_P (type
))
2835 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
2838 warning ("MMX vector argument without MMX enabled "
2842 ret
= gen_reg_or_parallel (mode
, orig_mode
,
2843 cum
->mmx_regno
+ FIRST_MMX_REG
);
2848 if (TARGET_DEBUG_ARG
)
2851 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2852 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
2855 print_simple_rtl (stderr
, ret
);
2857 fprintf (stderr
, ", stack");
2859 fprintf (stderr
, " )\n");
2865 /* A C expression that indicates when an argument must be passed by
2866 reference. If nonzero for an argument, a copy of that argument is
2867 made in memory and a pointer to the argument is passed instead of
2868 the argument itself. The pointer is passed in whatever way is
2869 appropriate for passing a pointer to that type. */
2872 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
2873 enum machine_mode mode ATTRIBUTE_UNUSED
,
2874 tree type
, bool named ATTRIBUTE_UNUSED
)
2879 if (type
&& int_size_in_bytes (type
) == -1)
2881 if (TARGET_DEBUG_ARG
)
2882 fprintf (stderr
, "function_arg_pass_by_reference\n");
2889 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2890 ABI. Only called if TARGET_SSE. */
2892 contains_128bit_aligned_vector_p (tree type
)
2894 enum machine_mode mode
= TYPE_MODE (type
);
2895 if (SSE_REG_MODE_P (mode
)
2896 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
2898 if (TYPE_ALIGN (type
) < 128)
2901 if (AGGREGATE_TYPE_P (type
))
2903 /* Walk the aggregates recursively. */
2904 if (TREE_CODE (type
) == RECORD_TYPE
2905 || TREE_CODE (type
) == UNION_TYPE
2906 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
2910 if (TYPE_BINFO (type
))
2912 tree binfo
, base_binfo
;
2915 for (binfo
= TYPE_BINFO (type
), i
= 0;
2916 BINFO_BASE_ITERATE (binfo
, i
, base_binfo
); i
++)
2917 if (contains_128bit_aligned_vector_p (BINFO_TYPE (base_binfo
)))
2920 /* And now merge the fields of structure. */
2921 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
2923 if (TREE_CODE (field
) == FIELD_DECL
2924 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
2928 /* Just for use if some languages passes arrays by value. */
2929 else if (TREE_CODE (type
) == ARRAY_TYPE
)
2931 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
2940 /* Gives the alignment boundary, in bits, of an argument with the
2941 specified mode and type. */
2944 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
2948 align
= TYPE_ALIGN (type
);
2950 align
= GET_MODE_ALIGNMENT (mode
);
2951 if (align
< PARM_BOUNDARY
)
2952 align
= PARM_BOUNDARY
;
2955 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2956 make an exception for SSE modes since these require 128bit
2959 The handling here differs from field_alignment. ICC aligns MMX
2960 arguments to 4 byte boundaries, while structure fields are aligned
2961 to 8 byte boundaries. */
2963 align
= PARM_BOUNDARY
;
2966 if (!SSE_REG_MODE_P (mode
))
2967 align
= PARM_BOUNDARY
;
2971 if (!contains_128bit_aligned_vector_p (type
))
2972 align
= PARM_BOUNDARY
;
2980 /* Return true if N is a possible register number of function value. */
2982 ix86_function_value_regno_p (int regno
)
2986 return ((regno
) == 0
2987 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
2988 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
));
2990 return ((regno
) == 0 || (regno
) == FIRST_FLOAT_REG
2991 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
)
2992 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
));
2995 /* Define how to find the value returned by a function.
2996 VALTYPE is the data type of the value (as a tree).
2997 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2998 otherwise, FUNC is 0. */
3000 ix86_function_value (tree valtype
)
3004 rtx ret
= construct_container (type_natural_mode (valtype
),
3005 TYPE_MODE (valtype
), valtype
,
3006 1, REGPARM_MAX
, SSE_REGPARM_MAX
,
3007 x86_64_int_return_registers
, 0);
3008 /* For zero sized structures, construct_container return NULL, but we
3009 need to keep rest of compiler happy by returning meaningful value. */
3011 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
3015 return gen_rtx_REG (TYPE_MODE (valtype
),
3016 ix86_value_regno (TYPE_MODE (valtype
)));
3019 /* Return false iff type is returned in memory. */
3021 ix86_return_in_memory (tree type
)
3023 int needed_intregs
, needed_sseregs
, size
;
3024 enum machine_mode mode
= TYPE_MODE (type
);
3027 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
3029 if (mode
== BLKmode
)
3032 size
= int_size_in_bytes (type
);
3034 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
3037 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
3039 /* User-created vectors small enough to fit in EAX. */
3043 /* MMX/3dNow values are returned on the stack, since we've
3044 got to EMMS/FEMMS before returning. */
3048 /* SSE values are returned in XMM0, except when it doesn't exist. */
3050 return (TARGET_SSE
? 0 : 1);
3061 /* When returning SSE vector types, we have a choice of either
3062 (1) being abi incompatible with a -march switch, or
3063 (2) generating an error.
3064 Given no good solution, I think the safest thing is one warning.
3065 The user won't be able to use -Werror, but....
3067 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3068 called in response to actually generating a caller or callee that
3069 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3070 via aggregate_value_p for general type probing from tree-ssa. */
3073 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
3077 if (!TARGET_SSE
&& type
&& !warned
)
3079 /* Look at the return type of the function, not the function type. */
3080 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
3083 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
3086 warning ("SSE vector return without SSE enabled changes the ABI");
3093 /* Define how to find the value returned by a library function
3094 assuming the value has mode MODE. */
3096 ix86_libcall_value (enum machine_mode mode
)
3107 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
3110 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
3114 return gen_rtx_REG (mode
, 0);
3118 return gen_rtx_REG (mode
, ix86_value_regno (mode
));
3121 /* Given a mode, return the register to use for a return value. */
3124 ix86_value_regno (enum machine_mode mode
)
3126 /* Floating point return values in %st(0). */
3127 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
&& TARGET_FLOAT_RETURNS_IN_80387
)
3128 return FIRST_FLOAT_REG
;
3129 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3130 we prevent this case when sse is not available. */
3131 if (mode
== TImode
|| (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
3132 return FIRST_SSE_REG
;
3133 /* Everything else in %eax. */
3137 /* Create the va_list data type. */
3140 ix86_build_builtin_va_list (void)
3142 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
3144 /* For i386 we use plain pointer to argument area. */
3146 return build_pointer_type (char_type_node
);
3148 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
3149 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
3151 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
3152 unsigned_type_node
);
3153 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
3154 unsigned_type_node
);
3155 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
3157 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
3160 DECL_FIELD_CONTEXT (f_gpr
) = record
;
3161 DECL_FIELD_CONTEXT (f_fpr
) = record
;
3162 DECL_FIELD_CONTEXT (f_ovf
) = record
;
3163 DECL_FIELD_CONTEXT (f_sav
) = record
;
3165 TREE_CHAIN (record
) = type_decl
;
3166 TYPE_NAME (record
) = type_decl
;
3167 TYPE_FIELDS (record
) = f_gpr
;
3168 TREE_CHAIN (f_gpr
) = f_fpr
;
3169 TREE_CHAIN (f_fpr
) = f_ovf
;
3170 TREE_CHAIN (f_ovf
) = f_sav
;
3172 layout_type (record
);
3174 /* The correct type is an array type of one element. */
3175 return build_array_type (record
, build_index_type (size_zero_node
));
3178 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3181 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3182 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
3185 CUMULATIVE_ARGS next_cum
;
3186 rtx save_area
= NULL_RTX
, mem
;
3199 /* Indicate to allocate space on the stack for varargs save area. */
3200 ix86_save_varrargs_registers
= 1;
3202 cfun
->stack_alignment_needed
= 128;
3204 fntype
= TREE_TYPE (current_function_decl
);
3205 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
3206 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
3207 != void_type_node
));
3209 /* For varargs, we do not want to skip the dummy va_dcl argument.
3210 For stdargs, we do want to skip the last named argument. */
3213 function_arg_advance (&next_cum
, mode
, type
, 1);
3216 save_area
= frame_pointer_rtx
;
3218 set
= get_varargs_alias_set ();
3220 for (i
= next_cum
.regno
; i
< ix86_regparm
; i
++)
3222 mem
= gen_rtx_MEM (Pmode
,
3223 plus_constant (save_area
, i
* UNITS_PER_WORD
));
3224 set_mem_alias_set (mem
, set
);
3225 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
3226 x86_64_int_parameter_registers
[i
]));
3229 if (next_cum
.sse_nregs
)
3231 /* Now emit code to save SSE registers. The AX parameter contains number
3232 of SSE parameter registers used to call this function. We use
3233 sse_prologue_save insn template that produces computed jump across
3234 SSE saves. We need some preparation work to get this working. */
3236 label
= gen_label_rtx ();
3237 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
3239 /* Compute address to jump to :
3240 label - 5*eax + nnamed_sse_arguments*5 */
3241 tmp_reg
= gen_reg_rtx (Pmode
);
3242 nsse_reg
= gen_reg_rtx (Pmode
);
3243 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
3244 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
3245 gen_rtx_MULT (Pmode
, nsse_reg
,
3247 if (next_cum
.sse_regno
)
3250 gen_rtx_CONST (DImode
,
3251 gen_rtx_PLUS (DImode
,
3253 GEN_INT (next_cum
.sse_regno
* 4))));
3255 emit_move_insn (nsse_reg
, label_ref
);
3256 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
3258 /* Compute address of memory block we save into. We always use pointer
3259 pointing 127 bytes after first byte to store - this is needed to keep
3260 instruction size limited by 4 bytes. */
3261 tmp_reg
= gen_reg_rtx (Pmode
);
3262 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
3263 plus_constant (save_area
,
3264 8 * REGPARM_MAX
+ 127)));
3265 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
3266 set_mem_alias_set (mem
, set
);
3267 set_mem_align (mem
, BITS_PER_WORD
);
3269 /* And finally do the dirty job! */
3270 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
3271 GEN_INT (next_cum
.sse_regno
), label
));
3276 /* Implement va_start. */
3279 ix86_va_start (tree valist
, rtx nextarg
)
3281 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
3282 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
3283 tree gpr
, fpr
, ovf
, sav
, t
;
3285 /* Only 64bit target needs something special. */
3288 std_expand_builtin_va_start (valist
, nextarg
);
3292 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3293 f_fpr
= TREE_CHAIN (f_gpr
);
3294 f_ovf
= TREE_CHAIN (f_fpr
);
3295 f_sav
= TREE_CHAIN (f_ovf
);
3297 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
3298 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
3299 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
3300 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
3301 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
3303 /* Count number of gp and fp argument registers used. */
3304 words
= current_function_args_info
.words
;
3305 n_gpr
= current_function_args_info
.regno
;
3306 n_fpr
= current_function_args_info
.sse_regno
;
3308 if (TARGET_DEBUG_ARG
)
3309 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3310 (int) words
, (int) n_gpr
, (int) n_fpr
);
3312 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
,
3313 build_int_cst (NULL_TREE
, n_gpr
* 8));
3314 TREE_SIDE_EFFECTS (t
) = 1;
3315 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3317 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
,
3318 build_int_cst (NULL_TREE
, n_fpr
* 16 + 8*REGPARM_MAX
));
3319 TREE_SIDE_EFFECTS (t
) = 1;
3320 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3322 /* Find the overflow area. */
3323 t
= make_tree (TREE_TYPE (ovf
), virtual_incoming_args_rtx
);
3325 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), t
,
3326 build_int_cst (NULL_TREE
, words
* UNITS_PER_WORD
));
3327 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
3328 TREE_SIDE_EFFECTS (t
) = 1;
3329 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3331 /* Find the register save area.
3332 Prologue of the function save it right above stack frame. */
3333 t
= make_tree (TREE_TYPE (sav
), frame_pointer_rtx
);
3334 t
= build (MODIFY_EXPR
, TREE_TYPE (sav
), sav
, t
);
3335 TREE_SIDE_EFFECTS (t
) = 1;
3336 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3339 /* Implement va_arg. */
3342 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
3344 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
3345 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
3346 tree gpr
, fpr
, ovf
, sav
, t
;
3348 tree lab_false
, lab_over
= NULL_TREE
;
3353 enum machine_mode nat_mode
;
3355 /* Only 64bit target needs something special. */
3357 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
3359 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3360 f_fpr
= TREE_CHAIN (f_gpr
);
3361 f_ovf
= TREE_CHAIN (f_fpr
);
3362 f_sav
= TREE_CHAIN (f_ovf
);
3364 valist
= build_va_arg_indirect_ref (valist
);
3365 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
3366 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
3367 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
3368 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
3370 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
3372 type
= build_pointer_type (type
);
3373 size
= int_size_in_bytes (type
);
3374 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3376 nat_mode
= type_natural_mode (type
);
3377 container
= construct_container (nat_mode
, TYPE_MODE (type
), type
, 0,
3378 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
3380 /* Pull the value out of the saved registers. */
3382 addr
= create_tmp_var (ptr_type_node
, "addr");
3383 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
3387 int needed_intregs
, needed_sseregs
;
3389 tree int_addr
, sse_addr
;
3391 lab_false
= create_artificial_label ();
3392 lab_over
= create_artificial_label ();
3394 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
3396 need_temp
= (!REG_P (container
)
3397 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
3398 || TYPE_ALIGN (type
) > 128));
3400 /* In case we are passing structure, verify that it is consecutive block
3401 on the register save area. If not we need to do moves. */
3402 if (!need_temp
&& !REG_P (container
))
3404 /* Verify that all registers are strictly consecutive */
3405 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
3409 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
3411 rtx slot
= XVECEXP (container
, 0, i
);
3412 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
3413 || INTVAL (XEXP (slot
, 1)) != i
* 16)
3421 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
3423 rtx slot
= XVECEXP (container
, 0, i
);
3424 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
3425 || INTVAL (XEXP (slot
, 1)) != i
* 8)
3437 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
3438 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
3439 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
3440 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
3443 /* First ensure that we fit completely in registers. */
3446 t
= build_int_cst (TREE_TYPE (gpr
),
3447 (REGPARM_MAX
- needed_intregs
+ 1) * 8);
3448 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
3449 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
3450 t
= build (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
3451 gimplify_and_add (t
, pre_p
);
3455 t
= build_int_cst (TREE_TYPE (fpr
),
3456 (SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
3458 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
3459 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
3460 t
= build (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
3461 gimplify_and_add (t
, pre_p
);
3464 /* Compute index to start of area used for integer regs. */
3467 /* int_addr = gpr + sav; */
3468 t
= fold_convert (ptr_type_node
, gpr
);
3469 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
3470 t
= build2 (MODIFY_EXPR
, void_type_node
, int_addr
, t
);
3471 gimplify_and_add (t
, pre_p
);
3475 /* sse_addr = fpr + sav; */
3476 t
= fold_convert (ptr_type_node
, fpr
);
3477 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
3478 t
= build2 (MODIFY_EXPR
, void_type_node
, sse_addr
, t
);
3479 gimplify_and_add (t
, pre_p
);
3484 tree temp
= create_tmp_var (type
, "va_arg_tmp");
3487 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
3488 t
= build2 (MODIFY_EXPR
, void_type_node
, addr
, t
);
3489 gimplify_and_add (t
, pre_p
);
3491 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
3493 rtx slot
= XVECEXP (container
, 0, i
);
3494 rtx reg
= XEXP (slot
, 0);
3495 enum machine_mode mode
= GET_MODE (reg
);
3496 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
3497 tree addr_type
= build_pointer_type (piece_type
);
3500 tree dest_addr
, dest
;
3502 if (SSE_REGNO_P (REGNO (reg
)))
3504 src_addr
= sse_addr
;
3505 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
3509 src_addr
= int_addr
;
3510 src_offset
= REGNO (reg
) * 8;
3512 src_addr
= fold_convert (addr_type
, src_addr
);
3513 src_addr
= fold (build2 (PLUS_EXPR
, addr_type
, src_addr
,
3514 size_int (src_offset
)));
3515 src
= build_va_arg_indirect_ref (src_addr
);
3517 dest_addr
= fold_convert (addr_type
, addr
);
3518 dest_addr
= fold (build2 (PLUS_EXPR
, addr_type
, dest_addr
,
3519 size_int (INTVAL (XEXP (slot
, 1)))));
3520 dest
= build_va_arg_indirect_ref (dest_addr
);
3522 t
= build2 (MODIFY_EXPR
, void_type_node
, dest
, src
);
3523 gimplify_and_add (t
, pre_p
);
3529 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
3530 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
3531 t
= build2 (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
, t
);
3532 gimplify_and_add (t
, pre_p
);
3536 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
3537 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
3538 t
= build2 (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
, t
);
3539 gimplify_and_add (t
, pre_p
);
3542 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
3543 gimplify_and_add (t
, pre_p
);
3545 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
3546 append_to_statement_list (t
, pre_p
);
3549 /* ... otherwise out of the overflow area. */
3551 /* Care for on-stack alignment if needed. */
3552 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64)
3556 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
3557 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
,
3558 build_int_cst (TREE_TYPE (ovf
), align
- 1));
3559 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
3560 build_int_cst (TREE_TYPE (t
), -align
));
3562 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
3564 t2
= build2 (MODIFY_EXPR
, void_type_node
, addr
, t
);
3565 gimplify_and_add (t2
, pre_p
);
3567 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
3568 build_int_cst (TREE_TYPE (t
), rsize
* UNITS_PER_WORD
));
3569 t
= build2 (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
3570 gimplify_and_add (t
, pre_p
);
3574 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
3575 append_to_statement_list (t
, pre_p
);
3578 ptrtype
= build_pointer_type (type
);
3579 addr
= fold_convert (ptrtype
, addr
);
3582 addr
= build_va_arg_indirect_ref (addr
);
3583 return build_va_arg_indirect_ref (addr
);
3586 /* Return nonzero if OPNUM's MEM should be matched
3587 in movabs* patterns. */
3590 ix86_check_movabs (rtx insn
, int opnum
)
3594 set
= PATTERN (insn
);
3595 if (GET_CODE (set
) == PARALLEL
)
3596 set
= XVECEXP (set
, 0, 0);
3597 if (GET_CODE (set
) != SET
)
3599 mem
= XEXP (set
, opnum
);
3600 while (GET_CODE (mem
) == SUBREG
)
3601 mem
= SUBREG_REG (mem
);
3602 if (GET_CODE (mem
) != MEM
)
3604 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
3607 /* Initialize the table of extra 80387 mathematical constants. */
3610 init_ext_80387_constants (void)
3612 static const char * cst
[5] =
3614 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
3615 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
3616 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
3617 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
3618 "3.1415926535897932385128089594061862044", /* 4: fldpi */
3622 for (i
= 0; i
< 5; i
++)
3624 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
3625 /* Ensure each constant is rounded to XFmode precision. */
3626 real_convert (&ext_80387_constants_table
[i
],
3627 XFmode
, &ext_80387_constants_table
[i
]);
3630 ext_80387_constants_init
= 1;
3633 /* Return true if the constant is something that can be loaded with
3634 a special instruction. */
3637 standard_80387_constant_p (rtx x
)
3639 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
3642 if (x
== CONST0_RTX (GET_MODE (x
)))
3644 if (x
== CONST1_RTX (GET_MODE (x
)))
3647 /* For XFmode constants, try to find a special 80387 instruction when
3648 optimizing for size or on those CPUs that benefit from them. */
3649 if (GET_MODE (x
) == XFmode
3650 && (optimize_size
|| x86_ext_80387_constants
& TUNEMASK
))
3655 if (! ext_80387_constants_init
)
3656 init_ext_80387_constants ();
3658 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3659 for (i
= 0; i
< 5; i
++)
3660 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
3667 /* Return the opcode of the special instruction to be used to load
3671 standard_80387_constant_opcode (rtx x
)
3673 switch (standard_80387_constant_p (x
))
3693 /* Return the CONST_DOUBLE representing the 80387 constant that is
3694 loaded by the specified special instruction. The argument IDX
3695 matches the return value from standard_80387_constant_p. */
3698 standard_80387_constant_rtx (int idx
)
3702 if (! ext_80387_constants_init
)
3703 init_ext_80387_constants ();
3719 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
3723 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3726 standard_sse_constant_p (rtx x
)
3728 if (x
== const0_rtx
)
3730 return (x
== CONST0_RTX (GET_MODE (x
)));
3733 /* Returns 1 if OP contains a symbol reference */
3736 symbolic_reference_mentioned_p (rtx op
)
3741 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
3744 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
3745 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
3751 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
3752 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
3756 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
3763 /* Return 1 if it is appropriate to emit `ret' instructions in the
3764 body of a function. Do this only if the epilogue is simple, needing a
3765 couple of insns. Prior to reloading, we can't tell how many registers
3766 must be saved, so return 0 then. Return 0 if there is no frame
3767 marker to de-allocate. */
3770 ix86_can_use_return_insn_p (void)
3772 struct ix86_frame frame
;
3774 if (! reload_completed
|| frame_pointer_needed
)
3777 /* Don't allow more than 32 pop, since that's all we can do
3778 with one instruction. */
3779 if (current_function_pops_args
3780 && current_function_args_size
>= 32768)
3783 ix86_compute_frame_layout (&frame
);
3784 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
3787 /* Value should be nonzero if functions must have frame pointers.
3788 Zero means the frame pointer need not be set up (and parms may
3789 be accessed via the stack pointer) in functions that seem suitable. */
3792 ix86_frame_pointer_required (void)
3794 /* If we accessed previous frames, then the generated code expects
3795 to be able to access the saved ebp value in our frame. */
3796 if (cfun
->machine
->accesses_prev_frame
)
3799 /* Several x86 os'es need a frame pointer for other reasons,
3800 usually pertaining to setjmp. */
3801 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
3804 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3805 the frame pointer by default. Turn it back on now if we've not
3806 got a leaf function. */
3807 if (TARGET_OMIT_LEAF_FRAME_POINTER
3808 && (!current_function_is_leaf
))
3811 if (current_function_profile
)
3817 /* Record that the current function accesses previous call frames. */
3820 ix86_setup_frame_addresses (void)
3822 cfun
->machine
->accesses_prev_frame
= 1;
3825 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3826 # define USE_HIDDEN_LINKONCE 1
3828 # define USE_HIDDEN_LINKONCE 0
3831 static int pic_labels_used
;
3833 /* Fills in the label name that should be used for a pc thunk for
3834 the given register. */
3837 get_pc_thunk_name (char name
[32], unsigned int regno
)
3839 if (USE_HIDDEN_LINKONCE
)
3840 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
3842 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
3846 /* This function generates code for -fpic that loads %ebx with
3847 the return address of the caller and then returns. */
3850 ix86_file_end (void)
3855 for (regno
= 0; regno
< 8; ++regno
)
3859 if (! ((pic_labels_used
>> regno
) & 1))
3862 get_pc_thunk_name (name
, regno
);
3864 if (USE_HIDDEN_LINKONCE
)
3868 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
3870 TREE_PUBLIC (decl
) = 1;
3871 TREE_STATIC (decl
) = 1;
3872 DECL_ONE_ONLY (decl
) = 1;
3874 (*targetm
.asm_out
.unique_section
) (decl
, 0);
3875 named_section (decl
, NULL
, 0);
3877 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
3878 fputs ("\t.hidden\t", asm_out_file
);
3879 assemble_name (asm_out_file
, name
);
3880 fputc ('\n', asm_out_file
);
3881 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
3886 ASM_OUTPUT_LABEL (asm_out_file
, name
);
3889 xops
[0] = gen_rtx_REG (SImode
, regno
);
3890 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
3891 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
3892 output_asm_insn ("ret", xops
);
3895 if (NEED_INDICATE_EXEC_STACK
)
3896 file_end_indicate_exec_stack ();
3899 /* Emit code for the SET_GOT patterns. */
3902 output_set_got (rtx dest
)
3907 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
3909 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
3911 xops
[2] = gen_rtx_LABEL_REF (Pmode
, gen_label_rtx ());
3914 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
3916 output_asm_insn ("call\t%a2", xops
);
3919 /* Output the "canonical" label name ("Lxx$pb") here too. This
3920 is what will be referred to by the Mach-O PIC subsystem. */
3921 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
3923 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
3924 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
3927 output_asm_insn ("pop{l}\t%0", xops
);
3932 get_pc_thunk_name (name
, REGNO (dest
));
3933 pic_labels_used
|= 1 << REGNO (dest
);
3935 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
3936 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
3937 output_asm_insn ("call\t%X2", xops
);
3940 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
3941 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
3942 else if (!TARGET_MACHO
)
3943 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops
);
3948 /* Generate an "push" pattern for input ARG. */
3953 return gen_rtx_SET (VOIDmode
,
3955 gen_rtx_PRE_DEC (Pmode
,
3956 stack_pointer_rtx
)),
3960 /* Return >= 0 if there is an unused call-clobbered register available
3961 for the entire function. */
3964 ix86_select_alt_pic_regnum (void)
3966 if (current_function_is_leaf
&& !current_function_profile
)
3969 for (i
= 2; i
>= 0; --i
)
3970 if (!regs_ever_live
[i
])
3974 return INVALID_REGNUM
;
3977 /* Return 1 if we need to save REGNO. */
3979 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
3981 if (pic_offset_table_rtx
3982 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
3983 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
3984 || current_function_profile
3985 || current_function_calls_eh_return
3986 || current_function_uses_const_pool
))
3988 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
3993 if (current_function_calls_eh_return
&& maybe_eh_return
)
3998 unsigned test
= EH_RETURN_DATA_REGNO (i
);
3999 if (test
== INVALID_REGNUM
)
4006 return (regs_ever_live
[regno
]
4007 && !call_used_regs
[regno
]
4008 && !fixed_regs
[regno
]
4009 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
4012 /* Return number of registers to be saved on the stack. */
4015 ix86_nsaved_regs (void)
4020 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
4021 if (ix86_save_reg (regno
, true))
4026 /* Return the offset between two registers, one to be eliminated, and the other
4027 its replacement, at the start of a routine. */
4030 ix86_initial_elimination_offset (int from
, int to
)
4032 struct ix86_frame frame
;
4033 ix86_compute_frame_layout (&frame
);
4035 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4036 return frame
.hard_frame_pointer_offset
;
4037 else if (from
== FRAME_POINTER_REGNUM
4038 && to
== HARD_FRAME_POINTER_REGNUM
)
4039 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
4042 if (to
!= STACK_POINTER_REGNUM
)
4044 else if (from
== ARG_POINTER_REGNUM
)
4045 return frame
.stack_pointer_offset
;
4046 else if (from
!= FRAME_POINTER_REGNUM
)
4049 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
4053 /* Fill structure ix86_frame about frame of currently computed function. */
4056 ix86_compute_frame_layout (struct ix86_frame
*frame
)
4058 HOST_WIDE_INT total_size
;
4059 unsigned int stack_alignment_needed
;
4060 HOST_WIDE_INT offset
;
4061 unsigned int preferred_alignment
;
4062 HOST_WIDE_INT size
= get_frame_size ();
4064 frame
->nregs
= ix86_nsaved_regs ();
4067 stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
4068 preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
4070 /* During reload iteration the amount of registers saved can change.
4071 Recompute the value as needed. Do not recompute when amount of registers
4072 didn't change as reload does mutiple calls to the function and does not
4073 expect the decision to change within single iteration. */
4075 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
4077 int count
= frame
->nregs
;
4079 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
4080 /* The fast prologue uses move instead of push to save registers. This
4081 is significantly longer, but also executes faster as modern hardware
4082 can execute the moves in parallel, but can't do that for push/pop.
4084 Be careful about choosing what prologue to emit: When function takes
4085 many instructions to execute we may use slow version as well as in
4086 case function is known to be outside hot spot (this is known with
4087 feedback only). Weight the size of function by number of registers
4088 to save as it is cheap to use one or two push instructions but very
4089 slow to use many of them. */
4091 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
4092 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
4093 || (flag_branch_probabilities
4094 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
4095 cfun
->machine
->use_fast_prologue_epilogue
= false;
4097 cfun
->machine
->use_fast_prologue_epilogue
4098 = !expensive_function_p (count
);
4100 if (TARGET_PROLOGUE_USING_MOVE
4101 && cfun
->machine
->use_fast_prologue_epilogue
)
4102 frame
->save_regs_using_mov
= true;
4104 frame
->save_regs_using_mov
= false;
4107 /* Skip return address and saved base pointer. */
4108 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
4110 frame
->hard_frame_pointer_offset
= offset
;
4112 /* Do some sanity checking of stack_alignment_needed and
4113 preferred_alignment, since i386 port is the only using those features
4114 that may break easily. */
4116 if (size
&& !stack_alignment_needed
)
4118 if (preferred_alignment
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
4120 if (preferred_alignment
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
4122 if (stack_alignment_needed
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
4125 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
4126 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
4128 /* Register save area */
4129 offset
+= frame
->nregs
* UNITS_PER_WORD
;
4132 if (ix86_save_varrargs_registers
)
4134 offset
+= X86_64_VARARGS_SIZE
;
4135 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
4138 frame
->va_arg_size
= 0;
4140 /* Align start of frame for local function. */
4141 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
4142 & -stack_alignment_needed
) - offset
;
4144 offset
+= frame
->padding1
;
4146 /* Frame pointer points here. */
4147 frame
->frame_pointer_offset
= offset
;
4151 /* Add outgoing arguments area. Can be skipped if we eliminated
4152 all the function calls as dead code.
4153 Skipping is however impossible when function calls alloca. Alloca
4154 expander assumes that last current_function_outgoing_args_size
4155 of stack frame are unused. */
4156 if (ACCUMULATE_OUTGOING_ARGS
4157 && (!current_function_is_leaf
|| current_function_calls_alloca
))
4159 offset
+= current_function_outgoing_args_size
;
4160 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
4163 frame
->outgoing_arguments_size
= 0;
4165 /* Align stack boundary. Only needed if we're calling another function
4167 if (!current_function_is_leaf
|| current_function_calls_alloca
)
4168 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
4169 & -preferred_alignment
) - offset
;
4171 frame
->padding2
= 0;
4173 offset
+= frame
->padding2
;
4175 /* We've reached end of stack frame. */
4176 frame
->stack_pointer_offset
= offset
;
4178 /* Size prologue needs to allocate. */
4179 frame
->to_allocate
=
4180 (size
+ frame
->padding1
+ frame
->padding2
4181 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
4183 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
4184 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
4185 frame
->save_regs_using_mov
= false;
4187 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
4188 && current_function_is_leaf
)
4190 frame
->red_zone_size
= frame
->to_allocate
;
4191 if (frame
->save_regs_using_mov
)
4192 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
4193 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
4194 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
4197 frame
->red_zone_size
= 0;
4198 frame
->to_allocate
-= frame
->red_zone_size
;
4199 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
4201 fprintf (stderr
, "nregs: %i\n", frame
->nregs
);
4202 fprintf (stderr
, "size: %i\n", size
);
4203 fprintf (stderr
, "alignment1: %i\n", stack_alignment_needed
);
4204 fprintf (stderr
, "padding1: %i\n", frame
->padding1
);
4205 fprintf (stderr
, "va_arg: %i\n", frame
->va_arg_size
);
4206 fprintf (stderr
, "padding2: %i\n", frame
->padding2
);
4207 fprintf (stderr
, "to_allocate: %i\n", frame
->to_allocate
);
4208 fprintf (stderr
, "red_zone_size: %i\n", frame
->red_zone_size
);
4209 fprintf (stderr
, "frame_pointer_offset: %i\n", frame
->frame_pointer_offset
);
4210 fprintf (stderr
, "hard_frame_pointer_offset: %i\n",
4211 frame
->hard_frame_pointer_offset
);
4212 fprintf (stderr
, "stack_pointer_offset: %i\n", frame
->stack_pointer_offset
);
4216 /* Emit code to save registers in the prologue. */
4219 ix86_emit_save_regs (void)
4224 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
4225 if (ix86_save_reg (regno
, true))
4227 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
4228 RTX_FRAME_RELATED_P (insn
) = 1;
4232 /* Emit code to save registers using MOV insns. First register
4233 is restored from POINTER + OFFSET. */
4235 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
4240 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4241 if (ix86_save_reg (regno
, true))
4243 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
4245 gen_rtx_REG (Pmode
, regno
));
4246 RTX_FRAME_RELATED_P (insn
) = 1;
4247 offset
+= UNITS_PER_WORD
;
4251 /* Expand prologue or epilogue stack adjustment.
4252 The pattern exist to put a dependency on all ebp-based memory accesses.
4253 STYLE should be negative if instructions should be marked as frame related,
4254 zero if %r11 register is live and cannot be freely used and positive
4258 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
4263 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
4264 else if (x86_64_immediate_operand (offset
, DImode
))
4265 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
4269 /* r11 is used by indirect sibcall return as well, set before the
4270 epilogue and used after the epilogue. ATM indirect sibcall
4271 shouldn't be used together with huge frame sizes in one
4272 function because of the frame_size check in sibcall.c. */
4275 r11
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
4276 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
4278 RTX_FRAME_RELATED_P (insn
) = 1;
4279 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
4283 RTX_FRAME_RELATED_P (insn
) = 1;
4286 /* Expand the prologue into a bunch of separate insns. */
4289 ix86_expand_prologue (void)
4293 struct ix86_frame frame
;
4294 HOST_WIDE_INT allocate
;
4296 ix86_compute_frame_layout (&frame
);
4298 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4299 slower on all targets. Also sdb doesn't like it. */
4301 if (frame_pointer_needed
)
4303 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
4304 RTX_FRAME_RELATED_P (insn
) = 1;
4306 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
4307 RTX_FRAME_RELATED_P (insn
) = 1;
4310 allocate
= frame
.to_allocate
;
4312 if (!frame
.save_regs_using_mov
)
4313 ix86_emit_save_regs ();
4315 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
4317 /* When using red zone we may start register saving before allocating
4318 the stack frame saving one cycle of the prologue. */
4319 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
4320 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
4321 : stack_pointer_rtx
,
4322 -frame
.nregs
* UNITS_PER_WORD
);
4326 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
4327 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
4328 GEN_INT (-allocate
), -1);
4331 /* Only valid for Win32. */
4332 rtx eax
= gen_rtx_REG (SImode
, 0);
4333 bool eax_live
= ix86_eax_live_at_start_p ();
4341 emit_insn (gen_push (eax
));
4345 emit_move_insn (eax
, GEN_INT (allocate
));
4347 insn
= emit_insn (gen_allocate_stack_worker (eax
));
4348 RTX_FRAME_RELATED_P (insn
) = 1;
4349 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (-allocate
));
4350 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
4351 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
4352 t
, REG_NOTES (insn
));
4356 if (frame_pointer_needed
)
4357 t
= plus_constant (hard_frame_pointer_rtx
,
4360 - frame
.nregs
* UNITS_PER_WORD
);
4362 t
= plus_constant (stack_pointer_rtx
, allocate
);
4363 emit_move_insn (eax
, gen_rtx_MEM (SImode
, t
));
4367 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
4369 if (!frame_pointer_needed
|| !frame
.to_allocate
)
4370 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
4372 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
4373 -frame
.nregs
* UNITS_PER_WORD
);
4376 pic_reg_used
= false;
4377 if (pic_offset_table_rtx
4378 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
4379 || current_function_profile
))
4381 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
4383 if (alt_pic_reg_used
!= INVALID_REGNUM
)
4384 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
4386 pic_reg_used
= true;
4391 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
4393 /* Even with accurate pre-reload life analysis, we can wind up
4394 deleting all references to the pic register after reload.
4395 Consider if cross-jumping unifies two sides of a branch
4396 controlled by a comparison vs the only read from a global.
4397 In which case, allow the set_got to be deleted, though we're
4398 too late to do anything about the ebx save in the prologue. */
4399 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
4402 /* Prevent function calls from be scheduled before the call to mcount.
4403 In the pic_reg_used case, make sure that the got load isn't deleted. */
4404 if (current_function_profile
)
4405 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
4408 /* Emit code to restore saved registers using MOV insns. First register
4409 is restored from POINTER + OFFSET. */
4411 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
4412 int maybe_eh_return
)
4415 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
4417 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4418 if (ix86_save_reg (regno
, maybe_eh_return
))
4420 /* Ensure that adjust_address won't be forced to produce pointer
4421 out of range allowed by x86-64 instruction set. */
4422 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
4426 r11
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
4427 emit_move_insn (r11
, GEN_INT (offset
));
4428 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
4429 base_address
= gen_rtx_MEM (Pmode
, r11
);
4432 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
4433 adjust_address (base_address
, Pmode
, offset
));
4434 offset
+= UNITS_PER_WORD
;
4438 /* Restore function stack, frame, and registers. */
4441 ix86_expand_epilogue (int style
)
4444 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
4445 struct ix86_frame frame
;
4446 HOST_WIDE_INT offset
;
4448 ix86_compute_frame_layout (&frame
);
4450 /* Calculate start of saved registers relative to ebp. Special care
4451 must be taken for the normal return case of a function using
4452 eh_return: the eax and edx registers are marked as saved, but not
4453 restored along this path. */
4454 offset
= frame
.nregs
;
4455 if (current_function_calls_eh_return
&& style
!= 2)
4457 offset
*= -UNITS_PER_WORD
;
4459 /* If we're only restoring one register and sp is not valid then
4460 using a move instruction to restore the register since it's
4461 less work than reloading sp and popping the register.
4463 The default code result in stack adjustment using add/lea instruction,
4464 while this code results in LEAVE instruction (or discrete equivalent),
4465 so it is profitable in some other cases as well. Especially when there
4466 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4467 and there is exactly one register to pop. This heuristic may need some
4468 tuning in future. */
4469 if ((!sp_valid
&& frame
.nregs
<= 1)
4470 || (TARGET_EPILOGUE_USING_MOVE
4471 && cfun
->machine
->use_fast_prologue_epilogue
4472 && (frame
.nregs
> 1 || frame
.to_allocate
))
4473 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
4474 || (frame_pointer_needed
&& TARGET_USE_LEAVE
4475 && cfun
->machine
->use_fast_prologue_epilogue
4476 && frame
.nregs
== 1)
4477 || current_function_calls_eh_return
)
4479 /* Restore registers. We can use ebp or esp to address the memory
4480 locations. If both are available, default to ebp, since offsets
4481 are known to be small. Only exception is esp pointing directly to the
4482 end of block of saved registers, where we may simplify addressing
4485 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
4486 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
4487 frame
.to_allocate
, style
== 2);
4489 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
4490 offset
, style
== 2);
4492 /* eh_return epilogues need %ecx added to the stack pointer. */
4495 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
4497 if (frame_pointer_needed
)
4499 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
4500 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
4501 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
4503 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
4504 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
4506 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
4511 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
4512 tmp
= plus_constant (tmp
, (frame
.to_allocate
4513 + frame
.nregs
* UNITS_PER_WORD
));
4514 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
4517 else if (!frame_pointer_needed
)
4518 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
4519 GEN_INT (frame
.to_allocate
4520 + frame
.nregs
* UNITS_PER_WORD
),
4522 /* If not an i386, mov & pop is faster than "leave". */
4523 else if (TARGET_USE_LEAVE
|| optimize_size
4524 || !cfun
->machine
->use_fast_prologue_epilogue
)
4525 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4528 pro_epilogue_adjust_stack (stack_pointer_rtx
,
4529 hard_frame_pointer_rtx
,
4532 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4534 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4539 /* First step is to deallocate the stack frame so that we can
4540 pop the registers. */
4543 if (!frame_pointer_needed
)
4545 pro_epilogue_adjust_stack (stack_pointer_rtx
,
4546 hard_frame_pointer_rtx
,
4547 GEN_INT (offset
), style
);
4549 else if (frame
.to_allocate
)
4550 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
4551 GEN_INT (frame
.to_allocate
), style
);
4553 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4554 if (ix86_save_reg (regno
, false))
4557 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
4559 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
4561 if (frame_pointer_needed
)
4563 /* Leave results in shorter dependency chains on CPUs that are
4564 able to grok it fast. */
4565 if (TARGET_USE_LEAVE
)
4566 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4567 else if (TARGET_64BIT
)
4568 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4570 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4574 /* Sibcall epilogues don't want a return instruction. */
4578 if (current_function_pops_args
&& current_function_args_size
)
4580 rtx popc
= GEN_INT (current_function_pops_args
);
4582 /* i386 can only pop 64K bytes. If asked to pop more, pop
4583 return address, do explicit add, and jump indirectly to the
4586 if (current_function_pops_args
>= 65536)
4588 rtx ecx
= gen_rtx_REG (SImode
, 2);
4590 /* There is no "pascal" calling convention in 64bit ABI. */
4594 emit_insn (gen_popsi1 (ecx
));
4595 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
4596 emit_jump_insn (gen_return_indirect_internal (ecx
));
4599 emit_jump_insn (gen_return_pop_internal (popc
));
4602 emit_jump_insn (gen_return_internal ());
4605 /* Reset from the function's potential modifications. */
4608 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
4609 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
4611 if (pic_offset_table_rtx
)
4612 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
4615 /* Extract the parts of an RTL expression that is a valid memory address
4616 for an instruction. Return 0 if the structure of the address is
4617 grossly off. Return -1 if the address contains ASHIFT, so it is not
4618 strictly valid, but still used for computing length of lea instruction. */
4621 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
4623 rtx base
= NULL_RTX
;
4624 rtx index
= NULL_RTX
;
4625 rtx disp
= NULL_RTX
;
4626 HOST_WIDE_INT scale
= 1;
4627 rtx scale_rtx
= NULL_RTX
;
4629 enum ix86_address_seg seg
= SEG_DEFAULT
;
4631 if (GET_CODE (addr
) == REG
|| GET_CODE (addr
) == SUBREG
)
4633 else if (GET_CODE (addr
) == PLUS
)
4643 addends
[n
++] = XEXP (op
, 1);
4646 while (GET_CODE (op
) == PLUS
);
4651 for (i
= n
; i
>= 0; --i
)
4654 switch (GET_CODE (op
))
4659 index
= XEXP (op
, 0);
4660 scale_rtx
= XEXP (op
, 1);
4664 if (XINT (op
, 1) == UNSPEC_TP
4665 && TARGET_TLS_DIRECT_SEG_REFS
4666 && seg
== SEG_DEFAULT
)
4667 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
4696 else if (GET_CODE (addr
) == MULT
)
4698 index
= XEXP (addr
, 0); /* index*scale */
4699 scale_rtx
= XEXP (addr
, 1);
4701 else if (GET_CODE (addr
) == ASHIFT
)
4705 /* We're called for lea too, which implements ashift on occasion. */
4706 index
= XEXP (addr
, 0);
4707 tmp
= XEXP (addr
, 1);
4708 if (GET_CODE (tmp
) != CONST_INT
)
4710 scale
= INTVAL (tmp
);
4711 if ((unsigned HOST_WIDE_INT
) scale
> 3)
4717 disp
= addr
; /* displacement */
4719 /* Extract the integral value of scale. */
4722 if (GET_CODE (scale_rtx
) != CONST_INT
)
4724 scale
= INTVAL (scale_rtx
);
4727 /* Allow arg pointer and stack pointer as index if there is not scaling. */
4728 if (base
&& index
&& scale
== 1
4729 && (index
== arg_pointer_rtx
4730 || index
== frame_pointer_rtx
4731 || (REG_P (index
) && REGNO (index
) == STACK_POINTER_REGNUM
)))
4738 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4739 if ((base
== hard_frame_pointer_rtx
4740 || base
== frame_pointer_rtx
4741 || base
== arg_pointer_rtx
) && !disp
)
4744 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4745 Avoid this by transforming to [%esi+0]. */
4746 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
4747 && base
&& !index
&& !disp
4749 && REGNO_REG_CLASS (REGNO (base
)) == SIREG
)
4752 /* Special case: encode reg+reg instead of reg*2. */
4753 if (!base
&& index
&& scale
&& scale
== 2)
4754 base
= index
, scale
= 1;
4756 /* Special case: scaling cannot be encoded without base or displacement. */
4757 if (!base
&& !disp
&& index
&& scale
!= 1)
4769 /* Return cost of the memory address x.
4770 For i386, it is better to use a complex address than let gcc copy
4771 the address into a reg and make a new pseudo. But not if the address
4772 requires to two regs - that would mean more pseudos with longer
4775 ix86_address_cost (rtx x
)
4777 struct ix86_address parts
;
4780 if (!ix86_decompose_address (x
, &parts
))
4783 /* More complex memory references are better. */
4784 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
4786 if (parts
.seg
!= SEG_DEFAULT
)
4789 /* Attempt to minimize number of registers in the address. */
4791 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
4793 && (!REG_P (parts
.index
)
4794 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
4798 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
4800 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
4801 && parts
.base
!= parts
.index
)
4804 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4805 since it's predecode logic can't detect the length of instructions
4806 and it degenerates to vector decoded. Increase cost of such
4807 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4808 to split such addresses or even refuse such addresses at all.
4810 Following addressing modes are affected:
4815 The first and last case may be avoidable by explicitly coding the zero in
4816 memory address, but I don't have AMD-K6 machine handy to check this
4820 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
4821 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
4822 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
4828 /* If X is a machine specific address (i.e. a symbol or label being
4829 referenced as a displacement from the GOT implemented using an
4830 UNSPEC), then return the base term. Otherwise return X. */
4833 ix86_find_base_term (rtx x
)
4839 if (GET_CODE (x
) != CONST
)
4842 if (GET_CODE (term
) == PLUS
4843 && (GET_CODE (XEXP (term
, 1)) == CONST_INT
4844 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
4845 term
= XEXP (term
, 0);
4846 if (GET_CODE (term
) != UNSPEC
4847 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
4850 term
= XVECEXP (term
, 0, 0);
4852 if (GET_CODE (term
) != SYMBOL_REF
4853 && GET_CODE (term
) != LABEL_REF
)
4859 term
= ix86_delegitimize_address (x
);
4861 if (GET_CODE (term
) != SYMBOL_REF
4862 && GET_CODE (term
) != LABEL_REF
)
4868 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
4869 this is used for to form addresses to local data when -fPIC is in
4873 darwin_local_data_pic (rtx disp
)
4875 if (GET_CODE (disp
) == MINUS
)
4877 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
4878 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
4879 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
4881 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
4882 if (! strcmp (sym_name
, "<pic base>"))
4890 /* Determine if a given RTX is a valid constant. We already know this
4891 satisfies CONSTANT_P. */
4894 legitimate_constant_p (rtx x
)
4896 switch (GET_CODE (x
))
4901 if (GET_CODE (x
) == PLUS
)
4903 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
4908 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
4911 /* Only some unspecs are valid as "constants". */
4912 if (GET_CODE (x
) == UNSPEC
)
4913 switch (XINT (x
, 1))
4917 return local_exec_symbolic_operand (XVECEXP (x
, 0, 0), Pmode
);
4919 return local_dynamic_symbolic_operand (XVECEXP (x
, 0, 0), Pmode
);
4924 /* We must have drilled down to a symbol. */
4925 if (!symbolic_operand (x
, Pmode
))
4930 /* TLS symbols are never valid. */
4931 if (tls_symbolic_operand (x
, Pmode
))
4939 /* Otherwise we handle everything else in the move patterns. */
4943 /* Determine if it's legal to put X into the constant pool. This
4944 is not possible for the address of thread-local symbols, which
4945 is checked above. */
4948 ix86_cannot_force_const_mem (rtx x
)
4950 return !legitimate_constant_p (x
);
4953 /* Determine if a given RTX is a valid constant address. */
4956 constant_address_p (rtx x
)
4958 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
4961 /* Nonzero if the constant value X is a legitimate general operand
4962 when generating PIC code. It is given that flag_pic is on and
4963 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4966 legitimate_pic_operand_p (rtx x
)
4970 switch (GET_CODE (x
))
4973 inner
= XEXP (x
, 0);
4975 /* Only some unspecs are valid as "constants". */
4976 if (GET_CODE (inner
) == UNSPEC
)
4977 switch (XINT (inner
, 1))
4980 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
4988 return legitimate_pic_address_disp_p (x
);
4995 /* Determine if a given CONST RTX is a valid memory displacement
4999 legitimate_pic_address_disp_p (rtx disp
)
5003 /* In 64bit mode we can allow direct addresses of symbols and labels
5004 when they are not dynamic symbols. */
5007 /* TLS references should always be enclosed in UNSPEC. */
5008 if (tls_symbolic_operand (disp
, GET_MODE (disp
)))
5010 if (GET_CODE (disp
) == SYMBOL_REF
5011 && ix86_cmodel
== CM_SMALL_PIC
5012 && SYMBOL_REF_LOCAL_P (disp
))
5014 if (GET_CODE (disp
) == LABEL_REF
)
5016 if (GET_CODE (disp
) == CONST
5017 && GET_CODE (XEXP (disp
, 0)) == PLUS
)
5019 rtx op0
= XEXP (XEXP (disp
, 0), 0);
5020 rtx op1
= XEXP (XEXP (disp
, 0), 1);
5022 /* TLS references should always be enclosed in UNSPEC. */
5023 if (tls_symbolic_operand (op0
, GET_MODE (op0
)))
5025 if (((GET_CODE (op0
) == SYMBOL_REF
5026 && ix86_cmodel
== CM_SMALL_PIC
5027 && SYMBOL_REF_LOCAL_P (op0
))
5028 || GET_CODE (op0
) == LABEL_REF
)
5029 && GET_CODE (op1
) == CONST_INT
5030 && INTVAL (op1
) < 16*1024*1024
5031 && INTVAL (op1
) >= -16*1024*1024)
5035 if (GET_CODE (disp
) != CONST
)
5037 disp
= XEXP (disp
, 0);
5041 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5042 of GOT tables. We should not need these anyway. */
5043 if (GET_CODE (disp
) != UNSPEC
5044 || XINT (disp
, 1) != UNSPEC_GOTPCREL
)
5047 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
5048 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
5054 if (GET_CODE (disp
) == PLUS
)
5056 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
5058 disp
= XEXP (disp
, 0);
5062 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
5065 if (GET_CODE (disp
) != UNSPEC
)
5068 switch (XINT (disp
, 1))
5073 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
5075 if (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
5076 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
5077 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5079 case UNSPEC_GOTTPOFF
:
5080 case UNSPEC_GOTNTPOFF
:
5081 case UNSPEC_INDNTPOFF
:
5084 return initial_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5086 return local_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5088 return local_dynamic_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5094 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5095 memory address for an instruction. The MODE argument is the machine mode
5096 for the MEM expression that wants to use this address.
5098 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5099 convert common non-canonical forms to canonical form so that they will
5103 legitimate_address_p (enum machine_mode mode
, rtx addr
, int strict
)
5105 struct ix86_address parts
;
5106 rtx base
, index
, disp
;
5107 HOST_WIDE_INT scale
;
5108 const char *reason
= NULL
;
5109 rtx reason_rtx
= NULL_RTX
;
5111 if (TARGET_DEBUG_ADDR
)
5114 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5115 GET_MODE_NAME (mode
), strict
);
5119 if (ix86_decompose_address (addr
, &parts
) <= 0)
5121 reason
= "decomposition failed";
5126 index
= parts
.index
;
5128 scale
= parts
.scale
;
5130 /* Validate base register.
5132 Don't allow SUBREG's here, it can lead to spill failures when the base
5133 is one word out of a two word structure, which is represented internally
5140 if (GET_CODE (base
) != REG
)
5142 reason
= "base is not a register";
5146 if (GET_MODE (base
) != Pmode
)
5148 reason
= "base is not in Pmode";
5152 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (base
))
5153 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (base
)))
5155 reason
= "base is not valid";
5160 /* Validate index register.
5162 Don't allow SUBREG's here, it can lead to spill failures when the index
5163 is one word out of a two word structure, which is represented internally
5170 if (GET_CODE (index
) != REG
)
5172 reason
= "index is not a register";
5176 if (GET_MODE (index
) != Pmode
)
5178 reason
= "index is not in Pmode";
5182 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (index
))
5183 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (index
)))
5185 reason
= "index is not valid";
5190 /* Validate scale factor. */
5193 reason_rtx
= GEN_INT (scale
);
5196 reason
= "scale without index";
5200 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
5202 reason
= "scale is not a valid multiplier";
5207 /* Validate displacement. */
5212 if (GET_CODE (disp
) == CONST
5213 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
5214 switch (XINT (XEXP (disp
, 0), 1))
5218 case UNSPEC_GOTPCREL
:
5221 goto is_legitimate_pic
;
5223 case UNSPEC_GOTTPOFF
:
5224 case UNSPEC_GOTNTPOFF
:
5225 case UNSPEC_INDNTPOFF
:
5231 reason
= "invalid address unspec";
5235 else if (flag_pic
&& (SYMBOLIC_CONST (disp
)
5237 && !machopic_operand_p (disp
)
5242 if (TARGET_64BIT
&& (index
|| base
))
5244 /* foo@dtpoff(%rX) is ok. */
5245 if (GET_CODE (disp
) != CONST
5246 || GET_CODE (XEXP (disp
, 0)) != PLUS
5247 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
5248 || GET_CODE (XEXP (XEXP (disp
, 0), 1)) != CONST_INT
5249 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
5250 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
5252 reason
= "non-constant pic memory reference";
5256 else if (! legitimate_pic_address_disp_p (disp
))
5258 reason
= "displacement is an invalid pic construct";
5262 /* This code used to verify that a symbolic pic displacement
5263 includes the pic_offset_table_rtx register.
5265 While this is good idea, unfortunately these constructs may
5266 be created by "adds using lea" optimization for incorrect
5275 This code is nonsensical, but results in addressing
5276 GOT table with pic_offset_table_rtx base. We can't
5277 just refuse it easily, since it gets matched by
5278 "addsi3" pattern, that later gets split to lea in the
5279 case output register differs from input. While this
5280 can be handled by separate addsi pattern for this case
5281 that never results in lea, this seems to be easier and
5282 correct fix for crash to disable this test. */
5284 else if (GET_CODE (disp
) != LABEL_REF
5285 && GET_CODE (disp
) != CONST_INT
5286 && (GET_CODE (disp
) != CONST
5287 || !legitimate_constant_p (disp
))
5288 && (GET_CODE (disp
) != SYMBOL_REF
5289 || !legitimate_constant_p (disp
)))
5291 reason
= "displacement is not constant";
5294 else if (TARGET_64BIT
5295 && !x86_64_immediate_operand (disp
, VOIDmode
))
5297 reason
= "displacement is out of range";
5302 /* Everything looks valid. */
5303 if (TARGET_DEBUG_ADDR
)
5304 fprintf (stderr
, "Success.\n");
5308 if (TARGET_DEBUG_ADDR
)
5310 fprintf (stderr
, "Error: %s\n", reason
);
5311 debug_rtx (reason_rtx
);
5316 /* Return an unique alias set for the GOT. */
5318 static HOST_WIDE_INT
5319 ix86_GOT_alias_set (void)
5321 static HOST_WIDE_INT set
= -1;
5323 set
= new_alias_set ();
5327 /* Return a legitimate reference for ORIG (an address) using the
5328 register REG. If REG is 0, a new pseudo is generated.
5330 There are two types of references that must be handled:
5332 1. Global data references must load the address from the GOT, via
5333 the PIC reg. An insn is emitted to do this load, and the reg is
5336 2. Static data references, constant pool addresses, and code labels
5337 compute the address as an offset from the GOT, whose base is in
5338 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
5339 differentiate them from global data objects. The returned
5340 address is the PIC reg + an unspec constant.
5342 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5343 reg also appears in the address. */
5346 legitimize_pic_address (rtx orig
, rtx reg
)
5354 reg
= gen_reg_rtx (Pmode
);
5355 /* Use the generic Mach-O PIC machinery. */
5356 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
5359 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
5361 else if (!TARGET_64BIT
&& local_symbolic_operand (addr
, Pmode
))
5363 /* This symbol may be referenced via a displacement from the PIC
5364 base address (@GOTOFF). */
5366 if (reload_in_progress
)
5367 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5368 if (GET_CODE (addr
) == CONST
)
5369 addr
= XEXP (addr
, 0);
5370 if (GET_CODE (addr
) == PLUS
)
5372 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
5373 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
5376 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
5377 new = gen_rtx_CONST (Pmode
, new);
5378 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5382 emit_move_insn (reg
, new);
5386 else if (GET_CODE (addr
) == SYMBOL_REF
)
5390 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
5391 new = gen_rtx_CONST (Pmode
, new);
5392 new = gen_const_mem (Pmode
, new);
5393 set_mem_alias_set (new, ix86_GOT_alias_set ());
5396 reg
= gen_reg_rtx (Pmode
);
5397 /* Use directly gen_movsi, otherwise the address is loaded
5398 into register for CSE. We don't want to CSE this addresses,
5399 instead we CSE addresses from the GOT table, so skip this. */
5400 emit_insn (gen_movsi (reg
, new));
5405 /* This symbol must be referenced via a load from the
5406 Global Offset Table (@GOT). */
5408 if (reload_in_progress
)
5409 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5410 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
5411 new = gen_rtx_CONST (Pmode
, new);
5412 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5413 new = gen_const_mem (Pmode
, new);
5414 set_mem_alias_set (new, ix86_GOT_alias_set ());
5417 reg
= gen_reg_rtx (Pmode
);
5418 emit_move_insn (reg
, new);
5424 if (GET_CODE (addr
) == CONST
)
5426 addr
= XEXP (addr
, 0);
5428 /* We must match stuff we generate before. Assume the only
5429 unspecs that can get here are ours. Not that we could do
5430 anything with them anyway.... */
5431 if (GET_CODE (addr
) == UNSPEC
5432 || (GET_CODE (addr
) == PLUS
5433 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
5435 if (GET_CODE (addr
) != PLUS
)
5438 if (GET_CODE (addr
) == PLUS
)
5440 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
5442 /* Check first to see if this is a constant offset from a @GOTOFF
5443 symbol reference. */
5444 if (local_symbolic_operand (op0
, Pmode
)
5445 && GET_CODE (op1
) == CONST_INT
)
5449 if (reload_in_progress
)
5450 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5451 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
5453 new = gen_rtx_PLUS (Pmode
, new, op1
);
5454 new = gen_rtx_CONST (Pmode
, new);
5455 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5459 emit_move_insn (reg
, new);
5465 if (INTVAL (op1
) < -16*1024*1024
5466 || INTVAL (op1
) >= 16*1024*1024)
5467 new = gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
5472 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
5473 new = legitimize_pic_address (XEXP (addr
, 1),
5474 base
== reg
? NULL_RTX
: reg
);
5476 if (GET_CODE (new) == CONST_INT
)
5477 new = plus_constant (base
, INTVAL (new));
5480 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
5482 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
5483 new = XEXP (new, 1);
5485 new = gen_rtx_PLUS (Pmode
, base
, new);
5493 /* Load the thread pointer. If TO_REG is true, force it into a register. */
5496 get_thread_pointer (int to_reg
)
5500 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
5504 reg
= gen_reg_rtx (Pmode
);
5505 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
5506 insn
= emit_insn (insn
);
5511 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
5512 false if we expect this to be used for a memory address and true if
5513 we expect to load the address into a register. */
5516 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
5518 rtx dest
, base
, off
, pic
;
5523 case TLS_MODEL_GLOBAL_DYNAMIC
:
5524 dest
= gen_reg_rtx (Pmode
);
5527 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
5530 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
5531 insns
= get_insns ();
5534 emit_libcall_block (insns
, dest
, rax
, x
);
5537 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
5540 case TLS_MODEL_LOCAL_DYNAMIC
:
5541 base
= gen_reg_rtx (Pmode
);
5544 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
5547 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
5548 insns
= get_insns ();
5551 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
5552 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
5553 emit_libcall_block (insns
, base
, rax
, note
);
5556 emit_insn (gen_tls_local_dynamic_base_32 (base
));
5558 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
5559 off
= gen_rtx_CONST (Pmode
, off
);
5561 return gen_rtx_PLUS (Pmode
, base
, off
);
5563 case TLS_MODEL_INITIAL_EXEC
:
5567 type
= UNSPEC_GOTNTPOFF
;
5571 if (reload_in_progress
)
5572 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5573 pic
= pic_offset_table_rtx
;
5574 type
= TARGET_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
5576 else if (!TARGET_GNU_TLS
)
5578 pic
= gen_reg_rtx (Pmode
);
5579 emit_insn (gen_set_got (pic
));
5580 type
= UNSPEC_GOTTPOFF
;
5585 type
= UNSPEC_INDNTPOFF
;
5588 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
5589 off
= gen_rtx_CONST (Pmode
, off
);
5591 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
5592 off
= gen_const_mem (Pmode
, off
);
5593 set_mem_alias_set (off
, ix86_GOT_alias_set ());
5595 if (TARGET_64BIT
|| TARGET_GNU_TLS
)
5597 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
5598 off
= force_reg (Pmode
, off
);
5599 return gen_rtx_PLUS (Pmode
, base
, off
);
5603 base
= get_thread_pointer (true);
5604 dest
= gen_reg_rtx (Pmode
);
5605 emit_insn (gen_subsi3 (dest
, base
, off
));
5609 case TLS_MODEL_LOCAL_EXEC
:
5610 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
5611 (TARGET_64BIT
|| TARGET_GNU_TLS
)
5612 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
5613 off
= gen_rtx_CONST (Pmode
, off
);
5615 if (TARGET_64BIT
|| TARGET_GNU_TLS
)
5617 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
5618 return gen_rtx_PLUS (Pmode
, base
, off
);
5622 base
= get_thread_pointer (true);
5623 dest
= gen_reg_rtx (Pmode
);
5624 emit_insn (gen_subsi3 (dest
, base
, off
));
5635 /* Try machine-dependent ways of modifying an illegitimate address
5636 to be legitimate. If we find one, return the new, valid address.
5637 This macro is used in only one place: `memory_address' in explow.c.
5639 OLDX is the address as it was before break_out_memory_refs was called.
5640 In some cases it is useful to look at this to decide what needs to be done.
5642 MODE and WIN are passed so that this macro can use
5643 GO_IF_LEGITIMATE_ADDRESS.
5645 It is always safe for this macro to do nothing. It exists to recognize
5646 opportunities to optimize the output.
5648 For the 80386, we handle X+REG by loading X into a register R and
5649 using R+REG. R will go in a general reg and indexing will be used.
5650 However, if REG is a broken-out memory address or multiplication,
5651 nothing needs to be done because REG can certainly go in a general reg.
5653 When -fpic is used, special handling is needed for symbolic references.
5654 See comments by legitimize_pic_address in i386.c for details. */
5657 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
5662 if (TARGET_DEBUG_ADDR
)
5664 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5665 GET_MODE_NAME (mode
));
5669 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
5671 return legitimize_tls_address (x
, log
, false);
5672 if (GET_CODE (x
) == CONST
5673 && GET_CODE (XEXP (x
, 0)) == PLUS
5674 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
5675 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
5677 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0), log
, false);
5678 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
5681 if (flag_pic
&& SYMBOLIC_CONST (x
))
5682 return legitimize_pic_address (x
, 0);
5684 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5685 if (GET_CODE (x
) == ASHIFT
5686 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5687 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (x
, 1)))) < 4)
5690 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
5691 GEN_INT (1 << log
));
5694 if (GET_CODE (x
) == PLUS
)
5696 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5698 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
5699 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
5700 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)))) < 4)
5703 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
5704 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
5705 GEN_INT (1 << log
));
5708 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
5709 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
5710 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 1), 1)))) < 4)
5713 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
5714 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
5715 GEN_INT (1 << log
));
5718 /* Put multiply first if it isn't already. */
5719 if (GET_CODE (XEXP (x
, 1)) == MULT
)
5721 rtx tmp
= XEXP (x
, 0);
5722 XEXP (x
, 0) = XEXP (x
, 1);
5727 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5728 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5729 created by virtual register instantiation, register elimination, and
5730 similar optimizations. */
5731 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
5734 x
= gen_rtx_PLUS (Pmode
,
5735 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
5736 XEXP (XEXP (x
, 1), 0)),
5737 XEXP (XEXP (x
, 1), 1));
5741 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5742 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5743 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
5744 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
5745 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
5746 && CONSTANT_P (XEXP (x
, 1)))
5749 rtx other
= NULL_RTX
;
5751 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5753 constant
= XEXP (x
, 1);
5754 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5756 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
5758 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5759 other
= XEXP (x
, 1);
5767 x
= gen_rtx_PLUS (Pmode
,
5768 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
5769 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
5770 plus_constant (other
, INTVAL (constant
)));
5774 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
5777 if (GET_CODE (XEXP (x
, 0)) == MULT
)
5780 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
5783 if (GET_CODE (XEXP (x
, 1)) == MULT
)
5786 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
5790 && GET_CODE (XEXP (x
, 1)) == REG
5791 && GET_CODE (XEXP (x
, 0)) == REG
)
5794 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
5797 x
= legitimize_pic_address (x
, 0);
5800 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
5803 if (GET_CODE (XEXP (x
, 0)) == REG
)
5805 rtx temp
= gen_reg_rtx (Pmode
);
5806 rtx val
= force_operand (XEXP (x
, 1), temp
);
5808 emit_move_insn (temp
, val
);
5814 else if (GET_CODE (XEXP (x
, 1)) == REG
)
5816 rtx temp
= gen_reg_rtx (Pmode
);
5817 rtx val
= force_operand (XEXP (x
, 0), temp
);
5819 emit_move_insn (temp
, val
);
5829 /* Print an integer constant expression in assembler syntax. Addition
5830 and subtraction are the only arithmetic that may appear in these
5831 expressions. FILE is the stdio stream to write to, X is the rtx, and
5832 CODE is the operand print code from the output string. */
5835 output_pic_addr_const (FILE *file
, rtx x
, int code
)
5839 switch (GET_CODE (x
))
5849 /* Mark the decl as referenced so that cgraph will output the function. */
5850 if (SYMBOL_REF_DECL (x
))
5851 mark_decl_referenced (SYMBOL_REF_DECL (x
));
5853 assemble_name (file
, XSTR (x
, 0));
5854 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
5855 fputs ("@PLT", file
);
5862 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
5863 assemble_name (asm_out_file
, buf
);
5867 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
5871 /* This used to output parentheses around the expression,
5872 but that does not work on the 386 (either ATT or BSD assembler). */
5873 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5877 if (GET_MODE (x
) == VOIDmode
)
5879 /* We can use %d if the number is <32 bits and positive. */
5880 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
5881 fprintf (file
, "0x%lx%08lx",
5882 (unsigned long) CONST_DOUBLE_HIGH (x
),
5883 (unsigned long) CONST_DOUBLE_LOW (x
));
5885 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
5888 /* We can't handle floating point constants;
5889 PRINT_OPERAND must handle them. */
5890 output_operand_lossage ("floating constant misused");
5894 /* Some assemblers need integer constants to appear first. */
5895 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
5897 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5899 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5901 else if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5903 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5905 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5913 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
5914 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5916 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5918 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
5922 if (XVECLEN (x
, 0) != 1)
5924 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
5925 switch (XINT (x
, 1))
5928 fputs ("@GOT", file
);
5931 fputs ("@GOTOFF", file
);
5933 case UNSPEC_GOTPCREL
:
5934 fputs ("@GOTPCREL(%rip)", file
);
5936 case UNSPEC_GOTTPOFF
:
5937 /* FIXME: This might be @TPOFF in Sun ld too. */
5938 fputs ("@GOTTPOFF", file
);
5941 fputs ("@TPOFF", file
);
5945 fputs ("@TPOFF", file
);
5947 fputs ("@NTPOFF", file
);
5950 fputs ("@DTPOFF", file
);
5952 case UNSPEC_GOTNTPOFF
:
5954 fputs ("@GOTTPOFF(%rip)", file
);
5956 fputs ("@GOTNTPOFF", file
);
5958 case UNSPEC_INDNTPOFF
:
5959 fputs ("@INDNTPOFF", file
);
5962 output_operand_lossage ("invalid UNSPEC as operand");
5968 output_operand_lossage ("invalid expression as operand");
5972 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
5973 We need to emit DTP-relative relocations. */
5976 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
5978 fputs (ASM_LONG
, file
);
5979 output_addr_const (file
, x
);
5980 fputs ("@DTPOFF", file
);
5986 fputs (", 0", file
);
5993 /* In the name of slightly smaller debug output, and to cater to
5994 general assembler losage, recognize PIC+GOTOFF and turn it back
5995 into a direct symbol reference. */
5998 ix86_delegitimize_address (rtx orig_x
)
6002 if (GET_CODE (x
) == MEM
)
6007 if (GET_CODE (x
) != CONST
6008 || GET_CODE (XEXP (x
, 0)) != UNSPEC
6009 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
6010 || GET_CODE (orig_x
) != MEM
)
6012 return XVECEXP (XEXP (x
, 0), 0, 0);
6015 if (GET_CODE (x
) != PLUS
6016 || GET_CODE (XEXP (x
, 1)) != CONST
)
6019 if (GET_CODE (XEXP (x
, 0)) == REG
6020 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
6021 /* %ebx + GOT/GOTOFF */
6023 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
6025 /* %ebx + %reg * scale + GOT/GOTOFF */
6027 if (GET_CODE (XEXP (y
, 0)) == REG
6028 && REGNO (XEXP (y
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
6030 else if (GET_CODE (XEXP (y
, 1)) == REG
6031 && REGNO (XEXP (y
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
6035 if (GET_CODE (y
) != REG
6036 && GET_CODE (y
) != MULT
6037 && GET_CODE (y
) != ASHIFT
)
6043 x
= XEXP (XEXP (x
, 1), 0);
6044 if (GET_CODE (x
) == UNSPEC
6045 && ((XINT (x
, 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
6046 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& GET_CODE (orig_x
) != MEM
)))
6049 return gen_rtx_PLUS (Pmode
, y
, XVECEXP (x
, 0, 0));
6050 return XVECEXP (x
, 0, 0);
6053 if (GET_CODE (x
) == PLUS
6054 && GET_CODE (XEXP (x
, 0)) == UNSPEC
6055 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6056 && ((XINT (XEXP (x
, 0), 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
6057 || (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTOFF
6058 && GET_CODE (orig_x
) != MEM
)))
6060 x
= gen_rtx_PLUS (VOIDmode
, XVECEXP (XEXP (x
, 0), 0, 0), XEXP (x
, 1));
6062 return gen_rtx_PLUS (Pmode
, y
, x
);
6070 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
6075 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
6077 enum rtx_code second_code
, bypass_code
;
6078 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
6079 if (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
)
6081 code
= ix86_fp_compare_code_to_integer (code
);
6085 code
= reverse_condition (code
);
6096 if (mode
!= CCmode
&& mode
!= CCNOmode
&& mode
!= CCGCmode
)
6101 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6102 Those same assemblers have the same but opposite losage on cmov. */
6105 suffix
= fp
? "nbe" : "a";
6108 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
6110 else if (mode
== CCmode
|| mode
== CCGCmode
)
6121 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
6123 else if (mode
== CCmode
|| mode
== CCGCmode
)
6132 suffix
= fp
? "nb" : "ae";
6135 if (mode
!= CCmode
&& mode
!= CCGCmode
&& mode
!= CCNOmode
)
6145 suffix
= fp
? "u" : "p";
6148 suffix
= fp
? "nu" : "np";
6153 fputs (suffix
, file
);
6156 /* Print the name of register X to FILE based on its machine mode and number.
6157 If CODE is 'w', pretend the mode is HImode.
6158 If CODE is 'b', pretend the mode is QImode.
6159 If CODE is 'k', pretend the mode is SImode.
6160 If CODE is 'q', pretend the mode is DImode.
6161 If CODE is 'h', pretend the reg is the `high' byte register.
6162 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
6165 print_reg (rtx x
, int code
, FILE *file
)
6167 if (REGNO (x
) == ARG_POINTER_REGNUM
6168 || REGNO (x
) == FRAME_POINTER_REGNUM
6169 || REGNO (x
) == FLAGS_REG
6170 || REGNO (x
) == FPSR_REG
)
6173 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
6176 if (code
== 'w' || MMX_REG_P (x
))
6178 else if (code
== 'b')
6180 else if (code
== 'k')
6182 else if (code
== 'q')
6184 else if (code
== 'y')
6186 else if (code
== 'h')
6189 code
= GET_MODE_SIZE (GET_MODE (x
));
6191 /* Irritatingly, AMD extended registers use different naming convention
6192 from the normal registers. */
6193 if (REX_INT_REG_P (x
))
6200 error ("extended registers have no high halves");
6203 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6206 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6209 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6212 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6215 error ("unsupported operand size for extended register");
6223 if (STACK_TOP_P (x
))
6225 fputs ("st(0)", file
);
6232 if (! ANY_FP_REG_P (x
))
6233 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
6238 fputs (hi_reg_name
[REGNO (x
)], file
);
6241 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
6243 fputs (qi_reg_name
[REGNO (x
)], file
);
6246 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
6248 fputs (qi_high_reg_name
[REGNO (x
)], file
);
6255 /* Locate some local-dynamic symbol still in use by this function
6256 so that we can print its name in some tls_local_dynamic_base
6260 get_some_local_dynamic_name (void)
6264 if (cfun
->machine
->some_ld_name
)
6265 return cfun
->machine
->some_ld_name
;
6267 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
6269 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
6270 return cfun
->machine
->some_ld_name
;
6276 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
6280 if (GET_CODE (x
) == SYMBOL_REF
6281 && local_dynamic_symbolic_operand (x
, Pmode
))
6283 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
6291 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6292 C -- print opcode suffix for set/cmov insn.
6293 c -- like C, but print reversed condition
6294 F,f -- likewise, but for floating-point.
6295 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6297 R -- print the prefix for register names.
6298 z -- print the opcode suffix for the size of the current operand.
6299 * -- print a star (in certain assembler syntax)
6300 A -- print an absolute memory reference.
6301 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6302 s -- print a shift double count, followed by the assemblers argument
6304 b -- print the QImode name of the register for the indicated operand.
6305 %b0 would print %al if operands[0] is reg 0.
6306 w -- likewise, print the HImode name of the register.
6307 k -- likewise, print the SImode name of the register.
6308 q -- likewise, print the DImode name of the register.
6309 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6310 y -- print "st(0)" instead of "st" as a register.
6311 D -- print condition for SSE cmp instruction.
6312 P -- if PIC, print an @PLT suffix.
6313 X -- don't print any sort of PIC '@' suffix for a symbol.
6314 & -- print some in-use local-dynamic symbol name.
6315 H -- print a memory address offset by 8; used for sse high-parts
6319 print_operand (FILE *file
, rtx x
, int code
)
6326 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6331 assemble_name (file
, get_some_local_dynamic_name ());
6335 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6337 else if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6339 /* Intel syntax. For absolute addresses, registers should not
6340 be surrounded by braces. */
6341 if (GET_CODE (x
) != REG
)
6344 PRINT_OPERAND (file
, x
, 0);
6352 PRINT_OPERAND (file
, x
, 0);
6357 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6362 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6367 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6372 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6377 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6382 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6387 /* 387 opcodes don't get size suffixes if the operands are
6389 if (STACK_REG_P (x
))
6392 /* Likewise if using Intel opcodes. */
6393 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6396 /* This is the size of op from size of operand. */
6397 switch (GET_MODE_SIZE (GET_MODE (x
)))
6400 #ifdef HAVE_GAS_FILDS_FISTS
6406 if (GET_MODE (x
) == SFmode
)
6421 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
6423 #ifdef GAS_MNEMONICS
6449 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
6451 PRINT_OPERAND (file
, x
, 0);
6457 /* Little bit of braindamage here. The SSE compare instructions
6458 does use completely different names for the comparisons that the
6459 fp conditional moves. */
6460 switch (GET_CODE (x
))
6475 fputs ("unord", file
);
6479 fputs ("neq", file
);
6483 fputs ("nlt", file
);
6487 fputs ("nle", file
);
6490 fputs ("ord", file
);
6498 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6499 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6501 switch (GET_MODE (x
))
6503 case HImode
: putc ('w', file
); break;
6505 case SFmode
: putc ('l', file
); break;
6507 case DFmode
: putc ('q', file
); break;
6515 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
6518 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6519 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6522 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
6525 /* Like above, but reverse condition */
6527 /* Check to see if argument to %c is really a constant
6528 and not a condition code which needs to be reversed. */
6529 if (!COMPARISON_P (x
))
6531 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6534 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
6537 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6538 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6541 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
6545 /* It doesn't actually matter what mode we use here, as we're
6546 only going to use this for printing. */
6547 x
= adjust_address_nv (x
, DImode
, 8);
6554 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
6557 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
6560 int pred_val
= INTVAL (XEXP (x
, 0));
6562 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
6563 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
6565 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
6566 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
6568 /* Emit hints only in the case default branch prediction
6569 heuristics would fail. */
6570 if (taken
!= cputaken
)
6572 /* We use 3e (DS) prefix for taken branches and
6573 2e (CS) prefix for not taken branches. */
6575 fputs ("ds ; ", file
);
6577 fputs ("cs ; ", file
);
6584 output_operand_lossage ("invalid operand code '%c'", code
);
6588 if (GET_CODE (x
) == REG
)
6589 print_reg (x
, code
, file
);
6591 else if (GET_CODE (x
) == MEM
)
6593 /* No `byte ptr' prefix for call instructions. */
6594 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
6597 switch (GET_MODE_SIZE (GET_MODE (x
)))
6599 case 1: size
= "BYTE"; break;
6600 case 2: size
= "WORD"; break;
6601 case 4: size
= "DWORD"; break;
6602 case 8: size
= "QWORD"; break;
6603 case 12: size
= "XWORD"; break;
6604 case 16: size
= "XMMWORD"; break;
6609 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6612 else if (code
== 'w')
6614 else if (code
== 'k')
6618 fputs (" PTR ", file
);
6622 /* Avoid (%rip) for call operands. */
6623 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
6624 && GET_CODE (x
) != CONST_INT
)
6625 output_addr_const (file
, x
);
6626 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
6627 output_operand_lossage ("invalid constraints for operand");
6632 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
6637 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
6638 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
6640 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6642 fprintf (file
, "0x%08lx", l
);
6645 /* These float cases don't actually occur as immediate operands. */
6646 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
6650 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
6651 fprintf (file
, "%s", dstr
);
6654 else if (GET_CODE (x
) == CONST_DOUBLE
6655 && GET_MODE (x
) == XFmode
)
6659 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
6660 fprintf (file
, "%s", dstr
);
6667 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
6669 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6672 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
6673 || GET_CODE (x
) == LABEL_REF
)
6675 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6678 fputs ("OFFSET FLAT:", file
);
6681 if (GET_CODE (x
) == CONST_INT
)
6682 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
6684 output_pic_addr_const (file
, x
, code
);
6686 output_addr_const (file
, x
);
6690 /* Print a memory operand whose address is ADDR. */
6693 print_operand_address (FILE *file
, rtx addr
)
6695 struct ix86_address parts
;
6696 rtx base
, index
, disp
;
6699 if (! ix86_decompose_address (addr
, &parts
))
6703 index
= parts
.index
;
6705 scale
= parts
.scale
;
6713 if (USER_LABEL_PREFIX
[0] == 0)
6715 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
6721 if (!base
&& !index
)
6723 /* Displacement only requires special attention. */
6725 if (GET_CODE (disp
) == CONST_INT
)
6727 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
6729 if (USER_LABEL_PREFIX
[0] == 0)
6731 fputs ("ds:", file
);
6733 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
6736 output_pic_addr_const (file
, disp
, 0);
6738 output_addr_const (file
, disp
);
6740 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6742 && ((GET_CODE (disp
) == SYMBOL_REF
6743 && ! tls_symbolic_operand (disp
, GET_MODE (disp
)))
6744 || GET_CODE (disp
) == LABEL_REF
6745 || (GET_CODE (disp
) == CONST
6746 && GET_CODE (XEXP (disp
, 0)) == PLUS
6747 && (GET_CODE (XEXP (XEXP (disp
, 0), 0)) == SYMBOL_REF
6748 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) == LABEL_REF
)
6749 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)))
6750 fputs ("(%rip)", file
);
6754 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6759 output_pic_addr_const (file
, disp
, 0);
6760 else if (GET_CODE (disp
) == LABEL_REF
)
6761 output_asm_label (disp
);
6763 output_addr_const (file
, disp
);
6768 print_reg (base
, 0, file
);
6772 print_reg (index
, 0, file
);
6774 fprintf (file
, ",%d", scale
);
6780 rtx offset
= NULL_RTX
;
6784 /* Pull out the offset of a symbol; print any symbol itself. */
6785 if (GET_CODE (disp
) == CONST
6786 && GET_CODE (XEXP (disp
, 0)) == PLUS
6787 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
6789 offset
= XEXP (XEXP (disp
, 0), 1);
6790 disp
= gen_rtx_CONST (VOIDmode
,
6791 XEXP (XEXP (disp
, 0), 0));
6795 output_pic_addr_const (file
, disp
, 0);
6796 else if (GET_CODE (disp
) == LABEL_REF
)
6797 output_asm_label (disp
);
6798 else if (GET_CODE (disp
) == CONST_INT
)
6801 output_addr_const (file
, disp
);
6807 print_reg (base
, 0, file
);
6810 if (INTVAL (offset
) >= 0)
6812 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
6816 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
6823 print_reg (index
, 0, file
);
6825 fprintf (file
, "*%d", scale
);
6833 output_addr_const_extra (FILE *file
, rtx x
)
6837 if (GET_CODE (x
) != UNSPEC
)
6840 op
= XVECEXP (x
, 0, 0);
6841 switch (XINT (x
, 1))
6843 case UNSPEC_GOTTPOFF
:
6844 output_addr_const (file
, op
);
6845 /* FIXME: This might be @TPOFF in Sun ld. */
6846 fputs ("@GOTTPOFF", file
);
6849 output_addr_const (file
, op
);
6850 fputs ("@TPOFF", file
);
6853 output_addr_const (file
, op
);
6855 fputs ("@TPOFF", file
);
6857 fputs ("@NTPOFF", file
);
6860 output_addr_const (file
, op
);
6861 fputs ("@DTPOFF", file
);
6863 case UNSPEC_GOTNTPOFF
:
6864 output_addr_const (file
, op
);
6866 fputs ("@GOTTPOFF(%rip)", file
);
6868 fputs ("@GOTNTPOFF", file
);
6870 case UNSPEC_INDNTPOFF
:
6871 output_addr_const (file
, op
);
6872 fputs ("@INDNTPOFF", file
);
6882 /* Split one or more DImode RTL references into pairs of SImode
6883 references. The RTL can be REG, offsettable MEM, integer constant, or
6884 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6885 split and "num" is its length. lo_half and hi_half are output arrays
6886 that parallel "operands". */
6889 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
6893 rtx op
= operands
[num
];
6895 /* simplify_subreg refuse to split volatile memory addresses,
6896 but we still have to handle it. */
6897 if (GET_CODE (op
) == MEM
)
6899 lo_half
[num
] = adjust_address (op
, SImode
, 0);
6900 hi_half
[num
] = adjust_address (op
, SImode
, 4);
6904 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
6905 GET_MODE (op
) == VOIDmode
6906 ? DImode
: GET_MODE (op
), 0);
6907 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
6908 GET_MODE (op
) == VOIDmode
6909 ? DImode
: GET_MODE (op
), 4);
6913 /* Split one or more TImode RTL references into pairs of SImode
6914 references. The RTL can be REG, offsettable MEM, integer constant, or
6915 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6916 split and "num" is its length. lo_half and hi_half are output arrays
6917 that parallel "operands". */
6920 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
6924 rtx op
= operands
[num
];
6926 /* simplify_subreg refuse to split volatile memory addresses, but we
6927 still have to handle it. */
6928 if (GET_CODE (op
) == MEM
)
6930 lo_half
[num
] = adjust_address (op
, DImode
, 0);
6931 hi_half
[num
] = adjust_address (op
, DImode
, 8);
6935 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
6936 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
6941 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6942 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6943 is the expression of the binary operation. The output may either be
6944 emitted here, or returned to the caller, like all output_* functions.
6946 There is no guarantee that the operands are the same mode, as they
6947 might be within FLOAT or FLOAT_EXTEND expressions. */
6949 #ifndef SYSV386_COMPAT
6950 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6951 wants to fix the assemblers because that causes incompatibility
6952 with gcc. No-one wants to fix gcc because that causes
6953 incompatibility with assemblers... You can use the option of
6954 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6955 #define SYSV386_COMPAT 1
6959 output_387_binary_op (rtx insn
, rtx
*operands
)
6961 static char buf
[30];
6964 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
6966 #ifdef ENABLE_CHECKING
6967 /* Even if we do not want to check the inputs, this documents input
6968 constraints. Which helps in understanding the following code. */
6969 if (STACK_REG_P (operands
[0])
6970 && ((REG_P (operands
[1])
6971 && REGNO (operands
[0]) == REGNO (operands
[1])
6972 && (STACK_REG_P (operands
[2]) || GET_CODE (operands
[2]) == MEM
))
6973 || (REG_P (operands
[2])
6974 && REGNO (operands
[0]) == REGNO (operands
[2])
6975 && (STACK_REG_P (operands
[1]) || GET_CODE (operands
[1]) == MEM
)))
6976 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
6982 switch (GET_CODE (operands
[3]))
6985 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6986 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6994 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6995 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7003 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7004 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7012 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7013 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7027 if (GET_MODE (operands
[0]) == SFmode
)
7028 strcat (buf
, "ss\t{%2, %0|%0, %2}");
7030 strcat (buf
, "sd\t{%2, %0|%0, %2}");
7035 switch (GET_CODE (operands
[3]))
7039 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
7041 rtx temp
= operands
[2];
7042 operands
[2] = operands
[1];
7046 /* know operands[0] == operands[1]. */
7048 if (GET_CODE (operands
[2]) == MEM
)
7054 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
7056 if (STACK_TOP_P (operands
[0]))
7057 /* How is it that we are storing to a dead operand[2]?
7058 Well, presumably operands[1] is dead too. We can't
7059 store the result to st(0) as st(0) gets popped on this
7060 instruction. Instead store to operands[2] (which I
7061 think has to be st(1)). st(1) will be popped later.
7062 gcc <= 2.8.1 didn't have this check and generated
7063 assembly code that the Unixware assembler rejected. */
7064 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7066 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7070 if (STACK_TOP_P (operands
[0]))
7071 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7073 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7078 if (GET_CODE (operands
[1]) == MEM
)
7084 if (GET_CODE (operands
[2]) == MEM
)
7090 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
7093 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7094 derived assemblers, confusingly reverse the direction of
7095 the operation for fsub{r} and fdiv{r} when the
7096 destination register is not st(0). The Intel assembler
7097 doesn't have this brain damage. Read !SYSV386_COMPAT to
7098 figure out what the hardware really does. */
7099 if (STACK_TOP_P (operands
[0]))
7100 p
= "{p\t%0, %2|rp\t%2, %0}";
7102 p
= "{rp\t%2, %0|p\t%0, %2}";
7104 if (STACK_TOP_P (operands
[0]))
7105 /* As above for fmul/fadd, we can't store to st(0). */
7106 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7108 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7113 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
7116 if (STACK_TOP_P (operands
[0]))
7117 p
= "{rp\t%0, %1|p\t%1, %0}";
7119 p
= "{p\t%1, %0|rp\t%0, %1}";
7121 if (STACK_TOP_P (operands
[0]))
7122 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7124 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7129 if (STACK_TOP_P (operands
[0]))
7131 if (STACK_TOP_P (operands
[1]))
7132 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7134 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7137 else if (STACK_TOP_P (operands
[1]))
7140 p
= "{\t%1, %0|r\t%0, %1}";
7142 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7148 p
= "{r\t%2, %0|\t%0, %2}";
7150 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7163 /* Output code to initialize control word copies used by trunc?f?i and
7164 rounding patterns. CURRENT_MODE is set to current control word,
7165 while NEW_MODE is set to new control word. */
7168 emit_i387_cw_initialization (rtx current_mode
, rtx new_mode
, int mode
)
7170 rtx reg
= gen_reg_rtx (HImode
);
7172 emit_insn (gen_x86_fnstcw_1 (current_mode
));
7173 emit_move_insn (reg
, current_mode
);
7175 if (!TARGET_PARTIAL_REG_STALL
&& !optimize_size
7181 /* round down toward -oo */
7182 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
7186 /* round up toward +oo */
7187 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
7191 /* round toward zero (truncate) */
7192 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
7195 case I387_CW_MASK_PM
:
7196 /* mask precision exception for nearbyint() */
7197 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
7209 /* round down toward -oo */
7210 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
7211 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
7215 /* round up toward +oo */
7216 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
7217 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
7221 /* round toward zero (truncate) */
7222 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
7225 case I387_CW_MASK_PM
:
7226 /* mask precision exception for nearbyint() */
7227 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
7235 emit_move_insn (new_mode
, reg
);
7238 /* Output code for INSN to convert a float to a signed int. OPERANDS
7239 are the insn operands. The output may be [HSD]Imode and the input
7240 operand may be [SDX]Fmode. */
7243 output_fix_trunc (rtx insn
, rtx
*operands
)
7245 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
7246 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
7248 /* Jump through a hoop or two for DImode, since the hardware has no
7249 non-popping instruction. We used to do this a different way, but
7250 that was somewhat fragile and broke with post-reload splitters. */
7251 if (dimode_p
&& !stack_top_dies
)
7252 output_asm_insn ("fld\t%y1", operands
);
7254 if (!STACK_TOP_P (operands
[1]))
7257 if (GET_CODE (operands
[0]) != MEM
)
7260 output_asm_insn ("fldcw\t%3", operands
);
7261 if (stack_top_dies
|| dimode_p
)
7262 output_asm_insn ("fistp%z0\t%0", operands
);
7264 output_asm_insn ("fist%z0\t%0", operands
);
7265 output_asm_insn ("fldcw\t%2", operands
);
7270 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7271 should be used. UNORDERED_P is true when fucom should be used. */
7274 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
7277 rtx cmp_op0
, cmp_op1
;
7278 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
7282 cmp_op0
= operands
[0];
7283 cmp_op1
= operands
[1];
7287 cmp_op0
= operands
[1];
7288 cmp_op1
= operands
[2];
7293 if (GET_MODE (operands
[0]) == SFmode
)
7295 return "ucomiss\t{%1, %0|%0, %1}";
7297 return "comiss\t{%1, %0|%0, %1}";
7300 return "ucomisd\t{%1, %0|%0, %1}";
7302 return "comisd\t{%1, %0|%0, %1}";
7305 if (! STACK_TOP_P (cmp_op0
))
7308 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
7310 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
7314 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
7315 return TARGET_USE_FFREEP
? "ffreep\t%y1" : "fstp\t%y1";
7318 return "ftst\n\tfnstsw\t%0";
7321 if (STACK_REG_P (cmp_op1
)
7323 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
7324 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
7326 /* If both the top of the 387 stack dies, and the other operand
7327 is also a stack register that dies, then this must be a
7328 `fcompp' float compare */
7332 /* There is no double popping fcomi variant. Fortunately,
7333 eflags is immune from the fstp's cc clobbering. */
7335 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
7337 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
7338 return TARGET_USE_FFREEP
? "ffreep\t%y0" : "fstp\t%y0";
7343 return "fucompp\n\tfnstsw\t%0";
7345 return "fcompp\n\tfnstsw\t%0";
7350 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7352 static const char * const alt
[16] =
7354 "fcom%z2\t%y2\n\tfnstsw\t%0",
7355 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7356 "fucom%z2\t%y2\n\tfnstsw\t%0",
7357 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7359 "ficom%z2\t%y2\n\tfnstsw\t%0",
7360 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7364 "fcomi\t{%y1, %0|%0, %y1}",
7365 "fcomip\t{%y1, %0|%0, %y1}",
7366 "fucomi\t{%y1, %0|%0, %y1}",
7367 "fucomip\t{%y1, %0|%0, %y1}",
7378 mask
= eflags_p
<< 3;
7379 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
7380 mask
|= unordered_p
<< 1;
7381 mask
|= stack_top_dies
;
7394 ix86_output_addr_vec_elt (FILE *file
, int value
)
7396 const char *directive
= ASM_LONG
;
7401 directive
= ASM_QUAD
;
7407 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
7411 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
7414 fprintf (file
, "%s%s%d-%s%d\n",
7415 ASM_LONG
, LPREFIX
, value
, LPREFIX
, rel
);
7416 else if (HAVE_AS_GOTOFF_IN_DATA
)
7417 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
7419 else if (TARGET_MACHO
)
7421 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
7422 machopic_output_function_base_name (file
);
7423 fprintf(file
, "\n");
7427 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
7428 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
7431 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7435 ix86_expand_clear (rtx dest
)
7439 /* We play register width games, which are only valid after reload. */
7440 if (!reload_completed
)
7443 /* Avoid HImode and its attendant prefix byte. */
7444 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
7445 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
7447 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
7449 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7450 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
7452 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
7453 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
7459 /* X is an unchanging MEM. If it is a constant pool reference, return
7460 the constant pool rtx, else NULL. */
7463 maybe_get_pool_constant (rtx x
)
7465 x
= ix86_delegitimize_address (XEXP (x
, 0));
7467 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
7468 return get_pool_constant (x
);
7474 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
7476 int strict
= (reload_in_progress
|| reload_completed
);
7478 enum tls_model model
;
7483 if (GET_CODE (op1
) == SYMBOL_REF
)
7485 model
= SYMBOL_REF_TLS_MODEL (op1
);
7488 op1
= legitimize_tls_address (op1
, model
, true);
7489 op1
= force_operand (op1
, op0
);
7494 else if (GET_CODE (op1
) == CONST
7495 && GET_CODE (XEXP (op1
, 0)) == PLUS
7496 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
7498 model
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1
, 0), 0));
7501 rtx addend
= XEXP (XEXP (op1
, 0), 1);
7502 op1
= legitimize_tls_address (XEXP (XEXP (op1
, 0), 0), model
, true);
7503 op1
= force_operand (op1
, NULL
);
7504 op1
= expand_simple_binop (Pmode
, PLUS
, op1
, addend
,
7505 op0
, 1, OPTAB_DIRECT
);
7511 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
7516 rtx temp
= ((reload_in_progress
7517 || ((op0
&& GET_CODE (op0
) == REG
)
7519 ? op0
: gen_reg_rtx (Pmode
));
7520 op1
= machopic_indirect_data_reference (op1
, temp
);
7521 op1
= machopic_legitimize_pic_address (op1
, mode
,
7522 temp
== op1
? 0 : temp
);
7524 else if (MACHOPIC_INDIRECT
)
7525 op1
= machopic_indirect_data_reference (op1
, 0);
7529 if (GET_CODE (op0
) == MEM
)
7530 op1
= force_reg (Pmode
, op1
);
7532 op1
= legitimize_address (op1
, op1
, Pmode
);
7533 #endif /* TARGET_MACHO */
7537 if (GET_CODE (op0
) == MEM
7538 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
7539 || !push_operand (op0
, mode
))
7540 && GET_CODE (op1
) == MEM
)
7541 op1
= force_reg (mode
, op1
);
7543 if (push_operand (op0
, mode
)
7544 && ! general_no_elim_operand (op1
, mode
))
7545 op1
= copy_to_mode_reg (mode
, op1
);
7547 /* Force large constants in 64bit compilation into register
7548 to get them CSEed. */
7549 if (TARGET_64BIT
&& mode
== DImode
7550 && immediate_operand (op1
, mode
)
7551 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
7552 && !register_operand (op0
, mode
)
7553 && optimize
&& !reload_completed
&& !reload_in_progress
)
7554 op1
= copy_to_mode_reg (mode
, op1
);
7556 if (FLOAT_MODE_P (mode
))
7558 /* If we are loading a floating point constant to a register,
7559 force the value to memory now, since we'll get better code
7560 out the back end. */
7564 else if (GET_CODE (op1
) == CONST_DOUBLE
)
7566 op1
= validize_mem (force_const_mem (mode
, op1
));
7567 if (!register_operand (op0
, mode
))
7569 rtx temp
= gen_reg_rtx (mode
);
7570 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
7571 emit_move_insn (op0
, temp
);
7578 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
7582 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
7584 rtx op0
= operands
[0], op1
= operands
[1];
7586 /* Force constants other than zero into memory. We do not know how
7587 the instructions used to build constants modify the upper 64 bits
7588 of the register, once we have that information we may be able
7589 to handle some of them more efficiently. */
7590 if ((reload_in_progress
| reload_completed
) == 0
7591 && register_operand (op0
, mode
)
7592 && CONSTANT_P (op1
) && op1
!= CONST0_RTX (mode
))
7593 op1
= validize_mem (force_const_mem (mode
, op1
));
7595 /* Make operand1 a register if it isn't already. */
7597 && !register_operand (op0
, mode
)
7598 && !register_operand (op1
, mode
))
7600 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
7604 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
7607 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
7608 straight to ix86_expand_vector_move. */
7611 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
7620 /* If we're optimizing for size, movups is the smallest. */
7623 op0
= gen_lowpart (V4SFmode
, op0
);
7624 op1
= gen_lowpart (V4SFmode
, op1
);
7625 emit_insn (gen_sse_movups (op0
, op1
));
7629 /* ??? If we have typed data, then it would appear that using
7630 movdqu is the only way to get unaligned data loaded with
7632 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
7634 op0
= gen_lowpart (V16QImode
, op0
);
7635 op1
= gen_lowpart (V16QImode
, op1
);
7636 emit_insn (gen_sse2_movdqu (op0
, op1
));
7640 if (TARGET_SSE2
&& mode
== V2DFmode
)
7642 /* When SSE registers are split into halves, we can avoid
7643 writing to the top half twice. */
7644 if (TARGET_SSE_SPLIT_REGS
)
7646 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
7647 m
= adjust_address (op1
, DFmode
, 0);
7648 emit_insn (gen_sse2_loadlpd (op0
, op0
, m
));
7649 m
= adjust_address (op1
, DFmode
, 8);
7650 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
7654 /* ??? Not sure about the best option for the Intel chips.
7655 The following would seem to satisfy; the register is
7656 entirely cleared, breaking the dependency chain. We
7657 then store to the upper half, with a dependency depth
7658 of one. A rumor has it that Intel recommends two movsd
7659 followed by an unpacklpd, but this is unconfirmed. And
7660 given that the dependency depth of the unpacklpd would
7661 still be one, I'm not sure why this would be better. */
7662 m
= adjust_address (op1
, DFmode
, 0);
7663 emit_insn (gen_sse2_loadsd (op0
, m
));
7664 m
= adjust_address (op1
, DFmode
, 8);
7665 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
7670 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
7671 emit_move_insn (op0
, CONST0_RTX (mode
));
7673 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
7675 m
= adjust_address (op1
, V2SFmode
, 0);
7676 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
7677 m
= adjust_address (op1
, V2SFmode
, 8);
7678 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
7681 else if (MEM_P (op0
))
7683 /* If we're optimizing for size, movups is the smallest. */
7686 op0
= gen_lowpart (V4SFmode
, op0
);
7687 op1
= gen_lowpart (V4SFmode
, op1
);
7688 emit_insn (gen_sse_movups (op0
, op1
));
7692 /* ??? Similar to above, only less clear because of quote
7693 typeless stores unquote. */
7694 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
7695 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
7697 op0
= gen_lowpart (V16QImode
, op0
);
7698 op1
= gen_lowpart (V16QImode
, op1
);
7699 emit_insn (gen_sse2_movdqu (op0
, op1
));
7703 if (TARGET_SSE2
&& mode
== V2DFmode
)
7705 m
= adjust_address (op0
, DFmode
, 0);
7706 emit_insn (gen_sse2_storelpd (m
, op1
));
7707 m
= adjust_address (op0
, DFmode
, 8);
7708 emit_insn (gen_sse2_storehpd (m
, op1
));
7713 m
= adjust_address (op0
, V2SFmode
, 0);
7714 emit_insn (gen_sse_storelps (m
, op1
));
7715 m
= adjust_address (op0
, V2SFmode
, 8);
7716 emit_insn (gen_sse_storehps (m
, op1
));
7725 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
7726 destination to use for the operation. If different from the true
7727 destination in operands[0], a copy operation will be required. */
7730 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
7733 int matching_memory
;
7734 rtx src1
, src2
, dst
;
7740 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7741 if (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
7742 && (rtx_equal_p (dst
, src2
)
7743 || immediate_operand (src1
, mode
)))
7750 /* If the destination is memory, and we do not have matching source
7751 operands, do things in registers. */
7752 matching_memory
= 0;
7753 if (GET_CODE (dst
) == MEM
)
7755 if (rtx_equal_p (dst
, src1
))
7756 matching_memory
= 1;
7757 else if (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
7758 && rtx_equal_p (dst
, src2
))
7759 matching_memory
= 2;
7761 dst
= gen_reg_rtx (mode
);
7764 /* Both source operands cannot be in memory. */
7765 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
7767 if (matching_memory
!= 2)
7768 src2
= force_reg (mode
, src2
);
7770 src1
= force_reg (mode
, src1
);
7773 /* If the operation is not commutable, source 1 cannot be a constant
7774 or non-matching memory. */
7775 if ((CONSTANT_P (src1
)
7776 || (!matching_memory
&& GET_CODE (src1
) == MEM
))
7777 && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
7778 src1
= force_reg (mode
, src1
);
7780 /* If optimizing, copy to regs to improve CSE */
7781 if (optimize
&& ! no_new_pseudos
)
7783 if (GET_CODE (dst
) == MEM
)
7784 dst
= gen_reg_rtx (mode
);
7785 if (GET_CODE (src1
) == MEM
)
7786 src1
= force_reg (mode
, src1
);
7787 if (GET_CODE (src2
) == MEM
)
7788 src2
= force_reg (mode
, src2
);
7791 src1
= operands
[1] = src1
;
7792 src2
= operands
[2] = src2
;
7796 /* Similarly, but assume that the destination has already been
7800 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
7801 enum machine_mode mode
, rtx operands
[])
7803 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
7804 gcc_assert (dst
== operands
[0]);
7807 /* Attempt to expand a binary operator. Make the expansion closer to the
7808 actual machine, then just general_operand, which will allow 3 separate
7809 memory references (one output, two input) in a single insn. */
7812 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
7815 rtx src1
, src2
, dst
, op
, clob
;
7817 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
7821 /* Emit the instruction. */
7823 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
7824 if (reload_in_progress
)
7826 /* Reload doesn't know about the flags register, and doesn't know that
7827 it doesn't want to clobber it. We can only do this with PLUS. */
7834 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
7835 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
7838 /* Fix up the destination if needed. */
7839 if (dst
!= operands
[0])
7840 emit_move_insn (operands
[0], dst
);
7843 /* Return TRUE or FALSE depending on whether the binary operator meets the
7844 appropriate constraints. */
7847 ix86_binary_operator_ok (enum rtx_code code
,
7848 enum machine_mode mode ATTRIBUTE_UNUSED
,
7851 /* Both source operands cannot be in memory. */
7852 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
7854 /* If the operation is not commutable, source 1 cannot be a constant. */
7855 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
7857 /* If the destination is memory, we must have a matching source operand. */
7858 if (GET_CODE (operands
[0]) == MEM
7859 && ! (rtx_equal_p (operands
[0], operands
[1])
7860 || (GET_RTX_CLASS (code
) == RTX_COMM_ARITH
7861 && rtx_equal_p (operands
[0], operands
[2]))))
7863 /* If the operation is not commutable and the source 1 is memory, we must
7864 have a matching destination. */
7865 if (GET_CODE (operands
[1]) == MEM
7866 && GET_RTX_CLASS (code
) != RTX_COMM_ARITH
7867 && ! rtx_equal_p (operands
[0], operands
[1]))
7872 /* Attempt to expand a unary operator. Make the expansion closer to the
7873 actual machine, then just general_operand, which will allow 2 separate
7874 memory references (one output, one input) in a single insn. */
7877 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
7880 int matching_memory
;
7881 rtx src
, dst
, op
, clob
;
7886 /* If the destination is memory, and we do not have matching source
7887 operands, do things in registers. */
7888 matching_memory
= 0;
7891 if (rtx_equal_p (dst
, src
))
7892 matching_memory
= 1;
7894 dst
= gen_reg_rtx (mode
);
7897 /* When source operand is memory, destination must match. */
7898 if (MEM_P (src
) && !matching_memory
)
7899 src
= force_reg (mode
, src
);
7901 /* If optimizing, copy to regs to improve CSE. */
7902 if (optimize
&& ! no_new_pseudos
)
7904 if (GET_CODE (dst
) == MEM
)
7905 dst
= gen_reg_rtx (mode
);
7906 if (GET_CODE (src
) == MEM
)
7907 src
= force_reg (mode
, src
);
7910 /* Emit the instruction. */
7912 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
7913 if (reload_in_progress
|| code
== NOT
)
7915 /* Reload doesn't know about the flags register, and doesn't know that
7916 it doesn't want to clobber it. */
7923 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
7924 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
7927 /* Fix up the destination if needed. */
7928 if (dst
!= operands
[0])
7929 emit_move_insn (operands
[0], dst
);
7932 /* Return TRUE or FALSE depending on whether the unary operator meets the
7933 appropriate constraints. */
7936 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
7937 enum machine_mode mode ATTRIBUTE_UNUSED
,
7938 rtx operands
[2] ATTRIBUTE_UNUSED
)
7940 /* If one of operands is memory, source and destination must match. */
7941 if ((GET_CODE (operands
[0]) == MEM
7942 || GET_CODE (operands
[1]) == MEM
)
7943 && ! rtx_equal_p (operands
[0], operands
[1]))
7948 /* Generate code for floating point ABS or NEG. */
7951 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
7954 rtx mask
, set
, use
, clob
, dst
, src
;
7955 bool matching_memory
;
7956 bool use_sse
= false;
7957 bool vector_mode
= VECTOR_MODE_P (mode
);
7958 enum machine_mode elt_mode
= mode
;
7959 enum machine_mode vec_mode
= VOIDmode
;
7963 elt_mode
= GET_MODE_INNER (mode
);
7967 if (TARGET_SSE_MATH
)
7972 vec_mode
= V4SFmode
;
7974 else if (mode
== DFmode
&& TARGET_SSE2
)
7977 vec_mode
= V2DFmode
;
7981 /* NEG and ABS performed with SSE use bitwise mask operations.
7982 Create the appropriate mask now. */
7985 HOST_WIDE_INT hi
, lo
;
7989 /* Find the sign bit, sign extended to 2*HWI. */
7990 if (elt_mode
== SFmode
)
7991 lo
= 0x80000000, hi
= lo
< 0;
7992 else if (HOST_BITS_PER_WIDE_INT
>= 64)
7993 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
7995 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
7997 /* If we're looking for the absolute value, then we want
8002 /* Force this value into the low part of a fp vector constant. */
8003 mask
= immed_double_const (lo
, hi
, mode
== SFmode
? SImode
: DImode
);
8004 mask
= gen_lowpart (mode
, mask
);
8009 v
= gen_rtvec (4, mask
, CONST0_RTX (SFmode
),
8010 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
8014 v
= gen_rtvec (2, mask
, CONST0_RTX (DFmode
));
8018 v
= gen_rtvec (4, mask
, mask
, mask
, mask
);
8022 v
= gen_rtvec (2, mask
, mask
);
8029 mask
= gen_rtx_CONST_VECTOR (vec_mode
, v
);
8030 mask
= force_reg (vec_mode
, mask
);
8034 /* When not using SSE, we don't use the mask, but prefer to keep the
8035 same general form of the insn pattern to reduce duplication when
8036 it comes time to split. */
8043 /* If the destination is memory, and we don't have matching source
8044 operands, do things in registers. */
8045 matching_memory
= false;
8048 if (rtx_equal_p (dst
, src
) && (!optimize
|| no_new_pseudos
))
8049 matching_memory
= true;
8051 dst
= gen_reg_rtx (mode
);
8053 if (MEM_P (src
) && !matching_memory
)
8054 src
= force_reg (mode
, src
);
8058 set
= gen_rtx_fmt_ee (code
== NEG
? XOR
: AND
, mode
, src
, mask
);
8059 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
8064 set
= gen_rtx_fmt_e (code
, mode
, src
);
8065 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
8066 use
= gen_rtx_USE (VOIDmode
, mask
);
8067 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
8068 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (3, set
, use
, clob
)));
8071 if (dst
!= operands
[0])
8072 emit_move_insn (operands
[0], dst
);
8075 /* Return TRUE or FALSE depending on whether the first SET in INSN
8076 has source and destination with matching CC modes, and that the
8077 CC mode is at least as constrained as REQ_MODE. */
8080 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
8083 enum machine_mode set_mode
;
8085 set
= PATTERN (insn
);
8086 if (GET_CODE (set
) == PARALLEL
)
8087 set
= XVECEXP (set
, 0, 0);
8088 if (GET_CODE (set
) != SET
)
8090 if (GET_CODE (SET_SRC (set
)) != COMPARE
)
8093 set_mode
= GET_MODE (SET_DEST (set
));
8097 if (req_mode
!= CCNOmode
8098 && (req_mode
!= CCmode
8099 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
8103 if (req_mode
== CCGCmode
)
8107 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
8111 if (req_mode
== CCZmode
)
8121 return (GET_MODE (SET_SRC (set
)) == set_mode
);
8124 /* Generate insn patterns to do an integer compare of OPERANDS. */
8127 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
8129 enum machine_mode cmpmode
;
8132 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
8133 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
8135 /* This is very simple, but making the interface the same as in the
8136 FP case makes the rest of the code easier. */
8137 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
8138 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
8140 /* Return the test that should be put into the flags user, i.e.
8141 the bcc, scc, or cmov instruction. */
8142 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
8145 /* Figure out whether to use ordered or unordered fp comparisons.
8146 Return the appropriate mode to use. */
8149 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
8151 /* ??? In order to make all comparisons reversible, we do all comparisons
8152 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8153 all forms trapping and nontrapping comparisons, we can make inequality
8154 comparisons trapping again, since it results in better code when using
8155 FCOM based compares. */
8156 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
8160 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
8162 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
8163 return ix86_fp_compare_mode (code
);
8166 /* Only zero flag is needed. */
8168 case NE
: /* ZF!=0 */
8170 /* Codes needing carry flag. */
8171 case GEU
: /* CF=0 */
8172 case GTU
: /* CF=0 & ZF=0 */
8173 case LTU
: /* CF=1 */
8174 case LEU
: /* CF=1 | ZF=1 */
8176 /* Codes possibly doable only with sign flag when
8177 comparing against zero. */
8178 case GE
: /* SF=OF or SF=0 */
8179 case LT
: /* SF<>OF or SF=1 */
8180 if (op1
== const0_rtx
)
8183 /* For other cases Carry flag is not required. */
8185 /* Codes doable only with sign flag when comparing
8186 against zero, but we miss jump instruction for it
8187 so we need to use relational tests against overflow
8188 that thus needs to be zero. */
8189 case GT
: /* ZF=0 & SF=OF */
8190 case LE
: /* ZF=1 | SF<>OF */
8191 if (op1
== const0_rtx
)
8195 /* strcmp pattern do (use flags) and combine may ask us for proper
8204 /* Return the fixed registers used for condition codes. */
8207 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
8214 /* If two condition code modes are compatible, return a condition code
8215 mode which is compatible with both. Otherwise, return
8218 static enum machine_mode
8219 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
8224 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
8227 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
8228 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
8256 /* These are only compatible with themselves, which we already
8262 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8265 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
8267 enum rtx_code swapped_code
= swap_condition (code
);
8268 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
8269 || (ix86_fp_comparison_cost (swapped_code
)
8270 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
8273 /* Swap, force into registers, or otherwise massage the two operands
8274 to a fp comparison. The operands are updated in place; the new
8275 comparison code is returned. */
8277 static enum rtx_code
8278 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
8280 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
8281 rtx op0
= *pop0
, op1
= *pop1
;
8282 enum machine_mode op_mode
= GET_MODE (op0
);
8283 int is_sse
= SSE_REG_P (op0
) || SSE_REG_P (op1
);
8285 /* All of the unordered compare instructions only work on registers.
8286 The same is true of the fcomi compare instructions. The same is
8287 true of the XFmode compare instructions if not comparing with
8288 zero (ftst insn is used in this case). */
8291 && (fpcmp_mode
== CCFPUmode
8292 || (op_mode
== XFmode
8293 && ! (standard_80387_constant_p (op0
) == 1
8294 || standard_80387_constant_p (op1
) == 1))
8295 || ix86_use_fcomi_compare (code
)))
8297 op0
= force_reg (op_mode
, op0
);
8298 op1
= force_reg (op_mode
, op1
);
8302 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8303 things around if they appear profitable, otherwise force op0
8306 if (standard_80387_constant_p (op0
) == 0
8307 || (GET_CODE (op0
) == MEM
8308 && ! (standard_80387_constant_p (op1
) == 0
8309 || GET_CODE (op1
) == MEM
)))
8312 tmp
= op0
, op0
= op1
, op1
= tmp
;
8313 code
= swap_condition (code
);
8316 if (GET_CODE (op0
) != REG
)
8317 op0
= force_reg (op_mode
, op0
);
8319 if (CONSTANT_P (op1
))
8321 int tmp
= standard_80387_constant_p (op1
);
8323 op1
= validize_mem (force_const_mem (op_mode
, op1
));
8327 op1
= force_reg (op_mode
, op1
);
8330 op1
= force_reg (op_mode
, op1
);
8334 /* Try to rearrange the comparison to make it cheaper. */
8335 if (ix86_fp_comparison_cost (code
)
8336 > ix86_fp_comparison_cost (swap_condition (code
))
8337 && (GET_CODE (op1
) == REG
|| !no_new_pseudos
))
8340 tmp
= op0
, op0
= op1
, op1
= tmp
;
8341 code
= swap_condition (code
);
8342 if (GET_CODE (op0
) != REG
)
8343 op0
= force_reg (op_mode
, op0
);
8351 /* Convert comparison codes we use to represent FP comparison to integer
8352 code that will result in proper branch. Return UNKNOWN if no such code
8356 ix86_fp_compare_code_to_integer (enum rtx_code code
)
8385 /* Split comparison code CODE into comparisons we can do using branch
8386 instructions. BYPASS_CODE is comparison code for branch that will
8387 branch around FIRST_CODE and SECOND_CODE. If some of branches
8388 is not required, set value to UNKNOWN.
8389 We never require more than two branches. */
8392 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
8393 enum rtx_code
*first_code
,
8394 enum rtx_code
*second_code
)
8397 *bypass_code
= UNKNOWN
;
8398 *second_code
= UNKNOWN
;
8400 /* The fcomi comparison sets flags as follows:
8410 case GT
: /* GTU - CF=0 & ZF=0 */
8411 case GE
: /* GEU - CF=0 */
8412 case ORDERED
: /* PF=0 */
8413 case UNORDERED
: /* PF=1 */
8414 case UNEQ
: /* EQ - ZF=1 */
8415 case UNLT
: /* LTU - CF=1 */
8416 case UNLE
: /* LEU - CF=1 | ZF=1 */
8417 case LTGT
: /* EQ - ZF=0 */
8419 case LT
: /* LTU - CF=1 - fails on unordered */
8421 *bypass_code
= UNORDERED
;
8423 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
8425 *bypass_code
= UNORDERED
;
8427 case EQ
: /* EQ - ZF=1 - fails on unordered */
8429 *bypass_code
= UNORDERED
;
8431 case NE
: /* NE - ZF=0 - fails on unordered */
8433 *second_code
= UNORDERED
;
8435 case UNGE
: /* GEU - CF=0 - fails on unordered */
8437 *second_code
= UNORDERED
;
8439 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
8441 *second_code
= UNORDERED
;
8446 if (!TARGET_IEEE_FP
)
8448 *second_code
= UNKNOWN
;
8449 *bypass_code
= UNKNOWN
;
8453 /* Return cost of comparison done fcom + arithmetics operations on AX.
8454 All following functions do use number of instructions as a cost metrics.
8455 In future this should be tweaked to compute bytes for optimize_size and
8456 take into account performance of various instructions on various CPUs. */
8458 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
8460 if (!TARGET_IEEE_FP
)
8462 /* The cost of code output by ix86_expand_fp_compare. */
8490 /* Return cost of comparison done using fcomi operation.
8491 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8493 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
8495 enum rtx_code bypass_code
, first_code
, second_code
;
8496 /* Return arbitrarily high cost when instruction is not supported - this
8497 prevents gcc from using it. */
8500 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8501 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 2;
8504 /* Return cost of comparison done using sahf operation.
8505 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8507 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
8509 enum rtx_code bypass_code
, first_code
, second_code
;
8510 /* Return arbitrarily high cost when instruction is not preferred - this
8511 avoids gcc from using it. */
8512 if (!TARGET_USE_SAHF
&& !optimize_size
)
8514 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8515 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 3;
8518 /* Compute cost of the comparison done using any method.
8519 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8521 ix86_fp_comparison_cost (enum rtx_code code
)
8523 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
8526 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
8527 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
8529 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
8530 if (min
> sahf_cost
)
8532 if (min
> fcomi_cost
)
8537 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8540 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
8541 rtx
*second_test
, rtx
*bypass_test
)
8543 enum machine_mode fpcmp_mode
, intcmp_mode
;
8545 int cost
= ix86_fp_comparison_cost (code
);
8546 enum rtx_code bypass_code
, first_code
, second_code
;
8548 fpcmp_mode
= ix86_fp_compare_mode (code
);
8549 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
8552 *second_test
= NULL_RTX
;
8554 *bypass_test
= NULL_RTX
;
8556 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8558 /* Do fcomi/sahf based test when profitable. */
8559 if ((bypass_code
== UNKNOWN
|| bypass_test
)
8560 && (second_code
== UNKNOWN
|| second_test
)
8561 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
8565 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8566 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
8572 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8573 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
8575 scratch
= gen_reg_rtx (HImode
);
8576 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
8577 emit_insn (gen_x86_sahf_1 (scratch
));
8580 /* The FP codes work out to act like unsigned. */
8581 intcmp_mode
= fpcmp_mode
;
8583 if (bypass_code
!= UNKNOWN
)
8584 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
8585 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8587 if (second_code
!= UNKNOWN
)
8588 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
8589 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8594 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8595 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8596 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
8598 scratch
= gen_reg_rtx (HImode
);
8599 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
8601 /* In the unordered case, we have to check C2 for NaN's, which
8602 doesn't happen to work out to anything nice combination-wise.
8603 So do some bit twiddling on the value we've got in AH to come
8604 up with an appropriate set of condition codes. */
8606 intcmp_mode
= CCNOmode
;
8611 if (code
== GT
|| !TARGET_IEEE_FP
)
8613 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
8618 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8619 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
8620 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
8621 intcmp_mode
= CCmode
;
8627 if (code
== LT
&& TARGET_IEEE_FP
)
8629 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8630 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
8631 intcmp_mode
= CCmode
;
8636 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
8642 if (code
== GE
|| !TARGET_IEEE_FP
)
8644 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
8649 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8650 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
8657 if (code
== LE
&& TARGET_IEEE_FP
)
8659 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8660 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
8661 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
8662 intcmp_mode
= CCmode
;
8667 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
8673 if (code
== EQ
&& TARGET_IEEE_FP
)
8675 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8676 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
8677 intcmp_mode
= CCmode
;
8682 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
8689 if (code
== NE
&& TARGET_IEEE_FP
)
8691 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8692 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
8698 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
8704 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
8708 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
8717 /* Return the test that should be put into the flags user, i.e.
8718 the bcc, scc, or cmov instruction. */
8719 return gen_rtx_fmt_ee (code
, VOIDmode
,
8720 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8725 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
8728 op0
= ix86_compare_op0
;
8729 op1
= ix86_compare_op1
;
8732 *second_test
= NULL_RTX
;
8734 *bypass_test
= NULL_RTX
;
8736 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
8737 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
8738 second_test
, bypass_test
);
8740 ret
= ix86_expand_int_compare (code
, op0
, op1
);
8745 /* Return true if the CODE will result in nontrivial jump sequence. */
8747 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
8749 enum rtx_code bypass_code
, first_code
, second_code
;
8752 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8753 return bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
;
8757 ix86_expand_branch (enum rtx_code code
, rtx label
)
8761 switch (GET_MODE (ix86_compare_op0
))
8767 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
8768 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
8769 gen_rtx_LABEL_REF (VOIDmode
, label
),
8771 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
8780 enum rtx_code bypass_code
, first_code
, second_code
;
8782 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
8785 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8787 /* Check whether we will use the natural sequence with one jump. If
8788 so, we can expand jump early. Otherwise delay expansion by
8789 creating compound insn to not confuse optimizers. */
8790 if (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
8793 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
8794 gen_rtx_LABEL_REF (VOIDmode
, label
),
8795 pc_rtx
, NULL_RTX
, NULL_RTX
);
8799 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
8800 ix86_compare_op0
, ix86_compare_op1
);
8801 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
8802 gen_rtx_LABEL_REF (VOIDmode
, label
),
8804 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
8806 use_fcomi
= ix86_use_fcomi_compare (code
);
8807 vec
= rtvec_alloc (3 + !use_fcomi
);
8808 RTVEC_ELT (vec
, 0) = tmp
;
8810 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
8812 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
8815 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
8817 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
8825 /* Expand DImode branch into multiple compare+branch. */
8827 rtx lo
[2], hi
[2], label2
;
8828 enum rtx_code code1
, code2
, code3
;
8830 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
8832 tmp
= ix86_compare_op0
;
8833 ix86_compare_op0
= ix86_compare_op1
;
8834 ix86_compare_op1
= tmp
;
8835 code
= swap_condition (code
);
8837 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
8838 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
8840 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8841 avoid two branches. This costs one extra insn, so disable when
8842 optimizing for size. */
8844 if ((code
== EQ
|| code
== NE
)
8846 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
8851 if (hi
[1] != const0_rtx
)
8852 xor1
= expand_binop (SImode
, xor_optab
, xor1
, hi
[1],
8853 NULL_RTX
, 0, OPTAB_WIDEN
);
8856 if (lo
[1] != const0_rtx
)
8857 xor0
= expand_binop (SImode
, xor_optab
, xor0
, lo
[1],
8858 NULL_RTX
, 0, OPTAB_WIDEN
);
8860 tmp
= expand_binop (SImode
, ior_optab
, xor1
, xor0
,
8861 NULL_RTX
, 0, OPTAB_WIDEN
);
8863 ix86_compare_op0
= tmp
;
8864 ix86_compare_op1
= const0_rtx
;
8865 ix86_expand_branch (code
, label
);
8869 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8870 op1 is a constant and the low word is zero, then we can just
8871 examine the high word. */
8873 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
)
8876 case LT
: case LTU
: case GE
: case GEU
:
8877 ix86_compare_op0
= hi
[0];
8878 ix86_compare_op1
= hi
[1];
8879 ix86_expand_branch (code
, label
);
8885 /* Otherwise, we need two or three jumps. */
8887 label2
= gen_label_rtx ();
8890 code2
= swap_condition (code
);
8891 code3
= unsigned_condition (code
);
8895 case LT
: case GT
: case LTU
: case GTU
:
8898 case LE
: code1
= LT
; code2
= GT
; break;
8899 case GE
: code1
= GT
; code2
= LT
; break;
8900 case LEU
: code1
= LTU
; code2
= GTU
; break;
8901 case GEU
: code1
= GTU
; code2
= LTU
; break;
8903 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
8904 case NE
: code2
= UNKNOWN
; break;
8912 * if (hi(a) < hi(b)) goto true;
8913 * if (hi(a) > hi(b)) goto false;
8914 * if (lo(a) < lo(b)) goto true;
8918 ix86_compare_op0
= hi
[0];
8919 ix86_compare_op1
= hi
[1];
8921 if (code1
!= UNKNOWN
)
8922 ix86_expand_branch (code1
, label
);
8923 if (code2
!= UNKNOWN
)
8924 ix86_expand_branch (code2
, label2
);
8926 ix86_compare_op0
= lo
[0];
8927 ix86_compare_op1
= lo
[1];
8928 ix86_expand_branch (code3
, label
);
8930 if (code2
!= UNKNOWN
)
8931 emit_label (label2
);
8940 /* Split branch based on floating point condition. */
8942 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
8943 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
8946 rtx label
= NULL_RTX
;
8948 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
8951 if (target2
!= pc_rtx
)
8954 code
= reverse_condition_maybe_unordered (code
);
8959 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
8960 tmp
, &second
, &bypass
);
8962 /* Remove pushed operand from stack. */
8964 ix86_free_from_memory (GET_MODE (pushed
));
8966 if (split_branch_probability
>= 0)
8968 /* Distribute the probabilities across the jumps.
8969 Assume the BYPASS and SECOND to be always test
8971 probability
= split_branch_probability
;
8973 /* Value of 1 is low enough to make no need for probability
8974 to be updated. Later we may run some experiments and see
8975 if unordered values are more frequent in practice. */
8977 bypass_probability
= 1;
8979 second_probability
= 1;
8981 if (bypass
!= NULL_RTX
)
8983 label
= gen_label_rtx ();
8984 i
= emit_jump_insn (gen_rtx_SET
8986 gen_rtx_IF_THEN_ELSE (VOIDmode
,
8988 gen_rtx_LABEL_REF (VOIDmode
,
8991 if (bypass_probability
>= 0)
8993 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
8994 GEN_INT (bypass_probability
),
8997 i
= emit_jump_insn (gen_rtx_SET
8999 gen_rtx_IF_THEN_ELSE (VOIDmode
,
9000 condition
, target1
, target2
)));
9001 if (probability
>= 0)
9003 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
9004 GEN_INT (probability
),
9006 if (second
!= NULL_RTX
)
9008 i
= emit_jump_insn (gen_rtx_SET
9010 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
9012 if (second_probability
>= 0)
9014 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
9015 GEN_INT (second_probability
),
9018 if (label
!= NULL_RTX
)
9023 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
9025 rtx ret
, tmp
, tmpreg
, equiv
;
9026 rtx second_test
, bypass_test
;
9028 if (GET_MODE (ix86_compare_op0
) == DImode
9030 return 0; /* FAIL */
9032 if (GET_MODE (dest
) != QImode
)
9035 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9036 PUT_MODE (ret
, QImode
);
9041 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
9042 if (bypass_test
|| second_test
)
9044 rtx test
= second_test
;
9046 rtx tmp2
= gen_reg_rtx (QImode
);
9053 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
9055 PUT_MODE (test
, QImode
);
9056 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
9059 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
9061 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
9064 /* Attach a REG_EQUAL note describing the comparison result. */
9065 equiv
= simplify_gen_relational (code
, QImode
,
9066 GET_MODE (ix86_compare_op0
),
9067 ix86_compare_op0
, ix86_compare_op1
);
9068 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
9070 return 1; /* DONE */
9073 /* Expand comparison setting or clearing carry flag. Return true when
9074 successful and set pop for the operation. */
9076 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
9078 enum machine_mode mode
=
9079 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
9081 /* Do not handle DImode compares that go trought special path. Also we can't
9082 deal with FP compares yet. This is possible to add. */
9083 if ((mode
== DImode
&& !TARGET_64BIT
))
9085 if (FLOAT_MODE_P (mode
))
9087 rtx second_test
= NULL
, bypass_test
= NULL
;
9088 rtx compare_op
, compare_seq
;
9090 /* Shortcut: following common codes never translate into carry flag compares. */
9091 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
9092 || code
== ORDERED
|| code
== UNORDERED
)
9095 /* These comparisons require zero flag; swap operands so they won't. */
9096 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
9102 code
= swap_condition (code
);
9105 /* Try to expand the comparison and verify that we end up with carry flag
9106 based comparison. This is fails to be true only when we decide to expand
9107 comparison using arithmetic that is not too common scenario. */
9109 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
9110 &second_test
, &bypass_test
);
9111 compare_seq
= get_insns ();
9114 if (second_test
|| bypass_test
)
9116 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
9117 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
9118 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
9120 code
= GET_CODE (compare_op
);
9121 if (code
!= LTU
&& code
!= GEU
)
9123 emit_insn (compare_seq
);
9127 if (!INTEGRAL_MODE_P (mode
))
9135 /* Convert a==0 into (unsigned)a<1. */
9138 if (op1
!= const0_rtx
)
9141 code
= (code
== EQ
? LTU
: GEU
);
9144 /* Convert a>b into b<a or a>=b-1. */
9147 if (GET_CODE (op1
) == CONST_INT
)
9149 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
9150 /* Bail out on overflow. We still can swap operands but that
9151 would force loading of the constant into register. */
9152 if (op1
== const0_rtx
9153 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
9155 code
= (code
== GTU
? GEU
: LTU
);
9162 code
= (code
== GTU
? LTU
: GEU
);
9166 /* Convert a>=0 into (unsigned)a<0x80000000. */
9169 if (mode
== DImode
|| op1
!= const0_rtx
)
9171 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
9172 code
= (code
== LT
? GEU
: LTU
);
9176 if (mode
== DImode
|| op1
!= constm1_rtx
)
9178 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
9179 code
= (code
== LE
? GEU
: LTU
);
9185 /* Swapping operands may cause constant to appear as first operand. */
9186 if (!nonimmediate_operand (op0
, VOIDmode
))
9190 op0
= force_reg (mode
, op0
);
9192 ix86_compare_op0
= op0
;
9193 ix86_compare_op1
= op1
;
9194 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
9195 if (GET_CODE (*pop
) != LTU
&& GET_CODE (*pop
) != GEU
)
9201 ix86_expand_int_movcc (rtx operands
[])
9203 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
9204 rtx compare_seq
, compare_op
;
9205 rtx second_test
, bypass_test
;
9206 enum machine_mode mode
= GET_MODE (operands
[0]);
9207 bool sign_bit_compare_p
= false;;
9210 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9211 compare_seq
= get_insns ();
9214 compare_code
= GET_CODE (compare_op
);
9216 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
9217 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
9218 sign_bit_compare_p
= true;
9220 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9221 HImode insns, we'd be swallowed in word prefix ops. */
9223 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
9224 && (mode
!= DImode
|| TARGET_64BIT
)
9225 && GET_CODE (operands
[2]) == CONST_INT
9226 && GET_CODE (operands
[3]) == CONST_INT
)
9228 rtx out
= operands
[0];
9229 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
9230 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
9234 /* Sign bit compares are better done using shifts than we do by using
9236 if (sign_bit_compare_p
9237 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
9238 ix86_compare_op1
, &compare_op
))
9240 /* Detect overlap between destination and compare sources. */
9243 if (!sign_bit_compare_p
)
9247 compare_code
= GET_CODE (compare_op
);
9249 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
9250 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
9253 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
9256 /* To simplify rest of code, restrict to the GEU case. */
9257 if (compare_code
== LTU
)
9259 HOST_WIDE_INT tmp
= ct
;
9262 compare_code
= reverse_condition (compare_code
);
9263 code
= reverse_condition (code
);
9268 PUT_CODE (compare_op
,
9269 reverse_condition_maybe_unordered
9270 (GET_CODE (compare_op
)));
9272 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
9276 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
9277 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
9278 tmp
= gen_reg_rtx (mode
);
9281 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
9283 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
9287 if (code
== GT
|| code
== GE
)
9288 code
= reverse_condition (code
);
9291 HOST_WIDE_INT tmp
= ct
;
9296 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
9297 ix86_compare_op1
, VOIDmode
, 0, -1);
9310 tmp
= expand_simple_binop (mode
, PLUS
,
9312 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9323 tmp
= expand_simple_binop (mode
, IOR
,
9325 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9327 else if (diff
== -1 && ct
)
9337 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
9339 tmp
= expand_simple_binop (mode
, PLUS
,
9340 copy_rtx (tmp
), GEN_INT (cf
),
9341 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9349 * andl cf - ct, dest
9359 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
9362 tmp
= expand_simple_binop (mode
, AND
,
9364 gen_int_mode (cf
- ct
, mode
),
9365 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9367 tmp
= expand_simple_binop (mode
, PLUS
,
9368 copy_rtx (tmp
), GEN_INT (ct
),
9369 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9372 if (!rtx_equal_p (tmp
, out
))
9373 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
9375 return 1; /* DONE */
9381 tmp
= ct
, ct
= cf
, cf
= tmp
;
9383 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
9385 /* We may be reversing unordered compare to normal compare, that
9386 is not valid in general (we may convert non-trapping condition
9387 to trapping one), however on i386 we currently emit all
9388 comparisons unordered. */
9389 compare_code
= reverse_condition_maybe_unordered (compare_code
);
9390 code
= reverse_condition_maybe_unordered (code
);
9394 compare_code
= reverse_condition (compare_code
);
9395 code
= reverse_condition (code
);
9399 compare_code
= UNKNOWN
;
9400 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
9401 && GET_CODE (ix86_compare_op1
) == CONST_INT
)
9403 if (ix86_compare_op1
== const0_rtx
9404 && (code
== LT
|| code
== GE
))
9405 compare_code
= code
;
9406 else if (ix86_compare_op1
== constm1_rtx
)
9410 else if (code
== GT
)
9415 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9416 if (compare_code
!= UNKNOWN
9417 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
9418 && (cf
== -1 || ct
== -1))
9420 /* If lea code below could be used, only optimize
9421 if it results in a 2 insn sequence. */
9423 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
9424 || diff
== 3 || diff
== 5 || diff
== 9)
9425 || (compare_code
== LT
&& ct
== -1)
9426 || (compare_code
== GE
&& cf
== -1))
9429 * notl op1 (if necessary)
9437 code
= reverse_condition (code
);
9440 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9441 ix86_compare_op1
, VOIDmode
, 0, -1);
9443 out
= expand_simple_binop (mode
, IOR
,
9445 out
, 1, OPTAB_DIRECT
);
9446 if (out
!= operands
[0])
9447 emit_move_insn (operands
[0], out
);
9449 return 1; /* DONE */
9454 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
9455 || diff
== 3 || diff
== 5 || diff
== 9)
9456 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
9458 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
9464 * lea cf(dest*(ct-cf)),dest
9468 * This also catches the degenerate setcc-only case.
9474 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9475 ix86_compare_op1
, VOIDmode
, 0, 1);
9478 /* On x86_64 the lea instruction operates on Pmode, so we need
9479 to get arithmetics done in proper mode to match. */
9481 tmp
= copy_rtx (out
);
9485 out1
= copy_rtx (out
);
9486 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
9490 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
9496 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
9499 if (!rtx_equal_p (tmp
, out
))
9502 out
= force_operand (tmp
, copy_rtx (out
));
9504 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
9506 if (!rtx_equal_p (out
, operands
[0]))
9507 emit_move_insn (operands
[0], copy_rtx (out
));
9509 return 1; /* DONE */
9513 * General case: Jumpful:
9514 * xorl dest,dest cmpl op1, op2
9515 * cmpl op1, op2 movl ct, dest
9517 * decl dest movl cf, dest
9518 * andl (cf-ct),dest 1:
9523 * This is reasonably steep, but branch mispredict costs are
9524 * high on modern cpus, so consider failing only if optimizing
9528 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
9529 && BRANCH_COST
>= 2)
9535 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
9536 /* We may be reversing unordered compare to normal compare,
9537 that is not valid in general (we may convert non-trapping
9538 condition to trapping one), however on i386 we currently
9539 emit all comparisons unordered. */
9540 code
= reverse_condition_maybe_unordered (code
);
9543 code
= reverse_condition (code
);
9544 if (compare_code
!= UNKNOWN
)
9545 compare_code
= reverse_condition (compare_code
);
9549 if (compare_code
!= UNKNOWN
)
9551 /* notl op1 (if needed)
9556 For x < 0 (resp. x <= -1) there will be no notl,
9557 so if possible swap the constants to get rid of the
9559 True/false will be -1/0 while code below (store flag
9560 followed by decrement) is 0/-1, so the constants need
9561 to be exchanged once more. */
9563 if (compare_code
== GE
|| !cf
)
9565 code
= reverse_condition (code
);
9570 HOST_WIDE_INT tmp
= cf
;
9575 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9576 ix86_compare_op1
, VOIDmode
, 0, -1);
9580 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9581 ix86_compare_op1
, VOIDmode
, 0, 1);
9583 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
9584 copy_rtx (out
), 1, OPTAB_DIRECT
);
9587 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
9588 gen_int_mode (cf
- ct
, mode
),
9589 copy_rtx (out
), 1, OPTAB_DIRECT
);
9591 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
9592 copy_rtx (out
), 1, OPTAB_DIRECT
);
9593 if (!rtx_equal_p (out
, operands
[0]))
9594 emit_move_insn (operands
[0], copy_rtx (out
));
9596 return 1; /* DONE */
9600 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
9602 /* Try a few things more with specific constants and a variable. */
9605 rtx var
, orig_out
, out
, tmp
;
9607 if (BRANCH_COST
<= 2)
9608 return 0; /* FAIL */
9610 /* If one of the two operands is an interesting constant, load a
9611 constant with the above and mask it in with a logical operation. */
9613 if (GET_CODE (operands
[2]) == CONST_INT
)
9616 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
9617 operands
[3] = constm1_rtx
, op
= and_optab
;
9618 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
9619 operands
[3] = const0_rtx
, op
= ior_optab
;
9621 return 0; /* FAIL */
9623 else if (GET_CODE (operands
[3]) == CONST_INT
)
9626 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
9627 operands
[2] = constm1_rtx
, op
= and_optab
;
9628 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
9629 operands
[2] = const0_rtx
, op
= ior_optab
;
9631 return 0; /* FAIL */
9634 return 0; /* FAIL */
9636 orig_out
= operands
[0];
9637 tmp
= gen_reg_rtx (mode
);
9640 /* Recurse to get the constant loaded. */
9641 if (ix86_expand_int_movcc (operands
) == 0)
9642 return 0; /* FAIL */
9644 /* Mask in the interesting variable. */
9645 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
9647 if (!rtx_equal_p (out
, orig_out
))
9648 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
9650 return 1; /* DONE */
9654 * For comparison with above,
9664 if (! nonimmediate_operand (operands
[2], mode
))
9665 operands
[2] = force_reg (mode
, operands
[2]);
9666 if (! nonimmediate_operand (operands
[3], mode
))
9667 operands
[3] = force_reg (mode
, operands
[3]);
9669 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
9671 rtx tmp
= gen_reg_rtx (mode
);
9672 emit_move_insn (tmp
, operands
[3]);
9675 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
9677 rtx tmp
= gen_reg_rtx (mode
);
9678 emit_move_insn (tmp
, operands
[2]);
9682 if (! register_operand (operands
[2], VOIDmode
)
9684 || ! register_operand (operands
[3], VOIDmode
)))
9685 operands
[2] = force_reg (mode
, operands
[2]);
9688 && ! register_operand (operands
[3], VOIDmode
))
9689 operands
[3] = force_reg (mode
, operands
[3]);
9691 emit_insn (compare_seq
);
9692 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9693 gen_rtx_IF_THEN_ELSE (mode
,
9694 compare_op
, operands
[2],
9697 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
9698 gen_rtx_IF_THEN_ELSE (mode
,
9700 copy_rtx (operands
[3]),
9701 copy_rtx (operands
[0]))));
9703 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
9704 gen_rtx_IF_THEN_ELSE (mode
,
9706 copy_rtx (operands
[2]),
9707 copy_rtx (operands
[0]))));
9709 return 1; /* DONE */
9713 ix86_expand_fp_movcc (rtx operands
[])
9717 rtx compare_op
, second_test
, bypass_test
;
9719 /* For SF/DFmode conditional moves based on comparisons
9720 in same mode, we may want to use SSE min/max instructions. */
9721 if (((TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == SFmode
)
9722 || (TARGET_SSE2
&& TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == DFmode
))
9723 && GET_MODE (ix86_compare_op0
) == GET_MODE (operands
[0])
9724 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9726 || (GET_CODE (operands
[1]) != LTGT
&& GET_CODE (operands
[1]) != UNEQ
))
9727 /* We may be called from the post-reload splitter. */
9728 && (!REG_P (operands
[0])
9729 || SSE_REG_P (operands
[0])
9730 || REGNO (operands
[0]) >= FIRST_PSEUDO_REGISTER
))
9732 rtx op0
= ix86_compare_op0
, op1
= ix86_compare_op1
;
9733 code
= GET_CODE (operands
[1]);
9735 /* See if we have (cross) match between comparison operands and
9736 conditional move operands. */
9737 if (rtx_equal_p (operands
[2], op1
))
9742 code
= reverse_condition_maybe_unordered (code
);
9744 if (rtx_equal_p (operands
[2], op0
) && rtx_equal_p (operands
[3], op1
))
9746 /* Check for min operation. */
9747 if (code
== LT
|| code
== UNLE
)
9755 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
9756 if (memory_operand (op0
, VOIDmode
))
9757 op0
= force_reg (GET_MODE (operands
[0]), op0
);
9758 if (GET_MODE (operands
[0]) == SFmode
)
9759 emit_insn (gen_minsf3 (operands
[0], op0
, op1
));
9761 emit_insn (gen_mindf3 (operands
[0], op0
, op1
));
9764 /* Check for max operation. */
9765 if (code
== GT
|| code
== UNGE
)
9773 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
9774 if (memory_operand (op0
, VOIDmode
))
9775 op0
= force_reg (GET_MODE (operands
[0]), op0
);
9776 if (GET_MODE (operands
[0]) == SFmode
)
9777 emit_insn (gen_maxsf3 (operands
[0], op0
, op1
));
9779 emit_insn (gen_maxdf3 (operands
[0], op0
, op1
));
9783 /* Manage condition to be sse_comparison_operator. In case we are
9784 in non-ieee mode, try to canonicalize the destination operand
9785 to be first in the comparison - this helps reload to avoid extra
9787 if (!sse_comparison_operator (operands
[1], VOIDmode
)
9788 || (rtx_equal_p (operands
[0], ix86_compare_op1
) && !TARGET_IEEE_FP
))
9790 rtx tmp
= ix86_compare_op0
;
9791 ix86_compare_op0
= ix86_compare_op1
;
9792 ix86_compare_op1
= tmp
;
9793 operands
[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands
[1])),
9794 VOIDmode
, ix86_compare_op0
,
9797 /* Similarly try to manage result to be first operand of conditional
9798 move. We also don't support the NE comparison on SSE, so try to
9800 if ((rtx_equal_p (operands
[0], operands
[3])
9801 && (!TARGET_IEEE_FP
|| GET_CODE (operands
[1]) != EQ
))
9802 || (GET_CODE (operands
[1]) == NE
&& TARGET_IEEE_FP
))
9804 rtx tmp
= operands
[2];
9805 operands
[2] = operands
[3];
9807 operands
[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9808 (GET_CODE (operands
[1])),
9809 VOIDmode
, ix86_compare_op0
,
9812 if (GET_MODE (operands
[0]) == SFmode
)
9813 emit_insn (gen_sse_movsfcc (operands
[0], operands
[1],
9814 operands
[2], operands
[3],
9815 ix86_compare_op0
, ix86_compare_op1
));
9817 emit_insn (gen_sse_movdfcc (operands
[0], operands
[1],
9818 operands
[2], operands
[3],
9819 ix86_compare_op0
, ix86_compare_op1
));
9823 /* The floating point conditional move instructions don't directly
9824 support conditions resulting from a signed integer comparison. */
9826 code
= GET_CODE (operands
[1]);
9827 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9829 /* The floating point conditional move instructions don't directly
9830 support signed integer comparisons. */
9832 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
9834 if (second_test
!= NULL
|| bypass_test
!= NULL
)
9836 tmp
= gen_reg_rtx (QImode
);
9837 ix86_expand_setcc (code
, tmp
);
9839 ix86_compare_op0
= tmp
;
9840 ix86_compare_op1
= const0_rtx
;
9841 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9843 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
9845 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
9846 emit_move_insn (tmp
, operands
[3]);
9849 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
9851 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
9852 emit_move_insn (tmp
, operands
[2]);
9856 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9857 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9862 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9863 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9868 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9869 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9877 /* Expand conditional increment or decrement using adb/sbb instructions.
9878 The default case using setcc followed by the conditional move can be
9879 done by generic code. */
9881 ix86_expand_int_addcc (rtx operands
[])
9883 enum rtx_code code
= GET_CODE (operands
[1]);
9885 rtx val
= const0_rtx
;
9887 enum machine_mode mode
= GET_MODE (operands
[0]);
9889 if (operands
[3] != const1_rtx
9890 && operands
[3] != constm1_rtx
)
9892 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
9893 ix86_compare_op1
, &compare_op
))
9895 code
= GET_CODE (compare_op
);
9897 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
9898 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
9901 code
= ix86_fp_compare_code_to_integer (code
);
9908 PUT_CODE (compare_op
,
9909 reverse_condition_maybe_unordered
9910 (GET_CODE (compare_op
)));
9912 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
9914 PUT_MODE (compare_op
, mode
);
9916 /* Construct either adc or sbb insn. */
9917 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
9919 switch (GET_MODE (operands
[0]))
9922 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
9925 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
9928 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
9931 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
9939 switch (GET_MODE (operands
[0]))
9942 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
9945 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
9948 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
9951 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
9957 return 1; /* DONE */
9961 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9962 works for floating pointer parameters and nonoffsetable memories.
9963 For pushes, it returns just stack offsets; the values will be saved
9964 in the right order. Maximally three parts are generated. */
9967 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
9972 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
9974 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
9976 if (GET_CODE (operand
) == REG
&& MMX_REGNO_P (REGNO (operand
)))
9978 if (size
< 2 || size
> 3)
9981 /* Optimize constant pool reference to immediates. This is used by fp
9982 moves, that force all constants to memory to allow combining. */
9983 if (GET_CODE (operand
) == MEM
&& MEM_READONLY_P (operand
))
9985 rtx tmp
= maybe_get_pool_constant (operand
);
9990 if (GET_CODE (operand
) == MEM
&& !offsettable_memref_p (operand
))
9992 /* The only non-offsetable memories we handle are pushes. */
9993 if (! push_operand (operand
, VOIDmode
))
9996 operand
= copy_rtx (operand
);
9997 PUT_MODE (operand
, Pmode
);
9998 parts
[0] = parts
[1] = parts
[2] = operand
;
10000 else if (!TARGET_64BIT
)
10002 if (mode
== DImode
)
10003 split_di (&operand
, 1, &parts
[0], &parts
[1]);
10006 if (REG_P (operand
))
10008 if (!reload_completed
)
10010 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
10011 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
10013 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
10015 else if (offsettable_memref_p (operand
))
10017 operand
= adjust_address (operand
, SImode
, 0);
10018 parts
[0] = operand
;
10019 parts
[1] = adjust_address (operand
, SImode
, 4);
10021 parts
[2] = adjust_address (operand
, SImode
, 8);
10023 else if (GET_CODE (operand
) == CONST_DOUBLE
)
10028 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
10032 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
10033 parts
[2] = gen_int_mode (l
[2], SImode
);
10036 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
10041 parts
[1] = gen_int_mode (l
[1], SImode
);
10042 parts
[0] = gen_int_mode (l
[0], SImode
);
10050 if (mode
== TImode
)
10051 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
10052 if (mode
== XFmode
|| mode
== TFmode
)
10054 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
10055 if (REG_P (operand
))
10057 if (!reload_completed
)
10059 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
10060 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
10062 else if (offsettable_memref_p (operand
))
10064 operand
= adjust_address (operand
, DImode
, 0);
10065 parts
[0] = operand
;
10066 parts
[1] = adjust_address (operand
, upper_mode
, 8);
10068 else if (GET_CODE (operand
) == CONST_DOUBLE
)
10073 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
10074 real_to_target (l
, &r
, mode
);
10076 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10077 if (HOST_BITS_PER_WIDE_INT
>= 64)
10080 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
10081 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
10084 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
10086 if (upper_mode
== SImode
)
10087 parts
[1] = gen_int_mode (l
[2], SImode
);
10088 else if (HOST_BITS_PER_WIDE_INT
>= 64)
10091 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
10092 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
10095 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
10105 /* Emit insns to perform a move or push of DI, DF, and XF values.
10106 Return false when normal moves are needed; true when all required
10107 insns have been emitted. Operands 2-4 contain the input values
10108 int the correct order; operands 5-7 contain the output values. */
10111 ix86_split_long_move (rtx operands
[])
10116 int collisions
= 0;
10117 enum machine_mode mode
= GET_MODE (operands
[0]);
10119 /* The DFmode expanders may ask us to move double.
10120 For 64bit target this is single move. By hiding the fact
10121 here we simplify i386.md splitters. */
10122 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
10124 /* Optimize constant pool reference to immediates. This is used by
10125 fp moves, that force all constants to memory to allow combining. */
10127 if (GET_CODE (operands
[1]) == MEM
10128 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
10129 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
10130 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
10131 if (push_operand (operands
[0], VOIDmode
))
10133 operands
[0] = copy_rtx (operands
[0]);
10134 PUT_MODE (operands
[0], Pmode
);
10137 operands
[0] = gen_lowpart (DImode
, operands
[0]);
10138 operands
[1] = gen_lowpart (DImode
, operands
[1]);
10139 emit_move_insn (operands
[0], operands
[1]);
10143 /* The only non-offsettable memory we handle is push. */
10144 if (push_operand (operands
[0], VOIDmode
))
10146 else if (GET_CODE (operands
[0]) == MEM
10147 && ! offsettable_memref_p (operands
[0]))
10150 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
10151 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
10153 /* When emitting push, take care for source operands on the stack. */
10154 if (push
&& GET_CODE (operands
[1]) == MEM
10155 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
10158 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
10159 XEXP (part
[1][2], 0));
10160 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
10161 XEXP (part
[1][1], 0));
10164 /* We need to do copy in the right order in case an address register
10165 of the source overlaps the destination. */
10166 if (REG_P (part
[0][0]) && GET_CODE (part
[1][0]) == MEM
)
10168 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
10170 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
10173 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
10176 /* Collision in the middle part can be handled by reordering. */
10177 if (collisions
== 1 && nparts
== 3
10178 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
10181 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
10182 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
10185 /* If there are more collisions, we can't handle it by reordering.
10186 Do an lea to the last part and use only one colliding move. */
10187 else if (collisions
> 1)
10193 base
= part
[0][nparts
- 1];
10195 /* Handle the case when the last part isn't valid for lea.
10196 Happens in 64-bit mode storing the 12-byte XFmode. */
10197 if (GET_MODE (base
) != Pmode
)
10198 base
= gen_rtx_REG (Pmode
, REGNO (base
));
10200 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
10201 part
[1][0] = replace_equiv_address (part
[1][0], base
);
10202 part
[1][1] = replace_equiv_address (part
[1][1],
10203 plus_constant (base
, UNITS_PER_WORD
));
10205 part
[1][2] = replace_equiv_address (part
[1][2],
10206 plus_constant (base
, 8));
10216 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
10217 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
10218 emit_move_insn (part
[0][2], part
[1][2]);
10223 /* In 64bit mode we don't have 32bit push available. In case this is
10224 register, it is OK - we will just use larger counterpart. We also
10225 retype memory - these comes from attempt to avoid REX prefix on
10226 moving of second half of TFmode value. */
10227 if (GET_MODE (part
[1][1]) == SImode
)
10229 if (GET_CODE (part
[1][1]) == MEM
)
10230 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
10231 else if (REG_P (part
[1][1]))
10232 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
10235 if (GET_MODE (part
[1][0]) == SImode
)
10236 part
[1][0] = part
[1][1];
10239 emit_move_insn (part
[0][1], part
[1][1]);
10240 emit_move_insn (part
[0][0], part
[1][0]);
10244 /* Choose correct order to not overwrite the source before it is copied. */
10245 if ((REG_P (part
[0][0])
10246 && REG_P (part
[1][1])
10247 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
10249 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
10251 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
10255 operands
[2] = part
[0][2];
10256 operands
[3] = part
[0][1];
10257 operands
[4] = part
[0][0];
10258 operands
[5] = part
[1][2];
10259 operands
[6] = part
[1][1];
10260 operands
[7] = part
[1][0];
10264 operands
[2] = part
[0][1];
10265 operands
[3] = part
[0][0];
10266 operands
[5] = part
[1][1];
10267 operands
[6] = part
[1][0];
10274 operands
[2] = part
[0][0];
10275 operands
[3] = part
[0][1];
10276 operands
[4] = part
[0][2];
10277 operands
[5] = part
[1][0];
10278 operands
[6] = part
[1][1];
10279 operands
[7] = part
[1][2];
10283 operands
[2] = part
[0][0];
10284 operands
[3] = part
[0][1];
10285 operands
[5] = part
[1][0];
10286 operands
[6] = part
[1][1];
10290 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
10293 if (GET_CODE (operands
[5]) == CONST_INT
10294 && operands
[5] != const0_rtx
10295 && REG_P (operands
[2]))
10297 if (GET_CODE (operands
[6]) == CONST_INT
10298 && INTVAL (operands
[6]) == INTVAL (operands
[5]))
10299 operands
[6] = operands
[2];
10302 && GET_CODE (operands
[7]) == CONST_INT
10303 && INTVAL (operands
[7]) == INTVAL (operands
[5]))
10304 operands
[7] = operands
[2];
10308 && GET_CODE (operands
[6]) == CONST_INT
10309 && operands
[6] != const0_rtx
10310 && REG_P (operands
[3])
10311 && GET_CODE (operands
[7]) == CONST_INT
10312 && INTVAL (operands
[7]) == INTVAL (operands
[6]))
10313 operands
[7] = operands
[3];
10316 emit_move_insn (operands
[2], operands
[5]);
10317 emit_move_insn (operands
[3], operands
[6]);
10319 emit_move_insn (operands
[4], operands
[7]);
10324 /* Helper function of ix86_split_ashldi used to generate an SImode
10325 left shift by a constant, either using a single shift or
10326 a sequence of add instructions. */
10329 ix86_expand_ashlsi3_const (rtx operand
, int count
)
10332 emit_insn (gen_addsi3 (operand
, operand
, operand
));
10333 else if (!optimize_size
10334 && count
* ix86_cost
->add
<= ix86_cost
->shift_const
)
10337 for (i
=0; i
<count
; i
++)
10338 emit_insn (gen_addsi3 (operand
, operand
, operand
));
10341 emit_insn (gen_ashlsi3 (operand
, operand
, GEN_INT (count
)));
10345 ix86_split_ashldi (rtx
*operands
, rtx scratch
)
10347 rtx low
[2], high
[2];
10350 if (GET_CODE (operands
[2]) == CONST_INT
)
10352 split_di (operands
, 2, low
, high
);
10353 count
= INTVAL (operands
[2]) & 63;
10357 emit_move_insn (high
[0], low
[1]);
10358 emit_move_insn (low
[0], const0_rtx
);
10361 ix86_expand_ashlsi3_const (high
[0], count
- 32);
10365 if (!rtx_equal_p (operands
[0], operands
[1]))
10366 emit_move_insn (operands
[0], operands
[1]);
10367 emit_insn (gen_x86_shld_1 (high
[0], low
[0], GEN_INT (count
)));
10368 ix86_expand_ashlsi3_const (low
[0], count
);
10373 split_di (operands
, 1, low
, high
);
10375 if (operands
[1] == const1_rtx
)
10377 /* Assuming we've chosen a QImode capable registers, then 1LL << N
10378 can be done with two 32-bit shifts, no branches, no cmoves. */
10379 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
10381 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
10383 ix86_expand_clear (low
[0]);
10384 ix86_expand_clear (high
[0]);
10385 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (32)));
10387 d
= gen_lowpart (QImode
, low
[0]);
10388 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
10389 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
10390 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
10392 d
= gen_lowpart (QImode
, high
[0]);
10393 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
10394 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
10395 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
10398 /* Otherwise, we can get the same results by manually performing
10399 a bit extract operation on bit 5, and then performing the two
10400 shifts. The two methods of getting 0/1 into low/high are exactly
10401 the same size. Avoiding the shift in the bit extract case helps
10402 pentium4 a bit; no one else seems to care much either way. */
10407 if (TARGET_PARTIAL_REG_STALL
&& !optimize_size
)
10408 x
= gen_rtx_ZERO_EXTEND (SImode
, operands
[2]);
10410 x
= gen_lowpart (SImode
, operands
[2]);
10411 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
10413 emit_insn (gen_lshrsi3 (high
[0], high
[0], GEN_INT (5)));
10414 emit_insn (gen_andsi3 (high
[0], high
[0], GEN_INT (1)));
10415 emit_move_insn (low
[0], high
[0]);
10416 emit_insn (gen_xorsi3 (low
[0], low
[0], GEN_INT (1)));
10419 emit_insn (gen_ashlsi3 (low
[0], low
[0], operands
[2]));
10420 emit_insn (gen_ashlsi3 (high
[0], high
[0], operands
[2]));
10424 if (operands
[1] == constm1_rtx
)
10426 /* For -1LL << N, we can avoid the shld instruction, because we
10427 know that we're shifting 0...31 ones into a -1. */
10428 emit_move_insn (low
[0], constm1_rtx
);
10430 emit_move_insn (high
[0], low
[0]);
10432 emit_move_insn (high
[0], constm1_rtx
);
10436 if (!rtx_equal_p (operands
[0], operands
[1]))
10437 emit_move_insn (operands
[0], operands
[1]);
10439 split_di (operands
, 1, low
, high
);
10440 emit_insn (gen_x86_shld_1 (high
[0], low
[0], operands
[2]));
10443 emit_insn (gen_ashlsi3 (low
[0], low
[0], operands
[2]));
10445 if (TARGET_CMOVE
&& scratch
)
10447 ix86_expand_clear (scratch
);
10448 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
10451 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
10455 ix86_split_ashrdi (rtx
*operands
, rtx scratch
)
10457 rtx low
[2], high
[2];
10460 if (GET_CODE (operands
[2]) == CONST_INT
)
10462 split_di (operands
, 2, low
, high
);
10463 count
= INTVAL (operands
[2]) & 63;
10467 emit_move_insn (high
[0], high
[1]);
10468 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
10469 emit_move_insn (low
[0], high
[0]);
10472 else if (count
>= 32)
10474 emit_move_insn (low
[0], high
[1]);
10475 emit_move_insn (high
[0], low
[0]);
10476 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
10478 emit_insn (gen_ashrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
10482 if (!rtx_equal_p (operands
[0], operands
[1]))
10483 emit_move_insn (operands
[0], operands
[1]);
10484 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
10485 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (count
)));
10490 if (!rtx_equal_p (operands
[0], operands
[1]))
10491 emit_move_insn (operands
[0], operands
[1]);
10493 split_di (operands
, 1, low
, high
);
10495 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
10496 emit_insn (gen_ashrsi3 (high
[0], high
[0], operands
[2]));
10498 if (TARGET_CMOVE
&& scratch
)
10500 emit_move_insn (scratch
, high
[0]);
10501 emit_insn (gen_ashrsi3 (scratch
, scratch
, GEN_INT (31)));
10502 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
10506 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
10511 ix86_split_lshrdi (rtx
*operands
, rtx scratch
)
10513 rtx low
[2], high
[2];
10516 if (GET_CODE (operands
[2]) == CONST_INT
)
10518 split_di (operands
, 2, low
, high
);
10519 count
= INTVAL (operands
[2]) & 63;
10523 emit_move_insn (low
[0], high
[1]);
10524 ix86_expand_clear (high
[0]);
10527 emit_insn (gen_lshrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
10531 if (!rtx_equal_p (operands
[0], operands
[1]))
10532 emit_move_insn (operands
[0], operands
[1]);
10533 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
10534 emit_insn (gen_lshrsi3 (high
[0], high
[0], GEN_INT (count
)));
10539 if (!rtx_equal_p (operands
[0], operands
[1]))
10540 emit_move_insn (operands
[0], operands
[1]);
10542 split_di (operands
, 1, low
, high
);
10544 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
10545 emit_insn (gen_lshrsi3 (high
[0], high
[0], operands
[2]));
10547 /* Heh. By reversing the arguments, we can reuse this pattern. */
10548 if (TARGET_CMOVE
&& scratch
)
10550 ix86_expand_clear (scratch
);
10551 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
10555 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
10559 /* Helper function for the string operations below. Dest VARIABLE whether
10560 it is aligned to VALUE bytes. If true, jump to the label. */
10562 ix86_expand_aligntest (rtx variable
, int value
)
10564 rtx label
= gen_label_rtx ();
10565 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
10566 if (GET_MODE (variable
) == DImode
)
10567 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
10569 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
10570 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
10575 /* Adjust COUNTER by the VALUE. */
10577 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
10579 if (GET_MODE (countreg
) == DImode
)
10580 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
10582 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
10585 /* Zero extend possibly SImode EXP to Pmode register. */
10587 ix86_zero_extend_to_Pmode (rtx exp
)
10590 if (GET_MODE (exp
) == VOIDmode
)
10591 return force_reg (Pmode
, exp
);
10592 if (GET_MODE (exp
) == Pmode
)
10593 return copy_to_mode_reg (Pmode
, exp
);
10594 r
= gen_reg_rtx (Pmode
);
10595 emit_insn (gen_zero_extendsidi2 (r
, exp
));
10599 /* Expand string move (memcpy) operation. Use i386 string operations when
10600 profitable. expand_clrmem contains similar code. */
10602 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
)
10604 rtx srcreg
, destreg
, countreg
, srcexp
, destexp
;
10605 enum machine_mode counter_mode
;
10606 HOST_WIDE_INT align
= 0;
10607 unsigned HOST_WIDE_INT count
= 0;
10609 if (GET_CODE (align_exp
) == CONST_INT
)
10610 align
= INTVAL (align_exp
);
10612 /* Can't use any of this if the user has appropriated esi or edi. */
10613 if (global_regs
[4] || global_regs
[5])
10616 /* This simple hack avoids all inlining code and simplifies code below. */
10617 if (!TARGET_ALIGN_STRINGOPS
)
10620 if (GET_CODE (count_exp
) == CONST_INT
)
10622 count
= INTVAL (count_exp
);
10623 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
10627 /* Figure out proper mode for counter. For 32bits it is always SImode,
10628 for 64bits use SImode when possible, otherwise DImode.
10629 Set count to number of bytes copied when known at compile time. */
10631 || GET_MODE (count_exp
) == SImode
10632 || x86_64_zext_immediate_operand (count_exp
, VOIDmode
))
10633 counter_mode
= SImode
;
10635 counter_mode
= DImode
;
10637 if (counter_mode
!= SImode
&& counter_mode
!= DImode
)
10640 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
10641 if (destreg
!= XEXP (dst
, 0))
10642 dst
= replace_equiv_address_nv (dst
, destreg
);
10643 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
10644 if (srcreg
!= XEXP (src
, 0))
10645 src
= replace_equiv_address_nv (src
, srcreg
);
10647 /* When optimizing for size emit simple rep ; movsb instruction for
10648 counts not divisible by 4. */
10650 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
10652 emit_insn (gen_cld ());
10653 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
10654 destexp
= gen_rtx_PLUS (Pmode
, destreg
, countreg
);
10655 srcexp
= gen_rtx_PLUS (Pmode
, srcreg
, countreg
);
10656 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
, countreg
,
10660 /* For constant aligned (or small unaligned) copies use rep movsl
10661 followed by code copying the rest. For PentiumPro ensure 8 byte
10662 alignment to allow rep movsl acceleration. */
10664 else if (count
!= 0
10666 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
10667 || optimize_size
|| count
< (unsigned int) 64))
10669 unsigned HOST_WIDE_INT offset
= 0;
10670 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
10671 rtx srcmem
, dstmem
;
10673 emit_insn (gen_cld ());
10674 if (count
& ~(size
- 1))
10676 countreg
= copy_to_mode_reg (counter_mode
,
10677 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
10678 & (TARGET_64BIT
? -1 : 0x3fffffff)));
10679 countreg
= ix86_zero_extend_to_Pmode (countreg
);
10681 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
10682 GEN_INT (size
== 4 ? 2 : 3));
10683 srcexp
= gen_rtx_PLUS (Pmode
, destexp
, srcreg
);
10684 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
10686 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
,
10687 countreg
, destexp
, srcexp
));
10688 offset
= count
& ~(size
- 1);
10690 if (size
== 8 && (count
& 0x04))
10692 srcmem
= adjust_automodify_address_nv (src
, SImode
, srcreg
,
10694 dstmem
= adjust_automodify_address_nv (dst
, SImode
, destreg
,
10696 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10701 srcmem
= adjust_automodify_address_nv (src
, HImode
, srcreg
,
10703 dstmem
= adjust_automodify_address_nv (dst
, HImode
, destreg
,
10705 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10710 srcmem
= adjust_automodify_address_nv (src
, QImode
, srcreg
,
10712 dstmem
= adjust_automodify_address_nv (dst
, QImode
, destreg
,
10714 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10717 /* The generic code based on the glibc implementation:
10718 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10719 allowing accelerated copying there)
10720 - copy the data using rep movsl
10721 - copy the rest. */
10726 rtx srcmem
, dstmem
;
10727 int desired_alignment
= (TARGET_PENTIUMPRO
10728 && (count
== 0 || count
>= (unsigned int) 260)
10729 ? 8 : UNITS_PER_WORD
);
10730 /* Get rid of MEM_OFFSETs, they won't be accurate. */
10731 dst
= change_address (dst
, BLKmode
, destreg
);
10732 src
= change_address (src
, BLKmode
, srcreg
);
10734 /* In case we don't know anything about the alignment, default to
10735 library version, since it is usually equally fast and result in
10738 Also emit call when we know that the count is large and call overhead
10739 will not be important. */
10740 if (!TARGET_INLINE_ALL_STRINGOPS
10741 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
10744 if (TARGET_SINGLE_STRINGOP
)
10745 emit_insn (gen_cld ());
10747 countreg2
= gen_reg_rtx (Pmode
);
10748 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
10750 /* We don't use loops to align destination and to copy parts smaller
10751 than 4 bytes, because gcc is able to optimize such code better (in
10752 the case the destination or the count really is aligned, gcc is often
10753 able to predict the branches) and also it is friendlier to the
10754 hardware branch prediction.
10756 Using loops is beneficial for generic case, because we can
10757 handle small counts using the loops. Many CPUs (such as Athlon)
10758 have large REP prefix setup costs.
10760 This is quite costly. Maybe we can revisit this decision later or
10761 add some customizability to this code. */
10763 if (count
== 0 && align
< desired_alignment
)
10765 label
= gen_label_rtx ();
10766 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
10767 LEU
, 0, counter_mode
, 1, label
);
10771 rtx label
= ix86_expand_aligntest (destreg
, 1);
10772 srcmem
= change_address (src
, QImode
, srcreg
);
10773 dstmem
= change_address (dst
, QImode
, destreg
);
10774 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10775 ix86_adjust_counter (countreg
, 1);
10776 emit_label (label
);
10777 LABEL_NUSES (label
) = 1;
10781 rtx label
= ix86_expand_aligntest (destreg
, 2);
10782 srcmem
= change_address (src
, HImode
, srcreg
);
10783 dstmem
= change_address (dst
, HImode
, destreg
);
10784 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10785 ix86_adjust_counter (countreg
, 2);
10786 emit_label (label
);
10787 LABEL_NUSES (label
) = 1;
10789 if (align
<= 4 && desired_alignment
> 4)
10791 rtx label
= ix86_expand_aligntest (destreg
, 4);
10792 srcmem
= change_address (src
, SImode
, srcreg
);
10793 dstmem
= change_address (dst
, SImode
, destreg
);
10794 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10795 ix86_adjust_counter (countreg
, 4);
10796 emit_label (label
);
10797 LABEL_NUSES (label
) = 1;
10800 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
10802 emit_label (label
);
10803 LABEL_NUSES (label
) = 1;
10806 if (!TARGET_SINGLE_STRINGOP
)
10807 emit_insn (gen_cld ());
10810 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
10812 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, GEN_INT (3));
10816 emit_insn (gen_lshrsi3 (countreg2
, countreg
, const2_rtx
));
10817 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, const2_rtx
);
10819 srcexp
= gen_rtx_PLUS (Pmode
, destexp
, srcreg
);
10820 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
10821 emit_insn (gen_rep_mov (destreg
, dst
, srcreg
, src
,
10822 countreg2
, destexp
, srcexp
));
10826 emit_label (label
);
10827 LABEL_NUSES (label
) = 1;
10829 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
10831 srcmem
= change_address (src
, SImode
, srcreg
);
10832 dstmem
= change_address (dst
, SImode
, destreg
);
10833 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10835 if ((align
<= 4 || count
== 0) && TARGET_64BIT
)
10837 rtx label
= ix86_expand_aligntest (countreg
, 4);
10838 srcmem
= change_address (src
, SImode
, srcreg
);
10839 dstmem
= change_address (dst
, SImode
, destreg
);
10840 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10841 emit_label (label
);
10842 LABEL_NUSES (label
) = 1;
10844 if (align
> 2 && count
!= 0 && (count
& 2))
10846 srcmem
= change_address (src
, HImode
, srcreg
);
10847 dstmem
= change_address (dst
, HImode
, destreg
);
10848 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10850 if (align
<= 2 || count
== 0)
10852 rtx label
= ix86_expand_aligntest (countreg
, 2);
10853 srcmem
= change_address (src
, HImode
, srcreg
);
10854 dstmem
= change_address (dst
, HImode
, destreg
);
10855 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10856 emit_label (label
);
10857 LABEL_NUSES (label
) = 1;
10859 if (align
> 1 && count
!= 0 && (count
& 1))
10861 srcmem
= change_address (src
, QImode
, srcreg
);
10862 dstmem
= change_address (dst
, QImode
, destreg
);
10863 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10865 if (align
<= 1 || count
== 0)
10867 rtx label
= ix86_expand_aligntest (countreg
, 1);
10868 srcmem
= change_address (src
, QImode
, srcreg
);
10869 dstmem
= change_address (dst
, QImode
, destreg
);
10870 emit_insn (gen_strmov (destreg
, dstmem
, srcreg
, srcmem
));
10871 emit_label (label
);
10872 LABEL_NUSES (label
) = 1;
10879 /* Expand string clear operation (bzero). Use i386 string operations when
10880 profitable. expand_movmem contains similar code. */
10882 ix86_expand_clrmem (rtx dst
, rtx count_exp
, rtx align_exp
)
10884 rtx destreg
, zeroreg
, countreg
, destexp
;
10885 enum machine_mode counter_mode
;
10886 HOST_WIDE_INT align
= 0;
10887 unsigned HOST_WIDE_INT count
= 0;
10889 if (GET_CODE (align_exp
) == CONST_INT
)
10890 align
= INTVAL (align_exp
);
10892 /* Can't use any of this if the user has appropriated esi. */
10893 if (global_regs
[4])
10896 /* This simple hack avoids all inlining code and simplifies code below. */
10897 if (!TARGET_ALIGN_STRINGOPS
)
10900 if (GET_CODE (count_exp
) == CONST_INT
)
10902 count
= INTVAL (count_exp
);
10903 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
10906 /* Figure out proper mode for counter. For 32bits it is always SImode,
10907 for 64bits use SImode when possible, otherwise DImode.
10908 Set count to number of bytes copied when known at compile time. */
10910 || GET_MODE (count_exp
) == SImode
10911 || x86_64_zext_immediate_operand (count_exp
, VOIDmode
))
10912 counter_mode
= SImode
;
10914 counter_mode
= DImode
;
10916 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
10917 if (destreg
!= XEXP (dst
, 0))
10918 dst
= replace_equiv_address_nv (dst
, destreg
);
10921 /* When optimizing for size emit simple rep ; movsb instruction for
10922 counts not divisible by 4. The movl $N, %ecx; rep; stosb
10923 sequence is 7 bytes long, so if optimizing for size and count is
10924 small enough that some stosl, stosw and stosb instructions without
10925 rep are shorter, fall back into the next if. */
10927 if ((!optimize
|| optimize_size
)
10930 && (!optimize_size
|| (count
& 0x03) + (count
>> 2) > 7))))
10932 emit_insn (gen_cld ());
10934 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
10935 zeroreg
= copy_to_mode_reg (QImode
, const0_rtx
);
10936 destexp
= gen_rtx_PLUS (Pmode
, destreg
, countreg
);
10937 emit_insn (gen_rep_stos (destreg
, countreg
, dst
, zeroreg
, destexp
));
10939 else if (count
!= 0
10941 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
10942 || optimize_size
|| count
< (unsigned int) 64))
10944 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
10945 unsigned HOST_WIDE_INT offset
= 0;
10947 emit_insn (gen_cld ());
10949 zeroreg
= copy_to_mode_reg (size
== 4 ? SImode
: DImode
, const0_rtx
);
10950 if (count
& ~(size
- 1))
10952 unsigned HOST_WIDE_INT repcount
;
10953 unsigned int max_nonrep
;
10955 repcount
= count
>> (size
== 4 ? 2 : 3);
10957 repcount
&= 0x3fffffff;
10959 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
10960 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
10961 bytes. In both cases the latter seems to be faster for small
10963 max_nonrep
= size
== 4 ? 7 : 4;
10964 if (!optimize_size
)
10967 case PROCESSOR_PENTIUM4
:
10968 case PROCESSOR_NOCONA
:
10975 if (repcount
<= max_nonrep
)
10976 while (repcount
-- > 0)
10978 rtx mem
= adjust_automodify_address_nv (dst
,
10979 GET_MODE (zeroreg
),
10981 emit_insn (gen_strset (destreg
, mem
, zeroreg
));
10986 countreg
= copy_to_mode_reg (counter_mode
, GEN_INT (repcount
));
10987 countreg
= ix86_zero_extend_to_Pmode (countreg
);
10988 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
10989 GEN_INT (size
== 4 ? 2 : 3));
10990 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
10991 emit_insn (gen_rep_stos (destreg
, countreg
, dst
, zeroreg
,
10993 offset
= count
& ~(size
- 1);
10996 if (size
== 8 && (count
& 0x04))
10998 rtx mem
= adjust_automodify_address_nv (dst
, SImode
, destreg
,
11000 emit_insn (gen_strset (destreg
, mem
,
11001 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
11006 rtx mem
= adjust_automodify_address_nv (dst
, HImode
, destreg
,
11008 emit_insn (gen_strset (destreg
, mem
,
11009 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11014 rtx mem
= adjust_automodify_address_nv (dst
, QImode
, destreg
,
11016 emit_insn (gen_strset (destreg
, mem
,
11017 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11024 /* Compute desired alignment of the string operation. */
11025 int desired_alignment
= (TARGET_PENTIUMPRO
11026 && (count
== 0 || count
>= (unsigned int) 260)
11027 ? 8 : UNITS_PER_WORD
);
11029 /* In case we don't know anything about the alignment, default to
11030 library version, since it is usually equally fast and result in
11033 Also emit call when we know that the count is large and call overhead
11034 will not be important. */
11035 if (!TARGET_INLINE_ALL_STRINGOPS
11036 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
11039 if (TARGET_SINGLE_STRINGOP
)
11040 emit_insn (gen_cld ());
11042 countreg2
= gen_reg_rtx (Pmode
);
11043 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
11044 zeroreg
= copy_to_mode_reg (Pmode
, const0_rtx
);
11045 /* Get rid of MEM_OFFSET, it won't be accurate. */
11046 dst
= change_address (dst
, BLKmode
, destreg
);
11048 if (count
== 0 && align
< desired_alignment
)
11050 label
= gen_label_rtx ();
11051 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
11052 LEU
, 0, counter_mode
, 1, label
);
11056 rtx label
= ix86_expand_aligntest (destreg
, 1);
11057 emit_insn (gen_strset (destreg
, dst
,
11058 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11059 ix86_adjust_counter (countreg
, 1);
11060 emit_label (label
);
11061 LABEL_NUSES (label
) = 1;
11065 rtx label
= ix86_expand_aligntest (destreg
, 2);
11066 emit_insn (gen_strset (destreg
, dst
,
11067 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11068 ix86_adjust_counter (countreg
, 2);
11069 emit_label (label
);
11070 LABEL_NUSES (label
) = 1;
11072 if (align
<= 4 && desired_alignment
> 4)
11074 rtx label
= ix86_expand_aligntest (destreg
, 4);
11075 emit_insn (gen_strset (destreg
, dst
,
11077 ? gen_rtx_SUBREG (SImode
, zeroreg
, 0)
11079 ix86_adjust_counter (countreg
, 4);
11080 emit_label (label
);
11081 LABEL_NUSES (label
) = 1;
11084 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
11086 emit_label (label
);
11087 LABEL_NUSES (label
) = 1;
11091 if (!TARGET_SINGLE_STRINGOP
)
11092 emit_insn (gen_cld ());
11095 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
11097 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, GEN_INT (3));
11101 emit_insn (gen_lshrsi3 (countreg2
, countreg
, const2_rtx
));
11102 destexp
= gen_rtx_ASHIFT (Pmode
, countreg2
, const2_rtx
);
11104 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destreg
);
11105 emit_insn (gen_rep_stos (destreg
, countreg2
, dst
, zeroreg
, destexp
));
11109 emit_label (label
);
11110 LABEL_NUSES (label
) = 1;
11113 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
11114 emit_insn (gen_strset (destreg
, dst
,
11115 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
11116 if (TARGET_64BIT
&& (align
<= 4 || count
== 0))
11118 rtx label
= ix86_expand_aligntest (countreg
, 4);
11119 emit_insn (gen_strset (destreg
, dst
,
11120 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
11121 emit_label (label
);
11122 LABEL_NUSES (label
) = 1;
11124 if (align
> 2 && count
!= 0 && (count
& 2))
11125 emit_insn (gen_strset (destreg
, dst
,
11126 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11127 if (align
<= 2 || count
== 0)
11129 rtx label
= ix86_expand_aligntest (countreg
, 2);
11130 emit_insn (gen_strset (destreg
, dst
,
11131 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11132 emit_label (label
);
11133 LABEL_NUSES (label
) = 1;
11135 if (align
> 1 && count
!= 0 && (count
& 1))
11136 emit_insn (gen_strset (destreg
, dst
,
11137 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11138 if (align
<= 1 || count
== 0)
11140 rtx label
= ix86_expand_aligntest (countreg
, 1);
11141 emit_insn (gen_strset (destreg
, dst
,
11142 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11143 emit_label (label
);
11144 LABEL_NUSES (label
) = 1;
11150 /* Expand strlen. */
11152 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
11154 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
11156 /* The generic case of strlen expander is long. Avoid it's
11157 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11159 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
11160 && !TARGET_INLINE_ALL_STRINGOPS
11162 && (GET_CODE (align
) != CONST_INT
|| INTVAL (align
) < 4))
11165 addr
= force_reg (Pmode
, XEXP (src
, 0));
11166 scratch1
= gen_reg_rtx (Pmode
);
11168 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
11171 /* Well it seems that some optimizer does not combine a call like
11172 foo(strlen(bar), strlen(bar));
11173 when the move and the subtraction is done here. It does calculate
11174 the length just once when these instructions are done inside of
11175 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11176 often used and I use one fewer register for the lifetime of
11177 output_strlen_unroll() this is better. */
11179 emit_move_insn (out
, addr
);
11181 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
11183 /* strlensi_unroll_1 returns the address of the zero at the end of
11184 the string, like memchr(), so compute the length by subtracting
11185 the start address. */
11187 emit_insn (gen_subdi3 (out
, out
, addr
));
11189 emit_insn (gen_subsi3 (out
, out
, addr
));
11194 scratch2
= gen_reg_rtx (Pmode
);
11195 scratch3
= gen_reg_rtx (Pmode
);
11196 scratch4
= force_reg (Pmode
, constm1_rtx
);
11198 emit_move_insn (scratch3
, addr
);
11199 eoschar
= force_reg (QImode
, eoschar
);
11201 emit_insn (gen_cld ());
11202 src
= replace_equiv_address_nv (src
, scratch3
);
11204 /* If .md starts supporting :P, this can be done in .md. */
11205 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
11206 scratch4
), UNSPEC_SCAS
);
11207 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
11210 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
11211 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
11215 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
11216 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
11222 /* Expand the appropriate insns for doing strlen if not just doing
11225 out = result, initialized with the start address
11226 align_rtx = alignment of the address.
11227 scratch = scratch register, initialized with the startaddress when
11228 not aligned, otherwise undefined
11230 This is just the body. It needs the initializations mentioned above and
11231 some address computing at the end. These things are done in i386.md. */
11234 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
11238 rtx align_2_label
= NULL_RTX
;
11239 rtx align_3_label
= NULL_RTX
;
11240 rtx align_4_label
= gen_label_rtx ();
11241 rtx end_0_label
= gen_label_rtx ();
11243 rtx tmpreg
= gen_reg_rtx (SImode
);
11244 rtx scratch
= gen_reg_rtx (SImode
);
11248 if (GET_CODE (align_rtx
) == CONST_INT
)
11249 align
= INTVAL (align_rtx
);
11251 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11253 /* Is there a known alignment and is it less than 4? */
11256 rtx scratch1
= gen_reg_rtx (Pmode
);
11257 emit_move_insn (scratch1
, out
);
11258 /* Is there a known alignment and is it not 2? */
11261 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
11262 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
11264 /* Leave just the 3 lower bits. */
11265 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
11266 NULL_RTX
, 0, OPTAB_WIDEN
);
11268 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
11269 Pmode
, 1, align_4_label
);
11270 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
11271 Pmode
, 1, align_2_label
);
11272 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
11273 Pmode
, 1, align_3_label
);
11277 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11278 check if is aligned to 4 - byte. */
11280 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
11281 NULL_RTX
, 0, OPTAB_WIDEN
);
11283 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
11284 Pmode
, 1, align_4_label
);
11287 mem
= change_address (src
, QImode
, out
);
11289 /* Now compare the bytes. */
11291 /* Compare the first n unaligned byte on a byte per byte basis. */
11292 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
11293 QImode
, 1, end_0_label
);
11295 /* Increment the address. */
11297 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
11299 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
11301 /* Not needed with an alignment of 2 */
11304 emit_label (align_2_label
);
11306 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
11310 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
11312 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
11314 emit_label (align_3_label
);
11317 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
11321 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
11323 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
11326 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11327 align this loop. It gives only huge programs, but does not help to
11329 emit_label (align_4_label
);
11331 mem
= change_address (src
, SImode
, out
);
11332 emit_move_insn (scratch
, mem
);
11334 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
11336 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
11338 /* This formula yields a nonzero result iff one of the bytes is zero.
11339 This saves three branches inside loop and many cycles. */
11341 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
11342 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
11343 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
11344 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
11345 gen_int_mode (0x80808080, SImode
)));
11346 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
11351 rtx reg
= gen_reg_rtx (SImode
);
11352 rtx reg2
= gen_reg_rtx (Pmode
);
11353 emit_move_insn (reg
, tmpreg
);
11354 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
11356 /* If zero is not in the first two bytes, move two bytes forward. */
11357 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
11358 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
11359 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
11360 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
11361 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
11364 /* Emit lea manually to avoid clobbering of flags. */
11365 emit_insn (gen_rtx_SET (SImode
, reg2
,
11366 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
11368 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
11369 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
11370 emit_insn (gen_rtx_SET (VOIDmode
, out
,
11371 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
11378 rtx end_2_label
= gen_label_rtx ();
11379 /* Is zero in the first two bytes? */
11381 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
11382 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
11383 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
11384 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11385 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
11387 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
11388 JUMP_LABEL (tmp
) = end_2_label
;
11390 /* Not in the first two. Move two bytes forward. */
11391 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
11393 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
11395 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
11397 emit_label (end_2_label
);
11401 /* Avoid branch in fixing the byte. */
11402 tmpreg
= gen_lowpart (QImode
, tmpreg
);
11403 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
11404 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, 17), const0_rtx
);
11406 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
11408 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
11410 emit_label (end_0_label
);
11414 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
11415 rtx callarg2 ATTRIBUTE_UNUSED
,
11416 rtx pop
, int sibcall
)
11418 rtx use
= NULL
, call
;
11420 if (pop
== const0_rtx
)
11422 if (TARGET_64BIT
&& pop
)
11426 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
11427 fnaddr
= machopic_indirect_call_target (fnaddr
);
11429 /* Static functions and indirect calls don't need the pic register. */
11430 if (! TARGET_64BIT
&& flag_pic
11431 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
11432 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
11433 use_reg (&use
, pic_offset_table_rtx
);
11435 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
11437 rtx al
= gen_rtx_REG (QImode
, 0);
11438 emit_move_insn (al
, callarg2
);
11439 use_reg (&use
, al
);
11441 #endif /* TARGET_MACHO */
11443 if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
11445 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
11446 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
11448 if (sibcall
&& TARGET_64BIT
11449 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
11452 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
11453 fnaddr
= gen_rtx_REG (Pmode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
11454 emit_move_insn (fnaddr
, addr
);
11455 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
11458 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
11460 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
11463 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
11464 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
11465 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
11468 call
= emit_call_insn (call
);
11470 CALL_INSN_FUNCTION_USAGE (call
) = use
;
11474 /* Clear stack slot assignments remembered from previous functions.
11475 This is called from INIT_EXPANDERS once before RTL is emitted for each
11478 static struct machine_function
*
11479 ix86_init_machine_status (void)
11481 struct machine_function
*f
;
11483 f
= ggc_alloc_cleared (sizeof (struct machine_function
));
11484 f
->use_fast_prologue_epilogue_nregs
= -1;
11489 /* Return a MEM corresponding to a stack slot with mode MODE.
11490 Allocate a new slot if necessary.
11492 The RTL for a function can have several slots available: N is
11493 which slot to use. */
11496 assign_386_stack_local (enum machine_mode mode
, int n
)
11498 struct stack_local_entry
*s
;
11500 if (n
< 0 || n
>= MAX_386_STACK_LOCALS
)
11503 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
11504 if (s
->mode
== mode
&& s
->n
== n
)
11507 s
= (struct stack_local_entry
*)
11508 ggc_alloc (sizeof (struct stack_local_entry
));
11511 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
11513 s
->next
= ix86_stack_locals
;
11514 ix86_stack_locals
= s
;
11518 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11520 static GTY(()) rtx ix86_tls_symbol
;
11522 ix86_tls_get_addr (void)
11525 if (!ix86_tls_symbol
)
11527 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
11528 (TARGET_GNU_TLS
&& !TARGET_64BIT
)
11529 ? "___tls_get_addr"
11530 : "__tls_get_addr");
11533 return ix86_tls_symbol
;
11536 /* Calculate the length of the memory address in the instruction
11537 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11540 memory_address_length (rtx addr
)
11542 struct ix86_address parts
;
11543 rtx base
, index
, disp
;
11546 if (GET_CODE (addr
) == PRE_DEC
11547 || GET_CODE (addr
) == POST_INC
11548 || GET_CODE (addr
) == PRE_MODIFY
11549 || GET_CODE (addr
) == POST_MODIFY
)
11552 if (! ix86_decompose_address (addr
, &parts
))
11556 index
= parts
.index
;
11561 - esp as the base always wants an index,
11562 - ebp as the base always wants a displacement. */
11564 /* Register Indirect. */
11565 if (base
&& !index
&& !disp
)
11567 /* esp (for its index) and ebp (for its displacement) need
11568 the two-byte modrm form. */
11569 if (addr
== stack_pointer_rtx
11570 || addr
== arg_pointer_rtx
11571 || addr
== frame_pointer_rtx
11572 || addr
== hard_frame_pointer_rtx
)
11576 /* Direct Addressing. */
11577 else if (disp
&& !base
&& !index
)
11582 /* Find the length of the displacement constant. */
11585 if (GET_CODE (disp
) == CONST_INT
11586 && CONST_OK_FOR_LETTER_P (INTVAL (disp
), 'K')
11592 /* ebp always wants a displacement. */
11593 else if (base
== hard_frame_pointer_rtx
)
11596 /* An index requires the two-byte modrm form.... */
11598 /* ...like esp, which always wants an index. */
11599 || base
== stack_pointer_rtx
11600 || base
== arg_pointer_rtx
11601 || base
== frame_pointer_rtx
)
11608 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11609 is set, expect that insn have 8bit immediate alternative. */
11611 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
11615 extract_insn_cached (insn
);
11616 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
11617 if (CONSTANT_P (recog_data
.operand
[i
]))
11622 && GET_CODE (recog_data
.operand
[i
]) == CONST_INT
11623 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data
.operand
[i
]), 'K'))
11627 switch (get_attr_mode (insn
))
11638 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11643 fatal_insn ("unknown insn mode", insn
);
11649 /* Compute default value for "length_address" attribute. */
11651 ix86_attr_length_address_default (rtx insn
)
11655 if (get_attr_type (insn
) == TYPE_LEA
)
11657 rtx set
= PATTERN (insn
);
11658 if (GET_CODE (set
) == SET
)
11660 else if (GET_CODE (set
) == PARALLEL
11661 && GET_CODE (XVECEXP (set
, 0, 0)) == SET
)
11662 set
= XVECEXP (set
, 0, 0);
11665 #ifdef ENABLE_CHECKING
11671 return memory_address_length (SET_SRC (set
));
11674 extract_insn_cached (insn
);
11675 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
11676 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
11678 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
11684 /* Return the maximum number of instructions a cpu can issue. */
11687 ix86_issue_rate (void)
11691 case PROCESSOR_PENTIUM
:
11695 case PROCESSOR_PENTIUMPRO
:
11696 case PROCESSOR_PENTIUM4
:
11697 case PROCESSOR_ATHLON
:
11699 case PROCESSOR_NOCONA
:
11707 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11708 by DEP_INSN and nothing set by DEP_INSN. */
11711 ix86_flags_dependant (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
11715 /* Simplify the test for uninteresting insns. */
11716 if (insn_type
!= TYPE_SETCC
11717 && insn_type
!= TYPE_ICMOV
11718 && insn_type
!= TYPE_FCMOV
11719 && insn_type
!= TYPE_IBR
)
11722 if ((set
= single_set (dep_insn
)) != 0)
11724 set
= SET_DEST (set
);
11727 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
11728 && XVECLEN (PATTERN (dep_insn
), 0) == 2
11729 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
11730 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
11732 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
11733 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
11738 if (GET_CODE (set
) != REG
|| REGNO (set
) != FLAGS_REG
)
11741 /* This test is true if the dependent insn reads the flags but
11742 not any other potentially set register. */
11743 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
11746 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
11752 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11753 address with operands set by DEP_INSN. */
11756 ix86_agi_dependant (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
11760 if (insn_type
== TYPE_LEA
11763 addr
= PATTERN (insn
);
11764 if (GET_CODE (addr
) == SET
)
11766 else if (GET_CODE (addr
) == PARALLEL
11767 && GET_CODE (XVECEXP (addr
, 0, 0)) == SET
)
11768 addr
= XVECEXP (addr
, 0, 0);
11771 addr
= SET_SRC (addr
);
11776 extract_insn_cached (insn
);
11777 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
11778 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
11780 addr
= XEXP (recog_data
.operand
[i
], 0);
11787 return modified_in_p (addr
, dep_insn
);
11791 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
11793 enum attr_type insn_type
, dep_insn_type
;
11794 enum attr_memory memory
;
11796 int dep_insn_code_number
;
11798 /* Anti and output dependencies have zero cost on all CPUs. */
11799 if (REG_NOTE_KIND (link
) != 0)
11802 dep_insn_code_number
= recog_memoized (dep_insn
);
11804 /* If we can't recognize the insns, we can't really do anything. */
11805 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
11808 insn_type
= get_attr_type (insn
);
11809 dep_insn_type
= get_attr_type (dep_insn
);
11813 case PROCESSOR_PENTIUM
:
11814 /* Address Generation Interlock adds a cycle of latency. */
11815 if (ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11818 /* ??? Compares pair with jump/setcc. */
11819 if (ix86_flags_dependant (insn
, dep_insn
, insn_type
))
11822 /* Floating point stores require value to be ready one cycle earlier. */
11823 if (insn_type
== TYPE_FMOV
11824 && get_attr_memory (insn
) == MEMORY_STORE
11825 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11829 case PROCESSOR_PENTIUMPRO
:
11830 memory
= get_attr_memory (insn
);
11832 /* INT->FP conversion is expensive. */
11833 if (get_attr_fp_int_src (dep_insn
))
11836 /* There is one cycle extra latency between an FP op and a store. */
11837 if (insn_type
== TYPE_FMOV
11838 && (set
= single_set (dep_insn
)) != NULL_RTX
11839 && (set2
= single_set (insn
)) != NULL_RTX
11840 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
11841 && GET_CODE (SET_DEST (set2
)) == MEM
)
11844 /* Show ability of reorder buffer to hide latency of load by executing
11845 in parallel with previous instruction in case
11846 previous instruction is not needed to compute the address. */
11847 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
11848 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11850 /* Claim moves to take one cycle, as core can issue one load
11851 at time and the next load can start cycle later. */
11852 if (dep_insn_type
== TYPE_IMOV
11853 || dep_insn_type
== TYPE_FMOV
)
11861 memory
= get_attr_memory (insn
);
11863 /* The esp dependency is resolved before the instruction is really
11865 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
11866 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
11869 /* INT->FP conversion is expensive. */
11870 if (get_attr_fp_int_src (dep_insn
))
11873 /* Show ability of reorder buffer to hide latency of load by executing
11874 in parallel with previous instruction in case
11875 previous instruction is not needed to compute the address. */
11876 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
11877 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11879 /* Claim moves to take one cycle, as core can issue one load
11880 at time and the next load can start cycle later. */
11881 if (dep_insn_type
== TYPE_IMOV
11882 || dep_insn_type
== TYPE_FMOV
)
11891 case PROCESSOR_ATHLON
:
11893 memory
= get_attr_memory (insn
);
11895 /* Show ability of reorder buffer to hide latency of load by executing
11896 in parallel with previous instruction in case
11897 previous instruction is not needed to compute the address. */
11898 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
11899 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11901 enum attr_unit unit
= get_attr_unit (insn
);
11904 /* Because of the difference between the length of integer and
11905 floating unit pipeline preparation stages, the memory operands
11906 for floating point are cheaper.
11908 ??? For Athlon it the difference is most probably 2. */
11909 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
11912 loadcost
= TARGET_ATHLON
? 2 : 0;
11914 if (cost
>= loadcost
)
11927 /* How many alternative schedules to try. This should be as wide as the
11928 scheduling freedom in the DFA, but no wider. Making this value too
11929 large results extra work for the scheduler. */
11932 ia32_multipass_dfa_lookahead (void)
11934 if (ix86_tune
== PROCESSOR_PENTIUM
)
11937 if (ix86_tune
== PROCESSOR_PENTIUMPRO
11938 || ix86_tune
== PROCESSOR_K6
)
11946 /* Compute the alignment given to a constant that is being placed in memory.
11947 EXP is the constant and ALIGN is the alignment that the object would
11949 The value of this function is used instead of that alignment to align
11953 ix86_constant_alignment (tree exp
, int align
)
11955 if (TREE_CODE (exp
) == REAL_CST
)
11957 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
11959 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
11962 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
11963 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
11964 return BITS_PER_WORD
;
11969 /* Compute the alignment for a static variable.
11970 TYPE is the data type, and ALIGN is the alignment that
11971 the object would ordinarily have. The value of this function is used
11972 instead of that alignment to align the object. */
11975 ix86_data_alignment (tree type
, int align
)
11977 if (AGGREGATE_TYPE_P (type
)
11978 && TYPE_SIZE (type
)
11979 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
11980 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 256
11981 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 256)
11984 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11985 to 16byte boundary. */
11988 if (AGGREGATE_TYPE_P (type
)
11989 && TYPE_SIZE (type
)
11990 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
11991 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
11992 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
11996 if (TREE_CODE (type
) == ARRAY_TYPE
)
11998 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
12000 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
12003 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
12006 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
12008 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
12011 else if ((TREE_CODE (type
) == RECORD_TYPE
12012 || TREE_CODE (type
) == UNION_TYPE
12013 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
12014 && TYPE_FIELDS (type
))
12016 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
12018 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
12021 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
12022 || TREE_CODE (type
) == INTEGER_TYPE
)
12024 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
12026 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
12033 /* Compute the alignment for a local variable.
12034 TYPE is the data type, and ALIGN is the alignment that
12035 the object would ordinarily have. The value of this macro is used
12036 instead of that alignment to align the object. */
12039 ix86_local_alignment (tree type
, int align
)
12041 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12042 to 16byte boundary. */
12045 if (AGGREGATE_TYPE_P (type
)
12046 && TYPE_SIZE (type
)
12047 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
12048 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
12049 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
12052 if (TREE_CODE (type
) == ARRAY_TYPE
)
12054 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
12056 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
12059 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
12061 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
12063 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
12066 else if ((TREE_CODE (type
) == RECORD_TYPE
12067 || TREE_CODE (type
) == UNION_TYPE
12068 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
12069 && TYPE_FIELDS (type
))
12071 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
12073 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
12076 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
12077 || TREE_CODE (type
) == INTEGER_TYPE
)
12080 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
12082 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
12088 /* Emit RTL insns to initialize the variable parts of a trampoline.
12089 FNADDR is an RTX for the address of the function's pure code.
12090 CXT is an RTX for the static chain value for the function. */
12092 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
12096 /* Compute offset from the end of the jmp to the target function. */
12097 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
12098 plus_constant (tramp
, 10),
12099 NULL_RTX
, 1, OPTAB_DIRECT
);
12100 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
12101 gen_int_mode (0xb9, QImode
));
12102 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
12103 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
12104 gen_int_mode (0xe9, QImode
));
12105 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
12110 /* Try to load address using shorter movl instead of movabs.
12111 We may want to support movq for kernel mode, but kernel does not use
12112 trampolines at the moment. */
12113 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
12115 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
12116 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12117 gen_int_mode (0xbb41, HImode
));
12118 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
12119 gen_lowpart (SImode
, fnaddr
));
12124 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12125 gen_int_mode (0xbb49, HImode
));
12126 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
12130 /* Load static chain using movabs to r10. */
12131 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12132 gen_int_mode (0xba49, HImode
));
12133 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
12136 /* Jump to the r11 */
12137 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12138 gen_int_mode (0xff49, HImode
));
12139 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
12140 gen_int_mode (0xe3, QImode
));
12142 if (offset
> TRAMPOLINE_SIZE
)
12146 #ifdef ENABLE_EXECUTE_STACK
12147 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
12148 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
12152 #define def_builtin(MASK, NAME, TYPE, CODE) \
12154 if ((MASK) & target_flags \
12155 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12156 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12157 NULL, NULL_TREE); \
12160 /* Bits for builtin_description.flag. */
12162 /* Set when we don't support the comparison natively, and should
12163 swap_comparison in order to support it. */
12164 #define BUILTIN_DESC_SWAP_OPERANDS 1
12166 struct builtin_description
12168 const unsigned int mask
;
12169 const enum insn_code icode
;
12170 const char *const name
;
12171 const enum ix86_builtins code
;
12172 const enum rtx_code comparison
;
12173 const unsigned int flag
;
12176 static const struct builtin_description bdesc_comi
[] =
12178 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
12179 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
12180 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
12181 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
12182 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
12183 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
12184 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
12185 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
12186 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
12187 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
12188 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
12189 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
12190 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
12191 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
12192 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
12193 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
12194 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
12195 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
12196 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
12197 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
12198 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
12199 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
12200 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
12201 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
12204 static const struct builtin_description bdesc_2arg
[] =
12207 { MASK_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
12208 { MASK_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
12209 { MASK_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
12210 { MASK_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
12211 { MASK_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
12212 { MASK_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
12213 { MASK_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
12214 { MASK_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
12216 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
12217 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
12218 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
12219 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
,
12220 BUILTIN_DESC_SWAP_OPERANDS
},
12221 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
,
12222 BUILTIN_DESC_SWAP_OPERANDS
},
12223 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
12224 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, 0 },
12225 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, 0 },
12226 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, 0 },
12227 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
,
12228 BUILTIN_DESC_SWAP_OPERANDS
},
12229 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
,
12230 BUILTIN_DESC_SWAP_OPERANDS
},
12231 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, 0 },
12232 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
12233 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
12234 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
12235 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
12236 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, 0 },
12237 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, 0 },
12238 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, 0 },
12239 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
,
12240 BUILTIN_DESC_SWAP_OPERANDS
},
12241 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
,
12242 BUILTIN_DESC_SWAP_OPERANDS
},
12243 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
12245 { MASK_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
12246 { MASK_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
12247 { MASK_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
12248 { MASK_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
12250 { MASK_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
12251 { MASK_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
12252 { MASK_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
12253 { MASK_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
12255 { MASK_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
12256 { MASK_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
12257 { MASK_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
12258 { MASK_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
12259 { MASK_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
12262 { MASK_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
12263 { MASK_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
12264 { MASK_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
12265 { MASK_MMX
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, 0, 0 },
12266 { MASK_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
12267 { MASK_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
12268 { MASK_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
12269 { MASK_MMX
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, 0, 0 },
12271 { MASK_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
12272 { MASK_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
12273 { MASK_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
12274 { MASK_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
12275 { MASK_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
12276 { MASK_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
12277 { MASK_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
12278 { MASK_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
12280 { MASK_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
12281 { MASK_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
12282 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
12284 { MASK_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
12285 { MASK_MMX
, CODE_FOR_mmx_nandv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
12286 { MASK_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
12287 { MASK_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
12289 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
12290 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
12292 { MASK_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
12293 { MASK_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
12294 { MASK_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
12295 { MASK_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
12296 { MASK_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
12297 { MASK_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
12299 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
12300 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
12301 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
12302 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
12304 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
12305 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
12306 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
12307 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
12308 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
12309 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
12312 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
12313 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
12314 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
12316 { MASK_SSE
, CODE_FOR_sse_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
12317 { MASK_SSE
, CODE_FOR_sse_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
12318 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, 0, 0 },
12320 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
12321 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
12322 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
12323 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
12324 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
12325 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
12327 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
12328 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
12329 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
12330 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
12331 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
12332 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
12334 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
12335 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
12336 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
12337 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
12339 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
12340 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
12343 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
12344 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
12345 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
12346 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
12347 { MASK_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
12348 { MASK_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
12349 { MASK_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
12350 { MASK_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
12352 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
12353 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
12354 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
12355 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
,
12356 BUILTIN_DESC_SWAP_OPERANDS
},
12357 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
,
12358 BUILTIN_DESC_SWAP_OPERANDS
},
12359 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
12360 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, 0 },
12361 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, 0 },
12362 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, 0 },
12363 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
,
12364 BUILTIN_DESC_SWAP_OPERANDS
},
12365 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
,
12366 BUILTIN_DESC_SWAP_OPERANDS
},
12367 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, 0 },
12368 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
12369 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
12370 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
12371 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
12372 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, 0 },
12373 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, 0 },
12374 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, 0 },
12375 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, 0 },
12377 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
12378 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
12379 { MASK_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
12380 { MASK_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
12382 { MASK_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
12383 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
12384 { MASK_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
12385 { MASK_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
12387 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
12388 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
12389 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
12392 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
12393 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
12394 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
12395 { MASK_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
12396 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
12397 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
12398 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
12399 { MASK_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
12401 { MASK_MMX
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
12402 { MASK_MMX
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
12403 { MASK_MMX
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
12404 { MASK_MMX
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
12405 { MASK_MMX
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
12406 { MASK_MMX
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
12407 { MASK_MMX
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
12408 { MASK_MMX
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
12410 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
12411 { MASK_SSE2
, CODE_FOR_sse2_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
12413 { MASK_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
12414 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
12415 { MASK_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
12416 { MASK_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
12418 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
12419 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
12421 { MASK_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
12422 { MASK_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
12423 { MASK_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
12424 { MASK_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
12425 { MASK_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
12426 { MASK_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
12428 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
12429 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
12430 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
12431 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
12433 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
12434 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
12435 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
12436 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
12437 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
12438 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
12439 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
12440 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
12442 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
12443 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
12444 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
12446 { MASK_SSE2
, CODE_FOR_sse2_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
12447 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
12449 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, 0, IX86_BUILTIN_PMULUDQ
, 0, 0 },
12450 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, 0, IX86_BUILTIN_PMULUDQ128
, 0, 0 },
12452 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
12453 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
12454 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
12456 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
12457 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
12458 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
12460 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
12461 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
12463 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
12465 { MASK_SSE2
, CODE_FOR_sse2_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
12466 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, 0, 0 },
12467 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
12468 { MASK_SSE2
, CODE_FOR_sse2_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 },
12471 { MASK_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, 0, 0 },
12472 { MASK_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, 0, 0 },
12473 { MASK_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, 0, 0 },
12474 { MASK_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, 0, 0 },
12475 { MASK_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, 0, 0 },
12476 { MASK_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, 0, 0 }
12479 static const struct builtin_description bdesc_1arg
[] =
12481 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
12482 { MASK_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
12484 { MASK_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
12485 { MASK_SSE
, CODE_FOR_sse_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
12486 { MASK_SSE
, CODE_FOR_sse_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
12488 { MASK_SSE
, CODE_FOR_sse_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
12489 { MASK_SSE
, CODE_FOR_sse_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
12490 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, 0, 0 },
12491 { MASK_SSE
, CODE_FOR_sse_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
12492 { MASK_SSE
, CODE_FOR_sse_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
12493 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, 0, 0 },
12495 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
12496 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
12498 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
12500 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
12501 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
12503 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
12504 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
12505 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
12506 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
12507 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
12509 { MASK_SSE2
, CODE_FOR_sse2_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
12511 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
12512 { MASK_SSE2
, CODE_FOR_sse2_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
12513 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, 0, 0 },
12514 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, 0, 0 },
12516 { MASK_SSE2
, CODE_FOR_sse2_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
12517 { MASK_SSE2
, CODE_FOR_sse2_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
12518 { MASK_SSE2
, CODE_FOR_sse2_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
12521 { MASK_SSE3
, CODE_FOR_sse3_movshdup
, 0, IX86_BUILTIN_MOVSHDUP
, 0, 0 },
12522 { MASK_SSE3
, CODE_FOR_sse3_movsldup
, 0, IX86_BUILTIN_MOVSLDUP
, 0, 0 },
12523 { MASK_SSE3
, CODE_FOR_sse3_movddup
, 0, IX86_BUILTIN_MOVDDUP
, 0, 0 }
12527 ix86_init_builtins (void)
12530 ix86_init_mmx_sse_builtins ();
12533 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12534 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12537 ix86_init_mmx_sse_builtins (void)
12539 const struct builtin_description
* d
;
12542 tree V16QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V16QImode
);
12543 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
12544 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
12545 tree V2DI_type_node
12546 = build_vector_type_for_mode (long_long_integer_type_node
, V2DImode
);
12547 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
12548 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
12549 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
12550 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
12551 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
12552 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
12554 tree pchar_type_node
= build_pointer_type (char_type_node
);
12555 tree pcchar_type_node
= build_pointer_type (
12556 build_type_variant (char_type_node
, 1, 0));
12557 tree pfloat_type_node
= build_pointer_type (float_type_node
);
12558 tree pcfloat_type_node
= build_pointer_type (
12559 build_type_variant (float_type_node
, 1, 0));
12560 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
12561 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
12562 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
12565 tree int_ftype_v4sf_v4sf
12566 = build_function_type_list (integer_type_node
,
12567 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12568 tree v4si_ftype_v4sf_v4sf
12569 = build_function_type_list (V4SI_type_node
,
12570 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12571 /* MMX/SSE/integer conversions. */
12572 tree int_ftype_v4sf
12573 = build_function_type_list (integer_type_node
,
12574 V4SF_type_node
, NULL_TREE
);
12575 tree int64_ftype_v4sf
12576 = build_function_type_list (long_long_integer_type_node
,
12577 V4SF_type_node
, NULL_TREE
);
12578 tree int_ftype_v8qi
12579 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
12580 tree v4sf_ftype_v4sf_int
12581 = build_function_type_list (V4SF_type_node
,
12582 V4SF_type_node
, integer_type_node
, NULL_TREE
);
12583 tree v4sf_ftype_v4sf_int64
12584 = build_function_type_list (V4SF_type_node
,
12585 V4SF_type_node
, long_long_integer_type_node
,
12587 tree v4sf_ftype_v4sf_v2si
12588 = build_function_type_list (V4SF_type_node
,
12589 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
12590 tree int_ftype_v4hi_int
12591 = build_function_type_list (integer_type_node
,
12592 V4HI_type_node
, integer_type_node
, NULL_TREE
);
12593 tree v4hi_ftype_v4hi_int_int
12594 = build_function_type_list (V4HI_type_node
, V4HI_type_node
,
12595 integer_type_node
, integer_type_node
,
12597 /* Miscellaneous. */
12598 tree v8qi_ftype_v4hi_v4hi
12599 = build_function_type_list (V8QI_type_node
,
12600 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
12601 tree v4hi_ftype_v2si_v2si
12602 = build_function_type_list (V4HI_type_node
,
12603 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
12604 tree v4sf_ftype_v4sf_v4sf_int
12605 = build_function_type_list (V4SF_type_node
,
12606 V4SF_type_node
, V4SF_type_node
,
12607 integer_type_node
, NULL_TREE
);
12608 tree v2si_ftype_v4hi_v4hi
12609 = build_function_type_list (V2SI_type_node
,
12610 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
12611 tree v4hi_ftype_v4hi_int
12612 = build_function_type_list (V4HI_type_node
,
12613 V4HI_type_node
, integer_type_node
, NULL_TREE
);
12614 tree v4hi_ftype_v4hi_di
12615 = build_function_type_list (V4HI_type_node
,
12616 V4HI_type_node
, long_long_unsigned_type_node
,
12618 tree v2si_ftype_v2si_di
12619 = build_function_type_list (V2SI_type_node
,
12620 V2SI_type_node
, long_long_unsigned_type_node
,
12622 tree void_ftype_void
12623 = build_function_type (void_type_node
, void_list_node
);
12624 tree void_ftype_unsigned
12625 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
12626 tree void_ftype_unsigned_unsigned
12627 = build_function_type_list (void_type_node
, unsigned_type_node
,
12628 unsigned_type_node
, NULL_TREE
);
12629 tree void_ftype_pcvoid_unsigned_unsigned
12630 = build_function_type_list (void_type_node
, const_ptr_type_node
,
12631 unsigned_type_node
, unsigned_type_node
,
12633 tree unsigned_ftype_void
12634 = build_function_type (unsigned_type_node
, void_list_node
);
12636 = build_function_type (long_long_unsigned_type_node
, void_list_node
);
12637 tree v4sf_ftype_void
12638 = build_function_type (V4SF_type_node
, void_list_node
);
12639 tree v2si_ftype_v4sf
12640 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
12641 /* Loads/stores. */
12642 tree void_ftype_v8qi_v8qi_pchar
12643 = build_function_type_list (void_type_node
,
12644 V8QI_type_node
, V8QI_type_node
,
12645 pchar_type_node
, NULL_TREE
);
12646 tree v4sf_ftype_pcfloat
12647 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
12648 /* @@@ the type is bogus */
12649 tree v4sf_ftype_v4sf_pv2si
12650 = build_function_type_list (V4SF_type_node
,
12651 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
12652 tree void_ftype_pv2si_v4sf
12653 = build_function_type_list (void_type_node
,
12654 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
12655 tree void_ftype_pfloat_v4sf
12656 = build_function_type_list (void_type_node
,
12657 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
12658 tree void_ftype_pdi_di
12659 = build_function_type_list (void_type_node
,
12660 pdi_type_node
, long_long_unsigned_type_node
,
12662 tree void_ftype_pv2di_v2di
12663 = build_function_type_list (void_type_node
,
12664 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
12665 /* Normal vector unops. */
12666 tree v4sf_ftype_v4sf
12667 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12669 /* Normal vector binops. */
12670 tree v4sf_ftype_v4sf_v4sf
12671 = build_function_type_list (V4SF_type_node
,
12672 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12673 tree v8qi_ftype_v8qi_v8qi
12674 = build_function_type_list (V8QI_type_node
,
12675 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
12676 tree v4hi_ftype_v4hi_v4hi
12677 = build_function_type_list (V4HI_type_node
,
12678 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
12679 tree v2si_ftype_v2si_v2si
12680 = build_function_type_list (V2SI_type_node
,
12681 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
12682 tree di_ftype_di_di
12683 = build_function_type_list (long_long_unsigned_type_node
,
12684 long_long_unsigned_type_node
,
12685 long_long_unsigned_type_node
, NULL_TREE
);
12687 tree v2si_ftype_v2sf
12688 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
12689 tree v2sf_ftype_v2si
12690 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
12691 tree v2si_ftype_v2si
12692 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
12693 tree v2sf_ftype_v2sf
12694 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
12695 tree v2sf_ftype_v2sf_v2sf
12696 = build_function_type_list (V2SF_type_node
,
12697 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
12698 tree v2si_ftype_v2sf_v2sf
12699 = build_function_type_list (V2SI_type_node
,
12700 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
12701 tree pint_type_node
= build_pointer_type (integer_type_node
);
12702 tree pcint_type_node
= build_pointer_type (
12703 build_type_variant (integer_type_node
, 1, 0));
12704 tree pdouble_type_node
= build_pointer_type (double_type_node
);
12705 tree pcdouble_type_node
= build_pointer_type (
12706 build_type_variant (double_type_node
, 1, 0));
12707 tree int_ftype_v2df_v2df
12708 = build_function_type_list (integer_type_node
,
12709 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12712 = build_function_type (intTI_type_node
, void_list_node
);
12713 tree v2di_ftype_void
12714 = build_function_type (V2DI_type_node
, void_list_node
);
12715 tree ti_ftype_ti_ti
12716 = build_function_type_list (intTI_type_node
,
12717 intTI_type_node
, intTI_type_node
, NULL_TREE
);
12718 tree void_ftype_pcvoid
12719 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
12721 = build_function_type_list (V2DI_type_node
,
12722 long_long_unsigned_type_node
, NULL_TREE
);
12724 = build_function_type_list (long_long_unsigned_type_node
,
12725 V2DI_type_node
, NULL_TREE
);
12726 tree v4sf_ftype_v4si
12727 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
12728 tree v4si_ftype_v4sf
12729 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
12730 tree v2df_ftype_v4si
12731 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
12732 tree v4si_ftype_v2df
12733 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
12734 tree v2si_ftype_v2df
12735 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
12736 tree v4sf_ftype_v2df
12737 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
12738 tree v2df_ftype_v2si
12739 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
12740 tree v2df_ftype_v4sf
12741 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
12742 tree int_ftype_v2df
12743 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
12744 tree int64_ftype_v2df
12745 = build_function_type_list (long_long_integer_type_node
,
12746 V2DF_type_node
, NULL_TREE
);
12747 tree v2df_ftype_v2df_int
12748 = build_function_type_list (V2DF_type_node
,
12749 V2DF_type_node
, integer_type_node
, NULL_TREE
);
12750 tree v2df_ftype_v2df_int64
12751 = build_function_type_list (V2DF_type_node
,
12752 V2DF_type_node
, long_long_integer_type_node
,
12754 tree v4sf_ftype_v4sf_v2df
12755 = build_function_type_list (V4SF_type_node
,
12756 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
12757 tree v2df_ftype_v2df_v4sf
12758 = build_function_type_list (V2DF_type_node
,
12759 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
12760 tree v2df_ftype_v2df_v2df_int
12761 = build_function_type_list (V2DF_type_node
,
12762 V2DF_type_node
, V2DF_type_node
,
12765 tree v2df_ftype_v2df_pcdouble
12766 = build_function_type_list (V2DF_type_node
,
12767 V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
12768 tree void_ftype_pdouble_v2df
12769 = build_function_type_list (void_type_node
,
12770 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
12771 tree void_ftype_pint_int
12772 = build_function_type_list (void_type_node
,
12773 pint_type_node
, integer_type_node
, NULL_TREE
);
12774 tree void_ftype_v16qi_v16qi_pchar
12775 = build_function_type_list (void_type_node
,
12776 V16QI_type_node
, V16QI_type_node
,
12777 pchar_type_node
, NULL_TREE
);
12778 tree v2df_ftype_pcdouble
12779 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
12780 tree v2df_ftype_v2df_v2df
12781 = build_function_type_list (V2DF_type_node
,
12782 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12783 tree v16qi_ftype_v16qi_v16qi
12784 = build_function_type_list (V16QI_type_node
,
12785 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
12786 tree v8hi_ftype_v8hi_v8hi
12787 = build_function_type_list (V8HI_type_node
,
12788 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
12789 tree v4si_ftype_v4si_v4si
12790 = build_function_type_list (V4SI_type_node
,
12791 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
12792 tree v2di_ftype_v2di_v2di
12793 = build_function_type_list (V2DI_type_node
,
12794 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
12795 tree v2di_ftype_v2df_v2df
12796 = build_function_type_list (V2DI_type_node
,
12797 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12798 tree v2df_ftype_v2df
12799 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12800 tree v2df_ftype_double
12801 = build_function_type_list (V2DF_type_node
, double_type_node
, NULL_TREE
);
12802 tree v2df_ftype_double_double
12803 = build_function_type_list (V2DF_type_node
,
12804 double_type_node
, double_type_node
, NULL_TREE
);
12805 tree int_ftype_v8hi_int
12806 = build_function_type_list (integer_type_node
,
12807 V8HI_type_node
, integer_type_node
, NULL_TREE
);
12808 tree v8hi_ftype_v8hi_int_int
12809 = build_function_type_list (V8HI_type_node
,
12810 V8HI_type_node
, integer_type_node
,
12811 integer_type_node
, NULL_TREE
);
12812 tree v2di_ftype_v2di_int
12813 = build_function_type_list (V2DI_type_node
,
12814 V2DI_type_node
, integer_type_node
, NULL_TREE
);
12815 tree v4si_ftype_v4si_int
12816 = build_function_type_list (V4SI_type_node
,
12817 V4SI_type_node
, integer_type_node
, NULL_TREE
);
12818 tree v8hi_ftype_v8hi_int
12819 = build_function_type_list (V8HI_type_node
,
12820 V8HI_type_node
, integer_type_node
, NULL_TREE
);
12821 tree v8hi_ftype_v8hi_v2di
12822 = build_function_type_list (V8HI_type_node
,
12823 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
12824 tree v4si_ftype_v4si_v2di
12825 = build_function_type_list (V4SI_type_node
,
12826 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
12827 tree v4si_ftype_v8hi_v8hi
12828 = build_function_type_list (V4SI_type_node
,
12829 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
12830 tree di_ftype_v8qi_v8qi
12831 = build_function_type_list (long_long_unsigned_type_node
,
12832 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
12833 tree di_ftype_v2si_v2si
12834 = build_function_type_list (long_long_unsigned_type_node
,
12835 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
12836 tree v2di_ftype_v16qi_v16qi
12837 = build_function_type_list (V2DI_type_node
,
12838 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
12839 tree v2di_ftype_v4si_v4si
12840 = build_function_type_list (V2DI_type_node
,
12841 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
12842 tree int_ftype_v16qi
12843 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
12844 tree v16qi_ftype_pcchar
12845 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
12846 tree void_ftype_pchar_v16qi
12847 = build_function_type_list (void_type_node
,
12848 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
12849 tree v4si_ftype_pcint
12850 = build_function_type_list (V4SI_type_node
, pcint_type_node
, NULL_TREE
);
12851 tree void_ftype_pcint_v4si
12852 = build_function_type_list (void_type_node
,
12853 pcint_type_node
, V4SI_type_node
, NULL_TREE
);
12854 tree v2di_ftype_v2di
12855 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
12858 tree float128_type
;
12860 /* The __float80 type. */
12861 if (TYPE_MODE (long_double_type_node
) == XFmode
)
12862 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
12866 /* The __float80 type. */
12867 float80_type
= make_node (REAL_TYPE
);
12868 TYPE_PRECISION (float80_type
) = 80;
12869 layout_type (float80_type
);
12870 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
12873 float128_type
= make_node (REAL_TYPE
);
12874 TYPE_PRECISION (float128_type
) = 128;
12875 layout_type (float128_type
);
12876 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
12878 /* Add all builtins that are more or less simple operations on two
12880 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
12882 /* Use one of the operands; the target can have a different mode for
12883 mask-generating compares. */
12884 enum machine_mode mode
;
12889 mode
= insn_data
[d
->icode
].operand
[1].mode
;
12894 type
= v16qi_ftype_v16qi_v16qi
;
12897 type
= v8hi_ftype_v8hi_v8hi
;
12900 type
= v4si_ftype_v4si_v4si
;
12903 type
= v2di_ftype_v2di_v2di
;
12906 type
= v2df_ftype_v2df_v2df
;
12909 type
= ti_ftype_ti_ti
;
12912 type
= v4sf_ftype_v4sf_v4sf
;
12915 type
= v8qi_ftype_v8qi_v8qi
;
12918 type
= v4hi_ftype_v4hi_v4hi
;
12921 type
= v2si_ftype_v2si_v2si
;
12924 type
= di_ftype_di_di
;
12931 /* Override for comparisons. */
12932 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
12933 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
)
12934 type
= v4si_ftype_v4sf_v4sf
;
12936 if (d
->icode
== CODE_FOR_sse2_maskcmpv2df3
12937 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
12938 type
= v2di_ftype_v2df_v2df
;
12940 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
12943 /* Add the remaining MMX insns with somewhat more complicated types. */
12944 def_builtin (MASK_MMX
, "__builtin_ia32_mmx_zero", di_ftype_void
, IX86_BUILTIN_MMX_ZERO
);
12945 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
12946 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
12947 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
12948 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
12950 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
12951 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
12952 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
12954 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
12955 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
12957 def_builtin (MASK_MMX
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
12958 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
12960 /* comi/ucomi insns. */
12961 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
12962 if (d
->mask
== MASK_SSE2
)
12963 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
12965 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
12967 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
12968 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
12969 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
12971 def_builtin (MASK_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
12972 def_builtin (MASK_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
12973 def_builtin (MASK_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
12974 def_builtin (MASK_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
12975 def_builtin (MASK_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
12976 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
12977 def_builtin (MASK_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
12978 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
12979 def_builtin (MASK_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
12980 def_builtin (MASK_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
12981 def_builtin (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
12983 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pextrw", int_ftype_v4hi_int
, IX86_BUILTIN_PEXTRW
);
12984 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int
, IX86_BUILTIN_PINSRW
);
12986 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
12988 def_builtin (MASK_SSE
, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADAPS
);
12989 def_builtin (MASK_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
12990 def_builtin (MASK_SSE
, "__builtin_ia32_loadss", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADSS
);
12991 def_builtin (MASK_SSE
, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREAPS
);
12992 def_builtin (MASK_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
12993 def_builtin (MASK_SSE
, "__builtin_ia32_storess", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STORESS
);
12995 def_builtin (MASK_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
12996 def_builtin (MASK_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
12997 def_builtin (MASK_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
12998 def_builtin (MASK_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
13000 def_builtin (MASK_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
13001 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
13002 def_builtin (MASK_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
13003 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
13005 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
13007 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
13009 def_builtin (MASK_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
13010 def_builtin (MASK_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
13011 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
13012 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
13013 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
13014 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
13016 def_builtin (MASK_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
13018 /* Original 3DNow! */
13019 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
13020 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
13021 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
13022 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
13023 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
13024 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
13025 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
13026 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
13027 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
13028 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
13029 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
13030 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
13031 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
13032 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
13033 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
13034 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
13035 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
13036 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
13037 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
13038 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
13040 /* 3DNow! extension as used in the Athlon CPU. */
13041 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
13042 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
13043 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
13044 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
13045 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
13046 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
13048 def_builtin (MASK_SSE
, "__builtin_ia32_setzerops", v4sf_ftype_void
, IX86_BUILTIN_SSE_ZERO
);
13051 def_builtin (MASK_SSE2
, "__builtin_ia32_pextrw128", int_ftype_v8hi_int
, IX86_BUILTIN_PEXTRW128
);
13052 def_builtin (MASK_SSE2
, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int
, IX86_BUILTIN_PINSRW128
);
13054 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
13055 def_builtin (MASK_SSE2
, "__builtin_ia32_movq2dq", v2di_ftype_di
, IX86_BUILTIN_MOVQ2DQ
);
13056 def_builtin (MASK_SSE2
, "__builtin_ia32_movdq2q", di_ftype_v2di
, IX86_BUILTIN_MOVDQ2Q
);
13058 def_builtin (MASK_SSE2
, "__builtin_ia32_loadapd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADAPD
);
13059 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
13060 def_builtin (MASK_SSE2
, "__builtin_ia32_loadsd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADSD
);
13061 def_builtin (MASK_SSE2
, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREAPD
);
13062 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
13063 def_builtin (MASK_SSE2
, "__builtin_ia32_storesd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORESD
);
13065 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADHPD
);
13066 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADLPD
);
13067 def_builtin (MASK_SSE2
, "__builtin_ia32_storehpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREHPD
);
13068 def_builtin (MASK_SSE2
, "__builtin_ia32_storelpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORELPD
);
13070 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
13071 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
13072 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
13073 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
13074 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
13076 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
13077 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
13078 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
13079 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
13081 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
13082 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
13084 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
13086 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
13087 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
13089 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
13090 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
13091 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
13092 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
13093 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
13095 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
13097 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
13098 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
13099 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
13100 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
13102 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
13103 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
13104 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
13106 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
13107 def_builtin (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
13108 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
13109 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
13111 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd1", v2df_ftype_double
, IX86_BUILTIN_SETPD1
);
13112 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd", v2df_ftype_double_double
, IX86_BUILTIN_SETPD
);
13113 def_builtin (MASK_SSE2
, "__builtin_ia32_setzeropd", ti_ftype_void
, IX86_BUILTIN_CLRPD
);
13114 def_builtin (MASK_SSE2
, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADPD1
);
13115 def_builtin (MASK_SSE2
, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADRPD
);
13116 def_builtin (MASK_SSE2
, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREPD1
);
13117 def_builtin (MASK_SSE2
, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORERPD
);
13119 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
13120 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
13121 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
13123 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQA
);
13124 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
13125 def_builtin (MASK_SSE2
, "__builtin_ia32_loadd", v4si_ftype_pcint
, IX86_BUILTIN_LOADD
);
13126 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQA
);
13127 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
13128 def_builtin (MASK_SSE2
, "__builtin_ia32_stored", void_ftype_pcint_v4si
, IX86_BUILTIN_STORED
);
13129 def_builtin (MASK_SSE2
, "__builtin_ia32_movq", v2di_ftype_v2di
, IX86_BUILTIN_MOVQ
);
13131 def_builtin (MASK_SSE
, "__builtin_ia32_setzero128", v2di_ftype_void
, IX86_BUILTIN_CLRTI
);
13133 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si
, IX86_BUILTIN_PMULUDQ
);
13134 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULUDQ128
);
13136 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
13137 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
13138 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
13140 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
13141 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
13142 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
13144 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
13145 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
13147 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
13148 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
13149 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
13150 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
13152 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
13153 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
13154 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
13155 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
13157 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
13158 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
13160 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
13162 /* Prescott New Instructions. */
13163 def_builtin (MASK_SSE3
, "__builtin_ia32_monitor",
13164 void_ftype_pcvoid_unsigned_unsigned
,
13165 IX86_BUILTIN_MONITOR
);
13166 def_builtin (MASK_SSE3
, "__builtin_ia32_mwait",
13167 void_ftype_unsigned_unsigned
,
13168 IX86_BUILTIN_MWAIT
);
13169 def_builtin (MASK_SSE3
, "__builtin_ia32_movshdup",
13171 IX86_BUILTIN_MOVSHDUP
);
13172 def_builtin (MASK_SSE3
, "__builtin_ia32_movsldup",
13174 IX86_BUILTIN_MOVSLDUP
);
13175 def_builtin (MASK_SSE3
, "__builtin_ia32_lddqu",
13176 v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
13177 def_builtin (MASK_SSE3
, "__builtin_ia32_loadddup",
13178 v2df_ftype_pcdouble
, IX86_BUILTIN_LOADDDUP
);
13179 def_builtin (MASK_SSE3
, "__builtin_ia32_movddup",
13180 v2df_ftype_v2df
, IX86_BUILTIN_MOVDDUP
);
13183 /* Errors in the source file can cause expand_expr to return const0_rtx
13184 where we expect a vector. To avoid crashing, use one of the vector
13185 clear instructions. */
13187 safe_vector_operand (rtx x
, enum machine_mode mode
)
13189 if (x
== const0_rtx
)
13190 x
= CONST0_RTX (mode
);
13194 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13197 ix86_expand_binop_builtin (enum insn_code icode
, tree arglist
, rtx target
)
13200 tree arg0
= TREE_VALUE (arglist
);
13201 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13202 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13203 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13204 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
13205 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
13206 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
13208 if (VECTOR_MODE_P (mode0
))
13209 op0
= safe_vector_operand (op0
, mode0
);
13210 if (VECTOR_MODE_P (mode1
))
13211 op1
= safe_vector_operand (op1
, mode1
);
13213 if (optimize
|| !target
13214 || GET_MODE (target
) != tmode
13215 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13216 target
= gen_reg_rtx (tmode
);
13218 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
13220 rtx x
= gen_reg_rtx (V4SImode
);
13221 emit_insn (gen_sse2_loadd (x
, op1
));
13222 op1
= gen_lowpart (TImode
, x
);
13225 /* In case the insn wants input operands in modes different from
13226 the result, abort. */
13227 if ((GET_MODE (op0
) != mode0
&& GET_MODE (op0
) != VOIDmode
)
13228 || (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
))
13231 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13232 op0
= copy_to_mode_reg (mode0
, op0
);
13233 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13234 op1
= copy_to_mode_reg (mode1
, op1
);
13239 target
= ix86_fixup_binary_operands (UNKNOWN
, tmode
, xops
);
13241 pat
= GEN_FCN (icode
) (target
, xops
[1], xops
[2]);
13248 /* Subroutine of ix86_expand_builtin to take care of stores. */
13251 ix86_expand_store_builtin (enum insn_code icode
, tree arglist
)
13254 tree arg0
= TREE_VALUE (arglist
);
13255 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13256 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13257 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13258 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
13259 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
13261 if (VECTOR_MODE_P (mode1
))
13262 op1
= safe_vector_operand (op1
, mode1
);
13264 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
13265 op1
= copy_to_mode_reg (mode1
, op1
);
13267 pat
= GEN_FCN (icode
) (op0
, op1
);
13273 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13276 ix86_expand_unop_builtin (enum insn_code icode
, tree arglist
,
13277 rtx target
, int do_load
)
13280 tree arg0
= TREE_VALUE (arglist
);
13281 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13282 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
13283 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
13285 if (optimize
|| !target
13286 || GET_MODE (target
) != tmode
13287 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13288 target
= gen_reg_rtx (tmode
);
13290 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
13293 if (VECTOR_MODE_P (mode0
))
13294 op0
= safe_vector_operand (op0
, mode0
);
13296 if ((optimize
&& !register_operand (op0
, mode0
))
13297 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13298 op0
= copy_to_mode_reg (mode0
, op0
);
13301 pat
= GEN_FCN (icode
) (target
, op0
);
13308 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13309 sqrtss, rsqrtss, rcpss. */
13312 ix86_expand_unop1_builtin (enum insn_code icode
, tree arglist
, rtx target
)
13315 tree arg0
= TREE_VALUE (arglist
);
13316 rtx op1
, op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13317 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
13318 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
13320 if (optimize
|| !target
13321 || GET_MODE (target
) != tmode
13322 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13323 target
= gen_reg_rtx (tmode
);
13325 if (VECTOR_MODE_P (mode0
))
13326 op0
= safe_vector_operand (op0
, mode0
);
13328 if ((optimize
&& !register_operand (op0
, mode0
))
13329 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13330 op0
= copy_to_mode_reg (mode0
, op0
);
13333 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
13334 op1
= copy_to_mode_reg (mode0
, op1
);
13336 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13343 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13346 ix86_expand_sse_compare (const struct builtin_description
*d
, tree arglist
,
13350 tree arg0
= TREE_VALUE (arglist
);
13351 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13352 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13353 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13355 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
13356 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
13357 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
13358 enum rtx_code comparison
= d
->comparison
;
13360 if (VECTOR_MODE_P (mode0
))
13361 op0
= safe_vector_operand (op0
, mode0
);
13362 if (VECTOR_MODE_P (mode1
))
13363 op1
= safe_vector_operand (op1
, mode1
);
13365 /* Swap operands if we have a comparison that isn't available in
13367 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
13369 rtx tmp
= gen_reg_rtx (mode1
);
13370 emit_move_insn (tmp
, op1
);
13375 if (optimize
|| !target
13376 || GET_MODE (target
) != tmode
13377 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
13378 target
= gen_reg_rtx (tmode
);
13380 if ((optimize
&& !register_operand (op0
, mode0
))
13381 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
13382 op0
= copy_to_mode_reg (mode0
, op0
);
13383 if ((optimize
&& !register_operand (op1
, mode1
))
13384 || ! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
13385 op1
= copy_to_mode_reg (mode1
, op1
);
13387 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
13388 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
13395 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13398 ix86_expand_sse_comi (const struct builtin_description
*d
, tree arglist
,
13402 tree arg0
= TREE_VALUE (arglist
);
13403 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13404 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13405 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13407 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
13408 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
13409 enum rtx_code comparison
= d
->comparison
;
13411 if (VECTOR_MODE_P (mode0
))
13412 op0
= safe_vector_operand (op0
, mode0
);
13413 if (VECTOR_MODE_P (mode1
))
13414 op1
= safe_vector_operand (op1
, mode1
);
13416 /* Swap operands if we have a comparison that isn't available in
13418 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
13425 target
= gen_reg_rtx (SImode
);
13426 emit_move_insn (target
, const0_rtx
);
13427 target
= gen_rtx_SUBREG (QImode
, target
, 0);
13429 if ((optimize
&& !register_operand (op0
, mode0
))
13430 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
13431 op0
= copy_to_mode_reg (mode0
, op0
);
13432 if ((optimize
&& !register_operand (op1
, mode1
))
13433 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
13434 op1
= copy_to_mode_reg (mode1
, op1
);
13436 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
13437 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
13441 emit_insn (gen_rtx_SET (VOIDmode
,
13442 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
13443 gen_rtx_fmt_ee (comparison
, QImode
,
13447 return SUBREG_REG (target
);
13450 /* Expand an expression EXP that calls a built-in function,
13451 with result going to TARGET if that's convenient
13452 (and in mode MODE if that's convenient).
13453 SUBTARGET may be used as the target for computing one of EXP's operands.
13454 IGNORE is nonzero if the value is to be ignored. */
13457 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
13458 enum machine_mode mode ATTRIBUTE_UNUSED
,
13459 int ignore ATTRIBUTE_UNUSED
)
13461 const struct builtin_description
*d
;
13463 enum insn_code icode
;
13464 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
13465 tree arglist
= TREE_OPERAND (exp
, 1);
13466 tree arg0
, arg1
, arg2
;
13467 rtx op0
, op1
, op2
, pat
;
13468 enum machine_mode tmode
, mode0
, mode1
, mode2
;
13469 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
13473 case IX86_BUILTIN_EMMS
:
13474 emit_insn (gen_mmx_emms ());
13477 case IX86_BUILTIN_SFENCE
:
13478 emit_insn (gen_sse_sfence ());
13481 case IX86_BUILTIN_PEXTRW
:
13482 case IX86_BUILTIN_PEXTRW128
:
13483 icode
= (fcode
== IX86_BUILTIN_PEXTRW
13484 ? CODE_FOR_mmx_pextrw
13485 : CODE_FOR_sse2_pextrw
);
13486 arg0
= TREE_VALUE (arglist
);
13487 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13488 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13489 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13490 tmode
= insn_data
[icode
].operand
[0].mode
;
13491 mode0
= insn_data
[icode
].operand
[1].mode
;
13492 mode1
= insn_data
[icode
].operand
[2].mode
;
13494 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13495 op0
= copy_to_mode_reg (mode0
, op0
);
13496 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13498 error ("selector must be an integer constant in the range 0..%i",
13499 fcode
== IX86_BUILTIN_PEXTRW
? 3:7);
13500 return gen_reg_rtx (tmode
);
13503 || GET_MODE (target
) != tmode
13504 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13505 target
= gen_reg_rtx (tmode
);
13506 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13512 case IX86_BUILTIN_PINSRW
:
13513 case IX86_BUILTIN_PINSRW128
:
13514 icode
= (fcode
== IX86_BUILTIN_PINSRW
13515 ? CODE_FOR_mmx_pinsrw
13516 : CODE_FOR_sse2_pinsrw
);
13517 arg0
= TREE_VALUE (arglist
);
13518 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13519 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13520 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13521 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13522 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13523 tmode
= insn_data
[icode
].operand
[0].mode
;
13524 mode0
= insn_data
[icode
].operand
[1].mode
;
13525 mode1
= insn_data
[icode
].operand
[2].mode
;
13526 mode2
= insn_data
[icode
].operand
[3].mode
;
13528 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13529 op0
= copy_to_mode_reg (mode0
, op0
);
13530 if ((optimize
&& !register_operand (op1
, mode1
))
13531 || ! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13532 op1
= copy_to_mode_reg (mode1
, op1
);
13533 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
13535 error ("selector must be an integer constant in the range 0..%i",
13536 fcode
== IX86_BUILTIN_PINSRW
? 3:7);
13540 || GET_MODE (target
) != tmode
13541 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13542 target
= gen_reg_rtx (tmode
);
13543 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
13549 case IX86_BUILTIN_MASKMOVQ
:
13550 case IX86_BUILTIN_MASKMOVDQU
:
13551 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
13552 ? CODE_FOR_mmx_maskmovq
13553 : CODE_FOR_sse2_maskmovdqu
);
13554 /* Note the arg order is different from the operand order. */
13555 arg1
= TREE_VALUE (arglist
);
13556 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
13557 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13558 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13559 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13560 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13561 mode0
= insn_data
[icode
].operand
[0].mode
;
13562 mode1
= insn_data
[icode
].operand
[1].mode
;
13563 mode2
= insn_data
[icode
].operand
[2].mode
;
13565 op0
= force_reg (Pmode
, op0
);
13566 op0
= gen_rtx_MEM (mode1
, op0
);
13568 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
13569 op0
= copy_to_mode_reg (mode0
, op0
);
13570 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
13571 op1
= copy_to_mode_reg (mode1
, op1
);
13572 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
13573 op2
= copy_to_mode_reg (mode2
, op2
);
13574 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
13580 case IX86_BUILTIN_SQRTSS
:
13581 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2
, arglist
, target
);
13582 case IX86_BUILTIN_RSQRTSS
:
13583 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2
, arglist
, target
);
13584 case IX86_BUILTIN_RCPSS
:
13585 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2
, arglist
, target
);
13587 case IX86_BUILTIN_LOADAPS
:
13588 return ix86_expand_unop_builtin (CODE_FOR_movv4sf
, arglist
, target
, 1);
13590 case IX86_BUILTIN_LOADUPS
:
13591 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
13593 case IX86_BUILTIN_STOREAPS
:
13594 return ix86_expand_store_builtin (CODE_FOR_movv4sf
, arglist
);
13596 case IX86_BUILTIN_STOREUPS
:
13597 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
);
13599 case IX86_BUILTIN_LOADSS
:
13600 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss
, arglist
, target
, 1);
13602 case IX86_BUILTIN_STORESS
:
13603 return ix86_expand_store_builtin (CODE_FOR_sse_storess
, arglist
);
13605 case IX86_BUILTIN_LOADHPS
:
13606 case IX86_BUILTIN_LOADLPS
:
13607 case IX86_BUILTIN_LOADHPD
:
13608 case IX86_BUILTIN_LOADLPD
:
13609 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_loadhps
13610 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_loadlps
13611 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_loadhpd
13612 : CODE_FOR_sse2_loadlpd
);
13613 arg0
= TREE_VALUE (arglist
);
13614 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13615 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13616 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13617 tmode
= insn_data
[icode
].operand
[0].mode
;
13618 mode0
= insn_data
[icode
].operand
[1].mode
;
13619 mode1
= insn_data
[icode
].operand
[2].mode
;
13621 op0
= force_reg (mode0
, op0
);
13622 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
13623 if (optimize
|| target
== 0
13624 || GET_MODE (target
) != tmode
13625 || !register_operand (target
, tmode
))
13626 target
= gen_reg_rtx (tmode
);
13627 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13633 case IX86_BUILTIN_STOREHPS
:
13634 case IX86_BUILTIN_STORELPS
:
13635 case IX86_BUILTIN_STOREHPD
:
13636 case IX86_BUILTIN_STORELPD
:
13637 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_storehps
13638 : fcode
== IX86_BUILTIN_STORELPS
? CODE_FOR_sse_storelps
13639 : fcode
== IX86_BUILTIN_STOREHPD
? CODE_FOR_sse2_storehpd
13640 : CODE_FOR_sse2_storelpd
);
13641 arg0
= TREE_VALUE (arglist
);
13642 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13643 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13644 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13645 mode0
= insn_data
[icode
].operand
[0].mode
;
13646 mode1
= insn_data
[icode
].operand
[1].mode
;
13648 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
13649 op1
= force_reg (mode1
, op1
);
13651 pat
= GEN_FCN (icode
) (op0
, op1
);
13657 case IX86_BUILTIN_MOVNTPS
:
13658 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
);
13659 case IX86_BUILTIN_MOVNTQ
:
13660 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
);
13662 case IX86_BUILTIN_LDMXCSR
:
13663 op0
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
13664 target
= assign_386_stack_local (SImode
, 0);
13665 emit_move_insn (target
, op0
);
13666 emit_insn (gen_sse_ldmxcsr (target
));
13669 case IX86_BUILTIN_STMXCSR
:
13670 target
= assign_386_stack_local (SImode
, 0);
13671 emit_insn (gen_sse_stmxcsr (target
));
13672 return copy_to_mode_reg (SImode
, target
);
13674 case IX86_BUILTIN_SHUFPS
:
13675 case IX86_BUILTIN_SHUFPD
:
13676 icode
= (fcode
== IX86_BUILTIN_SHUFPS
13677 ? CODE_FOR_sse_shufps
13678 : CODE_FOR_sse2_shufpd
);
13679 arg0
= TREE_VALUE (arglist
);
13680 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13681 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13682 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13683 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13684 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13685 tmode
= insn_data
[icode
].operand
[0].mode
;
13686 mode0
= insn_data
[icode
].operand
[1].mode
;
13687 mode1
= insn_data
[icode
].operand
[2].mode
;
13688 mode2
= insn_data
[icode
].operand
[3].mode
;
13690 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13691 op0
= copy_to_mode_reg (mode0
, op0
);
13692 if ((optimize
&& !register_operand (op1
, mode1
))
13693 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13694 op1
= copy_to_mode_reg (mode1
, op1
);
13695 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
13697 /* @@@ better error message */
13698 error ("mask must be an immediate");
13699 return gen_reg_rtx (tmode
);
13701 if (optimize
|| target
== 0
13702 || GET_MODE (target
) != tmode
13703 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13704 target
= gen_reg_rtx (tmode
);
13705 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
13711 case IX86_BUILTIN_PSHUFW
:
13712 case IX86_BUILTIN_PSHUFD
:
13713 case IX86_BUILTIN_PSHUFHW
:
13714 case IX86_BUILTIN_PSHUFLW
:
13715 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
13716 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
13717 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
13718 : CODE_FOR_mmx_pshufw
);
13719 arg0
= TREE_VALUE (arglist
);
13720 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13721 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13722 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13723 tmode
= insn_data
[icode
].operand
[0].mode
;
13724 mode1
= insn_data
[icode
].operand
[1].mode
;
13725 mode2
= insn_data
[icode
].operand
[2].mode
;
13727 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
13728 op0
= copy_to_mode_reg (mode1
, op0
);
13729 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
13731 /* @@@ better error message */
13732 error ("mask must be an immediate");
13736 || GET_MODE (target
) != tmode
13737 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13738 target
= gen_reg_rtx (tmode
);
13739 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13745 case IX86_BUILTIN_PSLLDQI128
:
13746 case IX86_BUILTIN_PSRLDQI128
:
13747 icode
= ( fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
13748 : CODE_FOR_sse2_lshrti3
);
13749 arg0
= TREE_VALUE (arglist
);
13750 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13751 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13752 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13753 tmode
= insn_data
[icode
].operand
[0].mode
;
13754 mode1
= insn_data
[icode
].operand
[1].mode
;
13755 mode2
= insn_data
[icode
].operand
[2].mode
;
13757 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
13759 op0
= copy_to_reg (op0
);
13760 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
13762 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
13764 error ("shift must be an immediate");
13767 target
= gen_reg_rtx (V2DImode
);
13768 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0), op0
, op1
);
13774 case IX86_BUILTIN_FEMMS
:
13775 emit_insn (gen_mmx_femms ());
13778 case IX86_BUILTIN_PAVGUSB
:
13779 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3
, arglist
, target
);
13781 case IX86_BUILTIN_PF2ID
:
13782 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id
, arglist
, target
, 0);
13784 case IX86_BUILTIN_PFACC
:
13785 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3
, arglist
, target
);
13787 case IX86_BUILTIN_PFADD
:
13788 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3
, arglist
, target
);
13790 case IX86_BUILTIN_PFCMPEQ
:
13791 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3
, arglist
, target
);
13793 case IX86_BUILTIN_PFCMPGE
:
13794 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3
, arglist
, target
);
13796 case IX86_BUILTIN_PFCMPGT
:
13797 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3
, arglist
, target
);
13799 case IX86_BUILTIN_PFMAX
:
13800 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3
, arglist
, target
);
13802 case IX86_BUILTIN_PFMIN
:
13803 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3
, arglist
, target
);
13805 case IX86_BUILTIN_PFMUL
:
13806 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3
, arglist
, target
);
13808 case IX86_BUILTIN_PFRCP
:
13809 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2
, arglist
, target
, 0);
13811 case IX86_BUILTIN_PFRCPIT1
:
13812 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3
, arglist
, target
);
13814 case IX86_BUILTIN_PFRCPIT2
:
13815 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3
, arglist
, target
);
13817 case IX86_BUILTIN_PFRSQIT1
:
13818 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3
, arglist
, target
);
13820 case IX86_BUILTIN_PFRSQRT
:
13821 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2
, arglist
, target
, 0);
13823 case IX86_BUILTIN_PFSUB
:
13824 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3
, arglist
, target
);
13826 case IX86_BUILTIN_PFSUBR
:
13827 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3
, arglist
, target
);
13829 case IX86_BUILTIN_PI2FD
:
13830 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2
, arglist
, target
, 0);
13832 case IX86_BUILTIN_PMULHRW
:
13833 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3
, arglist
, target
);
13835 case IX86_BUILTIN_PF2IW
:
13836 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw
, arglist
, target
, 0);
13838 case IX86_BUILTIN_PFNACC
:
13839 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3
, arglist
, target
);
13841 case IX86_BUILTIN_PFPNACC
:
13842 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3
, arglist
, target
);
13844 case IX86_BUILTIN_PI2FW
:
13845 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw
, arglist
, target
, 0);
13847 case IX86_BUILTIN_PSWAPDSI
:
13848 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2
, arglist
, target
, 0);
13850 case IX86_BUILTIN_PSWAPDSF
:
13851 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2
, arglist
, target
, 0);
13853 case IX86_BUILTIN_SSE_ZERO
:
13854 return CONST0_RTX (V4SFmode
);
13856 case IX86_BUILTIN_MMX_ZERO
:
13859 case IX86_BUILTIN_CLRTI
:
13862 case IX86_BUILTIN_SQRTSD
:
13863 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2
, arglist
, target
);
13864 case IX86_BUILTIN_LOADAPD
:
13865 return ix86_expand_unop_builtin (CODE_FOR_movv2df
, arglist
, target
, 1);
13866 case IX86_BUILTIN_LOADUPD
:
13867 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, arglist
, target
, 1);
13869 case IX86_BUILTIN_STOREAPD
:
13870 return ix86_expand_store_builtin (CODE_FOR_movv2df
, arglist
);
13871 case IX86_BUILTIN_STOREUPD
:
13872 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, arglist
);
13874 case IX86_BUILTIN_LOADSD
:
13875 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
, target
, 1);
13877 case IX86_BUILTIN_STORESD
:
13878 return ix86_expand_store_builtin (CODE_FOR_sse2_storelpd
, arglist
);
13880 case IX86_BUILTIN_SETPD1
:
13881 target
= assign_386_stack_local (DFmode
, 0);
13882 arg0
= TREE_VALUE (arglist
);
13883 emit_move_insn (adjust_address (target
, DFmode
, 0),
13884 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
13885 op0
= gen_reg_rtx (V2DFmode
);
13886 emit_insn (gen_sse2_loadsd (op0
, adjust_address (target
, V2DFmode
, 0)));
13887 emit_insn (gen_sse2_shufpd (op0
, op0
, op0
, const0_rtx
));
13890 case IX86_BUILTIN_SETPD
:
13891 target
= assign_386_stack_local (V2DFmode
, 0);
13892 arg0
= TREE_VALUE (arglist
);
13893 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13894 emit_move_insn (adjust_address (target
, DFmode
, 0),
13895 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
13896 emit_move_insn (adjust_address (target
, DFmode
, 8),
13897 expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0));
13898 op0
= gen_reg_rtx (V2DFmode
);
13899 emit_move_insn (op0
, target
);
13902 case IX86_BUILTIN_LOADRPD
:
13903 target
= ix86_expand_unop_builtin (CODE_FOR_movv2df
, arglist
,
13904 gen_reg_rtx (V2DFmode
), 1);
13905 emit_insn (gen_sse2_shufpd (target
, target
, target
, const1_rtx
));
13908 case IX86_BUILTIN_LOADPD1
:
13909 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
,
13910 gen_reg_rtx (V2DFmode
), 1);
13911 emit_insn (gen_sse2_shufpd (target
, target
, target
, const0_rtx
));
13914 case IX86_BUILTIN_STOREPD1
:
13915 return ix86_expand_store_builtin (CODE_FOR_movv2df
, arglist
);
13916 case IX86_BUILTIN_STORERPD
:
13917 return ix86_expand_store_builtin (CODE_FOR_movv2df
, arglist
);
13919 case IX86_BUILTIN_CLRPD
:
13920 return CONST0_RTX (V2DFmode
);
13922 case IX86_BUILTIN_MFENCE
:
13923 emit_insn (gen_sse2_mfence ());
13925 case IX86_BUILTIN_LFENCE
:
13926 emit_insn (gen_sse2_lfence ());
13929 case IX86_BUILTIN_CLFLUSH
:
13930 arg0
= TREE_VALUE (arglist
);
13931 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13932 icode
= CODE_FOR_sse2_clflush
;
13933 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
13934 op0
= copy_to_mode_reg (Pmode
, op0
);
13936 emit_insn (gen_sse2_clflush (op0
));
13939 case IX86_BUILTIN_MOVNTPD
:
13940 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, arglist
);
13941 case IX86_BUILTIN_MOVNTDQ
:
13942 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, arglist
);
13943 case IX86_BUILTIN_MOVNTI
:
13944 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, arglist
);
13946 case IX86_BUILTIN_LOADDQA
:
13947 return ix86_expand_unop_builtin (CODE_FOR_movv2di
, arglist
, target
, 1);
13948 case IX86_BUILTIN_LOADDQU
:
13949 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, arglist
, target
, 1);
13950 case IX86_BUILTIN_LOADD
:
13951 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd
, arglist
, target
, 1);
13953 case IX86_BUILTIN_STOREDQA
:
13954 return ix86_expand_store_builtin (CODE_FOR_movv2di
, arglist
);
13955 case IX86_BUILTIN_STOREDQU
:
13956 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, arglist
);
13957 case IX86_BUILTIN_STORED
:
13958 return ix86_expand_store_builtin (CODE_FOR_sse2_stored
, arglist
);
13960 case IX86_BUILTIN_MONITOR
:
13961 arg0
= TREE_VALUE (arglist
);
13962 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13963 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13964 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13965 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13966 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13968 op0
= copy_to_mode_reg (SImode
, op0
);
13970 op1
= copy_to_mode_reg (SImode
, op1
);
13972 op2
= copy_to_mode_reg (SImode
, op2
);
13973 emit_insn (gen_sse3_monitor (op0
, op1
, op2
));
13976 case IX86_BUILTIN_MWAIT
:
13977 arg0
= TREE_VALUE (arglist
);
13978 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13979 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13980 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13982 op0
= copy_to_mode_reg (SImode
, op0
);
13984 op1
= copy_to_mode_reg (SImode
, op1
);
13985 emit_insn (gen_sse3_mwait (op0
, op1
));
13988 case IX86_BUILTIN_LOADDDUP
:
13989 return ix86_expand_unop_builtin (CODE_FOR_sse3_loadddup
, arglist
, target
, 1);
13991 case IX86_BUILTIN_LDDQU
:
13992 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu
, arglist
, target
,
13999 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
14000 if (d
->code
== fcode
)
14002 /* Compares are treated specially. */
14003 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
14004 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
14005 || d
->icode
== CODE_FOR_sse2_maskcmpv2df3
14006 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
14007 return ix86_expand_sse_compare (d
, arglist
, target
);
14009 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
14012 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
14013 if (d
->code
== fcode
)
14014 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
14016 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
14017 if (d
->code
== fcode
)
14018 return ix86_expand_sse_comi (d
, arglist
, target
);
14020 /* @@@ Should really do something sensible here. */
14024 /* Store OPERAND to the memory after reload is completed. This means
14025 that we can't easily use assign_stack_local. */
14027 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
14030 if (!reload_completed
)
14032 if (TARGET_RED_ZONE
)
14034 result
= gen_rtx_MEM (mode
,
14035 gen_rtx_PLUS (Pmode
,
14037 GEN_INT (-RED_ZONE_SIZE
)));
14038 emit_move_insn (result
, operand
);
14040 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
14046 operand
= gen_lowpart (DImode
, operand
);
14050 gen_rtx_SET (VOIDmode
,
14051 gen_rtx_MEM (DImode
,
14052 gen_rtx_PRE_DEC (DImode
,
14053 stack_pointer_rtx
)),
14059 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
14068 split_di (&operand
, 1, operands
, operands
+ 1);
14070 gen_rtx_SET (VOIDmode
,
14071 gen_rtx_MEM (SImode
,
14072 gen_rtx_PRE_DEC (Pmode
,
14073 stack_pointer_rtx
)),
14076 gen_rtx_SET (VOIDmode
,
14077 gen_rtx_MEM (SImode
,
14078 gen_rtx_PRE_DEC (Pmode
,
14079 stack_pointer_rtx
)),
14084 /* It is better to store HImodes as SImodes. */
14085 if (!TARGET_PARTIAL_REG_STALL
)
14086 operand
= gen_lowpart (SImode
, operand
);
14090 gen_rtx_SET (VOIDmode
,
14091 gen_rtx_MEM (GET_MODE (operand
),
14092 gen_rtx_PRE_DEC (SImode
,
14093 stack_pointer_rtx
)),
14099 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
14104 /* Free operand from the memory. */
14106 ix86_free_from_memory (enum machine_mode mode
)
14108 if (!TARGET_RED_ZONE
)
14112 if (mode
== DImode
|| TARGET_64BIT
)
14114 else if (mode
== HImode
&& TARGET_PARTIAL_REG_STALL
)
14118 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14119 to pop or add instruction if registers are available. */
14120 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
14121 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
14126 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14127 QImode must go into class Q_REGS.
14128 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14129 movdf to do mem-to-mem moves through integer regs. */
14131 ix86_preferred_reload_class (rtx x
, enum reg_class
class)
14133 if (GET_CODE (x
) == CONST_VECTOR
&& x
!= CONST0_RTX (GET_MODE (x
)))
14135 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
14137 /* SSE can't load any constant directly yet. */
14138 if (SSE_CLASS_P (class))
14140 /* Floats can load 0 and 1. */
14141 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x
))
14143 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14144 if (MAYBE_SSE_CLASS_P (class))
14145 return (reg_class_subset_p (class, GENERAL_REGS
)
14146 ? GENERAL_REGS
: FLOAT_REGS
);
14150 /* General regs can load everything. */
14151 if (reg_class_subset_p (class, GENERAL_REGS
))
14152 return GENERAL_REGS
;
14153 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14154 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14157 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x
))
14159 if (GET_MODE (x
) == QImode
&& ! reg_class_subset_p (class, Q_REGS
))
14164 /* If we are copying between general and FP registers, we need a memory
14165 location. The same is true for SSE and MMX registers.
14167 The macro can't work reliably when one of the CLASSES is class containing
14168 registers from multiple units (SSE, MMX, integer). We avoid this by never
14169 combining those units in single alternative in the machine description.
14170 Ensure that this constraint holds to avoid unexpected surprises.
14172 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14173 enforce these sanity checks. */
14175 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
14176 enum machine_mode mode
, int strict
)
14178 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
14179 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
14180 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
14181 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
14182 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
14183 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
14190 return (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
)
14191 || ((SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
)
14192 || MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
14193 && ((mode
!= SImode
&& (mode
!= DImode
|| !TARGET_64BIT
))
14194 || (!TARGET_INTER_UNIT_MOVES
&& !optimize_size
))));
14196 /* Return the cost of moving data from a register in class CLASS1 to
14197 one in class CLASS2.
14199 It is not required that the cost always equal 2 when FROM is the same as TO;
14200 on some machines it is expensive to move between registers if they are not
14201 general registers. */
14203 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
14204 enum reg_class class2
)
14206 /* In case we require secondary memory, compute cost of the store followed
14207 by load. In order to avoid bad register allocation choices, we need
14208 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14210 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
14214 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
14215 MEMORY_MOVE_COST (mode
, class1
, 1));
14216 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
14217 MEMORY_MOVE_COST (mode
, class2
, 1));
14219 /* In case of copying from general_purpose_register we may emit multiple
14220 stores followed by single load causing memory size mismatch stall.
14221 Count this as arbitrarily high cost of 20. */
14222 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
14225 /* In the case of FP/MMX moves, the registers actually overlap, and we
14226 have to switch modes in order to treat them differently. */
14227 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
14228 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
14234 /* Moves between SSE/MMX and integer unit are expensive. */
14235 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
14236 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
14237 return ix86_cost
->mmxsse_to_integer
;
14238 if (MAYBE_FLOAT_CLASS_P (class1
))
14239 return ix86_cost
->fp_move
;
14240 if (MAYBE_SSE_CLASS_P (class1
))
14241 return ix86_cost
->sse_move
;
14242 if (MAYBE_MMX_CLASS_P (class1
))
14243 return ix86_cost
->mmx_move
;
14247 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14249 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
14251 /* Flags and only flags can only hold CCmode values. */
14252 if (CC_REGNO_P (regno
))
14253 return GET_MODE_CLASS (mode
) == MODE_CC
;
14254 if (GET_MODE_CLASS (mode
) == MODE_CC
14255 || GET_MODE_CLASS (mode
) == MODE_RANDOM
14256 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
14258 if (FP_REGNO_P (regno
))
14259 return VALID_FP_MODE_P (mode
);
14260 if (SSE_REGNO_P (regno
))
14262 /* We implement the move patterns for all vector modes into and
14263 out of SSE registers, even when no operation instructions
14265 return (VALID_SSE_REG_MODE (mode
)
14266 || VALID_SSE2_REG_MODE (mode
)
14267 || VALID_MMX_REG_MODE (mode
)
14268 || VALID_MMX_REG_MODE_3DNOW (mode
));
14270 if (MMX_REGNO_P (regno
))
14272 /* We implement the move patterns for 3DNOW modes even in MMX mode,
14273 so if the register is available at all, then we can move data of
14274 the given mode into or out of it. */
14275 return (VALID_MMX_REG_MODE (mode
)
14276 || VALID_MMX_REG_MODE_3DNOW (mode
));
14278 /* We handle both integer and floats in the general purpose registers.
14279 In future we should be able to handle vector modes as well. */
14280 if (!VALID_INT_MODE_P (mode
) && !VALID_FP_MODE_P (mode
))
14282 /* Take care for QImode values - they can be in non-QI regs, but then
14283 they do cause partial register stalls. */
14284 if (regno
< 4 || mode
!= QImode
|| TARGET_64BIT
)
14286 return reload_in_progress
|| reload_completed
|| !TARGET_PARTIAL_REG_STALL
;
14289 /* Return the cost of moving data of mode M between a
14290 register and memory. A value of 2 is the default; this cost is
14291 relative to those in `REGISTER_MOVE_COST'.
14293 If moving between registers and memory is more expensive than
14294 between two registers, you should define this macro to express the
14297 Model also increased moving costs of QImode registers in non
14301 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class
class, int in
)
14303 if (FLOAT_CLASS_P (class))
14320 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
14322 if (SSE_CLASS_P (class))
14325 switch (GET_MODE_SIZE (mode
))
14339 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
14341 if (MMX_CLASS_P (class))
14344 switch (GET_MODE_SIZE (mode
))
14355 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
14357 switch (GET_MODE_SIZE (mode
))
14361 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
14362 : ix86_cost
->movzbl_load
);
14364 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
14365 : ix86_cost
->int_store
[0] + 4);
14368 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
14370 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14371 if (mode
== TFmode
)
14373 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
14374 * (((int) GET_MODE_SIZE (mode
)
14375 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
14379 /* Compute a (partial) cost for rtx X. Return true if the complete
14380 cost has been computed, and false if subexpressions should be
14381 scanned. In either case, *TOTAL contains the cost result. */
14384 ix86_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
14386 enum machine_mode mode
= GET_MODE (x
);
14394 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
14396 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
14398 else if (flag_pic
&& SYMBOLIC_CONST (x
)
14400 || (!GET_CODE (x
) != LABEL_REF
14401 && (GET_CODE (x
) != SYMBOL_REF
14402 || !SYMBOL_REF_LOCAL_P (x
)))))
14409 if (mode
== VOIDmode
)
14412 switch (standard_80387_constant_p (x
))
14417 default: /* Other constants */
14422 /* Start with (MEM (SYMBOL_REF)), since that's where
14423 it'll probably end up. Add a penalty for size. */
14424 *total
= (COSTS_N_INSNS (1)
14425 + (flag_pic
!= 0 && !TARGET_64BIT
)
14426 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
14432 /* The zero extensions is often completely free on x86_64, so make
14433 it as cheap as possible. */
14434 if (TARGET_64BIT
&& mode
== DImode
14435 && GET_MODE (XEXP (x
, 0)) == SImode
)
14437 else if (TARGET_ZERO_EXTEND_WITH_AND
)
14438 *total
= COSTS_N_INSNS (ix86_cost
->add
);
14440 *total
= COSTS_N_INSNS (ix86_cost
->movzx
);
14444 *total
= COSTS_N_INSNS (ix86_cost
->movsx
);
14448 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
14449 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
14451 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
14454 *total
= COSTS_N_INSNS (ix86_cost
->add
);
14457 if ((value
== 2 || value
== 3)
14458 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
14460 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
14470 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
14472 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
14474 if (INTVAL (XEXP (x
, 1)) > 32)
14475 *total
= COSTS_N_INSNS(ix86_cost
->shift_const
+ 2);
14477 *total
= COSTS_N_INSNS(ix86_cost
->shift_const
* 2);
14481 if (GET_CODE (XEXP (x
, 1)) == AND
)
14482 *total
= COSTS_N_INSNS(ix86_cost
->shift_var
* 2);
14484 *total
= COSTS_N_INSNS(ix86_cost
->shift_var
* 6 + 2);
14489 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
14490 *total
= COSTS_N_INSNS (ix86_cost
->shift_const
);
14492 *total
= COSTS_N_INSNS (ix86_cost
->shift_var
);
14497 if (FLOAT_MODE_P (mode
))
14499 *total
= COSTS_N_INSNS (ix86_cost
->fmul
);
14504 rtx op0
= XEXP (x
, 0);
14505 rtx op1
= XEXP (x
, 1);
14507 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
14509 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
14510 for (nbits
= 0; value
!= 0; value
&= value
- 1)
14514 /* This is arbitrary. */
14517 /* Compute costs correctly for widening multiplication. */
14518 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
14519 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
14520 == GET_MODE_SIZE (mode
))
14522 int is_mulwiden
= 0;
14523 enum machine_mode inner_mode
= GET_MODE (op0
);
14525 if (GET_CODE (op0
) == GET_CODE (op1
))
14526 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
14527 else if (GET_CODE (op1
) == CONST_INT
)
14529 if (GET_CODE (op0
) == SIGN_EXTEND
)
14530 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
14533 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
14537 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
14540 *total
= COSTS_N_INSNS (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
14541 + nbits
* ix86_cost
->mult_bit
)
14542 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
);
14551 if (FLOAT_MODE_P (mode
))
14552 *total
= COSTS_N_INSNS (ix86_cost
->fdiv
);
14554 *total
= COSTS_N_INSNS (ix86_cost
->divide
[MODE_INDEX (mode
)]);
14558 if (FLOAT_MODE_P (mode
))
14559 *total
= COSTS_N_INSNS (ix86_cost
->fadd
);
14560 else if (GET_MODE_CLASS (mode
) == MODE_INT
14561 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
14563 if (GET_CODE (XEXP (x
, 0)) == PLUS
14564 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
14565 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == CONST_INT
14566 && CONSTANT_P (XEXP (x
, 1)))
14568 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
14569 if (val
== 2 || val
== 4 || val
== 8)
14571 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
14572 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
14573 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
14575 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
14579 else if (GET_CODE (XEXP (x
, 0)) == MULT
14580 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)
14582 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
14583 if (val
== 2 || val
== 4 || val
== 8)
14585 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
14586 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
14587 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
14591 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
14593 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
14594 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
14595 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
14596 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
14603 if (FLOAT_MODE_P (mode
))
14605 *total
= COSTS_N_INSNS (ix86_cost
->fadd
);
14613 if (!TARGET_64BIT
&& mode
== DImode
)
14615 *total
= (COSTS_N_INSNS (ix86_cost
->add
) * 2
14616 + (rtx_cost (XEXP (x
, 0), outer_code
)
14617 << (GET_MODE (XEXP (x
, 0)) != DImode
))
14618 + (rtx_cost (XEXP (x
, 1), outer_code
)
14619 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
14625 if (FLOAT_MODE_P (mode
))
14627 *total
= COSTS_N_INSNS (ix86_cost
->fchs
);
14633 if (!TARGET_64BIT
&& mode
== DImode
)
14634 *total
= COSTS_N_INSNS (ix86_cost
->add
* 2);
14636 *total
= COSTS_N_INSNS (ix86_cost
->add
);
14640 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
14641 && XEXP (XEXP (x
, 0), 1) == const1_rtx
14642 && GET_CODE (XEXP (XEXP (x
, 0), 2)) == CONST_INT
14643 && XEXP (x
, 1) == const0_rtx
)
14645 /* This kind of construct is implemented using test[bwl].
14646 Treat it as if we had an AND. */
14647 *total
= (COSTS_N_INSNS (ix86_cost
->add
)
14648 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
)
14649 + rtx_cost (const1_rtx
, outer_code
));
14655 if (!TARGET_SSE_MATH
14657 || (mode
== DFmode
&& !TARGET_SSE2
))
14662 if (FLOAT_MODE_P (mode
))
14663 *total
= COSTS_N_INSNS (ix86_cost
->fabs
);
14667 if (FLOAT_MODE_P (mode
))
14668 *total
= COSTS_N_INSNS (ix86_cost
->fsqrt
);
14672 if (XINT (x
, 1) == UNSPEC_TP
)
14683 static int current_machopic_label_num
;
14685 /* Given a symbol name and its associated stub, write out the
14686 definition of the stub. */
14689 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
14691 unsigned int length
;
14692 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
14693 int label
= ++current_machopic_label_num
;
14695 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14696 symb
= (*targetm
.strip_name_encoding
) (symb
);
14698 length
= strlen (stub
);
14699 binder_name
= alloca (length
+ 32);
14700 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
14702 length
= strlen (symb
);
14703 symbol_name
= alloca (length
+ 32);
14704 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
14706 sprintf (lazy_ptr_name
, "L%d$lz", label
);
14709 machopic_picsymbol_stub_section ();
14711 machopic_symbol_stub_section ();
14713 fprintf (file
, "%s:\n", stub
);
14714 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
14718 fprintf (file
, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label
, label
);
14719 fprintf (file
, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
14720 fprintf (file
, "\tjmp %%edx\n");
14723 fprintf (file
, "\tjmp *%s\n", lazy_ptr_name
);
14725 fprintf (file
, "%s:\n", binder_name
);
14729 fprintf (file
, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
14730 fprintf (file
, "\tpushl %%eax\n");
14733 fprintf (file
, "\t pushl $%s\n", lazy_ptr_name
);
14735 fprintf (file
, "\tjmp dyld_stub_binding_helper\n");
14737 machopic_lazy_symbol_ptr_section ();
14738 fprintf (file
, "%s:\n", lazy_ptr_name
);
14739 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
14740 fprintf (file
, "\t.long %s\n", binder_name
);
14742 #endif /* TARGET_MACHO */
14744 /* Order the registers for register allocator. */
14747 x86_order_regs_for_local_alloc (void)
14752 /* First allocate the local general purpose registers. */
14753 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
14754 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
14755 reg_alloc_order
[pos
++] = i
;
14757 /* Global general purpose registers. */
14758 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
14759 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
14760 reg_alloc_order
[pos
++] = i
;
14762 /* x87 registers come first in case we are doing FP math
14764 if (!TARGET_SSE_MATH
)
14765 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
14766 reg_alloc_order
[pos
++] = i
;
14768 /* SSE registers. */
14769 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
14770 reg_alloc_order
[pos
++] = i
;
14771 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
14772 reg_alloc_order
[pos
++] = i
;
14774 /* x87 registers. */
14775 if (TARGET_SSE_MATH
)
14776 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
14777 reg_alloc_order
[pos
++] = i
;
14779 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
14780 reg_alloc_order
[pos
++] = i
;
14782 /* Initialize the rest of array as we do not allocate some registers
14784 while (pos
< FIRST_PSEUDO_REGISTER
)
14785 reg_alloc_order
[pos
++] = 0;
14788 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
14789 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
14792 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
14793 struct attribute_spec.handler. */
14795 ix86_handle_struct_attribute (tree
*node
, tree name
,
14796 tree args ATTRIBUTE_UNUSED
,
14797 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
14800 if (DECL_P (*node
))
14802 if (TREE_CODE (*node
) == TYPE_DECL
)
14803 type
= &TREE_TYPE (*node
);
14808 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
14809 || TREE_CODE (*type
) == UNION_TYPE
)))
14811 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name
));
14812 *no_add_attrs
= true;
14815 else if ((is_attribute_p ("ms_struct", name
)
14816 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
14817 || ((is_attribute_p ("gcc_struct", name
)
14818 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
14820 warning ("%qs incompatible attribute ignored",
14821 IDENTIFIER_POINTER (name
));
14822 *no_add_attrs
= true;
14829 ix86_ms_bitfield_layout_p (tree record_type
)
14831 return (TARGET_USE_MS_BITFIELD_LAYOUT
&&
14832 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
14833 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
14836 /* Returns an expression indicating where the this parameter is
14837 located on entry to the FUNCTION. */
14840 x86_this_parameter (tree function
)
14842 tree type
= TREE_TYPE (function
);
14846 int n
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
14847 return gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
14850 if (ix86_function_regparm (type
, function
) > 0)
14854 parm
= TYPE_ARG_TYPES (type
);
14855 /* Figure out whether or not the function has a variable number of
14857 for (; parm
; parm
= TREE_CHAIN (parm
))
14858 if (TREE_VALUE (parm
) == void_type_node
)
14860 /* If not, the this parameter is in the first argument. */
14864 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
14866 return gen_rtx_REG (SImode
, regno
);
14870 if (aggregate_value_p (TREE_TYPE (type
), type
))
14871 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
14873 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
14876 /* Determine whether x86_output_mi_thunk can succeed. */
14879 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED
,
14880 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
14881 HOST_WIDE_INT vcall_offset
, tree function
)
14883 /* 64-bit can handle anything. */
14887 /* For 32-bit, everything's fine if we have one free register. */
14888 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
14891 /* Need a free register for vcall_offset. */
14895 /* Need a free register for GOT references. */
14896 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
14899 /* Otherwise ok. */
14903 /* Output the assembler code for a thunk function. THUNK_DECL is the
14904 declaration for the thunk function itself, FUNCTION is the decl for
14905 the target function. DELTA is an immediate constant offset to be
14906 added to THIS. If VCALL_OFFSET is nonzero, the word at
14907 *(*this + vcall_offset) should be added to THIS. */
14910 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
14911 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
14912 HOST_WIDE_INT vcall_offset
, tree function
)
14915 rtx
this = x86_this_parameter (function
);
14918 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14919 pull it in now and let DELTA benefit. */
14922 else if (vcall_offset
)
14924 /* Put the this parameter into %eax. */
14926 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
14927 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
14930 this_reg
= NULL_RTX
;
14932 /* Adjust the this parameter by a fixed constant. */
14935 xops
[0] = GEN_INT (delta
);
14936 xops
[1] = this_reg
? this_reg
: this;
14939 if (!x86_64_general_operand (xops
[0], DImode
))
14941 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
14943 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
14947 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
14950 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
14953 /* Adjust the this parameter by a value stored in the vtable. */
14957 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
14960 int tmp_regno
= 2 /* ECX */;
14961 if (lookup_attribute ("fastcall",
14962 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
14963 tmp_regno
= 0 /* EAX */;
14964 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
14967 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
14970 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
14972 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
14974 /* Adjust the this parameter. */
14975 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
14976 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
14978 rtx tmp2
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
14979 xops
[0] = GEN_INT (vcall_offset
);
14981 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
14982 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
14984 xops
[1] = this_reg
;
14986 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
14988 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
14991 /* If necessary, drop THIS back to its stack slot. */
14992 if (this_reg
&& this_reg
!= this)
14994 xops
[0] = this_reg
;
14996 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
14999 xops
[0] = XEXP (DECL_RTL (function
), 0);
15002 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
15003 output_asm_insn ("jmp\t%P0", xops
);
15006 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
15007 tmp
= gen_rtx_CONST (Pmode
, tmp
);
15008 tmp
= gen_rtx_MEM (QImode
, tmp
);
15010 output_asm_insn ("jmp\t%A0", xops
);
15015 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
15016 output_asm_insn ("jmp\t%P0", xops
);
15021 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
15022 tmp
= (gen_rtx_SYMBOL_REF
15024 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
15025 tmp
= gen_rtx_MEM (QImode
, tmp
);
15027 output_asm_insn ("jmp\t%0", xops
);
15030 #endif /* TARGET_MACHO */
15032 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
15033 output_set_got (tmp
);
15036 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
15037 output_asm_insn ("jmp\t{*}%1", xops
);
15043 x86_file_start (void)
15045 default_file_start ();
15046 if (X86_FILE_START_VERSION_DIRECTIVE
)
15047 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
15048 if (X86_FILE_START_FLTUSED
)
15049 fputs ("\t.global\t__fltused\n", asm_out_file
);
15050 if (ix86_asm_dialect
== ASM_INTEL
)
15051 fputs ("\t.intel_syntax\n", asm_out_file
);
15055 x86_field_alignment (tree field
, int computed
)
15057 enum machine_mode mode
;
15058 tree type
= TREE_TYPE (field
);
15060 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
15062 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
15063 ? get_inner_array_type (type
) : type
);
15064 if (mode
== DFmode
|| mode
== DCmode
15065 || GET_MODE_CLASS (mode
) == MODE_INT
15066 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
15067 return MIN (32, computed
);
15071 /* Output assembler code to FILE to increment profiler label # LABELNO
15072 for profiling a function entry. */
15074 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
15079 #ifndef NO_PROFILE_COUNTERS
15080 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
15082 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
15086 #ifndef NO_PROFILE_COUNTERS
15087 fprintf (file
, "\tmovq\t$%sP%d,%%r11\n", LPREFIX
, labelno
);
15089 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
15093 #ifndef NO_PROFILE_COUNTERS
15094 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15095 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
15097 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
15101 #ifndef NO_PROFILE_COUNTERS
15102 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
15103 PROFILE_COUNT_REGISTER
);
15105 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
15109 /* We don't have exact information about the insn sizes, but we may assume
15110 quite safely that we are informed about all 1 byte insns and memory
15111 address sizes. This is enough to eliminate unnecessary padding in
15115 min_insn_size (rtx insn
)
15119 if (!INSN_P (insn
) || !active_insn_p (insn
))
15122 /* Discard alignments we've emit and jump instructions. */
15123 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
15124 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
15126 if (GET_CODE (insn
) == JUMP_INSN
15127 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
15128 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
15131 /* Important case - calls are always 5 bytes.
15132 It is common to have many calls in the row. */
15133 if (GET_CODE (insn
) == CALL_INSN
15134 && symbolic_reference_mentioned_p (PATTERN (insn
))
15135 && !SIBLING_CALL_P (insn
))
15137 if (get_attr_length (insn
) <= 1)
15140 /* For normal instructions we may rely on the sizes of addresses
15141 and the presence of symbol to require 4 bytes of encoding.
15142 This is not the case for jumps where references are PC relative. */
15143 if (GET_CODE (insn
) != JUMP_INSN
)
15145 l
= get_attr_length_address (insn
);
15146 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
15155 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15159 ix86_avoid_jump_misspredicts (void)
15161 rtx insn
, start
= get_insns ();
15162 int nbytes
= 0, njumps
= 0;
15165 /* Look for all minimal intervals of instructions containing 4 jumps.
15166 The intervals are bounded by START and INSN. NBYTES is the total
15167 size of instructions in the interval including INSN and not including
15168 START. When the NBYTES is smaller than 16 bytes, it is possible
15169 that the end of START and INSN ends up in the same 16byte page.
15171 The smallest offset in the page INSN can start is the case where START
15172 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15173 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15175 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
15178 nbytes
+= min_insn_size (insn
);
15180 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
15181 INSN_UID (insn
), min_insn_size (insn
));
15182 if ((GET_CODE (insn
) == JUMP_INSN
15183 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
15184 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
15185 || GET_CODE (insn
) == CALL_INSN
)
15192 start
= NEXT_INSN (start
);
15193 if ((GET_CODE (start
) == JUMP_INSN
15194 && GET_CODE (PATTERN (start
)) != ADDR_VEC
15195 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
15196 || GET_CODE (start
) == CALL_INSN
)
15197 njumps
--, isjump
= 1;
15200 nbytes
-= min_insn_size (start
);
15205 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
15206 INSN_UID (start
), INSN_UID (insn
), nbytes
);
15208 if (njumps
== 3 && isjump
&& nbytes
< 16)
15210 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
15213 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
15214 INSN_UID (insn
), padsize
);
15215 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
15220 /* AMD Athlon works faster
15221 when RET is not destination of conditional jump or directly preceded
15222 by other jump instruction. We avoid the penalty by inserting NOP just
15223 before the RET instructions in such cases. */
15225 ix86_pad_returns (void)
15230 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
15232 basic_block bb
= e
->src
;
15233 rtx ret
= BB_END (bb
);
15235 bool replace
= false;
15237 if (GET_CODE (ret
) != JUMP_INSN
|| GET_CODE (PATTERN (ret
)) != RETURN
15238 || !maybe_hot_bb_p (bb
))
15240 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
15241 if (active_insn_p (prev
) || GET_CODE (prev
) == CODE_LABEL
)
15243 if (prev
&& GET_CODE (prev
) == CODE_LABEL
)
15248 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
15249 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
15250 && !(e
->flags
& EDGE_FALLTHRU
))
15255 prev
= prev_active_insn (ret
);
15257 && ((GET_CODE (prev
) == JUMP_INSN
&& any_condjump_p (prev
))
15258 || GET_CODE (prev
) == CALL_INSN
))
15260 /* Empty functions get branch mispredict even when the jump destination
15261 is not visible to us. */
15262 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
15267 emit_insn_before (gen_return_internal_long (), ret
);
15273 /* Implement machine specific optimizations. We implement padding of returns
15274 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
15278 if (TARGET_ATHLON_K8
&& optimize
&& !optimize_size
)
15279 ix86_pad_returns ();
15280 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
15281 ix86_avoid_jump_misspredicts ();
15284 /* Return nonzero when QImode register that must be represented via REX prefix
15287 x86_extended_QIreg_mentioned_p (rtx insn
)
15290 extract_insn_cached (insn
);
15291 for (i
= 0; i
< recog_data
.n_operands
; i
++)
15292 if (REG_P (recog_data
.operand
[i
])
15293 && REGNO (recog_data
.operand
[i
]) >= 4)
15298 /* Return nonzero when P points to register encoded via REX prefix.
15299 Called via for_each_rtx. */
15301 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
15303 unsigned int regno
;
15306 regno
= REGNO (*p
);
15307 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
15310 /* Return true when INSN mentions register that must be encoded using REX
15313 x86_extended_reg_mentioned_p (rtx insn
)
15315 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
15318 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15319 optabs would emit if we didn't have TFmode patterns. */
15322 x86_emit_floatuns (rtx operands
[2])
15324 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
15325 enum machine_mode mode
, inmode
;
15327 inmode
= GET_MODE (operands
[1]);
15328 if (inmode
!= SImode
15329 && inmode
!= DImode
)
15333 in
= force_reg (inmode
, operands
[1]);
15334 mode
= GET_MODE (out
);
15335 neglab
= gen_label_rtx ();
15336 donelab
= gen_label_rtx ();
15337 i1
= gen_reg_rtx (Pmode
);
15338 f0
= gen_reg_rtx (mode
);
15340 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, Pmode
, 0, neglab
);
15342 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_FLOAT (mode
, in
)));
15343 emit_jump_insn (gen_jump (donelab
));
15346 emit_label (neglab
);
15348 i0
= expand_simple_binop (Pmode
, LSHIFTRT
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
15349 i1
= expand_simple_binop (Pmode
, AND
, in
, const1_rtx
, NULL
, 1, OPTAB_DIRECT
);
15350 i0
= expand_simple_binop (Pmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
15351 expand_float (f0
, i0
, 0);
15352 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
15354 emit_label (donelab
);
15357 /* Initialize vector TARGET via VALS. */
15359 ix86_expand_vector_init (rtx target
, rtx vals
)
15361 enum machine_mode mode
= GET_MODE (target
);
15362 int elt_size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
15363 int n_elts
= (GET_MODE_SIZE (mode
) / elt_size
);
15366 for (i
= n_elts
- 1; i
>= 0; i
--)
15367 if (GET_CODE (XVECEXP (vals
, 0, i
)) != CONST_INT
15368 && GET_CODE (XVECEXP (vals
, 0, i
)) != CONST_DOUBLE
)
15371 /* Few special cases first...
15372 ... constants are best loaded from constant pool. */
15375 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
15379 /* ... values where only first field is non-constant are best loaded
15380 from the pool and overwritten via move later. */
15383 XVECEXP (vals
, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode
));
15384 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
15386 switch (GET_MODE (target
))
15389 emit_insn (gen_sse2_loadlpd (target
, target
, XVECEXP (vals
, 0, 0)));
15394 /* ??? We can represent this better. */
15395 rtx op
= simplify_gen_subreg (mode
, XVECEXP (vals
, 0, 0),
15396 GET_MODE_INNER (mode
), 0);
15397 op
= force_reg (mode
, op
);
15398 emit_insn (gen_sse_movss (target
, target
, op
));
15408 /* And the busy sequence doing rotations. */
15409 switch (GET_MODE (target
))
15414 simplify_gen_subreg (V2DFmode
, XVECEXP (vals
, 0, 0), DFmode
, 0);
15416 simplify_gen_subreg (V2DFmode
, XVECEXP (vals
, 0, 1), DFmode
, 0);
15418 vecop0
= force_reg (V2DFmode
, vecop0
);
15419 vecop1
= force_reg (V2DFmode
, vecop1
);
15420 emit_insn (gen_sse2_unpcklpd (target
, vecop0
, vecop1
));
15426 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 0), SFmode
, 0);
15428 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 1), SFmode
, 0);
15430 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 2), SFmode
, 0);
15432 simplify_gen_subreg (V4SFmode
, XVECEXP (vals
, 0, 3), SFmode
, 0);
15433 rtx tmp1
= gen_reg_rtx (V4SFmode
);
15434 rtx tmp2
= gen_reg_rtx (V4SFmode
);
15436 vecop0
= force_reg (V4SFmode
, vecop0
);
15437 vecop1
= force_reg (V4SFmode
, vecop1
);
15438 vecop2
= force_reg (V4SFmode
, vecop2
);
15439 vecop3
= force_reg (V4SFmode
, vecop3
);
15440 emit_insn (gen_sse_unpcklps (tmp1
, vecop1
, vecop3
));
15441 emit_insn (gen_sse_unpcklps (tmp2
, vecop0
, vecop2
));
15442 emit_insn (gen_sse_unpcklps (target
, tmp2
, tmp1
));
15450 /* Implements target hook vector_mode_supported_p. */
15452 ix86_vector_mode_supported_p (enum machine_mode mode
)
15454 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
15456 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
15458 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
15460 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
15465 /* Worker function for TARGET_MD_ASM_CLOBBERS.
15467 We do this in the new i386 backend to maintain source compatibility
15468 with the old cc0-based compiler. */
15471 ix86_md_asm_clobbers (tree clobbers
)
15473 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
15475 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
15477 clobbers
= tree_cons (NULL_TREE
, build_string (7, "dirflag"),
15482 /* Worker function for REVERSE_CONDITION. */
15485 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
15487 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
15488 ? reverse_condition (code
)
15489 : reverse_condition_maybe_unordered (code
));
15492 /* Output code to perform an x87 FP register move, from OPERANDS[1]
15496 output_387_reg_move (rtx insn
, rtx
*operands
)
15498 if (REG_P (operands
[1])
15499 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
15501 if (REGNO (operands
[0]) == FIRST_STACK_REG
15502 && TARGET_USE_FFREEP
)
15503 return "ffreep\t%y0";
15504 return "fstp\t%y0";
15506 if (STACK_TOP_P (operands
[0]))
15507 return "fld%z1\t%y1";
15511 /* Output code to perform a conditional jump to LABEL, if C2 flag in
15512 FP status register is set. */
15515 ix86_emit_fp_unordered_jump (rtx label
)
15517 rtx reg
= gen_reg_rtx (HImode
);
15520 emit_insn (gen_x86_fnstsw_1 (reg
));
15522 if (TARGET_USE_SAHF
)
15524 emit_insn (gen_x86_sahf_1 (reg
));
15526 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
15527 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
15531 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
15533 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15534 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
15537 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
15538 gen_rtx_LABEL_REF (VOIDmode
, label
),
15540 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
15541 emit_jump_insn (temp
);
15544 /* Output code to perform a log1p XFmode calculation. */
15546 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
15548 rtx label1
= gen_label_rtx ();
15549 rtx label2
= gen_label_rtx ();
15551 rtx tmp
= gen_reg_rtx (XFmode
);
15552 rtx tmp2
= gen_reg_rtx (XFmode
);
15554 emit_insn (gen_absxf2 (tmp
, op1
));
15555 emit_insn (gen_cmpxf (tmp
,
15556 CONST_DOUBLE_FROM_REAL_VALUE (
15557 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
15559 emit_jump_insn (gen_bge (label1
));
15561 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
15562 emit_insn (gen_fyl2xp1_xf3 (op0
, tmp2
, op1
));
15563 emit_jump (label2
);
15565 emit_label (label1
);
15566 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
15567 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
15568 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
15569 emit_insn (gen_fyl2x_xf3 (op0
, tmp2
, tmp
));
15571 emit_label (label2
);
15574 /* Solaris named-section hook. Parameters are as for
15575 named_section_real. */
15578 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
15581 /* With Binutils 2.15, the "@unwind" marker must be specified on
15582 every occurrence of the ".eh_frame" section, not just the first
15585 && strcmp (name
, ".eh_frame") == 0)
15587 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
15588 flags
& SECTION_WRITE
? "aw" : "a");
15591 default_elf_asm_named_section (name
, flags
, decl
);
15594 #include "gt-i386.h"