1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
28 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "basic-block.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
51 /* Processor costs (relative to an add) */
53 struct processor_costs size_cost
= { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
88 /* Processor costs (relative to an add) */
90 struct processor_costs i386_cost
= { /* 386 specific costs */
91 1, /* cost of an add instruction */
92 1, /* cost of a lea instruction */
93 3, /* variable shift costs */
94 2, /* constant shift costs */
95 6, /* cost of starting a multiply */
96 1, /* cost of multiply per each bit set */
97 23, /* cost of a divide/mod */
98 3, /* cost of movsx */
99 2, /* cost of movzx */
100 15, /* "large" insn */
102 4, /* cost for loading QImode using movzbl */
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
105 Relative to reg-reg move (2). */
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {8, 8, 8}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
110 {8, 8, 8}, /* cost of loading integer registers */
111 2, /* cost of moving MMX register */
112 {4, 8}, /* cost of loading MMX registers
113 in SImode and DImode */
114 {4, 8}, /* cost of storing MMX registers
115 in SImode and DImode */
116 2, /* cost of moving SSE register */
117 {4, 8, 16}, /* cost of loading SSE registers
118 in SImode, DImode and TImode */
119 {4, 8, 16}, /* cost of storing SSE registers
120 in SImode, DImode and TImode */
121 3, /* MMX or SSE register to integer */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
127 struct processor_costs i486_cost
= { /* 486 specific costs */
128 1, /* cost of an add instruction */
129 1, /* cost of a lea instruction */
130 3, /* variable shift costs */
131 2, /* constant shift costs */
132 12, /* cost of starting a multiply */
133 1, /* cost of multiply per each bit set */
134 40, /* cost of a divide/mod */
135 3, /* cost of movsx */
136 2, /* cost of movzx */
137 15, /* "large" insn */
139 4, /* cost for loading QImode using movzbl */
140 {2, 4, 2}, /* cost of loading integer registers
141 in QImode, HImode and SImode.
142 Relative to reg-reg move (2). */
143 {2, 4, 2}, /* cost of storing integer registers */
144 2, /* cost of reg,reg fld/fst */
145 {8, 8, 8}, /* cost of loading fp registers
146 in SFmode, DFmode and XFmode */
147 {8, 8, 8}, /* cost of loading integer registers */
148 2, /* cost of moving MMX register */
149 {4, 8}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {4, 8}, /* cost of storing MMX registers
152 in SImode and DImode */
153 2, /* cost of moving SSE register */
154 {4, 8, 16}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {4, 8, 16}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of prefetch block */
160 0, /* number of parallel prefetches */
164 struct processor_costs pentium_cost
= {
165 1, /* cost of an add instruction */
166 1, /* cost of a lea instruction */
167 4, /* variable shift costs */
168 1, /* constant shift costs */
169 11, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
171 25, /* cost of a divide/mod */
172 3, /* cost of movsx */
173 2, /* cost of movzx */
174 8, /* "large" insn */
176 6, /* cost for loading QImode using movzbl */
177 {2, 4, 2}, /* cost of loading integer registers
178 in QImode, HImode and SImode.
179 Relative to reg-reg move (2). */
180 {2, 4, 2}, /* cost of storing integer registers */
181 2, /* cost of reg,reg fld/fst */
182 {2, 2, 6}, /* cost of loading fp registers
183 in SFmode, DFmode and XFmode */
184 {4, 4, 6}, /* cost of loading integer registers */
185 8, /* cost of moving MMX register */
186 {8, 8}, /* cost of loading MMX registers
187 in SImode and DImode */
188 {8, 8}, /* cost of storing MMX registers
189 in SImode and DImode */
190 2, /* cost of moving SSE register */
191 {4, 8, 16}, /* cost of loading SSE registers
192 in SImode, DImode and TImode */
193 {4, 8, 16}, /* cost of storing SSE registers
194 in SImode, DImode and TImode */
195 3, /* MMX or SSE register to integer */
196 0, /* size of prefetch block */
197 0, /* number of parallel prefetches */
201 struct processor_costs pentiumpro_cost
= {
202 1, /* cost of an add instruction */
203 1, /* cost of a lea instruction */
204 1, /* variable shift costs */
205 1, /* constant shift costs */
206 4, /* cost of starting a multiply */
207 0, /* cost of multiply per each bit set */
208 17, /* cost of a divide/mod */
209 1, /* cost of movsx */
210 1, /* cost of movzx */
211 8, /* "large" insn */
213 2, /* cost for loading QImode using movzbl */
214 {4, 4, 4}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
216 Relative to reg-reg move (2). */
217 {2, 2, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {2, 2, 6}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
221 {4, 4, 6}, /* cost of loading integer registers */
222 2, /* cost of moving MMX register */
223 {2, 2}, /* cost of loading MMX registers
224 in SImode and DImode */
225 {2, 2}, /* cost of storing MMX registers
226 in SImode and DImode */
227 2, /* cost of moving SSE register */
228 {2, 2, 8}, /* cost of loading SSE registers
229 in SImode, DImode and TImode */
230 {2, 2, 8}, /* cost of storing SSE registers
231 in SImode, DImode and TImode */
232 3, /* MMX or SSE register to integer */
233 32, /* size of prefetch block */
234 6, /* number of parallel prefetches */
238 struct processor_costs k6_cost
= {
239 1, /* cost of an add instruction */
240 2, /* cost of a lea instruction */
241 1, /* variable shift costs */
242 1, /* constant shift costs */
243 3, /* cost of starting a multiply */
244 0, /* cost of multiply per each bit set */
245 18, /* cost of a divide/mod */
246 2, /* cost of movsx */
247 2, /* cost of movzx */
248 8, /* "large" insn */
250 3, /* cost for loading QImode using movzbl */
251 {4, 5, 4}, /* cost of loading integer registers
252 in QImode, HImode and SImode.
253 Relative to reg-reg move (2). */
254 {2, 3, 2}, /* cost of storing integer registers */
255 4, /* cost of reg,reg fld/fst */
256 {6, 6, 6}, /* cost of loading fp registers
257 in SFmode, DFmode and XFmode */
258 {4, 4, 4}, /* cost of loading integer registers */
259 2, /* cost of moving MMX register */
260 {2, 2}, /* cost of loading MMX registers
261 in SImode and DImode */
262 {2, 2}, /* cost of storing MMX registers
263 in SImode and DImode */
264 2, /* cost of moving SSE register */
265 {2, 2, 8}, /* cost of loading SSE registers
266 in SImode, DImode and TImode */
267 {2, 2, 8}, /* cost of storing SSE registers
268 in SImode, DImode and TImode */
269 6, /* MMX or SSE register to integer */
270 32, /* size of prefetch block */
271 1, /* number of parallel prefetches */
275 struct processor_costs athlon_cost
= {
276 1, /* cost of an add instruction */
277 2, /* cost of a lea instruction */
278 1, /* variable shift costs */
279 1, /* constant shift costs */
280 5, /* cost of starting a multiply */
281 0, /* cost of multiply per each bit set */
282 42, /* cost of a divide/mod */
283 1, /* cost of movsx */
284 1, /* cost of movzx */
285 8, /* "large" insn */
287 4, /* cost for loading QImode using movzbl */
288 {4, 5, 4}, /* cost of loading integer registers
289 in QImode, HImode and SImode.
290 Relative to reg-reg move (2). */
291 {2, 3, 2}, /* cost of storing integer registers */
292 4, /* cost of reg,reg fld/fst */
293 {6, 6, 20}, /* cost of loading fp registers
294 in SFmode, DFmode and XFmode */
295 {4, 4, 16}, /* cost of loading integer registers */
296 2, /* cost of moving MMX register */
297 {2, 2}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {2, 2}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {2, 2, 8}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {2, 2, 8}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
306 6, /* MMX or SSE register to integer */
307 64, /* size of prefetch block */
308 6, /* number of parallel prefetches */
312 struct processor_costs pentium4_cost
= {
313 1, /* cost of an add instruction */
314 1, /* cost of a lea instruction */
315 8, /* variable shift costs */
316 8, /* constant shift costs */
317 30, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 112, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 16, /* "large" insn */
324 2, /* cost for loading QImode using movzbl */
325 {4, 5, 4}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {2, 3, 2}, /* cost of storing integer registers */
329 2, /* cost of reg,reg fld/fst */
330 {2, 2, 6}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {4, 4, 6}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 12, /* cost of moving SSE register */
339 {12, 12, 12}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 10, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
348 const struct processor_costs
*ix86_cost
= &pentium_cost
;
350 /* Processor feature/optimization bitmasks. */
351 #define m_386 (1<<PROCESSOR_I386)
352 #define m_486 (1<<PROCESSOR_I486)
353 #define m_PENT (1<<PROCESSOR_PENTIUM)
354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
355 #define m_K6 (1<<PROCESSOR_K6)
356 #define m_ATHLON (1<<PROCESSOR_ATHLON)
357 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
359 const int x86_use_leave
= m_386
| m_K6
| m_ATHLON
;
360 const int x86_push_memory
= m_386
| m_K6
| m_ATHLON
| m_PENT4
;
361 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
362 const int x86_movx
= m_ATHLON
| m_PPRO
| m_PENT4
/* m_386 | m_K6 */;
363 const int x86_double_with_add
= ~m_386
;
364 const int x86_use_bit_test
= m_386
;
365 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON
| m_K6
;
366 const int x86_cmove
= m_PPRO
| m_ATHLON
| m_PENT4
;
367 const int x86_3dnow_a
= m_ATHLON
;
368 const int x86_deep_branch
= m_PPRO
| m_K6
| m_ATHLON
| m_PENT4
;
369 const int x86_branch_hints
= m_PENT4
;
370 const int x86_use_sahf
= m_PPRO
| m_K6
| m_PENT4
;
371 const int x86_partial_reg_stall
= m_PPRO
;
372 const int x86_use_loop
= m_K6
;
373 const int x86_use_fiop
= ~(m_PPRO
| m_ATHLON
| m_PENT
);
374 const int x86_use_mov0
= m_K6
;
375 const int x86_use_cltd
= ~(m_PENT
| m_K6
);
376 const int x86_read_modify_write
= ~m_PENT
;
377 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
378 const int x86_split_long_moves
= m_PPRO
;
379 const int x86_promote_QImode
= m_K6
| m_PENT
| m_386
| m_486
;
380 const int x86_single_stringop
= m_386
| m_PENT4
;
381 const int x86_qimode_math
= ~(0);
382 const int x86_promote_qi_regs
= 0;
383 const int x86_himode_math
= ~(m_PPRO
);
384 const int x86_promote_hi_regs
= m_PPRO
;
385 const int x86_sub_esp_4
= m_ATHLON
| m_PPRO
| m_PENT4
;
386 const int x86_sub_esp_8
= m_ATHLON
| m_PPRO
| m_386
| m_486
| m_PENT4
;
387 const int x86_add_esp_4
= m_ATHLON
| m_K6
| m_PENT4
;
388 const int x86_add_esp_8
= m_ATHLON
| m_PPRO
| m_K6
| m_386
| m_486
| m_PENT4
;
389 const int x86_integer_DFmode_moves
= ~(m_ATHLON
| m_PENT4
);
390 const int x86_partial_reg_dependency
= m_ATHLON
| m_PENT4
;
391 const int x86_memory_mismatch_stall
= m_ATHLON
| m_PENT4
;
392 const int x86_accumulate_outgoing_args
= m_ATHLON
| m_PENT4
| m_PPRO
;
393 const int x86_prologue_using_move
= m_ATHLON
| m_PENT4
| m_PPRO
;
394 const int x86_epilogue_using_move
= m_ATHLON
| m_PENT4
| m_PPRO
;
395 const int x86_decompose_lea
= m_PENT4
;
396 const int x86_arch_always_fancy_math_387
= m_PENT
|m_PPRO
|m_ATHLON
|m_PENT4
;
398 /* In case the avreage insn count for single function invocation is
399 lower than this constant, emit fast (but longer) prologue and
401 #define FAST_PROLOGUE_INSN_COUNT 30
403 /* Set by prologue expander and used by epilogue expander to determine
405 static int use_fast_prologue_epilogue
;
407 #define AT_BP(MODE) (gen_rtx_MEM ((MODE), hard_frame_pointer_rtx))
409 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
410 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
411 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
412 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
414 /* Array of the smallest class containing reg number REGNO, indexed by
415 REGNO. Used by REGNO_REG_CLASS in i386.h. */
417 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
420 AREG
, DREG
, CREG
, BREG
,
422 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
424 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
425 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
428 /* flags, fpsr, dirflag, frame */
429 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
430 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
432 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
434 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
435 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
436 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
440 /* The "default" register map used in 32bit mode. */
442 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
444 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
445 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
446 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
447 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
448 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
449 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
450 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
453 static int const x86_64_int_parameter_registers
[6] =
455 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
456 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
459 static int const x86_64_int_return_registers
[4] =
461 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
464 /* The "default" register map used in 64bit mode. */
465 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
467 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
468 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
469 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
470 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
471 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
472 8,9,10,11,12,13,14,15, /* extended integer registers */
473 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
476 /* Define the register numbers to be used in Dwarf debugging information.
477 The SVR4 reference port C compiler uses the following register numbers
478 in its Dwarf output code:
479 0 for %eax (gcc regno = 0)
480 1 for %ecx (gcc regno = 2)
481 2 for %edx (gcc regno = 1)
482 3 for %ebx (gcc regno = 3)
483 4 for %esp (gcc regno = 7)
484 5 for %ebp (gcc regno = 6)
485 6 for %esi (gcc regno = 4)
486 7 for %edi (gcc regno = 5)
487 The following three DWARF register numbers are never generated by
488 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
489 believes these numbers have these meanings.
490 8 for %eip (no gcc equivalent)
491 9 for %eflags (gcc regno = 17)
492 10 for %trapno (no gcc equivalent)
493 It is not at all clear how we should number the FP stack registers
494 for the x86 architecture. If the version of SDB on x86/svr4 were
495 a bit less brain dead with respect to floating-point then we would
496 have a precedent to follow with respect to DWARF register numbers
497 for x86 FP registers, but the SDB on x86/svr4 is so completely
498 broken with respect to FP registers that it is hardly worth thinking
499 of it as something to strive for compatibility with.
500 The version of x86/svr4 SDB I have at the moment does (partially)
501 seem to believe that DWARF register number 11 is associated with
502 the x86 register %st(0), but that's about all. Higher DWARF
503 register numbers don't seem to be associated with anything in
504 particular, and even for DWARF regno 11, SDB only seems to under-
505 stand that it should say that a variable lives in %st(0) (when
506 asked via an `=' command) if we said it was in DWARF regno 11,
507 but SDB still prints garbage when asked for the value of the
508 variable in question (via a `/' command).
509 (Also note that the labels SDB prints for various FP stack regs
510 when doing an `x' command are all wrong.)
511 Note that these problems generally don't affect the native SVR4
512 C compiler because it doesn't allow the use of -O with -g and
513 because when it is *not* optimizing, it allocates a memory
514 location for each floating-point variable, and the memory
515 location is what gets described in the DWARF AT_location
516 attribute for the variable in question.
517 Regardless of the severe mental illness of the x86/svr4 SDB, we
518 do something sensible here and we use the following DWARF
519 register numbers. Note that these are all stack-top-relative
521 11 for %st(0) (gcc regno = 8)
522 12 for %st(1) (gcc regno = 9)
523 13 for %st(2) (gcc regno = 10)
524 14 for %st(3) (gcc regno = 11)
525 15 for %st(4) (gcc regno = 12)
526 16 for %st(5) (gcc regno = 13)
527 17 for %st(6) (gcc regno = 14)
528 18 for %st(7) (gcc regno = 15)
530 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
532 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
533 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
534 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
535 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
536 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
537 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
538 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
541 /* Test and compare insns in i386.md store the information needed to
542 generate branch and scc insns here. */
544 rtx ix86_compare_op0
= NULL_RTX
;
545 rtx ix86_compare_op1
= NULL_RTX
;
547 /* The encoding characters for the four TLS models present in ELF. */
549 static char const tls_model_chars
[] = " GLil";
551 #define MAX_386_STACK_LOCALS 3
552 /* Size of the register save area. */
553 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
555 /* Define the structure for the machine field in struct function. */
556 struct machine_function
558 rtx stack_locals
[(int) MAX_MACHINE_MODE
][MAX_386_STACK_LOCALS
];
559 const char *some_ld_name
;
560 int save_varrargs_registers
;
561 int accesses_prev_frame
;
564 #define ix86_stack_locals (cfun->machine->stack_locals)
565 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
567 /* Structure describing stack frame layout.
568 Stack grows downward:
574 saved frame pointer if frame_pointer_needed
575 <- HARD_FRAME_POINTER
581 > to_allocate <- FRAME_POINTER
593 int outgoing_arguments_size
;
596 HOST_WIDE_INT to_allocate
;
597 /* The offsets relative to ARG_POINTER. */
598 HOST_WIDE_INT frame_pointer_offset
;
599 HOST_WIDE_INT hard_frame_pointer_offset
;
600 HOST_WIDE_INT stack_pointer_offset
;
603 /* Used to enable/disable debugging features. */
604 const char *ix86_debug_arg_string
, *ix86_debug_addr_string
;
605 /* Code model option as passed by user. */
606 const char *ix86_cmodel_string
;
608 enum cmodel ix86_cmodel
;
610 const char *ix86_asm_string
;
611 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
613 const char *ix86_tls_dialect_string
;
614 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
616 /* Which unit we are generating floating point math for. */
617 enum fpmath_unit ix86_fpmath
;
619 /* Which cpu are we scheduling for. */
620 enum processor_type ix86_cpu
;
621 /* Which instruction set architecture to use. */
622 enum processor_type ix86_arch
;
624 /* Strings to hold which cpu and instruction set architecture to use. */
625 const char *ix86_cpu_string
; /* for -mcpu=<xxx> */
626 const char *ix86_arch_string
; /* for -march=<xxx> */
627 const char *ix86_fpmath_string
; /* for -mfpmath=<xxx> */
629 /* # of registers to use to pass arguments. */
630 const char *ix86_regparm_string
;
632 /* true if sse prefetch instruction is not NOOP. */
633 int x86_prefetch_sse
;
635 /* ix86_regparm_string as a number */
638 /* Alignment to use for loops and jumps: */
640 /* Power of two alignment for loops. */
641 const char *ix86_align_loops_string
;
643 /* Power of two alignment for non-loop jumps. */
644 const char *ix86_align_jumps_string
;
646 /* Power of two alignment for stack boundary in bytes. */
647 const char *ix86_preferred_stack_boundary_string
;
649 /* Preferred alignment for stack boundary in bits. */
650 int ix86_preferred_stack_boundary
;
652 /* Values 1-5: see jump.c */
653 int ix86_branch_cost
;
654 const char *ix86_branch_cost_string
;
656 /* Power of two alignment for functions. */
657 const char *ix86_align_funcs_string
;
659 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
660 static char internal_label_prefix
[16];
661 static int internal_label_prefix_len
;
663 static int local_symbolic_operand
PARAMS ((rtx
, enum machine_mode
));
664 static int tls_symbolic_operand_1
PARAMS ((rtx
, enum tls_model
));
665 static void output_pic_addr_const
PARAMS ((FILE *, rtx
, int));
666 static void put_condition_code
PARAMS ((enum rtx_code
, enum machine_mode
,
668 static const char *get_some_local_dynamic_name
PARAMS ((void));
669 static int get_some_local_dynamic_name_1
PARAMS ((rtx
*, void *));
670 static rtx maybe_get_pool_constant
PARAMS ((rtx
));
671 static rtx ix86_expand_int_compare
PARAMS ((enum rtx_code
, rtx
, rtx
));
672 static enum rtx_code ix86_prepare_fp_compare_args
PARAMS ((enum rtx_code
,
674 static rtx get_thread_pointer
PARAMS ((void));
675 static rtx gen_push
PARAMS ((rtx
));
676 static int memory_address_length
PARAMS ((rtx addr
));
677 static int ix86_flags_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
678 static int ix86_agi_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
679 static enum attr_ppro_uops ix86_safe_ppro_uops
PARAMS ((rtx
));
680 static void ix86_dump_ppro_packet
PARAMS ((FILE *));
681 static void ix86_reorder_insn
PARAMS ((rtx
*, rtx
*));
682 static void ix86_init_machine_status
PARAMS ((struct function
*));
683 static void ix86_mark_machine_status
PARAMS ((struct function
*));
684 static void ix86_free_machine_status
PARAMS ((struct function
*));
685 static int ix86_split_to_parts
PARAMS ((rtx
, rtx
*, enum machine_mode
));
686 static int ix86_nsaved_regs
PARAMS ((void));
687 static void ix86_emit_save_regs
PARAMS ((void));
688 static void ix86_emit_save_regs_using_mov
PARAMS ((rtx
, HOST_WIDE_INT
));
689 static void ix86_emit_restore_regs_using_mov
PARAMS ((rtx
, int, int));
690 static void ix86_set_move_mem_attrs_1
PARAMS ((rtx
, rtx
, rtx
, rtx
, rtx
));
691 static void ix86_sched_reorder_ppro
PARAMS ((rtx
*, rtx
*));
692 static HOST_WIDE_INT ix86_GOT_alias_set
PARAMS ((void));
693 static void ix86_adjust_counter
PARAMS ((rtx
, HOST_WIDE_INT
));
694 static rtx ix86_expand_aligntest
PARAMS ((rtx
, int));
695 static void ix86_expand_strlensi_unroll_1
PARAMS ((rtx
, rtx
));
696 static int ix86_issue_rate
PARAMS ((void));
697 static int ix86_adjust_cost
PARAMS ((rtx
, rtx
, rtx
, int));
698 static void ix86_sched_init
PARAMS ((FILE *, int, int));
699 static int ix86_sched_reorder
PARAMS ((FILE *, int, rtx
*, int *, int));
700 static int ix86_variable_issue
PARAMS ((FILE *, int, rtx
, int));
701 static int ia32_use_dfa_pipeline_interface
PARAMS ((void));
702 static int ia32_multipass_dfa_lookahead
PARAMS ((void));
703 static void ix86_init_mmx_sse_builtins
PARAMS ((void));
707 rtx base
, index
, disp
;
711 static int ix86_decompose_address
PARAMS ((rtx
, struct ix86_address
*));
713 static void ix86_encode_section_info
PARAMS ((tree
, int)) ATTRIBUTE_UNUSED
;
714 static const char *ix86_strip_name_encoding
PARAMS ((const char *))
717 struct builtin_description
;
718 static rtx ix86_expand_sse_comi
PARAMS ((const struct builtin_description
*,
720 static rtx ix86_expand_sse_compare
PARAMS ((const struct builtin_description
*,
722 static rtx ix86_expand_unop1_builtin
PARAMS ((enum insn_code
, tree
, rtx
));
723 static rtx ix86_expand_unop_builtin
PARAMS ((enum insn_code
, tree
, rtx
, int));
724 static rtx ix86_expand_binop_builtin
PARAMS ((enum insn_code
, tree
, rtx
));
725 static rtx ix86_expand_timode_binop_builtin
PARAMS ((enum insn_code
,
727 static rtx ix86_expand_store_builtin
PARAMS ((enum insn_code
, tree
));
728 static rtx safe_vector_operand
PARAMS ((rtx
, enum machine_mode
));
729 static enum rtx_code ix86_fp_compare_code_to_integer
PARAMS ((enum rtx_code
));
730 static void ix86_fp_comparison_codes
PARAMS ((enum rtx_code code
,
734 static rtx ix86_expand_fp_compare
PARAMS ((enum rtx_code
, rtx
, rtx
, rtx
,
736 static int ix86_fp_comparison_arithmetics_cost
PARAMS ((enum rtx_code code
));
737 static int ix86_fp_comparison_fcomi_cost
PARAMS ((enum rtx_code code
));
738 static int ix86_fp_comparison_sahf_cost
PARAMS ((enum rtx_code code
));
739 static int ix86_fp_comparison_cost
PARAMS ((enum rtx_code code
));
740 static int ix86_save_reg
PARAMS ((unsigned int, int));
741 static void ix86_compute_frame_layout
PARAMS ((struct ix86_frame
*));
742 static int ix86_comp_type_attributes
PARAMS ((tree
, tree
));
743 const struct attribute_spec ix86_attribute_table
[];
744 static tree ix86_handle_cdecl_attribute
PARAMS ((tree
*, tree
, tree
, int, bool *));
745 static tree ix86_handle_regparm_attribute
PARAMS ((tree
*, tree
, tree
, int, bool *));
747 #ifdef DO_GLOBAL_CTORS_BODY
748 static void ix86_svr3_asm_out_constructor
PARAMS ((rtx
, int));
751 /* Register class used for passing given 64bit part of the argument.
752 These represent classes as documented by the PS ABI, with the exception
753 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
754 use SF or DFmode move instead of DImode to avoid reformating penalties.
756 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
757 whenever possible (upper half does contain padding).
759 enum x86_64_reg_class
762 X86_64_INTEGER_CLASS
,
763 X86_64_INTEGERSI_CLASS
,
772 static const char * const x86_64_reg_class_name
[] =
773 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
775 #define MAX_CLASSES 4
776 static int classify_argument
PARAMS ((enum machine_mode
, tree
,
777 enum x86_64_reg_class
[MAX_CLASSES
],
779 static int examine_argument
PARAMS ((enum machine_mode
, tree
, int, int *,
781 static rtx construct_container
PARAMS ((enum machine_mode
, tree
, int, int, int,
783 static enum x86_64_reg_class merge_classes
PARAMS ((enum x86_64_reg_class
,
784 enum x86_64_reg_class
));
786 /* Initialize the GCC target structure. */
787 #undef TARGET_ATTRIBUTE_TABLE
788 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
789 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
790 # undef TARGET_MERGE_DECL_ATTRIBUTES
791 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
794 #undef TARGET_COMP_TYPE_ATTRIBUTES
795 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
797 #undef TARGET_INIT_BUILTINS
798 #define TARGET_INIT_BUILTINS ix86_init_builtins
800 #undef TARGET_EXPAND_BUILTIN
801 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
803 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
804 static void ix86_osf_output_function_prologue
PARAMS ((FILE *,
806 # undef TARGET_ASM_FUNCTION_PROLOGUE
807 # define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue
810 #undef TARGET_ASM_OPEN_PAREN
811 #define TARGET_ASM_OPEN_PAREN ""
812 #undef TARGET_ASM_CLOSE_PAREN
813 #define TARGET_ASM_CLOSE_PAREN ""
815 #undef TARGET_ASM_ALIGNED_HI_OP
816 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
817 #undef TARGET_ASM_ALIGNED_SI_OP
818 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
820 #undef TARGET_ASM_ALIGNED_DI_OP
821 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
824 #undef TARGET_ASM_UNALIGNED_HI_OP
825 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
826 #undef TARGET_ASM_UNALIGNED_SI_OP
827 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
828 #undef TARGET_ASM_UNALIGNED_DI_OP
829 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
831 #undef TARGET_SCHED_ADJUST_COST
832 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
833 #undef TARGET_SCHED_ISSUE_RATE
834 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
835 #undef TARGET_SCHED_VARIABLE_ISSUE
836 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
837 #undef TARGET_SCHED_INIT
838 #define TARGET_SCHED_INIT ix86_sched_init
839 #undef TARGET_SCHED_REORDER
840 #define TARGET_SCHED_REORDER ix86_sched_reorder
841 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
842 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
843 ia32_use_dfa_pipeline_interface
844 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
845 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
846 ia32_multipass_dfa_lookahead
849 #undef TARGET_HAVE_TLS
850 #define TARGET_HAVE_TLS true
853 struct gcc_target targetm
= TARGET_INITIALIZER
;
855 /* Sometimes certain combinations of command options do not make
856 sense on a particular target machine. You can define a macro
857 `OVERRIDE_OPTIONS' to take account of this. This macro, if
858 defined, is executed once just after all the command options have
861 Don't use this macro to turn on various extra optimizations for
862 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
868 /* Comes from final.c -- no real reason to change it. */
869 #define MAX_CODE_ALIGN 16
873 const struct processor_costs
*cost
; /* Processor costs */
874 const int target_enable
; /* Target flags to enable. */
875 const int target_disable
; /* Target flags to disable. */
876 const int align_loop
; /* Default alignments. */
877 const int align_loop_max_skip
;
878 const int align_jump
;
879 const int align_jump_max_skip
;
880 const int align_func
;
881 const int branch_cost
;
883 const processor_target_table
[PROCESSOR_max
] =
885 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4, 1},
886 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16, 1},
887 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16, 1},
888 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16, 1},
889 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32, 1},
890 {&athlon_cost
, 0, 0, 16, 7, 64, 7, 16, 1},
891 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0, 1}
894 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
897 const char *const name
; /* processor name or nickname. */
898 const enum processor_type processor
;
904 PTA_PREFETCH_SSE
= 8,
909 const processor_alias_table
[] =
911 {"i386", PROCESSOR_I386
, 0},
912 {"i486", PROCESSOR_I486
, 0},
913 {"i586", PROCESSOR_PENTIUM
, 0},
914 {"pentium", PROCESSOR_PENTIUM
, 0},
915 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
916 {"i686", PROCESSOR_PENTIUMPRO
, 0},
917 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
918 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
919 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
920 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
|
921 PTA_MMX
| PTA_PREFETCH_SSE
},
922 {"k6", PROCESSOR_K6
, PTA_MMX
},
923 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
924 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
925 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
927 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
928 | PTA_3DNOW
| PTA_3DNOW_A
},
929 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
930 | PTA_3DNOW_A
| PTA_SSE
},
931 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
932 | PTA_3DNOW_A
| PTA_SSE
},
933 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
934 | PTA_3DNOW_A
| PTA_SSE
},
937 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
939 #ifdef SUBTARGET_OVERRIDE_OPTIONS
940 SUBTARGET_OVERRIDE_OPTIONS
;
943 if (!ix86_cpu_string
&& ix86_arch_string
)
944 ix86_cpu_string
= ix86_arch_string
;
945 if (!ix86_cpu_string
)
946 ix86_cpu_string
= cpu_names
[TARGET_CPU_DEFAULT
];
947 if (!ix86_arch_string
)
948 ix86_arch_string
= TARGET_64BIT
? "athlon-4" : "i386";
950 if (ix86_cmodel_string
!= 0)
952 if (!strcmp (ix86_cmodel_string
, "small"))
953 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
955 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string
);
956 else if (!strcmp (ix86_cmodel_string
, "32"))
958 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
959 ix86_cmodel
= CM_KERNEL
;
960 else if (!strcmp (ix86_cmodel_string
, "medium") && !flag_pic
)
961 ix86_cmodel
= CM_MEDIUM
;
962 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
963 ix86_cmodel
= CM_LARGE
;
965 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
971 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
973 if (ix86_asm_string
!= 0)
975 if (!strcmp (ix86_asm_string
, "intel"))
976 ix86_asm_dialect
= ASM_INTEL
;
977 else if (!strcmp (ix86_asm_string
, "att"))
978 ix86_asm_dialect
= ASM_ATT
;
980 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
982 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
983 error ("code model `%s' not supported in the %s bit mode",
984 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
985 if (ix86_cmodel
== CM_LARGE
)
986 sorry ("code model `large' not supported yet");
987 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
988 sorry ("%i-bit mode not compiled in",
989 (target_flags
& MASK_64BIT
) ? 64 : 32);
991 for (i
= 0; i
< pta_size
; i
++)
992 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
994 ix86_arch
= processor_alias_table
[i
].processor
;
995 /* Default cpu tuning to the architecture. */
996 ix86_cpu
= ix86_arch
;
997 if (processor_alias_table
[i
].flags
& PTA_MMX
998 && !(target_flags
& MASK_MMX_SET
))
999 target_flags
|= MASK_MMX
;
1000 if (processor_alias_table
[i
].flags
& PTA_3DNOW
1001 && !(target_flags
& MASK_3DNOW_SET
))
1002 target_flags
|= MASK_3DNOW
;
1003 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
1004 && !(target_flags
& MASK_3DNOW_A_SET
))
1005 target_flags
|= MASK_3DNOW_A
;
1006 if (processor_alias_table
[i
].flags
& PTA_SSE
1007 && !(target_flags
& MASK_SSE_SET
))
1008 target_flags
|= MASK_SSE
;
1009 if (processor_alias_table
[i
].flags
& PTA_SSE2
1010 && !(target_flags
& MASK_SSE2_SET
))
1011 target_flags
|= MASK_SSE2
;
1012 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1013 x86_prefetch_sse
= true;
1018 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1020 for (i
= 0; i
< pta_size
; i
++)
1021 if (! strcmp (ix86_cpu_string
, processor_alias_table
[i
].name
))
1023 ix86_cpu
= processor_alias_table
[i
].processor
;
1026 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1027 x86_prefetch_sse
= true;
1029 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string
);
1032 ix86_cost
= &size_cost
;
1034 ix86_cost
= processor_target_table
[ix86_cpu
].cost
;
1035 target_flags
|= processor_target_table
[ix86_cpu
].target_enable
;
1036 target_flags
&= ~processor_target_table
[ix86_cpu
].target_disable
;
1038 /* Arrange to set up i386_stack_locals for all functions. */
1039 init_machine_status
= ix86_init_machine_status
;
1040 mark_machine_status
= ix86_mark_machine_status
;
1041 free_machine_status
= ix86_free_machine_status
;
1043 /* Validate -mregparm= value. */
1044 if (ix86_regparm_string
)
1046 i
= atoi (ix86_regparm_string
);
1047 if (i
< 0 || i
> REGPARM_MAX
)
1048 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
1054 ix86_regparm
= REGPARM_MAX
;
1056 /* If the user has provided any of the -malign-* options,
1057 warn and use that value only if -falign-* is not set.
1058 Remove this code in GCC 3.2 or later. */
1059 if (ix86_align_loops_string
)
1061 warning ("-malign-loops is obsolete, use -falign-loops");
1062 if (align_loops
== 0)
1064 i
= atoi (ix86_align_loops_string
);
1065 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1066 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1068 align_loops
= 1 << i
;
1072 if (ix86_align_jumps_string
)
1074 warning ("-malign-jumps is obsolete, use -falign-jumps");
1075 if (align_jumps
== 0)
1077 i
= atoi (ix86_align_jumps_string
);
1078 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1079 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1081 align_jumps
= 1 << i
;
1085 if (ix86_align_funcs_string
)
1087 warning ("-malign-functions is obsolete, use -falign-functions");
1088 if (align_functions
== 0)
1090 i
= atoi (ix86_align_funcs_string
);
1091 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1092 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1094 align_functions
= 1 << i
;
1098 /* Default align_* from the processor table. */
1099 if (align_loops
== 0)
1101 align_loops
= processor_target_table
[ix86_cpu
].align_loop
;
1102 align_loops_max_skip
= processor_target_table
[ix86_cpu
].align_loop_max_skip
;
1104 if (align_jumps
== 0)
1106 align_jumps
= processor_target_table
[ix86_cpu
].align_jump
;
1107 align_jumps_max_skip
= processor_target_table
[ix86_cpu
].align_jump_max_skip
;
1109 if (align_functions
== 0)
1111 align_functions
= processor_target_table
[ix86_cpu
].align_func
;
1114 /* Validate -mpreferred-stack-boundary= value, or provide default.
1115 The default of 128 bits is for Pentium III's SSE __m128, but we
1116 don't want additional code to keep the stack aligned when
1117 optimizing for code size. */
1118 ix86_preferred_stack_boundary
= (optimize_size
1119 ? TARGET_64BIT
? 64 : 32
1121 if (ix86_preferred_stack_boundary_string
)
1123 i
= atoi (ix86_preferred_stack_boundary_string
);
1124 if (i
< (TARGET_64BIT
? 3 : 2) || i
> 12)
1125 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
1126 TARGET_64BIT
? 3 : 2);
1128 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
1131 /* Validate -mbranch-cost= value, or provide default. */
1132 ix86_branch_cost
= processor_target_table
[ix86_cpu
].branch_cost
;
1133 if (ix86_branch_cost_string
)
1135 i
= atoi (ix86_branch_cost_string
);
1137 error ("-mbranch-cost=%d is not between 0 and 5", i
);
1139 ix86_branch_cost
= i
;
1142 if (ix86_tls_dialect_string
)
1144 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
1145 ix86_tls_dialect
= TLS_DIALECT_GNU
;
1146 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
1147 ix86_tls_dialect
= TLS_DIALECT_SUN
;
1149 error ("bad value (%s) for -mtls-dialect= switch",
1150 ix86_tls_dialect_string
);
1153 /* Keep nonleaf frame pointers. */
1154 if (TARGET_OMIT_LEAF_FRAME_POINTER
)
1155 flag_omit_frame_pointer
= 1;
1157 /* If we're doing fast math, we don't care about comparison order
1158 wrt NaNs. This lets us use a shorter comparison sequence. */
1159 if (flag_unsafe_math_optimizations
)
1160 target_flags
&= ~MASK_IEEE_FP
;
1162 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1163 since the insns won't need emulation. */
1164 if (x86_arch_always_fancy_math_387
& (1 << ix86_arch
))
1165 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
1169 if (TARGET_ALIGN_DOUBLE
)
1170 error ("-malign-double makes no sense in the 64bit mode");
1172 error ("-mrtd calling convention not supported in the 64bit mode");
1173 /* Enable by default the SSE and MMX builtins. */
1174 target_flags
|= (MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
);
1175 ix86_fpmath
= FPMATH_SSE
;
1178 ix86_fpmath
= FPMATH_387
;
1180 if (ix86_fpmath_string
!= 0)
1182 if (! strcmp (ix86_fpmath_string
, "387"))
1183 ix86_fpmath
= FPMATH_387
;
1184 else if (! strcmp (ix86_fpmath_string
, "sse"))
1188 warning ("SSE instruction set disabled, using 387 arithmetics");
1189 ix86_fpmath
= FPMATH_387
;
1192 ix86_fpmath
= FPMATH_SSE
;
1194 else if (! strcmp (ix86_fpmath_string
, "387,sse")
1195 || ! strcmp (ix86_fpmath_string
, "sse,387"))
1199 warning ("SSE instruction set disabled, using 387 arithmetics");
1200 ix86_fpmath
= FPMATH_387
;
1202 else if (!TARGET_80387
)
1204 warning ("387 instruction set disabled, using SSE arithmetics");
1205 ix86_fpmath
= FPMATH_SSE
;
1208 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
1211 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
1214 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1218 target_flags
|= MASK_MMX
;
1219 x86_prefetch_sse
= true;
1222 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1225 target_flags
|= MASK_MMX
;
1226 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1227 extensions it adds. */
1228 if (x86_3dnow_a
& (1 << ix86_arch
))
1229 target_flags
|= MASK_3DNOW_A
;
1231 if ((x86_accumulate_outgoing_args
& CPUMASK
)
1232 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS_SET
)
1234 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
1236 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1239 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
1240 p
= strchr (internal_label_prefix
, 'X');
1241 internal_label_prefix_len
= p
- internal_label_prefix
;
1247 optimization_options (level
, size
)
1249 int size ATTRIBUTE_UNUSED
;
1251 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1252 make the problem with not enough registers even worse. */
1253 #ifdef INSN_SCHEDULING
1255 flag_schedule_insns
= 0;
1257 if (TARGET_64BIT
&& optimize
>= 1)
1258 flag_omit_frame_pointer
= 1;
1261 flag_pcc_struct_return
= 0;
1262 flag_asynchronous_unwind_tables
= 1;
1266 /* Table of valid machine attributes. */
1267 const struct attribute_spec ix86_attribute_table
[] =
1269 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1270 /* Stdcall attribute says callee is responsible for popping arguments
1271 if they are not variable. */
1272 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1273 /* Cdecl attribute says the callee is a normal C declaration */
1274 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1275 /* Regparm attribute specifies how many integer arguments are to be
1276 passed in registers. */
1277 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute
},
1278 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1279 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute
},
1280 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute
},
1281 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
1283 { NULL
, 0, 0, false, false, false, NULL
}
1286 /* Handle a "cdecl" or "stdcall" attribute;
1287 arguments as in struct attribute_spec.handler. */
1289 ix86_handle_cdecl_attribute (node
, name
, args
, flags
, no_add_attrs
)
1292 tree args ATTRIBUTE_UNUSED
;
1293 int flags ATTRIBUTE_UNUSED
;
1296 if (TREE_CODE (*node
) != FUNCTION_TYPE
1297 && TREE_CODE (*node
) != METHOD_TYPE
1298 && TREE_CODE (*node
) != FIELD_DECL
1299 && TREE_CODE (*node
) != TYPE_DECL
)
1301 warning ("`%s' attribute only applies to functions",
1302 IDENTIFIER_POINTER (name
));
1303 *no_add_attrs
= true;
1308 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name
));
1309 *no_add_attrs
= true;
1315 /* Handle a "regparm" attribute;
1316 arguments as in struct attribute_spec.handler. */
1318 ix86_handle_regparm_attribute (node
, name
, args
, flags
, no_add_attrs
)
1322 int flags ATTRIBUTE_UNUSED
;
1325 if (TREE_CODE (*node
) != FUNCTION_TYPE
1326 && TREE_CODE (*node
) != METHOD_TYPE
1327 && TREE_CODE (*node
) != FIELD_DECL
1328 && TREE_CODE (*node
) != TYPE_DECL
)
1330 warning ("`%s' attribute only applies to functions",
1331 IDENTIFIER_POINTER (name
));
1332 *no_add_attrs
= true;
1338 cst
= TREE_VALUE (args
);
1339 if (TREE_CODE (cst
) != INTEGER_CST
)
1341 warning ("`%s' attribute requires an integer constant argument",
1342 IDENTIFIER_POINTER (name
));
1343 *no_add_attrs
= true;
1345 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
1347 warning ("argument to `%s' attribute larger than %d",
1348 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
1349 *no_add_attrs
= true;
1356 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
1358 /* Generate the assembly code for function entry. FILE is a stdio
1359 stream to output the code to. SIZE is an int: how many units of
1360 temporary storage to allocate.
1362 Refer to the array `regs_ever_live' to determine which registers to
1363 save; `regs_ever_live[I]' is nonzero if register number I is ever
1364 used in the function. This function is responsible for knowing
1365 which registers should not be saved even if used.
1367 We override it here to allow for the new profiling code to go before
1368 the prologue and the old mcount code to go after the prologue (and
1369 after %ebx has been set up for ELF shared library support). */
1372 ix86_osf_output_function_prologue (file
, size
)
1376 const char *prefix
= "";
1377 const char *const lprefix
= LPREFIX
;
1378 int labelno
= current_function_profile_label_no
;
1382 if (TARGET_UNDERSCORES
)
1385 if (current_function_profile
&& OSF_PROFILE_BEFORE_PROLOGUE
)
1387 if (!flag_pic
&& !HALF_PIC_P ())
1389 fprintf (file
, "\tmovl $%sP%d,%%edx\n", lprefix
, labelno
);
1390 fprintf (file
, "\tcall *%s_mcount_ptr\n", prefix
);
1393 else if (HALF_PIC_P ())
1397 HALF_PIC_EXTERNAL ("_mcount_ptr");
1398 symref
= HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode
,
1401 fprintf (file
, "\tmovl $%sP%d,%%edx\n", lprefix
, labelno
);
1402 fprintf (file
, "\tmovl %s%s,%%eax\n", prefix
,
1404 fprintf (file
, "\tcall *(%%eax)\n");
1409 static int call_no
= 0;
1411 fprintf (file
, "\tcall %sPc%d\n", lprefix
, call_no
);
1412 fprintf (file
, "%sPc%d:\tpopl %%eax\n", lprefix
, call_no
);
1413 fprintf (file
, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1414 lprefix
, call_no
++);
1415 fprintf (file
, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1417 fprintf (file
, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1419 fprintf (file
, "\tcall *(%%eax)\n");
1425 if (current_function_profile
&& OSF_PROFILE_BEFORE_PROLOGUE
)
1429 fprintf (file
, "\tmovl $%sP%d,%%edx\n", lprefix
, labelno
);
1430 fprintf (file
, "\tcall *%s_mcount_ptr\n", prefix
);
1435 static int call_no
= 0;
1437 fprintf (file
, "\tcall %sPc%d\n", lprefix
, call_no
);
1438 fprintf (file
, "%sPc%d:\tpopl %%eax\n", lprefix
, call_no
);
1439 fprintf (file
, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1440 lprefix
, call_no
++);
1441 fprintf (file
, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1443 fprintf (file
, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1445 fprintf (file
, "\tcall *(%%eax)\n");
1448 #endif /* !OSF_OS */
1450 function_prologue (file
, size
);
1453 #endif /* OSF_OS || TARGET_OSF1ELF */
1455 /* Return 0 if the attributes for two types are incompatible, 1 if they
1456 are compatible, and 2 if they are nearly compatible (which causes a
1457 warning to be generated). */
1460 ix86_comp_type_attributes (type1
, type2
)
1464 /* Check for mismatch of non-default calling convention. */
1465 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
1467 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
1470 /* Check for mismatched return types (cdecl vs stdcall). */
1471 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
1472 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
1477 /* Value is the number of bytes of arguments automatically
1478 popped when returning from a subroutine call.
1479 FUNDECL is the declaration node of the function (as a tree),
1480 FUNTYPE is the data type of the function (as a tree),
1481 or for a library call it is an identifier node for the subroutine name.
1482 SIZE is the number of bytes of arguments passed on the stack.
1484 On the 80386, the RTD insn may be used to pop them if the number
1485 of args is fixed, but if the number is variable then the caller
1486 must pop them all. RTD can't be used for library calls now
1487 because the library is compiled with the Unix compiler.
1488 Use of RTD is a selectable option, since it is incompatible with
1489 standard Unix calling sequences. If the option is not selected,
1490 the caller must always pop the args.
1492 The attribute stdcall is equivalent to RTD on a per module basis. */
1495 ix86_return_pops_args (fundecl
, funtype
, size
)
1500 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
1502 /* Cdecl functions override -mrtd, and never pop the stack. */
1503 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
1505 /* Stdcall functions will pop the stack if not variable args. */
1506 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
)))
1510 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
1511 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
1512 == void_type_node
)))
1516 /* Lose any fake structure return argument if it is passed on the stack. */
1517 if (aggregate_value_p (TREE_TYPE (funtype
))
1520 int nregs
= ix86_regparm
;
1524 tree attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype
));
1527 nregs
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1531 return GET_MODE_SIZE (Pmode
);
1537 /* Argument support functions. */
1539 /* Return true when register may be used to pass function parameters. */
1541 ix86_function_arg_regno_p (regno
)
1546 return (regno
< REGPARM_MAX
1547 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
1548 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
1550 /* RAX is used as hidden argument to va_arg functions. */
1553 for (i
= 0; i
< REGPARM_MAX
; i
++)
1554 if (regno
== x86_64_int_parameter_registers
[i
])
1559 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1560 for a call to a function whose data type is FNTYPE.
1561 For a library call, FNTYPE is 0. */
1564 init_cumulative_args (cum
, fntype
, libname
)
1565 CUMULATIVE_ARGS
*cum
; /* Argument info to initialize */
1566 tree fntype
; /* tree ptr for function decl */
1567 rtx libname
; /* SYMBOL_REF of library name or 0 */
1569 static CUMULATIVE_ARGS zero_cum
;
1570 tree param
, next_param
;
1572 if (TARGET_DEBUG_ARG
)
1574 fprintf (stderr
, "\ninit_cumulative_args (");
1576 fprintf (stderr
, "fntype code = %s, ret code = %s",
1577 tree_code_name
[(int) TREE_CODE (fntype
)],
1578 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
1580 fprintf (stderr
, "no fntype");
1583 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
1588 /* Set up the number of registers to use for passing arguments. */
1589 cum
->nregs
= ix86_regparm
;
1590 cum
->sse_nregs
= SSE_REGPARM_MAX
;
1591 if (fntype
&& !TARGET_64BIT
)
1593 tree attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype
));
1596 cum
->nregs
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1598 cum
->maybe_vaarg
= false;
1600 /* Determine if this function has variable arguments. This is
1601 indicated by the last argument being 'void_type_mode' if there
1602 are no variable arguments. If there are variable arguments, then
1603 we won't pass anything in registers */
1607 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
1608 param
!= 0; param
= next_param
)
1610 next_param
= TREE_CHAIN (param
);
1611 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
1615 cum
->maybe_vaarg
= true;
1619 if ((!fntype
&& !libname
)
1620 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
1621 cum
->maybe_vaarg
= 1;
1623 if (TARGET_DEBUG_ARG
)
1624 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
1629 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1630 of this code is to classify each 8bytes of incoming argument by the register
1631 class and assign registers accordingly. */
1633 /* Return the union class of CLASS1 and CLASS2.
1634 See the x86-64 PS ABI for details. */
1636 static enum x86_64_reg_class
1637 merge_classes (class1
, class2
)
1638 enum x86_64_reg_class class1
, class2
;
1640 /* Rule #1: If both classes are equal, this is the resulting class. */
1641 if (class1
== class2
)
1644 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1646 if (class1
== X86_64_NO_CLASS
)
1648 if (class2
== X86_64_NO_CLASS
)
1651 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1652 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
1653 return X86_64_MEMORY_CLASS
;
1655 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1656 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
1657 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
1658 return X86_64_INTEGERSI_CLASS
;
1659 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
1660 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
1661 return X86_64_INTEGER_CLASS
;
1663 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1664 if (class1
== X86_64_X87_CLASS
|| class1
== X86_64_X87UP_CLASS
1665 || class2
== X86_64_X87_CLASS
|| class2
== X86_64_X87UP_CLASS
)
1666 return X86_64_MEMORY_CLASS
;
1668 /* Rule #6: Otherwise class SSE is used. */
1669 return X86_64_SSE_CLASS
;
1672 /* Classify the argument of type TYPE and mode MODE.
1673 CLASSES will be filled by the register class used to pass each word
1674 of the operand. The number of words is returned. In case the parameter
1675 should be passed in memory, 0 is returned. As a special case for zero
1676 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1678 BIT_OFFSET is used internally for handling records and specifies offset
1679 of the offset in bits modulo 256 to avoid overflow cases.
1681 See the x86-64 PS ABI for details.
1685 classify_argument (mode
, type
, classes
, bit_offset
)
1686 enum machine_mode mode
;
1688 enum x86_64_reg_class classes
[MAX_CLASSES
];
1692 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1693 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
1695 if (type
&& AGGREGATE_TYPE_P (type
))
1699 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
1701 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1705 for (i
= 0; i
< words
; i
++)
1706 classes
[i
] = X86_64_NO_CLASS
;
1708 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1709 signalize memory class, so handle it as special case. */
1712 classes
[0] = X86_64_NO_CLASS
;
1716 /* Classify each field of record and merge classes. */
1717 if (TREE_CODE (type
) == RECORD_TYPE
)
1719 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
1721 if (TREE_CODE (field
) == FIELD_DECL
)
1725 /* Bitfields are always classified as integer. Handle them
1726 early, since later code would consider them to be
1727 misaligned integers. */
1728 if (DECL_BIT_FIELD (field
))
1730 for (i
= int_bit_position (field
) / 8 / 8;
1731 i
< (int_bit_position (field
)
1732 + tree_low_cst (DECL_SIZE (field
), 0)
1735 merge_classes (X86_64_INTEGER_CLASS
,
1740 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
1741 TREE_TYPE (field
), subclasses
,
1742 (int_bit_position (field
)
1743 + bit_offset
) % 256);
1746 for (i
= 0; i
< num
; i
++)
1749 (int_bit_position (field
) + bit_offset
) / 8 / 8;
1751 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
1757 /* Arrays are handled as small records. */
1758 else if (TREE_CODE (type
) == ARRAY_TYPE
)
1761 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
1762 TREE_TYPE (type
), subclasses
, bit_offset
);
1766 /* The partial classes are now full classes. */
1767 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
1768 subclasses
[0] = X86_64_SSE_CLASS
;
1769 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
1770 subclasses
[0] = X86_64_INTEGER_CLASS
;
1772 for (i
= 0; i
< words
; i
++)
1773 classes
[i
] = subclasses
[i
% num
];
1775 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1776 else if (TREE_CODE (type
) == UNION_TYPE
1777 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
1779 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
1781 if (TREE_CODE (field
) == FIELD_DECL
)
1784 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
1785 TREE_TYPE (field
), subclasses
,
1789 for (i
= 0; i
< num
; i
++)
1790 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
1797 /* Final merger cleanup. */
1798 for (i
= 0; i
< words
; i
++)
1800 /* If one class is MEMORY, everything should be passed in
1802 if (classes
[i
] == X86_64_MEMORY_CLASS
)
1805 /* The X86_64_SSEUP_CLASS should be always preceded by
1806 X86_64_SSE_CLASS. */
1807 if (classes
[i
] == X86_64_SSEUP_CLASS
1808 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
1809 classes
[i
] = X86_64_SSE_CLASS
;
1811 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1812 if (classes
[i
] == X86_64_X87UP_CLASS
1813 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
1814 classes
[i
] = X86_64_SSE_CLASS
;
1819 /* Compute alignment needed. We align all types to natural boundaries with
1820 exception of XFmode that is aligned to 64bits. */
1821 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
1823 int mode_alignment
= GET_MODE_BITSIZE (mode
);
1826 mode_alignment
= 128;
1827 else if (mode
== XCmode
)
1828 mode_alignment
= 256;
1829 /* Misaligned fields are always returned in memory. */
1830 if (bit_offset
% mode_alignment
)
1834 /* Classification of atomic types. */
1844 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
1845 classes
[0] = X86_64_INTEGERSI_CLASS
;
1847 classes
[0] = X86_64_INTEGER_CLASS
;
1851 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
1854 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
1855 classes
[2] = classes
[3] = X86_64_INTEGER_CLASS
;
1858 if (!(bit_offset
% 64))
1859 classes
[0] = X86_64_SSESF_CLASS
;
1861 classes
[0] = X86_64_SSE_CLASS
;
1864 classes
[0] = X86_64_SSEDF_CLASS
;
1867 classes
[0] = X86_64_X87_CLASS
;
1868 classes
[1] = X86_64_X87UP_CLASS
;
1871 classes
[0] = X86_64_X87_CLASS
;
1872 classes
[1] = X86_64_X87UP_CLASS
;
1873 classes
[2] = X86_64_X87_CLASS
;
1874 classes
[3] = X86_64_X87UP_CLASS
;
1877 classes
[0] = X86_64_SSEDF_CLASS
;
1878 classes
[1] = X86_64_SSEDF_CLASS
;
1881 classes
[0] = X86_64_SSE_CLASS
;
1885 classes
[0] = X86_64_SSE_CLASS
;
1886 classes
[1] = X86_64_SSEUP_CLASS
;
1892 classes
[0] = X86_64_SSE_CLASS
;
1902 /* Examine the argument and return set number of register required in each
1903 class. Return 0 iff parameter should be passed in memory. */
1905 examine_argument (mode
, type
, in_return
, int_nregs
, sse_nregs
)
1906 enum machine_mode mode
;
1908 int *int_nregs
, *sse_nregs
;
1911 enum x86_64_reg_class
class[MAX_CLASSES
];
1912 int n
= classify_argument (mode
, type
, class, 0);
1918 for (n
--; n
>= 0; n
--)
1921 case X86_64_INTEGER_CLASS
:
1922 case X86_64_INTEGERSI_CLASS
:
1925 case X86_64_SSE_CLASS
:
1926 case X86_64_SSESF_CLASS
:
1927 case X86_64_SSEDF_CLASS
:
1930 case X86_64_NO_CLASS
:
1931 case X86_64_SSEUP_CLASS
:
1933 case X86_64_X87_CLASS
:
1934 case X86_64_X87UP_CLASS
:
1938 case X86_64_MEMORY_CLASS
:
1943 /* Construct container for the argument used by GCC interface. See
1944 FUNCTION_ARG for the detailed description. */
1946 construct_container (mode
, type
, in_return
, nintregs
, nsseregs
, intreg
, sse_regno
)
1947 enum machine_mode mode
;
1950 int nintregs
, nsseregs
;
1954 enum machine_mode tmpmode
;
1956 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1957 enum x86_64_reg_class
class[MAX_CLASSES
];
1961 int needed_sseregs
, needed_intregs
;
1962 rtx exp
[MAX_CLASSES
];
1965 n
= classify_argument (mode
, type
, class, 0);
1966 if (TARGET_DEBUG_ARG
)
1969 fprintf (stderr
, "Memory class\n");
1972 fprintf (stderr
, "Classes:");
1973 for (i
= 0; i
< n
; i
++)
1975 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
1977 fprintf (stderr
, "\n");
1982 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
, &needed_sseregs
))
1984 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
1987 /* First construct simple cases. Avoid SCmode, since we want to use
1988 single register to pass this type. */
1989 if (n
== 1 && mode
!= SCmode
)
1992 case X86_64_INTEGER_CLASS
:
1993 case X86_64_INTEGERSI_CLASS
:
1994 return gen_rtx_REG (mode
, intreg
[0]);
1995 case X86_64_SSE_CLASS
:
1996 case X86_64_SSESF_CLASS
:
1997 case X86_64_SSEDF_CLASS
:
1998 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
1999 case X86_64_X87_CLASS
:
2000 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
2001 case X86_64_NO_CLASS
:
2002 /* Zero sized array, struct or class. */
2007 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
)
2008 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
2010 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
2011 return gen_rtx_REG (TFmode
, FIRST_STACK_REG
);
2012 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
2013 && class[1] == X86_64_INTEGER_CLASS
2014 && (mode
== CDImode
|| mode
== TImode
)
2015 && intreg
[0] + 1 == intreg
[1])
2016 return gen_rtx_REG (mode
, intreg
[0]);
2018 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
2019 && class[2] == X86_64_X87_CLASS
&& class[3] == X86_64_X87UP_CLASS
)
2020 return gen_rtx_REG (TCmode
, FIRST_STACK_REG
);
2022 /* Otherwise figure out the entries of the PARALLEL. */
2023 for (i
= 0; i
< n
; i
++)
2027 case X86_64_NO_CLASS
:
2029 case X86_64_INTEGER_CLASS
:
2030 case X86_64_INTEGERSI_CLASS
:
2031 /* Merge TImodes on aligned occassions here too. */
2032 if (i
* 8 + 8 > bytes
)
2033 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
2034 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
2038 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2039 if (tmpmode
== BLKmode
)
2041 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2042 gen_rtx_REG (tmpmode
, *intreg
),
2046 case X86_64_SSESF_CLASS
:
2047 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2048 gen_rtx_REG (SFmode
,
2049 SSE_REGNO (sse_regno
)),
2053 case X86_64_SSEDF_CLASS
:
2054 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2055 gen_rtx_REG (DFmode
,
2056 SSE_REGNO (sse_regno
)),
2060 case X86_64_SSE_CLASS
:
2061 if (i
< n
&& class[i
+ 1] == X86_64_SSEUP_CLASS
)
2062 tmpmode
= TImode
, i
++;
2065 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2066 gen_rtx_REG (tmpmode
,
2067 SSE_REGNO (sse_regno
)),
2075 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
2076 for (i
= 0; i
< nexps
; i
++)
2077 XVECEXP (ret
, 0, i
) = exp
[i
];
2081 /* Update the data in CUM to advance over an argument
2082 of mode MODE and data type TYPE.
2083 (TYPE is null for libcalls where that information may not be available.) */
2086 function_arg_advance (cum
, mode
, type
, named
)
2087 CUMULATIVE_ARGS
*cum
; /* current arg information */
2088 enum machine_mode mode
; /* current arg mode */
2089 tree type
; /* type of the argument or 0 if lib support */
2090 int named
; /* whether or not the argument was named */
2093 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2094 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2096 if (TARGET_DEBUG_ARG
)
2098 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2099 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
2102 int int_nregs
, sse_nregs
;
2103 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
2104 cum
->words
+= words
;
2105 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
2107 cum
->nregs
-= int_nregs
;
2108 cum
->sse_nregs
-= sse_nregs
;
2109 cum
->regno
+= int_nregs
;
2110 cum
->sse_regno
+= sse_nregs
;
2113 cum
->words
+= words
;
2117 if (TARGET_SSE
&& mode
== TImode
)
2119 cum
->sse_words
+= words
;
2120 cum
->sse_nregs
-= 1;
2121 cum
->sse_regno
+= 1;
2122 if (cum
->sse_nregs
<= 0)
2130 cum
->words
+= words
;
2131 cum
->nregs
-= words
;
2132 cum
->regno
+= words
;
2134 if (cum
->nregs
<= 0)
2144 /* Define where to put the arguments to a function.
2145 Value is zero to push the argument on the stack,
2146 or a hard register in which to store the argument.
2148 MODE is the argument's machine mode.
2149 TYPE is the data type of the argument (as a tree).
2150 This is null for libcalls where that information may
2152 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2153 the preceding args and about the function being called.
2154 NAMED is nonzero if this argument is a named parameter
2155 (otherwise it is an extra parameter matching an ellipsis). */
2158 function_arg (cum
, mode
, type
, named
)
2159 CUMULATIVE_ARGS
*cum
; /* current arg information */
2160 enum machine_mode mode
; /* current arg mode */
2161 tree type
; /* type of the argument or 0 if lib support */
2162 int named
; /* != 0 for normal args, == 0 for ... args */
2166 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2167 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2169 /* Handle an hidden AL argument containing number of registers for varargs
2170 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2172 if (mode
== VOIDmode
)
2175 return GEN_INT (cum
->maybe_vaarg
2176 ? (cum
->sse_nregs
< 0
2184 ret
= construct_container (mode
, type
, 0, cum
->nregs
, cum
->sse_nregs
,
2185 &x86_64_int_parameter_registers
[cum
->regno
],
2190 /* For now, pass fp/complex values on the stack. */
2199 if (words
<= cum
->nregs
)
2200 ret
= gen_rtx_REG (mode
, cum
->regno
);
2204 ret
= gen_rtx_REG (mode
, cum
->sse_regno
);
2208 if (TARGET_DEBUG_ARG
)
2211 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
2212 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
2215 fprintf (stderr
, ", reg=%%e%s", reg_names
[ REGNO (ret
) ]);
2217 fprintf (stderr
, ", stack");
2219 fprintf (stderr
, " )\n");
2225 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2229 ix86_function_arg_boundary (mode
, type
)
2230 enum machine_mode mode
;
2235 return PARM_BOUNDARY
;
2237 align
= TYPE_ALIGN (type
);
2239 align
= GET_MODE_ALIGNMENT (mode
);
2240 if (align
< PARM_BOUNDARY
)
2241 align
= PARM_BOUNDARY
;
2247 /* Return true if N is a possible register number of function value. */
2249 ix86_function_value_regno_p (regno
)
2254 return ((regno
) == 0
2255 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
2256 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
));
2258 return ((regno
) == 0 || (regno
) == FIRST_FLOAT_REG
2259 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
)
2260 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
));
2263 /* Define how to find the value returned by a function.
2264 VALTYPE is the data type of the value (as a tree).
2265 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2266 otherwise, FUNC is 0. */
2268 ix86_function_value (valtype
)
2273 rtx ret
= construct_container (TYPE_MODE (valtype
), valtype
, 1,
2274 REGPARM_MAX
, SSE_REGPARM_MAX
,
2275 x86_64_int_return_registers
, 0);
2276 /* For zero sized structures, construct_continer return NULL, but we need
2277 to keep rest of compiler happy by returning meaningfull value. */
2279 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
2283 return gen_rtx_REG (TYPE_MODE (valtype
), VALUE_REGNO (TYPE_MODE (valtype
)));
2286 /* Return false iff type is returned in memory. */
2288 ix86_return_in_memory (type
)
2291 int needed_intregs
, needed_sseregs
;
2294 return !examine_argument (TYPE_MODE (type
), type
, 1,
2295 &needed_intregs
, &needed_sseregs
);
2299 if (TYPE_MODE (type
) == BLKmode
2300 || (VECTOR_MODE_P (TYPE_MODE (type
))
2301 && int_size_in_bytes (type
) == 8)
2302 || (int_size_in_bytes (type
) > 12 && TYPE_MODE (type
) != TImode
2303 && TYPE_MODE (type
) != TFmode
2304 && !VECTOR_MODE_P (TYPE_MODE (type
))))
2310 /* Define how to find the value returned by a library function
2311 assuming the value has mode MODE. */
2313 ix86_libcall_value (mode
)
2314 enum machine_mode mode
;
2324 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
2327 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
2329 return gen_rtx_REG (mode
, 0);
2333 return gen_rtx_REG (mode
, VALUE_REGNO (mode
));
2336 /* Create the va_list data type. */
2339 ix86_build_va_list ()
2341 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
2343 /* For i386 we use plain pointer to argument area. */
2345 return build_pointer_type (char_type_node
);
2347 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
2348 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
2350 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
2351 unsigned_type_node
);
2352 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
2353 unsigned_type_node
);
2354 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
2356 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
2359 DECL_FIELD_CONTEXT (f_gpr
) = record
;
2360 DECL_FIELD_CONTEXT (f_fpr
) = record
;
2361 DECL_FIELD_CONTEXT (f_ovf
) = record
;
2362 DECL_FIELD_CONTEXT (f_sav
) = record
;
2364 TREE_CHAIN (record
) = type_decl
;
2365 TYPE_NAME (record
) = type_decl
;
2366 TYPE_FIELDS (record
) = f_gpr
;
2367 TREE_CHAIN (f_gpr
) = f_fpr
;
2368 TREE_CHAIN (f_fpr
) = f_ovf
;
2369 TREE_CHAIN (f_ovf
) = f_sav
;
2371 layout_type (record
);
2373 /* The correct type is an array type of one element. */
2374 return build_array_type (record
, build_index_type (size_zero_node
));
2377 /* Perform any needed actions needed for a function that is receiving a
2378 variable number of arguments.
2382 MODE and TYPE are the mode and type of the current parameter.
2384 PRETEND_SIZE is a variable that should be set to the amount of stack
2385 that must be pushed by the prolog to pretend that our caller pushed
2388 Normally, this macro will push all remaining incoming registers on the
2389 stack and set PRETEND_SIZE to the length of the registers pushed. */
2392 ix86_setup_incoming_varargs (cum
, mode
, type
, pretend_size
, no_rtl
)
2393 CUMULATIVE_ARGS
*cum
;
2394 enum machine_mode mode
;
2396 int *pretend_size ATTRIBUTE_UNUSED
;
2400 CUMULATIVE_ARGS next_cum
;
2401 rtx save_area
= NULL_RTX
, mem
;
2414 /* Indicate to allocate space on the stack for varargs save area. */
2415 ix86_save_varrargs_registers
= 1;
2417 fntype
= TREE_TYPE (current_function_decl
);
2418 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
2419 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
2420 != void_type_node
));
2422 /* For varargs, we do not want to skip the dummy va_dcl argument.
2423 For stdargs, we do want to skip the last named argument. */
2426 function_arg_advance (&next_cum
, mode
, type
, 1);
2429 save_area
= frame_pointer_rtx
;
2431 set
= get_varargs_alias_set ();
2433 for (i
= next_cum
.regno
; i
< ix86_regparm
; i
++)
2435 mem
= gen_rtx_MEM (Pmode
,
2436 plus_constant (save_area
, i
* UNITS_PER_WORD
));
2437 set_mem_alias_set (mem
, set
);
2438 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
2439 x86_64_int_parameter_registers
[i
]));
2442 if (next_cum
.sse_nregs
)
2444 /* Now emit code to save SSE registers. The AX parameter contains number
2445 of SSE parameter regsiters used to call this function. We use
2446 sse_prologue_save insn template that produces computed jump across
2447 SSE saves. We need some preparation work to get this working. */
2449 label
= gen_label_rtx ();
2450 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
2452 /* Compute address to jump to :
2453 label - 5*eax + nnamed_sse_arguments*5 */
2454 tmp_reg
= gen_reg_rtx (Pmode
);
2455 nsse_reg
= gen_reg_rtx (Pmode
);
2456 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
2457 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
2458 gen_rtx_MULT (Pmode
, nsse_reg
,
2460 if (next_cum
.sse_regno
)
2463 gen_rtx_CONST (DImode
,
2464 gen_rtx_PLUS (DImode
,
2466 GEN_INT (next_cum
.sse_regno
* 4))));
2468 emit_move_insn (nsse_reg
, label_ref
);
2469 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
2471 /* Compute address of memory block we save into. We always use pointer
2472 pointing 127 bytes after first byte to store - this is needed to keep
2473 instruction size limited by 4 bytes. */
2474 tmp_reg
= gen_reg_rtx (Pmode
);
2475 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
2476 plus_constant (save_area
,
2477 8 * REGPARM_MAX
+ 127)));
2478 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
2479 set_mem_alias_set (mem
, set
);
2480 set_mem_align (mem
, BITS_PER_WORD
);
2482 /* And finally do the dirty job! */
2483 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
2484 GEN_INT (next_cum
.sse_regno
), label
));
2489 /* Implement va_start. */
2492 ix86_va_start (stdarg_p
, valist
, nextarg
)
2497 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
2498 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
2499 tree gpr
, fpr
, ovf
, sav
, t
;
2501 /* Only 64bit target needs something special. */
2504 std_expand_builtin_va_start (stdarg_p
, valist
, nextarg
);
2508 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
2509 f_fpr
= TREE_CHAIN (f_gpr
);
2510 f_ovf
= TREE_CHAIN (f_fpr
);
2511 f_sav
= TREE_CHAIN (f_ovf
);
2513 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
2514 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
);
2515 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
);
2516 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
);
2517 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
);
2519 /* Count number of gp and fp argument registers used. */
2520 words
= current_function_args_info
.words
;
2521 n_gpr
= current_function_args_info
.regno
;
2522 n_fpr
= current_function_args_info
.sse_regno
;
2524 if (TARGET_DEBUG_ARG
)
2525 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2526 (int) words
, (int) n_gpr
, (int) n_fpr
);
2528 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
,
2529 build_int_2 (n_gpr
* 8, 0));
2530 TREE_SIDE_EFFECTS (t
) = 1;
2531 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2533 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
,
2534 build_int_2 (n_fpr
* 16 + 8*REGPARM_MAX
, 0));
2535 TREE_SIDE_EFFECTS (t
) = 1;
2536 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2538 /* Find the overflow area. */
2539 t
= make_tree (TREE_TYPE (ovf
), virtual_incoming_args_rtx
);
2541 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), t
,
2542 build_int_2 (words
* UNITS_PER_WORD
, 0));
2543 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
2544 TREE_SIDE_EFFECTS (t
) = 1;
2545 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2547 /* Find the register save area.
2548 Prologue of the function save it right above stack frame. */
2549 t
= make_tree (TREE_TYPE (sav
), frame_pointer_rtx
);
2550 t
= build (MODIFY_EXPR
, TREE_TYPE (sav
), sav
, t
);
2551 TREE_SIDE_EFFECTS (t
) = 1;
2552 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2555 /* Implement va_arg. */
2557 ix86_va_arg (valist
, type
)
2560 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
2561 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
2562 tree gpr
, fpr
, ovf
, sav
, t
;
2564 rtx lab_false
, lab_over
= NULL_RTX
;
2568 /* Only 64bit target needs something special. */
2571 return std_expand_builtin_va_arg (valist
, type
);
2574 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
2575 f_fpr
= TREE_CHAIN (f_gpr
);
2576 f_ovf
= TREE_CHAIN (f_fpr
);
2577 f_sav
= TREE_CHAIN (f_ovf
);
2579 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
2580 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
);
2581 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
);
2582 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
);
2583 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
);
2585 size
= int_size_in_bytes (type
);
2586 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2588 container
= construct_container (TYPE_MODE (type
), type
, 0,
2589 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
2591 * Pull the value out of the saved registers ...
2594 addr_rtx
= gen_reg_rtx (Pmode
);
2598 rtx int_addr_rtx
, sse_addr_rtx
;
2599 int needed_intregs
, needed_sseregs
;
2602 lab_over
= gen_label_rtx ();
2603 lab_false
= gen_label_rtx ();
2605 examine_argument (TYPE_MODE (type
), type
, 0,
2606 &needed_intregs
, &needed_sseregs
);
2609 need_temp
= ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
2610 || TYPE_ALIGN (type
) > 128);
2612 /* In case we are passing structure, verify that it is consetuctive block
2613 on the register save area. If not we need to do moves. */
2614 if (!need_temp
&& !REG_P (container
))
2616 /* Verify that all registers are strictly consetuctive */
2617 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
2621 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
2623 rtx slot
= XVECEXP (container
, 0, i
);
2624 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
2625 || INTVAL (XEXP (slot
, 1)) != i
* 16)
2633 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
2635 rtx slot
= XVECEXP (container
, 0, i
);
2636 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
2637 || INTVAL (XEXP (slot
, 1)) != i
* 8)
2644 int_addr_rtx
= addr_rtx
;
2645 sse_addr_rtx
= addr_rtx
;
2649 int_addr_rtx
= gen_reg_rtx (Pmode
);
2650 sse_addr_rtx
= gen_reg_rtx (Pmode
);
2652 /* First ensure that we fit completely in registers. */
2655 emit_cmp_and_jump_insns (expand_expr
2656 (gpr
, NULL_RTX
, SImode
, EXPAND_NORMAL
),
2657 GEN_INT ((REGPARM_MAX
- needed_intregs
+
2658 1) * 8), GE
, const1_rtx
, SImode
,
2663 emit_cmp_and_jump_insns (expand_expr
2664 (fpr
, NULL_RTX
, SImode
, EXPAND_NORMAL
),
2665 GEN_INT ((SSE_REGPARM_MAX
-
2666 needed_sseregs
+ 1) * 16 +
2667 REGPARM_MAX
* 8), GE
, const1_rtx
,
2668 SImode
, 1, lab_false
);
2671 /* Compute index to start of area used for integer regs. */
2674 t
= build (PLUS_EXPR
, ptr_type_node
, sav
, gpr
);
2675 r
= expand_expr (t
, int_addr_rtx
, Pmode
, EXPAND_NORMAL
);
2676 if (r
!= int_addr_rtx
)
2677 emit_move_insn (int_addr_rtx
, r
);
2681 t
= build (PLUS_EXPR
, ptr_type_node
, sav
, fpr
);
2682 r
= expand_expr (t
, sse_addr_rtx
, Pmode
, EXPAND_NORMAL
);
2683 if (r
!= sse_addr_rtx
)
2684 emit_move_insn (sse_addr_rtx
, r
);
2691 /* Never use the memory itself, as it has the alias set. */
2692 addr_rtx
= XEXP (assign_temp (type
, 0, 1, 0), 0);
2693 mem
= gen_rtx_MEM (BLKmode
, addr_rtx
);
2694 set_mem_alias_set (mem
, get_varargs_alias_set ());
2695 set_mem_align (mem
, BITS_PER_UNIT
);
2697 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
2699 rtx slot
= XVECEXP (container
, 0, i
);
2700 rtx reg
= XEXP (slot
, 0);
2701 enum machine_mode mode
= GET_MODE (reg
);
2707 if (SSE_REGNO_P (REGNO (reg
)))
2709 src_addr
= sse_addr_rtx
;
2710 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
2714 src_addr
= int_addr_rtx
;
2715 src_offset
= REGNO (reg
) * 8;
2717 src_mem
= gen_rtx_MEM (mode
, src_addr
);
2718 set_mem_alias_set (src_mem
, get_varargs_alias_set ());
2719 src_mem
= adjust_address (src_mem
, mode
, src_offset
);
2720 dest_mem
= adjust_address (mem
, mode
, INTVAL (XEXP (slot
, 1)));
2721 emit_move_insn (dest_mem
, src_mem
);
2728 build (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
2729 build_int_2 (needed_intregs
* 8, 0));
2730 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
, t
);
2731 TREE_SIDE_EFFECTS (t
) = 1;
2732 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2737 build (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
2738 build_int_2 (needed_sseregs
* 16, 0));
2739 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
, t
);
2740 TREE_SIDE_EFFECTS (t
) = 1;
2741 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2744 emit_jump_insn (gen_jump (lab_over
));
2746 emit_label (lab_false
);
2749 /* ... otherwise out of the overflow area. */
2751 /* Care for on-stack alignment if needed. */
2752 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64)
2756 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
2757 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
, build_int_2 (align
- 1, 0));
2758 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
, build_int_2 (-align
, -1));
2762 r
= expand_expr (t
, addr_rtx
, Pmode
, EXPAND_NORMAL
);
2764 emit_move_insn (addr_rtx
, r
);
2767 build (PLUS_EXPR
, TREE_TYPE (t
), t
,
2768 build_int_2 (rsize
* UNITS_PER_WORD
, 0));
2769 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
2770 TREE_SIDE_EFFECTS (t
) = 1;
2771 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2774 emit_label (lab_over
);
2779 /* Return nonzero if OP is general operand representable on x86_64. */
2782 x86_64_general_operand (op
, mode
)
2784 enum machine_mode mode
;
2787 return general_operand (op
, mode
);
2788 if (nonimmediate_operand (op
, mode
))
2790 return x86_64_sign_extended_value (op
);
2793 /* Return nonzero if OP is general operand representable on x86_64
2794 as either sign extended or zero extended constant. */
2797 x86_64_szext_general_operand (op
, mode
)
2799 enum machine_mode mode
;
2802 return general_operand (op
, mode
);
2803 if (nonimmediate_operand (op
, mode
))
2805 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
2808 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2811 x86_64_nonmemory_operand (op
, mode
)
2813 enum machine_mode mode
;
2816 return nonmemory_operand (op
, mode
);
2817 if (register_operand (op
, mode
))
2819 return x86_64_sign_extended_value (op
);
2822 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2825 x86_64_movabs_operand (op
, mode
)
2827 enum machine_mode mode
;
2829 if (!TARGET_64BIT
|| !flag_pic
)
2830 return nonmemory_operand (op
, mode
);
2831 if (register_operand (op
, mode
) || x86_64_sign_extended_value (op
))
2833 if (CONSTANT_P (op
) && !symbolic_reference_mentioned_p (op
))
2838 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2841 x86_64_szext_nonmemory_operand (op
, mode
)
2843 enum machine_mode mode
;
2846 return nonmemory_operand (op
, mode
);
2847 if (register_operand (op
, mode
))
2849 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
2852 /* Return nonzero if OP is immediate operand representable on x86_64. */
2855 x86_64_immediate_operand (op
, mode
)
2857 enum machine_mode mode
;
2860 return immediate_operand (op
, mode
);
2861 return x86_64_sign_extended_value (op
);
2864 /* Return nonzero if OP is immediate operand representable on x86_64. */
2867 x86_64_zext_immediate_operand (op
, mode
)
2869 enum machine_mode mode ATTRIBUTE_UNUSED
;
2871 return x86_64_zero_extended_value (op
);
2874 /* Return nonzero if OP is (const_int 1), else return zero. */
2877 const_int_1_operand (op
, mode
)
2879 enum machine_mode mode ATTRIBUTE_UNUSED
;
2881 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) == 1);
2884 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2885 reference and a constant. */
2888 symbolic_operand (op
, mode
)
2890 enum machine_mode mode ATTRIBUTE_UNUSED
;
2892 switch (GET_CODE (op
))
2900 if (GET_CODE (op
) == SYMBOL_REF
2901 || GET_CODE (op
) == LABEL_REF
2902 || (GET_CODE (op
) == UNSPEC
2903 && (XINT (op
, 1) == UNSPEC_GOT
2904 || XINT (op
, 1) == UNSPEC_GOTOFF
2905 || XINT (op
, 1) == UNSPEC_GOTPCREL
)))
2907 if (GET_CODE (op
) != PLUS
2908 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
2912 if (GET_CODE (op
) == SYMBOL_REF
2913 || GET_CODE (op
) == LABEL_REF
)
2915 /* Only @GOTOFF gets offsets. */
2916 if (GET_CODE (op
) != UNSPEC
2917 || XINT (op
, 1) != UNSPEC_GOTOFF
)
2920 op
= XVECEXP (op
, 0, 0);
2921 if (GET_CODE (op
) == SYMBOL_REF
2922 || GET_CODE (op
) == LABEL_REF
)
2931 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2934 pic_symbolic_operand (op
, mode
)
2936 enum machine_mode mode ATTRIBUTE_UNUSED
;
2938 if (GET_CODE (op
) != CONST
)
2943 if (GET_CODE (XEXP (op
, 0)) == UNSPEC
)
2948 if (GET_CODE (op
) == UNSPEC
)
2950 if (GET_CODE (op
) != PLUS
2951 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
2954 if (GET_CODE (op
) == UNSPEC
)
2960 /* Return true if OP is a symbolic operand that resolves locally. */
2963 local_symbolic_operand (op
, mode
)
2965 enum machine_mode mode ATTRIBUTE_UNUSED
;
2967 if (GET_CODE (op
) == LABEL_REF
)
2970 if (GET_CODE (op
) == CONST
2971 && GET_CODE (XEXP (op
, 0)) == PLUS
2972 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
2973 op
= XEXP (XEXP (op
, 0), 0);
2975 if (GET_CODE (op
) != SYMBOL_REF
)
2978 /* These we've been told are local by varasm and encode_section_info
2980 if (CONSTANT_POOL_ADDRESS_P (op
) || SYMBOL_REF_FLAG (op
))
2983 /* There is, however, a not insubstantial body of code in the rest of
2984 the compiler that assumes it can just stick the results of
2985 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2986 /* ??? This is a hack. Should update the body of the compiler to
2987 always create a DECL an invoke targetm.encode_section_info. */
2988 if (strncmp (XSTR (op
, 0), internal_label_prefix
,
2989 internal_label_prefix_len
) == 0)
2995 /* Test for various thread-local symbols. See ix86_encode_section_info. */
2998 tls_symbolic_operand (op
, mode
)
3000 enum machine_mode mode ATTRIBUTE_UNUSED
;
3002 const char *symbol_str
;
3004 if (GET_CODE (op
) != SYMBOL_REF
)
3006 symbol_str
= XSTR (op
, 0);
3008 if (symbol_str
[0] != '%')
3010 return strchr (tls_model_chars
, symbol_str
[1]) - tls_model_chars
;
3014 tls_symbolic_operand_1 (op
, kind
)
3016 enum tls_model kind
;
3018 const char *symbol_str
;
3020 if (GET_CODE (op
) != SYMBOL_REF
)
3022 symbol_str
= XSTR (op
, 0);
3024 return symbol_str
[0] == '%' && symbol_str
[1] == tls_model_chars
[kind
];
3028 global_dynamic_symbolic_operand (op
, mode
)
3030 enum machine_mode mode ATTRIBUTE_UNUSED
;
3032 return tls_symbolic_operand_1 (op
, TLS_MODEL_GLOBAL_DYNAMIC
);
3036 local_dynamic_symbolic_operand (op
, mode
)
3038 enum machine_mode mode ATTRIBUTE_UNUSED
;
3040 return tls_symbolic_operand_1 (op
, TLS_MODEL_LOCAL_DYNAMIC
);
3044 initial_exec_symbolic_operand (op
, mode
)
3046 enum machine_mode mode ATTRIBUTE_UNUSED
;
3048 return tls_symbolic_operand_1 (op
, TLS_MODEL_INITIAL_EXEC
);
3052 local_exec_symbolic_operand (op
, mode
)
3054 enum machine_mode mode ATTRIBUTE_UNUSED
;
3056 return tls_symbolic_operand_1 (op
, TLS_MODEL_LOCAL_EXEC
);
3059 /* Test for a valid operand for a call instruction. Don't allow the
3060 arg pointer register or virtual regs since they may decay into
3061 reg + const, which the patterns can't handle. */
3064 call_insn_operand (op
, mode
)
3066 enum machine_mode mode ATTRIBUTE_UNUSED
;
3068 /* Disallow indirect through a virtual register. This leads to
3069 compiler aborts when trying to eliminate them. */
3070 if (GET_CODE (op
) == REG
3071 && (op
== arg_pointer_rtx
3072 || op
== frame_pointer_rtx
3073 || (REGNO (op
) >= FIRST_PSEUDO_REGISTER
3074 && REGNO (op
) <= LAST_VIRTUAL_REGISTER
)))
3077 /* Disallow `call 1234'. Due to varying assembler lameness this
3078 gets either rejected or translated to `call .+1234'. */
3079 if (GET_CODE (op
) == CONST_INT
)
3082 /* Explicitly allow SYMBOL_REF even if pic. */
3083 if (GET_CODE (op
) == SYMBOL_REF
)
3086 /* Half-pic doesn't allow anything but registers and constants.
3087 We've just taken care of the later. */
3089 return register_operand (op
, Pmode
);
3091 /* Otherwise we can allow any general_operand in the address. */
3092 return general_operand (op
, Pmode
);
3096 constant_call_address_operand (op
, mode
)
3098 enum machine_mode mode ATTRIBUTE_UNUSED
;
3100 if (GET_CODE (op
) == CONST
3101 && GET_CODE (XEXP (op
, 0)) == PLUS
3102 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
3103 op
= XEXP (XEXP (op
, 0), 0);
3104 return GET_CODE (op
) == SYMBOL_REF
;
3107 /* Match exactly zero and one. */
3110 const0_operand (op
, mode
)
3112 enum machine_mode mode
;
3114 return op
== CONST0_RTX (mode
);
3118 const1_operand (op
, mode
)
3120 enum machine_mode mode ATTRIBUTE_UNUSED
;
3122 return op
== const1_rtx
;
3125 /* Match 2, 4, or 8. Used for leal multiplicands. */
3128 const248_operand (op
, mode
)
3130 enum machine_mode mode ATTRIBUTE_UNUSED
;
3132 return (GET_CODE (op
) == CONST_INT
3133 && (INTVAL (op
) == 2 || INTVAL (op
) == 4 || INTVAL (op
) == 8));
3136 /* True if this is a constant appropriate for an increment or decremenmt. */
3139 incdec_operand (op
, mode
)
3141 enum machine_mode mode ATTRIBUTE_UNUSED
;
3143 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3144 registers, since carry flag is not set. */
3145 if (TARGET_PENTIUM4
&& !optimize_size
)
3147 return op
== const1_rtx
|| op
== constm1_rtx
;
3150 /* Return nonzero if OP is acceptable as operand of DImode shift
3154 shiftdi_operand (op
, mode
)
3156 enum machine_mode mode ATTRIBUTE_UNUSED
;
3159 return nonimmediate_operand (op
, mode
);
3161 return register_operand (op
, mode
);
3164 /* Return false if this is the stack pointer, or any other fake
3165 register eliminable to the stack pointer. Otherwise, this is
3168 This is used to prevent esp from being used as an index reg.
3169 Which would only happen in pathological cases. */
3172 reg_no_sp_operand (op
, mode
)
3174 enum machine_mode mode
;
3177 if (GET_CODE (t
) == SUBREG
)
3179 if (t
== stack_pointer_rtx
|| t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
)
3182 return register_operand (op
, mode
);
3186 mmx_reg_operand (op
, mode
)
3188 enum machine_mode mode ATTRIBUTE_UNUSED
;
3190 return MMX_REG_P (op
);
3193 /* Return false if this is any eliminable register. Otherwise
3197 general_no_elim_operand (op
, mode
)
3199 enum machine_mode mode
;
3202 if (GET_CODE (t
) == SUBREG
)
3204 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
3205 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
3206 || t
== virtual_stack_dynamic_rtx
)
3209 && REGNO (t
) >= FIRST_VIRTUAL_REGISTER
3210 && REGNO (t
) <= LAST_VIRTUAL_REGISTER
)
3213 return general_operand (op
, mode
);
3216 /* Return false if this is any eliminable register. Otherwise
3217 register_operand or const_int. */
3220 nonmemory_no_elim_operand (op
, mode
)
3222 enum machine_mode mode
;
3225 if (GET_CODE (t
) == SUBREG
)
3227 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
3228 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
3229 || t
== virtual_stack_dynamic_rtx
)
3232 return GET_CODE (op
) == CONST_INT
|| register_operand (op
, mode
);
3235 /* Return true if op is a Q_REGS class register. */
3238 q_regs_operand (op
, mode
)
3240 enum machine_mode mode
;
3242 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3244 if (GET_CODE (op
) == SUBREG
)
3245 op
= SUBREG_REG (op
);
3246 return ANY_QI_REG_P (op
);
3249 /* Return true if op is a NON_Q_REGS class register. */
3252 non_q_regs_operand (op
, mode
)
3254 enum machine_mode mode
;
3256 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3258 if (GET_CODE (op
) == SUBREG
)
3259 op
= SUBREG_REG (op
);
3260 return NON_QI_REG_P (op
);
3263 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3266 sse_comparison_operator (op
, mode
)
3268 enum machine_mode mode ATTRIBUTE_UNUSED
;
3270 enum rtx_code code
= GET_CODE (op
);
3273 /* Operations supported directly. */
3283 /* These are equivalent to ones above in non-IEEE comparisons. */
3290 return !TARGET_IEEE_FP
;
3295 /* Return 1 if OP is a valid comparison operator in valid mode. */
3297 ix86_comparison_operator (op
, mode
)
3299 enum machine_mode mode
;
3301 enum machine_mode inmode
;
3302 enum rtx_code code
= GET_CODE (op
);
3303 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3305 if (GET_RTX_CLASS (code
) != '<')
3307 inmode
= GET_MODE (XEXP (op
, 0));
3309 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3311 enum rtx_code second_code
, bypass_code
;
3312 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
3313 return (bypass_code
== NIL
&& second_code
== NIL
);
3320 if (inmode
== CCmode
|| inmode
== CCGCmode
3321 || inmode
== CCGOCmode
|| inmode
== CCNOmode
)
3324 case LTU
: case GTU
: case LEU
: case ORDERED
: case UNORDERED
: case GEU
:
3325 if (inmode
== CCmode
)
3329 if (inmode
== CCmode
|| inmode
== CCGCmode
|| inmode
== CCNOmode
)
3337 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3340 fcmov_comparison_operator (op
, mode
)
3342 enum machine_mode mode
;
3344 enum machine_mode inmode
;
3345 enum rtx_code code
= GET_CODE (op
);
3346 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3348 if (GET_RTX_CLASS (code
) != '<')
3350 inmode
= GET_MODE (XEXP (op
, 0));
3351 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3353 enum rtx_code second_code
, bypass_code
;
3354 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
3355 if (bypass_code
!= NIL
|| second_code
!= NIL
)
3357 code
= ix86_fp_compare_code_to_integer (code
);
3359 /* i387 supports just limited amount of conditional codes. */
3362 case LTU
: case GTU
: case LEU
: case GEU
:
3363 if (inmode
== CCmode
|| inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3366 case ORDERED
: case UNORDERED
:
3374 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3377 promotable_binary_operator (op
, mode
)
3379 enum machine_mode mode ATTRIBUTE_UNUSED
;
3381 switch (GET_CODE (op
))
3384 /* Modern CPUs have same latency for HImode and SImode multiply,
3385 but 386 and 486 do HImode multiply faster. */
3386 return ix86_cpu
> PROCESSOR_I486
;
3398 /* Nearly general operand, but accept any const_double, since we wish
3399 to be able to drop them into memory rather than have them get pulled
3403 cmp_fp_expander_operand (op
, mode
)
3405 enum machine_mode mode
;
3407 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
3409 if (GET_CODE (op
) == CONST_DOUBLE
)
3411 return general_operand (op
, mode
);
3414 /* Match an SI or HImode register for a zero_extract. */
3417 ext_register_operand (op
, mode
)
3419 enum machine_mode mode ATTRIBUTE_UNUSED
;
3422 if ((!TARGET_64BIT
|| GET_MODE (op
) != DImode
)
3423 && GET_MODE (op
) != SImode
&& GET_MODE (op
) != HImode
)
3426 if (!register_operand (op
, VOIDmode
))
3429 /* Be curefull to accept only registers having upper parts. */
3430 regno
= REG_P (op
) ? REGNO (op
) : REGNO (SUBREG_REG (op
));
3431 return (regno
> LAST_VIRTUAL_REGISTER
|| regno
< 4);
3434 /* Return 1 if this is a valid binary floating-point operation.
3435 OP is the expression matched, and MODE is its mode. */
3438 binary_fp_operator (op
, mode
)
3440 enum machine_mode mode
;
3442 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
3445 switch (GET_CODE (op
))
3451 return GET_MODE_CLASS (GET_MODE (op
)) == MODE_FLOAT
;
3459 mult_operator (op
, mode
)
3461 enum machine_mode mode ATTRIBUTE_UNUSED
;
3463 return GET_CODE (op
) == MULT
;
3467 div_operator (op
, mode
)
3469 enum machine_mode mode ATTRIBUTE_UNUSED
;
3471 return GET_CODE (op
) == DIV
;
3475 arith_or_logical_operator (op
, mode
)
3477 enum machine_mode mode
;
3479 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
3480 && (GET_RTX_CLASS (GET_CODE (op
)) == 'c'
3481 || GET_RTX_CLASS (GET_CODE (op
)) == '2'));
3484 /* Returns 1 if OP is memory operand with a displacement. */
3487 memory_displacement_operand (op
, mode
)
3489 enum machine_mode mode
;
3491 struct ix86_address parts
;
3493 if (! memory_operand (op
, mode
))
3496 if (! ix86_decompose_address (XEXP (op
, 0), &parts
))
3499 return parts
.disp
!= NULL_RTX
;
3502 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3503 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3505 ??? It seems likely that this will only work because cmpsi is an
3506 expander, and no actual insns use this. */
3509 cmpsi_operand (op
, mode
)
3511 enum machine_mode mode
;
3513 if (nonimmediate_operand (op
, mode
))
3516 if (GET_CODE (op
) == AND
3517 && GET_MODE (op
) == SImode
3518 && GET_CODE (XEXP (op
, 0)) == ZERO_EXTRACT
3519 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
3520 && GET_CODE (XEXP (XEXP (op
, 0), 2)) == CONST_INT
3521 && INTVAL (XEXP (XEXP (op
, 0), 1)) == 8
3522 && INTVAL (XEXP (XEXP (op
, 0), 2)) == 8
3523 && GET_CODE (XEXP (op
, 1)) == CONST_INT
)
3529 /* Returns 1 if OP is memory operand that can not be represented by the
3533 long_memory_operand (op
, mode
)
3535 enum machine_mode mode
;
3537 if (! memory_operand (op
, mode
))
3540 return memory_address_length (op
) != 0;
3543 /* Return nonzero if the rtx is known aligned. */
3546 aligned_operand (op
, mode
)
3548 enum machine_mode mode
;
3550 struct ix86_address parts
;
3552 if (!general_operand (op
, mode
))
3555 /* Registers and immediate operands are always "aligned". */
3556 if (GET_CODE (op
) != MEM
)
3559 /* Don't even try to do any aligned optimizations with volatiles. */
3560 if (MEM_VOLATILE_P (op
))
3565 /* Pushes and pops are only valid on the stack pointer. */
3566 if (GET_CODE (op
) == PRE_DEC
3567 || GET_CODE (op
) == POST_INC
)
3570 /* Decode the address. */
3571 if (! ix86_decompose_address (op
, &parts
))
3574 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
3575 parts
.base
= SUBREG_REG (parts
.base
);
3576 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
3577 parts
.index
= SUBREG_REG (parts
.index
);
3579 /* Look for some component that isn't known to be aligned. */
3583 && REGNO_POINTER_ALIGN (REGNO (parts
.index
)) < 32)
3588 if (REGNO_POINTER_ALIGN (REGNO (parts
.base
)) < 32)
3593 if (GET_CODE (parts
.disp
) != CONST_INT
3594 || (INTVAL (parts
.disp
) & 3) != 0)
3598 /* Didn't find one -- this must be an aligned address. */
3602 /* Return true if the constant is something that can be loaded with
3603 a special instruction. Only handle 0.0 and 1.0; others are less
3607 standard_80387_constant_p (x
)
3610 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
3612 /* Note that on the 80387, other constants, such as pi, that we should support
3613 too. On some machines, these are much slower to load as standard constant,
3614 than to load from doubles in memory. */
3615 if (x
== CONST0_RTX (GET_MODE (x
)))
3617 if (x
== CONST1_RTX (GET_MODE (x
)))
3622 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3625 standard_sse_constant_p (x
)
3628 if (GET_CODE (x
) != CONST_DOUBLE
)
3630 return (x
== CONST0_RTX (GET_MODE (x
)));
3633 /* Returns 1 if OP contains a symbol reference */
3636 symbolic_reference_mentioned_p (op
)
3639 register const char *fmt
;
3642 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
3645 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
3646 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
3652 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
3653 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
3657 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
3664 /* Return 1 if it is appropriate to emit `ret' instructions in the
3665 body of a function. Do this only if the epilogue is simple, needing a
3666 couple of insns. Prior to reloading, we can't tell how many registers
3667 must be saved, so return 0 then. Return 0 if there is no frame
3668 marker to de-allocate.
3670 If NON_SAVING_SETJMP is defined and true, then it is not possible
3671 for the epilogue to be simple, so return 0. This is a special case
3672 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3673 until final, but jump_optimize may need to know sooner if a
3677 ix86_can_use_return_insn_p ()
3679 struct ix86_frame frame
;
3681 #ifdef NON_SAVING_SETJMP
3682 if (NON_SAVING_SETJMP
&& current_function_calls_setjmp
)
3686 if (! reload_completed
|| frame_pointer_needed
)
3689 /* Don't allow more than 32 pop, since that's all we can do
3690 with one instruction. */
3691 if (current_function_pops_args
3692 && current_function_args_size
>= 32768)
3695 ix86_compute_frame_layout (&frame
);
3696 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
3699 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3701 x86_64_sign_extended_value (value
)
3704 switch (GET_CODE (value
))
3706 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3707 to be at least 32 and this all acceptable constants are
3708 represented as CONST_INT. */
3710 if (HOST_BITS_PER_WIDE_INT
== 32)
3714 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (value
), DImode
);
3715 return trunc_int_for_mode (val
, SImode
) == val
;
3719 /* For certain code models, the symbolic references are known to fit. */
3721 return ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_KERNEL
;
3723 /* For certain code models, the code is near as well. */
3725 return ix86_cmodel
!= CM_LARGE
&& ix86_cmodel
!= CM_SMALL_PIC
;
3727 /* We also may accept the offsetted memory references in certain special
3730 if (GET_CODE (XEXP (value
, 0)) == UNSPEC
3731 && XINT (XEXP (value
, 0), 1) == UNSPEC_GOTPCREL
)
3733 else if (GET_CODE (XEXP (value
, 0)) == PLUS
)
3735 rtx op1
= XEXP (XEXP (value
, 0), 0);
3736 rtx op2
= XEXP (XEXP (value
, 0), 1);
3737 HOST_WIDE_INT offset
;
3739 if (ix86_cmodel
== CM_LARGE
)
3741 if (GET_CODE (op2
) != CONST_INT
)
3743 offset
= trunc_int_for_mode (INTVAL (op2
), DImode
);
3744 switch (GET_CODE (op1
))
3747 /* For CM_SMALL assume that latest object is 1MB before
3748 end of 31bits boundary. We may also accept pretty
3749 large negative constants knowing that all objects are
3750 in the positive half of address space. */
3751 if (ix86_cmodel
== CM_SMALL
3752 && offset
< 1024*1024*1024
3753 && trunc_int_for_mode (offset
, SImode
) == offset
)
3755 /* For CM_KERNEL we know that all object resist in the
3756 negative half of 32bits address space. We may not
3757 accept negative offsets, since they may be just off
3758 and we may accept pretty large positive ones. */
3759 if (ix86_cmodel
== CM_KERNEL
3761 && trunc_int_for_mode (offset
, SImode
) == offset
)
3765 /* These conditions are similar to SYMBOL_REF ones, just the
3766 constraints for code models differ. */
3767 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
3768 && offset
< 1024*1024*1024
3769 && trunc_int_for_mode (offset
, SImode
) == offset
)
3771 if (ix86_cmodel
== CM_KERNEL
3773 && trunc_int_for_mode (offset
, SImode
) == offset
)
3786 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3788 x86_64_zero_extended_value (value
)
3791 switch (GET_CODE (value
))
3794 if (HOST_BITS_PER_WIDE_INT
== 32)
3795 return (GET_MODE (value
) == VOIDmode
3796 && !CONST_DOUBLE_HIGH (value
));
3800 if (HOST_BITS_PER_WIDE_INT
== 32)
3801 return INTVAL (value
) >= 0;
3803 return !(INTVAL (value
) & ~(HOST_WIDE_INT
) 0xffffffff);
3806 /* For certain code models, the symbolic references are known to fit. */
3808 return ix86_cmodel
== CM_SMALL
;
3810 /* For certain code models, the code is near as well. */
3812 return ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
;
3814 /* We also may accept the offsetted memory references in certain special
3817 if (GET_CODE (XEXP (value
, 0)) == PLUS
)
3819 rtx op1
= XEXP (XEXP (value
, 0), 0);
3820 rtx op2
= XEXP (XEXP (value
, 0), 1);
3822 if (ix86_cmodel
== CM_LARGE
)
3824 switch (GET_CODE (op1
))
3828 /* For small code model we may accept pretty large positive
3829 offsets, since one bit is available for free. Negative
3830 offsets are limited by the size of NULL pointer area
3831 specified by the ABI. */
3832 if (ix86_cmodel
== CM_SMALL
3833 && GET_CODE (op2
) == CONST_INT
3834 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
3835 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
3838 /* ??? For the kernel, we may accept adjustment of
3839 -0x10000000, since we know that it will just convert
3840 negative address space to positive, but perhaps this
3841 is not worthwhile. */
3844 /* These conditions are similar to SYMBOL_REF ones, just the
3845 constraints for code models differ. */
3846 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
3847 && GET_CODE (op2
) == CONST_INT
3848 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
3849 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
3863 /* Value should be nonzero if functions must have frame pointers.
3864 Zero means the frame pointer need not be set up (and parms may
3865 be accessed via the stack pointer) in functions that seem suitable. */
3868 ix86_frame_pointer_required ()
3870 /* If we accessed previous frames, then the generated code expects
3871 to be able to access the saved ebp value in our frame. */
3872 if (cfun
->machine
->accesses_prev_frame
)
3875 /* Several x86 os'es need a frame pointer for other reasons,
3876 usually pertaining to setjmp. */
3877 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
3880 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3881 the frame pointer by default. Turn it back on now if we've not
3882 got a leaf function. */
3883 if (TARGET_OMIT_LEAF_FRAME_POINTER
&& ! leaf_function_p ())
3889 /* Record that the current function accesses previous call frames. */
3892 ix86_setup_frame_addresses ()
3894 cfun
->machine
->accesses_prev_frame
= 1;
3897 static char pic_label_name
[32];
3899 /* This function generates code for -fpic that loads %ebx with
3900 the return address of the caller and then returns. */
3903 ix86_asm_file_end (file
)
3908 if (pic_label_name
[0] == 0)
3911 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
3912 to updating relocations to a section being discarded such that this
3913 doesn't work. Ought to detect this at configure time. */
3915 /* The trick here is to create a linkonce section containing the
3916 pic label thunk, but to refer to it with an internal label.
3917 Because the label is internal, we don't have inter-dso name
3918 binding issues on hosts that don't support ".hidden".
3920 In order to use these macros, however, we must create a fake
3922 if (targetm
.have_named_sections
)
3924 tree decl
= build_decl (FUNCTION_DECL
,
3925 get_identifier ("i686.get_pc_thunk"),
3927 DECL_ONE_ONLY (decl
) = 1;
3928 (*targetm
.asm_out
.unique_section
) (decl
, 0);
3929 named_section (decl
, NULL
);
3936 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
3937 internal (non-global) label that's being emitted, it didn't make
3938 sense to have .type information for local labels. This caused
3939 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
3940 me debug info for a label that you're declaring non-global?) this
3941 was changed to call ASM_OUTPUT_LABEL() instead. */
3943 ASM_OUTPUT_LABEL (file
, pic_label_name
);
3945 xops
[0] = pic_offset_table_rtx
;
3946 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
3947 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
3948 output_asm_insn ("ret", xops
);
3951 /* Emit code for the SET_GOT patterns. */
3954 output_set_got (dest
)
3960 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
3962 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
3964 xops
[2] = gen_rtx_LABEL_REF (Pmode
, gen_label_rtx ());
3967 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
3969 output_asm_insn ("call\t%a2", xops
);
3971 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, "L",
3972 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
3975 output_asm_insn ("pop{l}\t%0", xops
);
3979 if (! pic_label_name
[0])
3980 ASM_GENERATE_INTERNAL_LABEL (pic_label_name
, "LPR", 0);
3982 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, pic_label_name
);
3983 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
3984 output_asm_insn ("call\t%X2", xops
);
3987 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
3988 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
3990 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops
);
3995 /* Generate an "push" pattern for input ARG. */
4001 return gen_rtx_SET (VOIDmode
,
4003 gen_rtx_PRE_DEC (Pmode
,
4004 stack_pointer_rtx
)),
4008 /* Return 1 if we need to save REGNO. */
4010 ix86_save_reg (regno
, maybe_eh_return
)
4012 int maybe_eh_return
;
4014 if (regno
== PIC_OFFSET_TABLE_REGNUM
4015 && (regs_ever_live
[regno
]
4016 || current_function_profile
4017 || current_function_calls_eh_return
))
4020 if (current_function_calls_eh_return
&& maybe_eh_return
)
4025 unsigned test
= EH_RETURN_DATA_REGNO (i
);
4026 if (test
== INVALID_REGNUM
)
4033 return (regs_ever_live
[regno
]
4034 && !call_used_regs
[regno
]
4035 && !fixed_regs
[regno
]
4036 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
4039 /* Return number of registers to be saved on the stack. */
4047 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
4048 if (ix86_save_reg (regno
, true))
4053 /* Return the offset between two registers, one to be eliminated, and the other
4054 its replacement, at the start of a routine. */
4057 ix86_initial_elimination_offset (from
, to
)
4061 struct ix86_frame frame
;
4062 ix86_compute_frame_layout (&frame
);
4064 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4065 return frame
.hard_frame_pointer_offset
;
4066 else if (from
== FRAME_POINTER_REGNUM
4067 && to
== HARD_FRAME_POINTER_REGNUM
)
4068 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
4071 if (to
!= STACK_POINTER_REGNUM
)
4073 else if (from
== ARG_POINTER_REGNUM
)
4074 return frame
.stack_pointer_offset
;
4075 else if (from
!= FRAME_POINTER_REGNUM
)
4078 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
4082 /* Fill structure ix86_frame about frame of currently computed function. */
4085 ix86_compute_frame_layout (frame
)
4086 struct ix86_frame
*frame
;
4088 HOST_WIDE_INT total_size
;
4089 int stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
4091 int preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
4092 HOST_WIDE_INT size
= get_frame_size ();
4094 frame
->nregs
= ix86_nsaved_regs ();
4097 /* Skip return address and saved base pointer. */
4098 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
4100 frame
->hard_frame_pointer_offset
= offset
;
4102 /* Do some sanity checking of stack_alignment_needed and
4103 preferred_alignment, since i386 port is the only using those features
4104 that may break easily. */
4106 if (size
&& !stack_alignment_needed
)
4108 if (preferred_alignment
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
4110 if (preferred_alignment
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
4112 if (stack_alignment_needed
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
4115 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
4116 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
4118 /* Register save area */
4119 offset
+= frame
->nregs
* UNITS_PER_WORD
;
4122 if (ix86_save_varrargs_registers
)
4124 offset
+= X86_64_VARARGS_SIZE
;
4125 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
4128 frame
->va_arg_size
= 0;
4130 /* Align start of frame for local function. */
4131 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
4132 & -stack_alignment_needed
) - offset
;
4134 offset
+= frame
->padding1
;
4136 /* Frame pointer points here. */
4137 frame
->frame_pointer_offset
= offset
;
4141 /* Add outgoing arguments area. Can be skipped if we eliminated
4142 all the function calls as dead code. */
4143 if (ACCUMULATE_OUTGOING_ARGS
&& !current_function_is_leaf
)
4145 offset
+= current_function_outgoing_args_size
;
4146 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
4149 frame
->outgoing_arguments_size
= 0;
4151 /* Align stack boundary. Only needed if we're calling another function. */
4152 if (!current_function_is_leaf
)
4153 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
4154 & -preferred_alignment
) - offset
;
4156 frame
->padding2
= 0;
4158 offset
+= frame
->padding2
;
4160 /* We've reached end of stack frame. */
4161 frame
->stack_pointer_offset
= offset
;
4163 /* Size prologue needs to allocate. */
4164 frame
->to_allocate
=
4165 (size
+ frame
->padding1
+ frame
->padding2
4166 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
4168 if (TARGET_64BIT
&& TARGET_RED_ZONE
&& current_function_sp_is_unchanging
4169 && current_function_is_leaf
)
4171 frame
->red_zone_size
= frame
->to_allocate
;
4172 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
4173 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
4176 frame
->red_zone_size
= 0;
4177 frame
->to_allocate
-= frame
->red_zone_size
;
4178 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
4180 fprintf (stderr
, "nregs: %i\n", frame
->nregs
);
4181 fprintf (stderr
, "size: %i\n", size
);
4182 fprintf (stderr
, "alignment1: %i\n", stack_alignment_needed
);
4183 fprintf (stderr
, "padding1: %i\n", frame
->padding1
);
4184 fprintf (stderr
, "va_arg: %i\n", frame
->va_arg_size
);
4185 fprintf (stderr
, "padding2: %i\n", frame
->padding2
);
4186 fprintf (stderr
, "to_allocate: %i\n", frame
->to_allocate
);
4187 fprintf (stderr
, "red_zone_size: %i\n", frame
->red_zone_size
);
4188 fprintf (stderr
, "frame_pointer_offset: %i\n", frame
->frame_pointer_offset
);
4189 fprintf (stderr
, "hard_frame_pointer_offset: %i\n",
4190 frame
->hard_frame_pointer_offset
);
4191 fprintf (stderr
, "stack_pointer_offset: %i\n", frame
->stack_pointer_offset
);
4195 /* Emit code to save registers in the prologue. */
4198 ix86_emit_save_regs ()
4203 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
4204 if (ix86_save_reg (regno
, true))
4206 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
4207 RTX_FRAME_RELATED_P (insn
) = 1;
4211 /* Emit code to save registers using MOV insns. First register
4212 is restored from POINTER + OFFSET. */
4214 ix86_emit_save_regs_using_mov (pointer
, offset
)
4216 HOST_WIDE_INT offset
;
4221 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4222 if (ix86_save_reg (regno
, true))
4224 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
4226 gen_rtx_REG (Pmode
, regno
));
4227 RTX_FRAME_RELATED_P (insn
) = 1;
4228 offset
+= UNITS_PER_WORD
;
4232 /* Expand the prologue into a bunch of separate insns. */
4235 ix86_expand_prologue ()
4238 int pic_reg_used
= (PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
4239 && (regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
]
4240 || current_function_profile
));
4241 struct ix86_frame frame
;
4243 HOST_WIDE_INT allocate
;
4247 use_fast_prologue_epilogue
4248 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT
);
4249 if (TARGET_PROLOGUE_USING_MOVE
)
4250 use_mov
= use_fast_prologue_epilogue
;
4252 ix86_compute_frame_layout (&frame
);
4254 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4255 slower on all targets. Also sdb doesn't like it. */
4257 if (frame_pointer_needed
)
4259 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
4260 RTX_FRAME_RELATED_P (insn
) = 1;
4262 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
4263 RTX_FRAME_RELATED_P (insn
) = 1;
4266 allocate
= frame
.to_allocate
;
4267 /* In case we are dealing only with single register and empty frame,
4268 push is equivalent of the mov+add sequence. */
4269 if (allocate
== 0 && frame
.nregs
<= 1)
4273 ix86_emit_save_regs ();
4275 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
4279 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
4281 insn
= emit_insn (gen_pro_epilogue_adjust_stack
4282 (stack_pointer_rtx
, stack_pointer_rtx
,
4283 GEN_INT (-allocate
)));
4284 RTX_FRAME_RELATED_P (insn
) = 1;
4288 /* ??? Is this only valid for Win32? */
4295 arg0
= gen_rtx_REG (SImode
, 0);
4296 emit_move_insn (arg0
, GEN_INT (allocate
));
4298 sym
= gen_rtx_MEM (FUNCTION_MODE
,
4299 gen_rtx_SYMBOL_REF (Pmode
, "_alloca"));
4300 insn
= emit_call_insn (gen_call (sym
, const0_rtx
, constm1_rtx
));
4302 CALL_INSN_FUNCTION_USAGE (insn
)
4303 = gen_rtx_EXPR_LIST (VOIDmode
, gen_rtx_USE (VOIDmode
, arg0
),
4304 CALL_INSN_FUNCTION_USAGE (insn
));
4308 if (!frame_pointer_needed
|| !frame
.to_allocate
)
4309 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
4311 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
4312 -frame
.nregs
* UNITS_PER_WORD
);
4315 #ifdef SUBTARGET_PROLOGUE
4321 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
4323 /* Even with accurate pre-reload life analysis, we can wind up
4324 deleting all references to the pic register after reload.
4325 Consider if cross-jumping unifies two sides of a branch
4326 controled by a comparison vs the only read from a global.
4327 In which case, allow the set_got to be deleted, though we're
4328 too late to do anything about the ebx save in the prologue. */
4329 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
4332 /* Prevent function calls from be scheduled before the call to mcount.
4333 In the pic_reg_used case, make sure that the got load isn't deleted. */
4334 if (current_function_profile
)
4335 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
4338 /* Emit code to restore saved registers using MOV insns. First register
4339 is restored from POINTER + OFFSET. */
4341 ix86_emit_restore_regs_using_mov (pointer
, offset
, maybe_eh_return
)
4344 int maybe_eh_return
;
4348 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4349 if (ix86_save_reg (regno
, maybe_eh_return
))
4351 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
4352 adjust_address (gen_rtx_MEM (Pmode
, pointer
),
4354 offset
+= UNITS_PER_WORD
;
4358 /* Restore function stack, frame, and registers. */
4361 ix86_expand_epilogue (style
)
4365 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
4366 struct ix86_frame frame
;
4367 HOST_WIDE_INT offset
;
4369 ix86_compute_frame_layout (&frame
);
4371 /* Calculate start of saved registers relative to ebp. Special care
4372 must be taken for the normal return case of a function using
4373 eh_return: the eax and edx registers are marked as saved, but not
4374 restored along this path. */
4375 offset
= frame
.nregs
;
4376 if (current_function_calls_eh_return
&& style
!= 2)
4378 offset
*= -UNITS_PER_WORD
;
4380 /* If we're only restoring one register and sp is not valid then
4381 using a move instruction to restore the register since it's
4382 less work than reloading sp and popping the register.
4384 The default code result in stack adjustment using add/lea instruction,
4385 while this code results in LEAVE instruction (or discrete equivalent),
4386 so it is profitable in some other cases as well. Especially when there
4387 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4388 and there is exactly one register to pop. This heruistic may need some
4389 tuning in future. */
4390 if ((!sp_valid
&& frame
.nregs
<= 1)
4391 || (TARGET_EPILOGUE_USING_MOVE
4392 && use_fast_prologue_epilogue
4393 && (frame
.nregs
> 1 || frame
.to_allocate
))
4394 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
4395 || (frame_pointer_needed
&& TARGET_USE_LEAVE
4396 && use_fast_prologue_epilogue
&& frame
.nregs
== 1)
4397 || current_function_calls_eh_return
)
4399 /* Restore registers. We can use ebp or esp to address the memory
4400 locations. If both are available, default to ebp, since offsets
4401 are known to be small. Only exception is esp pointing directly to the
4402 end of block of saved registers, where we may simplify addressing
4405 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
4406 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
4407 frame
.to_allocate
, style
== 2);
4409 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
4410 offset
, style
== 2);
4412 /* eh_return epilogues need %ecx added to the stack pointer. */
4415 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
4417 if (frame_pointer_needed
)
4419 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
4420 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
4421 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
4423 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
4424 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
4426 emit_insn (gen_pro_epilogue_adjust_stack
4427 (stack_pointer_rtx
, sa
, const0_rtx
));
4431 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
4432 tmp
= plus_constant (tmp
, (frame
.to_allocate
4433 + frame
.nregs
* UNITS_PER_WORD
));
4434 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
4437 else if (!frame_pointer_needed
)
4438 emit_insn (gen_pro_epilogue_adjust_stack
4439 (stack_pointer_rtx
, stack_pointer_rtx
,
4440 GEN_INT (frame
.to_allocate
4441 + frame
.nregs
* UNITS_PER_WORD
)));
4442 /* If not an i386, mov & pop is faster than "leave". */
4443 else if (TARGET_USE_LEAVE
|| optimize_size
|| !use_fast_prologue_epilogue
)
4444 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4447 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
4448 hard_frame_pointer_rtx
,
4451 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4453 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4458 /* First step is to deallocate the stack frame so that we can
4459 pop the registers. */
4462 if (!frame_pointer_needed
)
4464 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
4465 hard_frame_pointer_rtx
,
4468 else if (frame
.to_allocate
)
4469 emit_insn (gen_pro_epilogue_adjust_stack
4470 (stack_pointer_rtx
, stack_pointer_rtx
,
4471 GEN_INT (frame
.to_allocate
)));
4473 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4474 if (ix86_save_reg (regno
, false))
4477 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
4479 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
4481 if (frame_pointer_needed
)
4483 /* Leave results in shorter dependency chains on CPUs that are
4484 able to grok it fast. */
4485 if (TARGET_USE_LEAVE
)
4486 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4487 else if (TARGET_64BIT
)
4488 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4490 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4494 /* Sibcall epilogues don't want a return instruction. */
4498 if (current_function_pops_args
&& current_function_args_size
)
4500 rtx popc
= GEN_INT (current_function_pops_args
);
4502 /* i386 can only pop 64K bytes. If asked to pop more, pop
4503 return address, do explicit add, and jump indirectly to the
4506 if (current_function_pops_args
>= 65536)
4508 rtx ecx
= gen_rtx_REG (SImode
, 2);
4510 /* There are is no "pascal" calling convention in 64bit ABI. */
4514 emit_insn (gen_popsi1 (ecx
));
4515 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
4516 emit_jump_insn (gen_return_indirect_internal (ecx
));
4519 emit_jump_insn (gen_return_pop_internal (popc
));
4522 emit_jump_insn (gen_return_internal ());
4525 /* Extract the parts of an RTL expression that is a valid memory address
4526 for an instruction. Return 0 if the structure of the address is
4527 grossly off. Return -1 if the address contains ASHIFT, so it is not
4528 strictly valid, but still used for computing length of lea instruction.
4532 ix86_decompose_address (addr
, out
)
4534 struct ix86_address
*out
;
4536 rtx base
= NULL_RTX
;
4537 rtx index
= NULL_RTX
;
4538 rtx disp
= NULL_RTX
;
4539 HOST_WIDE_INT scale
= 1;
4540 rtx scale_rtx
= NULL_RTX
;
4543 if (REG_P (addr
) || GET_CODE (addr
) == SUBREG
)
4545 else if (GET_CODE (addr
) == PLUS
)
4547 rtx op0
= XEXP (addr
, 0);
4548 rtx op1
= XEXP (addr
, 1);
4549 enum rtx_code code0
= GET_CODE (op0
);
4550 enum rtx_code code1
= GET_CODE (op1
);
4552 if (code0
== REG
|| code0
== SUBREG
)
4554 if (code1
== REG
|| code1
== SUBREG
)
4555 index
= op0
, base
= op1
; /* index + base */
4557 base
= op0
, disp
= op1
; /* base + displacement */
4559 else if (code0
== MULT
)
4561 index
= XEXP (op0
, 0);
4562 scale_rtx
= XEXP (op0
, 1);
4563 if (code1
== REG
|| code1
== SUBREG
)
4564 base
= op1
; /* index*scale + base */
4566 disp
= op1
; /* index*scale + disp */
4568 else if (code0
== PLUS
&& GET_CODE (XEXP (op0
, 0)) == MULT
)
4570 index
= XEXP (XEXP (op0
, 0), 0); /* index*scale + base + disp */
4571 scale_rtx
= XEXP (XEXP (op0
, 0), 1);
4572 base
= XEXP (op0
, 1);
4575 else if (code0
== PLUS
)
4577 index
= XEXP (op0
, 0); /* index + base + disp */
4578 base
= XEXP (op0
, 1);
4584 else if (GET_CODE (addr
) == MULT
)
4586 index
= XEXP (addr
, 0); /* index*scale */
4587 scale_rtx
= XEXP (addr
, 1);
4589 else if (GET_CODE (addr
) == ASHIFT
)
4593 /* We're called for lea too, which implements ashift on occasion. */
4594 index
= XEXP (addr
, 0);
4595 tmp
= XEXP (addr
, 1);
4596 if (GET_CODE (tmp
) != CONST_INT
)
4598 scale
= INTVAL (tmp
);
4599 if ((unsigned HOST_WIDE_INT
) scale
> 3)
4605 disp
= addr
; /* displacement */
4607 /* Extract the integral value of scale. */
4610 if (GET_CODE (scale_rtx
) != CONST_INT
)
4612 scale
= INTVAL (scale_rtx
);
4615 /* Allow arg pointer and stack pointer as index if there is not scaling */
4616 if (base
&& index
&& scale
== 1
4617 && (index
== arg_pointer_rtx
|| index
== frame_pointer_rtx
4618 || index
== stack_pointer_rtx
))
4625 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4626 if ((base
== hard_frame_pointer_rtx
4627 || base
== frame_pointer_rtx
4628 || base
== arg_pointer_rtx
) && !disp
)
4631 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4632 Avoid this by transforming to [%esi+0]. */
4633 if (ix86_cpu
== PROCESSOR_K6
&& !optimize_size
4634 && base
&& !index
&& !disp
4636 && REGNO_REG_CLASS (REGNO (base
)) == SIREG
)
4639 /* Special case: encode reg+reg instead of reg*2. */
4640 if (!base
&& index
&& scale
&& scale
== 2)
4641 base
= index
, scale
= 1;
4643 /* Special case: scaling cannot be encoded without base or displacement. */
4644 if (!base
&& !disp
&& index
&& scale
!= 1)
4655 /* Return cost of the memory address x.
4656 For i386, it is better to use a complex address than let gcc copy
4657 the address into a reg and make a new pseudo. But not if the address
4658 requires to two regs - that would mean more pseudos with longer
4661 ix86_address_cost (x
)
4664 struct ix86_address parts
;
4667 if (!ix86_decompose_address (x
, &parts
))
4670 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
4671 parts
.base
= SUBREG_REG (parts
.base
);
4672 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
4673 parts
.index
= SUBREG_REG (parts
.index
);
4675 /* More complex memory references are better. */
4676 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
4679 /* Attempt to minimize number of registers in the address. */
4681 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
4683 && (!REG_P (parts
.index
)
4684 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
4688 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
4690 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
4691 && parts
.base
!= parts
.index
)
4694 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4695 since it's predecode logic can't detect the length of instructions
4696 and it degenerates to vector decoded. Increase cost of such
4697 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4698 to split such addresses or even refuse such addresses at all.
4700 Following addressing modes are affected:
4705 The first and last case may be avoidable by explicitly coding the zero in
4706 memory address, but I don't have AMD-K6 machine handy to check this
4710 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
4711 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
4712 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
4718 /* If X is a machine specific address (i.e. a symbol or label being
4719 referenced as a displacement from the GOT implemented using an
4720 UNSPEC), then return the base term. Otherwise return X. */
4723 ix86_find_base_term (x
)
4730 if (GET_CODE (x
) != CONST
)
4733 if (GET_CODE (term
) == PLUS
4734 && (GET_CODE (XEXP (term
, 1)) == CONST_INT
4735 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
4736 term
= XEXP (term
, 0);
4737 if (GET_CODE (term
) != UNSPEC
4738 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
4741 term
= XVECEXP (term
, 0, 0);
4743 if (GET_CODE (term
) != SYMBOL_REF
4744 && GET_CODE (term
) != LABEL_REF
)
4750 if (GET_CODE (x
) != PLUS
4751 || XEXP (x
, 0) != pic_offset_table_rtx
4752 || GET_CODE (XEXP (x
, 1)) != CONST
)
4755 term
= XEXP (XEXP (x
, 1), 0);
4757 if (GET_CODE (term
) == PLUS
&& GET_CODE (XEXP (term
, 1)) == CONST_INT
)
4758 term
= XEXP (term
, 0);
4760 if (GET_CODE (term
) != UNSPEC
4761 || XINT (term
, 1) != UNSPEC_GOTOFF
)
4764 term
= XVECEXP (term
, 0, 0);
4766 if (GET_CODE (term
) != SYMBOL_REF
4767 && GET_CODE (term
) != LABEL_REF
)
4773 /* Determine if a given RTX is a valid constant. We already know this
4774 satisfies CONSTANT_P. */
4777 legitimate_constant_p (x
)
4782 switch (GET_CODE (x
))
4785 /* TLS symbols are not constant. */
4786 if (tls_symbolic_operand (x
, Pmode
))
4791 inner
= XEXP (x
, 0);
4793 /* Offsets of TLS symbols are never valid.
4794 Discourage CSE from creating them. */
4795 if (GET_CODE (inner
) == PLUS
4796 && tls_symbolic_operand (XEXP (inner
, 0), Pmode
))
4799 /* Only some unspecs are valid as "constants". */
4800 if (GET_CODE (inner
) == UNSPEC
)
4801 switch (XINT (inner
, 1))
4804 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
4816 /* Otherwise we handle everything else in the move patterns. */
4820 /* Determine if a given RTX is a valid constant address. */
4823 constant_address_p (x
)
4826 switch (GET_CODE (x
))
4833 return TARGET_64BIT
;
4837 return !flag_pic
&& legitimate_constant_p (x
);
4844 /* Nonzero if the constant value X is a legitimate general operand
4845 when generating PIC code. It is given that flag_pic is on and
4846 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4849 legitimate_pic_operand_p (x
)
4854 switch (GET_CODE (x
))
4857 inner
= XEXP (x
, 0);
4859 /* Only some unspecs are valid as "constants". */
4860 if (GET_CODE (inner
) == UNSPEC
)
4861 switch (XINT (inner
, 1))
4864 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
4874 return legitimate_pic_address_disp_p (x
);
4881 /* Determine if a given CONST RTX is a valid memory displacement
4885 legitimate_pic_address_disp_p (disp
)
4890 /* In 64bit mode we can allow direct addresses of symbols and labels
4891 when they are not dynamic symbols. */
4895 if (GET_CODE (disp
) == CONST
)
4897 /* ??? Handle PIC code models */
4898 if (GET_CODE (x
) == PLUS
4899 && (GET_CODE (XEXP (x
, 1)) == CONST_INT
4900 && ix86_cmodel
== CM_SMALL_PIC
4901 && INTVAL (XEXP (x
, 1)) < 1024*1024*1024
4902 && INTVAL (XEXP (x
, 1)) > -1024*1024*1024))
4904 if (local_symbolic_operand (x
, Pmode
))
4907 if (GET_CODE (disp
) != CONST
)
4909 disp
= XEXP (disp
, 0);
4913 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4914 of GOT tables. We should not need these anyway. */
4915 if (GET_CODE (disp
) != UNSPEC
4916 || XINT (disp
, 1) != UNSPEC_GOTPCREL
)
4919 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
4920 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
4926 if (GET_CODE (disp
) == PLUS
)
4928 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
4930 disp
= XEXP (disp
, 0);
4934 if (GET_CODE (disp
) != UNSPEC
)
4937 switch (XINT (disp
, 1))
4942 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
4944 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
4945 case UNSPEC_GOTTPOFF
:
4948 return initial_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
4950 /* ??? Could support offset here. */
4953 return local_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
4955 /* ??? Could support offset here. */
4958 return local_dynamic_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
4964 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4965 memory address for an instruction. The MODE argument is the machine mode
4966 for the MEM expression that wants to use this address.
4968 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
4969 convert common non-canonical forms to canonical form so that they will
4973 legitimate_address_p (mode
, addr
, strict
)
4974 enum machine_mode mode
;
4978 struct ix86_address parts
;
4979 rtx base
, index
, disp
;
4980 HOST_WIDE_INT scale
;
4981 const char *reason
= NULL
;
4982 rtx reason_rtx
= NULL_RTX
;
4984 if (TARGET_DEBUG_ADDR
)
4987 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
4988 GET_MODE_NAME (mode
), strict
);
4992 if (ix86_decompose_address (addr
, &parts
) <= 0)
4994 reason
= "decomposition failed";
4999 index
= parts
.index
;
5001 scale
= parts
.scale
;
5003 /* Validate base register.
5005 Don't allow SUBREG's here, it can lead to spill failures when the base
5006 is one word out of a two word structure, which is represented internally
5014 if (GET_CODE (base
) == SUBREG
)
5015 reg
= SUBREG_REG (base
);
5019 if (GET_CODE (reg
) != REG
)
5021 reason
= "base is not a register";
5025 if (GET_MODE (base
) != Pmode
)
5027 reason
= "base is not in Pmode";
5031 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
5032 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
5034 reason
= "base is not valid";
5039 /* Validate index register.
5041 Don't allow SUBREG's here, it can lead to spill failures when the index
5042 is one word out of a two word structure, which is represented internally
5050 if (GET_CODE (index
) == SUBREG
)
5051 reg
= SUBREG_REG (index
);
5055 if (GET_CODE (reg
) != REG
)
5057 reason
= "index is not a register";
5061 if (GET_MODE (index
) != Pmode
)
5063 reason
= "index is not in Pmode";
5067 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
5068 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
5070 reason
= "index is not valid";
5075 /* Validate scale factor. */
5078 reason_rtx
= GEN_INT (scale
);
5081 reason
= "scale without index";
5085 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
5087 reason
= "scale is not a valid multiplier";
5092 /* Validate displacement. */
5099 if (!x86_64_sign_extended_value (disp
))
5101 reason
= "displacement is out of range";
5107 if (GET_CODE (disp
) == CONST_DOUBLE
)
5109 reason
= "displacement is a const_double";
5114 if (GET_CODE (disp
) == CONST
5115 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
5116 switch (XINT (XEXP (disp
, 0), 1))
5120 case UNSPEC_GOTPCREL
:
5123 goto is_legitimate_pic
;
5125 case UNSPEC_GOTTPOFF
:
5131 reason
= "invalid address unspec";
5135 else if (flag_pic
&& SYMBOLIC_CONST (disp
))
5138 if (TARGET_64BIT
&& (index
|| base
))
5140 reason
= "non-constant pic memory reference";
5143 if (! legitimate_pic_address_disp_p (disp
))
5145 reason
= "displacement is an invalid pic construct";
5149 /* This code used to verify that a symbolic pic displacement
5150 includes the pic_offset_table_rtx register.
5152 While this is good idea, unfortunately these constructs may
5153 be created by "adds using lea" optimization for incorrect
5162 This code is nonsensical, but results in addressing
5163 GOT table with pic_offset_table_rtx base. We can't
5164 just refuse it easily, since it gets matched by
5165 "addsi3" pattern, that later gets split to lea in the
5166 case output register differs from input. While this
5167 can be handled by separate addsi pattern for this case
5168 that never results in lea, this seems to be easier and
5169 correct fix for crash to disable this test. */
5171 else if (HALF_PIC_P ())
5173 if (! HALF_PIC_ADDRESS_P (disp
)
5174 || (base
!= NULL_RTX
|| index
!= NULL_RTX
))
5176 reason
= "displacement is an invalid half-pic reference";
5180 else if (!CONSTANT_ADDRESS_P (disp
))
5182 reason
= "displacement is not constant";
5187 /* Everything looks valid. */
5188 if (TARGET_DEBUG_ADDR
)
5189 fprintf (stderr
, "Success.\n");
5193 if (TARGET_DEBUG_ADDR
)
5195 fprintf (stderr
, "Error: %s\n", reason
);
5196 debug_rtx (reason_rtx
);
5201 /* Return an unique alias set for the GOT. */
5203 static HOST_WIDE_INT
5204 ix86_GOT_alias_set ()
5206 static HOST_WIDE_INT set
= -1;
5208 set
= new_alias_set ();
5212 /* Return a legitimate reference for ORIG (an address) using the
5213 register REG. If REG is 0, a new pseudo is generated.
5215 There are two types of references that must be handled:
5217 1. Global data references must load the address from the GOT, via
5218 the PIC reg. An insn is emitted to do this load, and the reg is
5221 2. Static data references, constant pool addresses, and code labels
5222 compute the address as an offset from the GOT, whose base is in
5223 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5224 differentiate them from global data objects. The returned
5225 address is the PIC reg + an unspec constant.
5227 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5228 reg also appears in the address. */
5231 legitimize_pic_address (orig
, reg
)
5239 if (local_symbolic_operand (addr
, Pmode
))
5241 /* In 64bit mode we can address such objects directly. */
5246 /* This symbol may be referenced via a displacement from the PIC
5247 base address (@GOTOFF). */
5249 if (reload_in_progress
)
5250 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5251 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
5252 new = gen_rtx_CONST (Pmode
, new);
5253 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5257 emit_move_insn (reg
, new);
5262 else if (GET_CODE (addr
) == SYMBOL_REF
)
5266 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
5267 new = gen_rtx_CONST (Pmode
, new);
5268 new = gen_rtx_MEM (Pmode
, new);
5269 RTX_UNCHANGING_P (new) = 1;
5270 set_mem_alias_set (new, ix86_GOT_alias_set ());
5273 reg
= gen_reg_rtx (Pmode
);
5274 /* Use directly gen_movsi, otherwise the address is loaded
5275 into register for CSE. We don't want to CSE this addresses,
5276 instead we CSE addresses from the GOT table, so skip this. */
5277 emit_insn (gen_movsi (reg
, new));
5282 /* This symbol must be referenced via a load from the
5283 Global Offset Table (@GOT). */
5285 if (reload_in_progress
)
5286 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5287 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
5288 new = gen_rtx_CONST (Pmode
, new);
5289 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5290 new = gen_rtx_MEM (Pmode
, new);
5291 RTX_UNCHANGING_P (new) = 1;
5292 set_mem_alias_set (new, ix86_GOT_alias_set ());
5295 reg
= gen_reg_rtx (Pmode
);
5296 emit_move_insn (reg
, new);
5302 if (GET_CODE (addr
) == CONST
)
5304 addr
= XEXP (addr
, 0);
5306 /* We must match stuff we generate before. Assume the only
5307 unspecs that can get here are ours. Not that we could do
5308 anything with them anyway... */
5309 if (GET_CODE (addr
) == UNSPEC
5310 || (GET_CODE (addr
) == PLUS
5311 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
5313 if (GET_CODE (addr
) != PLUS
)
5316 if (GET_CODE (addr
) == PLUS
)
5318 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
5320 /* Check first to see if this is a constant offset from a @GOTOFF
5321 symbol reference. */
5322 if (local_symbolic_operand (op0
, Pmode
)
5323 && GET_CODE (op1
) == CONST_INT
)
5327 if (reload_in_progress
)
5328 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5329 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
5331 new = gen_rtx_PLUS (Pmode
, new, op1
);
5332 new = gen_rtx_CONST (Pmode
, new);
5333 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5337 emit_move_insn (reg
, new);
5343 /* ??? We need to limit offsets here. */
5348 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
5349 new = legitimize_pic_address (XEXP (addr
, 1),
5350 base
== reg
? NULL_RTX
: reg
);
5352 if (GET_CODE (new) == CONST_INT
)
5353 new = plus_constant (base
, INTVAL (new));
5356 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
5358 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
5359 new = XEXP (new, 1);
5361 new = gen_rtx_PLUS (Pmode
, base
, new);
5370 ix86_encode_section_info (decl
, first
)
5372 int first ATTRIBUTE_UNUSED
;
5374 bool local_p
= (*targetm
.binds_local_p
) (decl
);
5377 rtl
= DECL_P (decl
) ? DECL_RTL (decl
) : TREE_CST_RTL (decl
);
5378 if (GET_CODE (rtl
) != MEM
)
5380 symbol
= XEXP (rtl
, 0);
5381 if (GET_CODE (symbol
) != SYMBOL_REF
)
5384 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5385 symbol so that we may access it directly in the GOT. */
5388 SYMBOL_REF_FLAG (symbol
) = local_p
;
5390 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5391 "local dynamic", "initial exec" or "local exec" TLS models
5394 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL (decl
))
5396 const char *symbol_str
;
5399 enum tls_model kind
;
5404 kind
= TLS_MODEL_LOCAL_EXEC
;
5406 kind
= TLS_MODEL_INITIAL_EXEC
;
5408 /* Local dynamic is inefficient when we're not combining the
5409 parts of the address. */
5410 else if (optimize
&& local_p
)
5411 kind
= TLS_MODEL_LOCAL_DYNAMIC
;
5413 kind
= TLS_MODEL_GLOBAL_DYNAMIC
;
5414 if (kind
< flag_tls_default
)
5415 kind
= flag_tls_default
;
5417 symbol_str
= XSTR (symbol
, 0);
5419 if (symbol_str
[0] == '%')
5421 if (symbol_str
[1] == tls_model_chars
[kind
])
5425 len
= strlen (symbol_str
) + 1;
5426 newstr
= alloca (len
+ 2);
5429 newstr
[1] = tls_model_chars
[kind
];
5430 memcpy (newstr
+ 2, symbol_str
, len
);
5432 XSTR (symbol
, 0) = ggc_alloc_string (newstr
, len
+ 2 - 1);
5436 /* Undo the above when printing symbol names. */
5439 ix86_strip_name_encoding (str
)
5449 /* Load the thread pointer into a register. */
5452 get_thread_pointer ()
5456 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
5457 tp
= gen_rtx_CONST (Pmode
, tp
);
5458 tp
= force_reg (Pmode
, tp
);
5463 /* Try machine-dependent ways of modifying an illegitimate address
5464 to be legitimate. If we find one, return the new, valid address.
5465 This macro is used in only one place: `memory_address' in explow.c.
5467 OLDX is the address as it was before break_out_memory_refs was called.
5468 In some cases it is useful to look at this to decide what needs to be done.
5470 MODE and WIN are passed so that this macro can use
5471 GO_IF_LEGITIMATE_ADDRESS.
5473 It is always safe for this macro to do nothing. It exists to recognize
5474 opportunities to optimize the output.
5476 For the 80386, we handle X+REG by loading X into a register R and
5477 using R+REG. R will go in a general reg and indexing will be used.
5478 However, if REG is a broken-out memory address or multiplication,
5479 nothing needs to be done because REG can certainly go in a general reg.
5481 When -fpic is used, special handling is needed for symbolic references.
5482 See comments by legitimize_pic_address in i386.c for details. */
5485 legitimize_address (x
, oldx
, mode
)
5487 register rtx oldx ATTRIBUTE_UNUSED
;
5488 enum machine_mode mode
;
5493 if (TARGET_DEBUG_ADDR
)
5495 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5496 GET_MODE_NAME (mode
));
5500 log
= tls_symbolic_operand (x
, mode
);
5503 rtx dest
, base
, off
, pic
;
5507 case TLS_MODEL_GLOBAL_DYNAMIC
:
5508 dest
= gen_reg_rtx (Pmode
);
5509 emit_insn (gen_tls_global_dynamic (dest
, x
));
5512 case TLS_MODEL_LOCAL_DYNAMIC
:
5513 base
= gen_reg_rtx (Pmode
);
5514 emit_insn (gen_tls_local_dynamic_base (base
));
5516 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
5517 off
= gen_rtx_CONST (Pmode
, off
);
5519 return gen_rtx_PLUS (Pmode
, base
, off
);
5521 case TLS_MODEL_INITIAL_EXEC
:
5524 if (reload_in_progress
)
5525 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5526 pic
= pic_offset_table_rtx
;
5530 pic
= gen_reg_rtx (Pmode
);
5531 emit_insn (gen_set_got (pic
));
5534 base
= get_thread_pointer ();
5536 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_GOTTPOFF
);
5537 off
= gen_rtx_CONST (Pmode
, off
);
5538 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
5539 off
= gen_rtx_MEM (Pmode
, off
);
5540 RTX_UNCHANGING_P (off
) = 1;
5541 set_mem_alias_set (off
, ix86_GOT_alias_set ());
5543 /* Damn Sun for specifing a set of dynamic relocations without
5544 considering the two-operand nature of the architecture!
5545 We'd be much better off with a "GOTNTPOFF" relocation that
5546 already contained the negated constant. */
5547 /* ??? Using negl and reg+reg addressing appears to be a lose
5548 size-wise. The negl is two bytes, just like the extra movl
5549 incurred by the two-operand subl, but reg+reg addressing
5550 uses the two-byte modrm form, unlike plain reg. */
5552 dest
= gen_reg_rtx (Pmode
);
5553 emit_insn (gen_subsi3 (dest
, base
, off
));
5556 case TLS_MODEL_LOCAL_EXEC
:
5557 base
= get_thread_pointer ();
5559 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
5560 TARGET_GNU_TLS
? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
5561 off
= gen_rtx_CONST (Pmode
, off
);
5564 return gen_rtx_PLUS (Pmode
, base
, off
);
5567 dest
= gen_reg_rtx (Pmode
);
5568 emit_insn (gen_subsi3 (dest
, base
, off
));
5579 if (flag_pic
&& SYMBOLIC_CONST (x
))
5580 return legitimize_pic_address (x
, 0);
5582 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5583 if (GET_CODE (x
) == ASHIFT
5584 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5585 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (x
, 1)))) < 4)
5588 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
5589 GEN_INT (1 << log
));
5592 if (GET_CODE (x
) == PLUS
)
5594 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5596 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
5597 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
5598 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)))) < 4)
5601 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
5602 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
5603 GEN_INT (1 << log
));
5606 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
5607 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
5608 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 1), 1)))) < 4)
5611 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
5612 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
5613 GEN_INT (1 << log
));
5616 /* Put multiply first if it isn't already. */
5617 if (GET_CODE (XEXP (x
, 1)) == MULT
)
5619 rtx tmp
= XEXP (x
, 0);
5620 XEXP (x
, 0) = XEXP (x
, 1);
5625 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5626 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5627 created by virtual register instantiation, register elimination, and
5628 similar optimizations. */
5629 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
5632 x
= gen_rtx_PLUS (Pmode
,
5633 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
5634 XEXP (XEXP (x
, 1), 0)),
5635 XEXP (XEXP (x
, 1), 1));
5639 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5640 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5641 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
5642 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
5643 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
5644 && CONSTANT_P (XEXP (x
, 1)))
5647 rtx other
= NULL_RTX
;
5649 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5651 constant
= XEXP (x
, 1);
5652 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5654 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
5656 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5657 other
= XEXP (x
, 1);
5665 x
= gen_rtx_PLUS (Pmode
,
5666 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
5667 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
5668 plus_constant (other
, INTVAL (constant
)));
5672 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
5675 if (GET_CODE (XEXP (x
, 0)) == MULT
)
5678 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
5681 if (GET_CODE (XEXP (x
, 1)) == MULT
)
5684 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
5688 && GET_CODE (XEXP (x
, 1)) == REG
5689 && GET_CODE (XEXP (x
, 0)) == REG
)
5692 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
5695 x
= legitimize_pic_address (x
, 0);
5698 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
5701 if (GET_CODE (XEXP (x
, 0)) == REG
)
5703 register rtx temp
= gen_reg_rtx (Pmode
);
5704 register rtx val
= force_operand (XEXP (x
, 1), temp
);
5706 emit_move_insn (temp
, val
);
5712 else if (GET_CODE (XEXP (x
, 1)) == REG
)
5714 register rtx temp
= gen_reg_rtx (Pmode
);
5715 register rtx val
= force_operand (XEXP (x
, 0), temp
);
5717 emit_move_insn (temp
, val
);
5727 /* Print an integer constant expression in assembler syntax. Addition
5728 and subtraction are the only arithmetic that may appear in these
5729 expressions. FILE is the stdio stream to write to, X is the rtx, and
5730 CODE is the operand print code from the output string. */
5733 output_pic_addr_const (file
, x
, code
)
5740 switch (GET_CODE (x
))
5750 assemble_name (file
, XSTR (x
, 0));
5751 if (code
== 'P' && ! SYMBOL_REF_FLAG (x
))
5752 fputs ("@PLT", file
);
5759 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
5760 assemble_name (asm_out_file
, buf
);
5764 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
5768 /* This used to output parentheses around the expression,
5769 but that does not work on the 386 (either ATT or BSD assembler). */
5770 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5774 if (GET_MODE (x
) == VOIDmode
)
5776 /* We can use %d if the number is <32 bits and positive. */
5777 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
5778 fprintf (file
, "0x%lx%08lx",
5779 (unsigned long) CONST_DOUBLE_HIGH (x
),
5780 (unsigned long) CONST_DOUBLE_LOW (x
));
5782 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
5785 /* We can't handle floating point constants;
5786 PRINT_OPERAND must handle them. */
5787 output_operand_lossage ("floating constant misused");
5791 /* Some assemblers need integer constants to appear first. */
5792 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
5794 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5796 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5798 else if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5800 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5802 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5809 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
5810 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5812 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5813 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
5817 if (XVECLEN (x
, 0) != 1)
5819 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
5820 switch (XINT (x
, 1))
5823 fputs ("@GOT", file
);
5826 fputs ("@GOTOFF", file
);
5828 case UNSPEC_GOTPCREL
:
5829 fputs ("@GOTPCREL(%RIP)", file
);
5831 case UNSPEC_GOTTPOFF
:
5832 fputs ("@GOTTPOFF", file
);
5835 fputs ("@TPOFF", file
);
5838 fputs ("@NTPOFF", file
);
5841 fputs ("@DTPOFF", file
);
5844 output_operand_lossage ("invalid UNSPEC as operand");
5850 output_operand_lossage ("invalid expression as operand");
5854 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5855 We need to handle our special PIC relocations. */
5858 i386_dwarf_output_addr_const (file
, x
)
5863 fprintf (file
, "%s", TARGET_64BIT
? ASM_QUAD
: ASM_LONG
);
5867 fprintf (file
, "%s", ASM_LONG
);
5870 output_pic_addr_const (file
, x
, '\0');
5872 output_addr_const (file
, x
);
5876 /* In the name of slightly smaller debug output, and to cater to
5877 general assembler losage, recognize PIC+GOTOFF and turn it back
5878 into a direct symbol reference. */
5881 i386_simplify_dwarf_addr (orig_x
)
5886 if (GET_CODE (x
) == MEM
)
5891 if (GET_CODE (x
) != CONST
5892 || GET_CODE (XEXP (x
, 0)) != UNSPEC
5893 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
5894 || GET_CODE (orig_x
) != MEM
)
5896 return XVECEXP (XEXP (x
, 0), 0, 0);
5899 if (GET_CODE (x
) != PLUS
5900 || GET_CODE (XEXP (x
, 1)) != CONST
)
5903 if (GET_CODE (XEXP (x
, 0)) == REG
5904 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
5905 /* %ebx + GOT/GOTOFF */
5907 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
5909 /* %ebx + %reg * scale + GOT/GOTOFF */
5911 if (GET_CODE (XEXP (y
, 0)) == REG
5912 && REGNO (XEXP (y
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
5914 else if (GET_CODE (XEXP (y
, 1)) == REG
5915 && REGNO (XEXP (y
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
5919 if (GET_CODE (y
) != REG
5920 && GET_CODE (y
) != MULT
5921 && GET_CODE (y
) != ASHIFT
)
5927 x
= XEXP (XEXP (x
, 1), 0);
5928 if (GET_CODE (x
) == UNSPEC
5929 && ((XINT (x
, 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
5930 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& GET_CODE (orig_x
) != MEM
)))
5933 return gen_rtx_PLUS (Pmode
, y
, XVECEXP (x
, 0, 0));
5934 return XVECEXP (x
, 0, 0);
5937 if (GET_CODE (x
) == PLUS
5938 && GET_CODE (XEXP (x
, 0)) == UNSPEC
5939 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5940 && ((XINT (XEXP (x
, 0), 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
5941 || (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTOFF
5942 && GET_CODE (orig_x
) != MEM
)))
5944 x
= gen_rtx_PLUS (VOIDmode
, XVECEXP (XEXP (x
, 0), 0, 0), XEXP (x
, 1));
5946 return gen_rtx_PLUS (Pmode
, y
, x
);
5954 put_condition_code (code
, mode
, reverse
, fp
, file
)
5956 enum machine_mode mode
;
5962 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
5964 enum rtx_code second_code
, bypass_code
;
5965 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
5966 if (bypass_code
!= NIL
|| second_code
!= NIL
)
5968 code
= ix86_fp_compare_code_to_integer (code
);
5972 code
= reverse_condition (code
);
5983 if (mode
!= CCmode
&& mode
!= CCNOmode
&& mode
!= CCGCmode
)
5988 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
5989 Those same assemblers have the same but opposite losage on cmov. */
5992 suffix
= fp
? "nbe" : "a";
5995 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
5997 else if (mode
== CCmode
|| mode
== CCGCmode
)
6008 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
6010 else if (mode
== CCmode
|| mode
== CCGCmode
)
6019 suffix
= fp
? "nb" : "ae";
6022 if (mode
!= CCmode
&& mode
!= CCGCmode
&& mode
!= CCNOmode
)
6032 suffix
= fp
? "u" : "p";
6035 suffix
= fp
? "nu" : "np";
6040 fputs (suffix
, file
);
6044 print_reg (x
, code
, file
)
6049 if (REGNO (x
) == ARG_POINTER_REGNUM
6050 || REGNO (x
) == FRAME_POINTER_REGNUM
6051 || REGNO (x
) == FLAGS_REG
6052 || REGNO (x
) == FPSR_REG
)
6055 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
6058 if (code
== 'w' || MMX_REG_P (x
))
6060 else if (code
== 'b')
6062 else if (code
== 'k')
6064 else if (code
== 'q')
6066 else if (code
== 'y')
6068 else if (code
== 'h')
6071 code
= GET_MODE_SIZE (GET_MODE (x
));
6073 /* Irritatingly, AMD extended registers use different naming convention
6074 from the normal registers. */
6075 if (REX_INT_REG_P (x
))
6082 error ("extended registers have no high halves");
6085 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6088 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6091 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6094 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6097 error ("unsupported operand size for extended register");
6105 if (STACK_TOP_P (x
))
6107 fputs ("st(0)", file
);
6114 if (! ANY_FP_REG_P (x
))
6115 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
6119 fputs (hi_reg_name
[REGNO (x
)], file
);
6122 fputs (qi_reg_name
[REGNO (x
)], file
);
6125 fputs (qi_high_reg_name
[REGNO (x
)], file
);
6132 /* Locate some local-dynamic symbol still in use by this function
6133 so that we can print its name in some tls_local_dynamic_base
6137 get_some_local_dynamic_name ()
6141 if (cfun
->machine
->some_ld_name
)
6142 return cfun
->machine
->some_ld_name
;
6144 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
6146 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
6147 return cfun
->machine
->some_ld_name
;
6153 get_some_local_dynamic_name_1 (px
, data
)
6155 void *data ATTRIBUTE_UNUSED
;
6159 if (GET_CODE (x
) == SYMBOL_REF
6160 && local_dynamic_symbolic_operand (x
, Pmode
))
6162 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
6170 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6171 C -- print opcode suffix for set/cmov insn.
6172 c -- like C, but print reversed condition
6173 F,f -- likewise, but for floating-point.
6174 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6176 R -- print the prefix for register names.
6177 z -- print the opcode suffix for the size of the current operand.
6178 * -- print a star (in certain assembler syntax)
6179 A -- print an absolute memory reference.
6180 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6181 s -- print a shift double count, followed by the assemblers argument
6183 b -- print the QImode name of the register for the indicated operand.
6184 %b0 would print %al if operands[0] is reg 0.
6185 w -- likewise, print the HImode name of the register.
6186 k -- likewise, print the SImode name of the register.
6187 q -- likewise, print the DImode name of the register.
6188 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6189 y -- print "st(0)" instead of "st" as a register.
6190 D -- print condition for SSE cmp instruction.
6191 P -- if PIC, print an @PLT suffix.
6192 X -- don't print any sort of PIC '@' suffix for a symbol.
6193 & -- print some in-use local-dynamic symbol name.
6197 print_operand (file
, x
, code
)
6207 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6212 assemble_name (file
, get_some_local_dynamic_name ());
6216 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6218 else if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6220 /* Intel syntax. For absolute addresses, registers should not
6221 be surrounded by braces. */
6222 if (GET_CODE (x
) != REG
)
6225 PRINT_OPERAND (file
, x
, 0);
6233 PRINT_OPERAND (file
, x
, 0);
6238 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6243 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6248 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6253 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6258 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6263 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6268 /* 387 opcodes don't get size suffixes if the operands are
6270 if (STACK_REG_P (x
))
6273 /* Likewise if using Intel opcodes. */
6274 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6277 /* This is the size of op from size of operand. */
6278 switch (GET_MODE_SIZE (GET_MODE (x
)))
6281 #ifdef HAVE_GAS_FILDS_FISTS
6287 if (GET_MODE (x
) == SFmode
)
6302 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
6304 #ifdef GAS_MNEMONICS
6330 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
6332 PRINT_OPERAND (file
, x
, 0);
6338 /* Little bit of braindamage here. The SSE compare instructions
6339 does use completely different names for the comparisons that the
6340 fp conditional moves. */
6341 switch (GET_CODE (x
))
6356 fputs ("unord", file
);
6360 fputs ("neq", file
);
6364 fputs ("nlt", file
);
6368 fputs ("nle", file
);
6371 fputs ("ord", file
);
6379 #ifdef CMOV_SUN_AS_SYNTAX
6380 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6382 switch (GET_MODE (x
))
6384 case HImode
: putc ('w', file
); break;
6386 case SFmode
: putc ('l', file
); break;
6388 case DFmode
: putc ('q', file
); break;
6396 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
6399 #ifdef CMOV_SUN_AS_SYNTAX
6400 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6403 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
6406 /* Like above, but reverse condition */
6408 /* Check to see if argument to %c is really a constant
6409 and not a condition code which needs to be reversed. */
6410 if (GET_RTX_CLASS (GET_CODE (x
)) != '<')
6412 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6415 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
6418 #ifdef CMOV_SUN_AS_SYNTAX
6419 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6422 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
6428 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
6431 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
6434 int pred_val
= INTVAL (XEXP (x
, 0));
6436 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
6437 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
6439 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
6440 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
6442 /* Emit hints only in the case default branch prediction
6443 heruistics would fail. */
6444 if (taken
!= cputaken
)
6446 /* We use 3e (DS) prefix for taken branches and
6447 2e (CS) prefix for not taken branches. */
6449 fputs ("ds ; ", file
);
6451 fputs ("cs ; ", file
);
6458 output_operand_lossage ("invalid operand code `%c'", code
);
6462 if (GET_CODE (x
) == REG
)
6464 PRINT_REG (x
, code
, file
);
6467 else if (GET_CODE (x
) == MEM
)
6469 /* No `byte ptr' prefix for call instructions. */
6470 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
6473 switch (GET_MODE_SIZE (GET_MODE (x
)))
6475 case 1: size
= "BYTE"; break;
6476 case 2: size
= "WORD"; break;
6477 case 4: size
= "DWORD"; break;
6478 case 8: size
= "QWORD"; break;
6479 case 12: size
= "XWORD"; break;
6480 case 16: size
= "XMMWORD"; break;
6485 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6488 else if (code
== 'w')
6490 else if (code
== 'k')
6494 fputs (" PTR ", file
);
6498 if (flag_pic
&& CONSTANT_ADDRESS_P (x
))
6499 output_pic_addr_const (file
, x
, code
);
6500 /* Avoid (%rip) for call operands. */
6501 else if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
6502 && GET_CODE (x
) != CONST_INT
)
6503 output_addr_const (file
, x
);
6504 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
6505 output_operand_lossage ("invalid constraints for operand");
6510 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
6515 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
6516 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
6518 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6520 fprintf (file
, "0x%lx", l
);
6523 /* These float cases don't actually occur as immediate operands. */
6524 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
6529 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
6530 REAL_VALUE_TO_DECIMAL (r
, "%.22e", dstr
);
6531 fprintf (file
, "%s", dstr
);
6534 else if (GET_CODE (x
) == CONST_DOUBLE
6535 && (GET_MODE (x
) == XFmode
|| GET_MODE (x
) == TFmode
))
6540 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
6541 REAL_VALUE_TO_DECIMAL (r
, "%.22e", dstr
);
6542 fprintf (file
, "%s", dstr
);
6545 else if (GET_CODE (x
) == CONST
6546 && GET_CODE (XEXP (x
, 0)) == UNSPEC
6547 && XINT (XEXP (x
, 0), 1) == UNSPEC_TP
)
6549 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6550 fputs ("DWORD PTR ", file
);
6551 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
6553 fputs ("gs:0", file
);
6560 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
6562 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6565 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
6566 || GET_CODE (x
) == LABEL_REF
)
6568 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6571 fputs ("OFFSET FLAT:", file
);
6574 if (GET_CODE (x
) == CONST_INT
)
6575 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
6577 output_pic_addr_const (file
, x
, code
);
6579 output_addr_const (file
, x
);
6583 /* Print a memory operand whose address is ADDR. */
6586 print_operand_address (file
, addr
)
6590 struct ix86_address parts
;
6591 rtx base
, index
, disp
;
6594 if (! ix86_decompose_address (addr
, &parts
))
6598 index
= parts
.index
;
6600 scale
= parts
.scale
;
6602 if (!base
&& !index
)
6604 /* Displacement only requires special attention. */
6606 if (GET_CODE (disp
) == CONST_INT
)
6608 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6610 if (USER_LABEL_PREFIX
[0] == 0)
6612 fputs ("ds:", file
);
6614 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (addr
));
6617 output_pic_addr_const (file
, addr
, 0);
6619 output_addr_const (file
, addr
);
6621 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6622 if (GET_CODE (disp
) != CONST_INT
&& TARGET_64BIT
)
6623 fputs ("(%rip)", file
);
6627 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6632 output_pic_addr_const (file
, disp
, 0);
6633 else if (GET_CODE (disp
) == LABEL_REF
)
6634 output_asm_label (disp
);
6636 output_addr_const (file
, disp
);
6641 PRINT_REG (base
, 0, file
);
6645 PRINT_REG (index
, 0, file
);
6647 fprintf (file
, ",%d", scale
);
6653 rtx offset
= NULL_RTX
;
6657 /* Pull out the offset of a symbol; print any symbol itself. */
6658 if (GET_CODE (disp
) == CONST
6659 && GET_CODE (XEXP (disp
, 0)) == PLUS
6660 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
6662 offset
= XEXP (XEXP (disp
, 0), 1);
6663 disp
= gen_rtx_CONST (VOIDmode
,
6664 XEXP (XEXP (disp
, 0), 0));
6668 output_pic_addr_const (file
, disp
, 0);
6669 else if (GET_CODE (disp
) == LABEL_REF
)
6670 output_asm_label (disp
);
6671 else if (GET_CODE (disp
) == CONST_INT
)
6674 output_addr_const (file
, disp
);
6680 PRINT_REG (base
, 0, file
);
6683 if (INTVAL (offset
) >= 0)
6685 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
6689 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
6696 PRINT_REG (index
, 0, file
);
6698 fprintf (file
, "*%d", scale
);
6706 output_addr_const_extra (file
, x
)
6712 if (GET_CODE (x
) != UNSPEC
)
6715 op
= XVECEXP (x
, 0, 0);
6716 switch (XINT (x
, 1))
6718 case UNSPEC_GOTTPOFF
:
6719 output_addr_const (file
, op
);
6720 fputs ("@GOTTPOFF", file
);
6723 output_addr_const (file
, op
);
6724 fputs ("@TPOFF", file
);
6727 output_addr_const (file
, op
);
6728 fputs ("@NTPOFF", file
);
6731 output_addr_const (file
, op
);
6732 fputs ("@DTPOFF", file
);
6742 /* Split one or more DImode RTL references into pairs of SImode
6743 references. The RTL can be REG, offsettable MEM, integer constant, or
6744 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6745 split and "num" is its length. lo_half and hi_half are output arrays
6746 that parallel "operands". */
6749 split_di (operands
, num
, lo_half
, hi_half
)
6752 rtx lo_half
[], hi_half
[];
6756 rtx op
= operands
[num
];
6758 /* simplify_subreg refuse to split volatile memory addresses,
6759 but we still have to handle it. */
6760 if (GET_CODE (op
) == MEM
)
6762 lo_half
[num
] = adjust_address (op
, SImode
, 0);
6763 hi_half
[num
] = adjust_address (op
, SImode
, 4);
6767 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
6768 GET_MODE (op
) == VOIDmode
6769 ? DImode
: GET_MODE (op
), 0);
6770 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
6771 GET_MODE (op
) == VOIDmode
6772 ? DImode
: GET_MODE (op
), 4);
6776 /* Split one or more TImode RTL references into pairs of SImode
6777 references. The RTL can be REG, offsettable MEM, integer constant, or
6778 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6779 split and "num" is its length. lo_half and hi_half are output arrays
6780 that parallel "operands". */
6783 split_ti (operands
, num
, lo_half
, hi_half
)
6786 rtx lo_half
[], hi_half
[];
6790 rtx op
= operands
[num
];
6792 /* simplify_subreg refuse to split volatile memory addresses, but we
6793 still have to handle it. */
6794 if (GET_CODE (op
) == MEM
)
6796 lo_half
[num
] = adjust_address (op
, DImode
, 0);
6797 hi_half
[num
] = adjust_address (op
, DImode
, 8);
6801 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
6802 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
6807 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6808 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6809 is the expression of the binary operation. The output may either be
6810 emitted here, or returned to the caller, like all output_* functions.
6812 There is no guarantee that the operands are the same mode, as they
6813 might be within FLOAT or FLOAT_EXTEND expressions. */
6815 #ifndef SYSV386_COMPAT
6816 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6817 wants to fix the assemblers because that causes incompatibility
6818 with gcc. No-one wants to fix gcc because that causes
6819 incompatibility with assemblers... You can use the option of
6820 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6821 #define SYSV386_COMPAT 1
6825 output_387_binary_op (insn
, operands
)
6829 static char buf
[30];
6832 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]) | SSE_REG_P (operands
[2]);
6834 #ifdef ENABLE_CHECKING
6835 /* Even if we do not want to check the inputs, this documents input
6836 constraints. Which helps in understanding the following code. */
6837 if (STACK_REG_P (operands
[0])
6838 && ((REG_P (operands
[1])
6839 && REGNO (operands
[0]) == REGNO (operands
[1])
6840 && (STACK_REG_P (operands
[2]) || GET_CODE (operands
[2]) == MEM
))
6841 || (REG_P (operands
[2])
6842 && REGNO (operands
[0]) == REGNO (operands
[2])
6843 && (STACK_REG_P (operands
[1]) || GET_CODE (operands
[1]) == MEM
)))
6844 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
6850 switch (GET_CODE (operands
[3]))
6853 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6854 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6862 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6863 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6871 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6872 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6880 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6881 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6895 if (GET_MODE (operands
[0]) == SFmode
)
6896 strcat (buf
, "ss\t{%2, %0|%0, %2}");
6898 strcat (buf
, "sd\t{%2, %0|%0, %2}");
6903 switch (GET_CODE (operands
[3]))
6907 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
6909 rtx temp
= operands
[2];
6910 operands
[2] = operands
[1];
6914 /* know operands[0] == operands[1]. */
6916 if (GET_CODE (operands
[2]) == MEM
)
6922 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
6924 if (STACK_TOP_P (operands
[0]))
6925 /* How is it that we are storing to a dead operand[2]?
6926 Well, presumably operands[1] is dead too. We can't
6927 store the result to st(0) as st(0) gets popped on this
6928 instruction. Instead store to operands[2] (which I
6929 think has to be st(1)). st(1) will be popped later.
6930 gcc <= 2.8.1 didn't have this check and generated
6931 assembly code that the Unixware assembler rejected. */
6932 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6934 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6938 if (STACK_TOP_P (operands
[0]))
6939 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6941 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6946 if (GET_CODE (operands
[1]) == MEM
)
6952 if (GET_CODE (operands
[2]) == MEM
)
6958 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
6961 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6962 derived assemblers, confusingly reverse the direction of
6963 the operation for fsub{r} and fdiv{r} when the
6964 destination register is not st(0). The Intel assembler
6965 doesn't have this brain damage. Read !SYSV386_COMPAT to
6966 figure out what the hardware really does. */
6967 if (STACK_TOP_P (operands
[0]))
6968 p
= "{p\t%0, %2|rp\t%2, %0}";
6970 p
= "{rp\t%2, %0|p\t%0, %2}";
6972 if (STACK_TOP_P (operands
[0]))
6973 /* As above for fmul/fadd, we can't store to st(0). */
6974 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6976 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6981 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
6984 if (STACK_TOP_P (operands
[0]))
6985 p
= "{rp\t%0, %1|p\t%1, %0}";
6987 p
= "{p\t%1, %0|rp\t%0, %1}";
6989 if (STACK_TOP_P (operands
[0]))
6990 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
6992 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
6997 if (STACK_TOP_P (operands
[0]))
6999 if (STACK_TOP_P (operands
[1]))
7000 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7002 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7005 else if (STACK_TOP_P (operands
[1]))
7008 p
= "{\t%1, %0|r\t%0, %1}";
7010 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7016 p
= "{r\t%2, %0|\t%0, %2}";
7018 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7031 /* Output code to initialize control word copies used by
7032 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7033 is set to control word rounding downwards. */
7035 emit_i387_cw_initialization (normal
, round_down
)
7036 rtx normal
, round_down
;
7038 rtx reg
= gen_reg_rtx (HImode
);
7040 emit_insn (gen_x86_fnstcw_1 (normal
));
7041 emit_move_insn (reg
, normal
);
7042 if (!TARGET_PARTIAL_REG_STALL
&& !optimize_size
7044 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
7046 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0xc00)));
7047 emit_move_insn (round_down
, reg
);
7050 /* Output code for INSN to convert a float to a signed int. OPERANDS
7051 are the insn operands. The output may be [HSD]Imode and the input
7052 operand may be [SDX]Fmode. */
7055 output_fix_trunc (insn
, operands
)
7059 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
7060 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
7062 /* Jump through a hoop or two for DImode, since the hardware has no
7063 non-popping instruction. We used to do this a different way, but
7064 that was somewhat fragile and broke with post-reload splitters. */
7065 if (dimode_p
&& !stack_top_dies
)
7066 output_asm_insn ("fld\t%y1", operands
);
7068 if (!STACK_TOP_P (operands
[1]))
7071 if (GET_CODE (operands
[0]) != MEM
)
7074 output_asm_insn ("fldcw\t%3", operands
);
7075 if (stack_top_dies
|| dimode_p
)
7076 output_asm_insn ("fistp%z0\t%0", operands
);
7078 output_asm_insn ("fist%z0\t%0", operands
);
7079 output_asm_insn ("fldcw\t%2", operands
);
7084 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7085 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7086 when fucom should be used. */
7089 output_fp_compare (insn
, operands
, eflags_p
, unordered_p
)
7092 int eflags_p
, unordered_p
;
7095 rtx cmp_op0
= operands
[0];
7096 rtx cmp_op1
= operands
[1];
7097 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]);
7102 cmp_op1
= operands
[2];
7106 if (GET_MODE (operands
[0]) == SFmode
)
7108 return "ucomiss\t{%1, %0|%0, %1}";
7110 return "comiss\t{%1, %0|%0, %y}";
7113 return "ucomisd\t{%1, %0|%0, %1}";
7115 return "comisd\t{%1, %0|%0, %y}";
7118 if (! STACK_TOP_P (cmp_op0
))
7121 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
7123 if (STACK_REG_P (cmp_op1
)
7125 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
7126 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
7128 /* If both the top of the 387 stack dies, and the other operand
7129 is also a stack register that dies, then this must be a
7130 `fcompp' float compare */
7134 /* There is no double popping fcomi variant. Fortunately,
7135 eflags is immune from the fstp's cc clobbering. */
7137 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
7139 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
7147 return "fucompp\n\tfnstsw\t%0";
7149 return "fcompp\n\tfnstsw\t%0";
7162 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7164 static const char * const alt
[24] =
7176 "fcomi\t{%y1, %0|%0, %y1}",
7177 "fcomip\t{%y1, %0|%0, %y1}",
7178 "fucomi\t{%y1, %0|%0, %y1}",
7179 "fucomip\t{%y1, %0|%0, %y1}",
7186 "fcom%z2\t%y2\n\tfnstsw\t%0",
7187 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7188 "fucom%z2\t%y2\n\tfnstsw\t%0",
7189 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7191 "ficom%z2\t%y2\n\tfnstsw\t%0",
7192 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7200 mask
= eflags_p
<< 3;
7201 mask
|= (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
) << 2;
7202 mask
|= unordered_p
<< 1;
7203 mask
|= stack_top_dies
;
7216 ix86_output_addr_vec_elt (file
, value
)
7220 const char *directive
= ASM_LONG
;
7225 directive
= ASM_QUAD
;
7231 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
7235 ix86_output_addr_diff_elt (file
, value
, rel
)
7240 fprintf (file
, "%s%s%d-%s%d\n",
7241 ASM_LONG
, LPREFIX
, value
, LPREFIX
, rel
);
7242 else if (HAVE_AS_GOTOFF_IN_DATA
)
7243 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
7245 asm_fprintf (file
, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
7246 ASM_LONG
, LPREFIX
, value
);
7249 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7253 ix86_expand_clear (dest
)
7258 /* We play register width games, which are only valid after reload. */
7259 if (!reload_completed
)
7262 /* Avoid HImode and its attendant prefix byte. */
7263 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
7264 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
7266 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
7268 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7269 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
7271 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
7272 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
7278 /* X is an unchanging MEM. If it is a constant pool reference, return
7279 the constant pool rtx, else NULL. */
7282 maybe_get_pool_constant (x
)
7289 if (GET_CODE (x
) != PLUS
)
7291 if (XEXP (x
, 0) != pic_offset_table_rtx
)
7294 if (GET_CODE (x
) != CONST
)
7297 if (GET_CODE (x
) != UNSPEC
)
7299 if (XINT (x
, 1) != UNSPEC_GOTOFF
)
7301 x
= XVECEXP (x
, 0, 0);
7304 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
7305 return get_pool_constant (x
);
7311 ix86_expand_move (mode
, operands
)
7312 enum machine_mode mode
;
7315 int strict
= (reload_in_progress
|| reload_completed
);
7316 rtx insn
, op0
, op1
, tmp
;
7321 /* ??? We have a slight problem. We need to say that tls symbols are
7322 not legitimate constants so that reload does not helpfully reload
7323 these constants from a REG_EQUIV, which we cannot handle. (Recall
7324 that general- and local-dynamic address resolution requires a
7327 However, if we say that tls symbols are not legitimate constants,
7328 then emit_move_insn helpfully drop them into the constant pool.
7330 It is far easier to work around emit_move_insn than reload. Recognize
7331 the MEM that we would have created and extract the symbol_ref. */
7334 && GET_CODE (op1
) == MEM
7335 && RTX_UNCHANGING_P (op1
))
7337 tmp
= maybe_get_pool_constant (op1
);
7338 /* Note that we only care about symbolic constants here, which
7339 unlike CONST_INT will always have a proper mode. */
7340 if (tmp
&& GET_MODE (tmp
) == Pmode
)
7344 if (tls_symbolic_operand (op1
, Pmode
))
7346 op1
= legitimize_address (op1
, op1
, VOIDmode
);
7347 if (GET_CODE (op0
) == MEM
)
7349 tmp
= gen_reg_rtx (mode
);
7350 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, op1
));
7354 else if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
7356 if (GET_CODE (op0
) == MEM
)
7357 op1
= force_reg (Pmode
, op1
);
7361 if (GET_CODE (temp
) != REG
)
7362 temp
= gen_reg_rtx (Pmode
);
7363 temp
= legitimize_pic_address (op1
, temp
);
7371 if (GET_CODE (op0
) == MEM
7372 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
7373 || !push_operand (op0
, mode
))
7374 && GET_CODE (op1
) == MEM
)
7375 op1
= force_reg (mode
, op1
);
7377 if (push_operand (op0
, mode
)
7378 && ! general_no_elim_operand (op1
, mode
))
7379 op1
= copy_to_mode_reg (mode
, op1
);
7381 /* Force large constants in 64bit compilation into register
7382 to get them CSEed. */
7383 if (TARGET_64BIT
&& mode
== DImode
7384 && immediate_operand (op1
, mode
)
7385 && !x86_64_zero_extended_value (op1
)
7386 && !register_operand (op0
, mode
)
7387 && optimize
&& !reload_completed
&& !reload_in_progress
)
7388 op1
= copy_to_mode_reg (mode
, op1
);
7390 if (FLOAT_MODE_P (mode
))
7392 /* If we are loading a floating point constant to a register,
7393 force the value to memory now, since we'll get better code
7394 out the back end. */
7398 else if (GET_CODE (op1
) == CONST_DOUBLE
7399 && register_operand (op0
, mode
))
7400 op1
= validize_mem (force_const_mem (mode
, op1
));
7404 insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
7410 ix86_expand_vector_move (mode
, operands
)
7411 enum machine_mode mode
;
7414 /* Force constants other than zero into memory. We do not know how
7415 the instructions used to build constants modify the upper 64 bits
7416 of the register, once we have that information we may be able
7417 to handle some of them more efficiently. */
7418 if ((reload_in_progress
| reload_completed
) == 0
7419 && register_operand (operands
[0], mode
)
7420 && CONSTANT_P (operands
[1]))
7422 rtx addr
= gen_reg_rtx (Pmode
);
7423 emit_move_insn (addr
, XEXP (force_const_mem (mode
, operands
[1]), 0));
7424 operands
[1] = gen_rtx_MEM (mode
, addr
);
7427 /* Make operand1 a register if it isn't already. */
7428 if ((reload_in_progress
| reload_completed
) == 0
7429 && !register_operand (operands
[0], mode
)
7430 && !register_operand (operands
[1], mode
)
7431 && operands
[1] != CONST0_RTX (mode
))
7433 rtx temp
= force_reg (GET_MODE (operands
[1]), operands
[1]);
7434 emit_move_insn (operands
[0], temp
);
7438 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], operands
[1]));
7441 /* Attempt to expand a binary operator. Make the expansion closer to the
7442 actual machine, then just general_operand, which will allow 3 separate
7443 memory references (one output, two input) in a single insn. */
7446 ix86_expand_binary_operator (code
, mode
, operands
)
7448 enum machine_mode mode
;
7451 int matching_memory
;
7452 rtx src1
, src2
, dst
, op
, clob
;
7458 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7459 if (GET_RTX_CLASS (code
) == 'c'
7460 && (rtx_equal_p (dst
, src2
)
7461 || immediate_operand (src1
, mode
)))
7468 /* If the destination is memory, and we do not have matching source
7469 operands, do things in registers. */
7470 matching_memory
= 0;
7471 if (GET_CODE (dst
) == MEM
)
7473 if (rtx_equal_p (dst
, src1
))
7474 matching_memory
= 1;
7475 else if (GET_RTX_CLASS (code
) == 'c'
7476 && rtx_equal_p (dst
, src2
))
7477 matching_memory
= 2;
7479 dst
= gen_reg_rtx (mode
);
7482 /* Both source operands cannot be in memory. */
7483 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
7485 if (matching_memory
!= 2)
7486 src2
= force_reg (mode
, src2
);
7488 src1
= force_reg (mode
, src1
);
7491 /* If the operation is not commutable, source 1 cannot be a constant
7492 or non-matching memory. */
7493 if ((CONSTANT_P (src1
)
7494 || (!matching_memory
&& GET_CODE (src1
) == MEM
))
7495 && GET_RTX_CLASS (code
) != 'c')
7496 src1
= force_reg (mode
, src1
);
7498 /* If optimizing, copy to regs to improve CSE */
7499 if (optimize
&& ! no_new_pseudos
)
7501 if (GET_CODE (dst
) == MEM
)
7502 dst
= gen_reg_rtx (mode
);
7503 if (GET_CODE (src1
) == MEM
)
7504 src1
= force_reg (mode
, src1
);
7505 if (GET_CODE (src2
) == MEM
)
7506 src2
= force_reg (mode
, src2
);
7509 /* Emit the instruction. */
7511 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
7512 if (reload_in_progress
)
7514 /* Reload doesn't know about the flags register, and doesn't know that
7515 it doesn't want to clobber it. We can only do this with PLUS. */
7522 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
7523 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
7526 /* Fix up the destination if needed. */
7527 if (dst
!= operands
[0])
7528 emit_move_insn (operands
[0], dst
);
7531 /* Return TRUE or FALSE depending on whether the binary operator meets the
7532 appropriate constraints. */
7535 ix86_binary_operator_ok (code
, mode
, operands
)
7537 enum machine_mode mode ATTRIBUTE_UNUSED
;
7540 /* Both source operands cannot be in memory. */
7541 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
7543 /* If the operation is not commutable, source 1 cannot be a constant. */
7544 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != 'c')
7546 /* If the destination is memory, we must have a matching source operand. */
7547 if (GET_CODE (operands
[0]) == MEM
7548 && ! (rtx_equal_p (operands
[0], operands
[1])
7549 || (GET_RTX_CLASS (code
) == 'c'
7550 && rtx_equal_p (operands
[0], operands
[2]))))
7552 /* If the operation is not commutable and the source 1 is memory, we must
7553 have a matching destination. */
7554 if (GET_CODE (operands
[1]) == MEM
7555 && GET_RTX_CLASS (code
) != 'c'
7556 && ! rtx_equal_p (operands
[0], operands
[1]))
7561 /* Attempt to expand a unary operator. Make the expansion closer to the
7562 actual machine, then just general_operand, which will allow 2 separate
7563 memory references (one output, one input) in a single insn. */
7566 ix86_expand_unary_operator (code
, mode
, operands
)
7568 enum machine_mode mode
;
7571 int matching_memory
;
7572 rtx src
, dst
, op
, clob
;
7577 /* If the destination is memory, and we do not have matching source
7578 operands, do things in registers. */
7579 matching_memory
= 0;
7580 if (GET_CODE (dst
) == MEM
)
7582 if (rtx_equal_p (dst
, src
))
7583 matching_memory
= 1;
7585 dst
= gen_reg_rtx (mode
);
7588 /* When source operand is memory, destination must match. */
7589 if (!matching_memory
&& GET_CODE (src
) == MEM
)
7590 src
= force_reg (mode
, src
);
7592 /* If optimizing, copy to regs to improve CSE */
7593 if (optimize
&& ! no_new_pseudos
)
7595 if (GET_CODE (dst
) == MEM
)
7596 dst
= gen_reg_rtx (mode
);
7597 if (GET_CODE (src
) == MEM
)
7598 src
= force_reg (mode
, src
);
7601 /* Emit the instruction. */
7603 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
7604 if (reload_in_progress
|| code
== NOT
)
7606 /* Reload doesn't know about the flags register, and doesn't know that
7607 it doesn't want to clobber it. */
7614 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
7615 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
7618 /* Fix up the destination if needed. */
7619 if (dst
!= operands
[0])
7620 emit_move_insn (operands
[0], dst
);
7623 /* Return TRUE or FALSE depending on whether the unary operator meets the
7624 appropriate constraints. */
7627 ix86_unary_operator_ok (code
, mode
, operands
)
7628 enum rtx_code code ATTRIBUTE_UNUSED
;
7629 enum machine_mode mode ATTRIBUTE_UNUSED
;
7630 rtx operands
[2] ATTRIBUTE_UNUSED
;
7632 /* If one of operands is memory, source and destination must match. */
7633 if ((GET_CODE (operands
[0]) == MEM
7634 || GET_CODE (operands
[1]) == MEM
)
7635 && ! rtx_equal_p (operands
[0], operands
[1]))
7640 /* Return TRUE or FALSE depending on whether the first SET in INSN
7641 has source and destination with matching CC modes, and that the
7642 CC mode is at least as constrained as REQ_MODE. */
7645 ix86_match_ccmode (insn
, req_mode
)
7647 enum machine_mode req_mode
;
7650 enum machine_mode set_mode
;
7652 set
= PATTERN (insn
);
7653 if (GET_CODE (set
) == PARALLEL
)
7654 set
= XVECEXP (set
, 0, 0);
7655 if (GET_CODE (set
) != SET
)
7657 if (GET_CODE (SET_SRC (set
)) != COMPARE
)
7660 set_mode
= GET_MODE (SET_DEST (set
));
7664 if (req_mode
!= CCNOmode
7665 && (req_mode
!= CCmode
7666 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
7670 if (req_mode
== CCGCmode
)
7674 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
7678 if (req_mode
== CCZmode
)
7688 return (GET_MODE (SET_SRC (set
)) == set_mode
);
7691 /* Generate insn patterns to do an integer compare of OPERANDS. */
7694 ix86_expand_int_compare (code
, op0
, op1
)
7698 enum machine_mode cmpmode
;
7701 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
7702 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
7704 /* This is very simple, but making the interface the same as in the
7705 FP case makes the rest of the code easier. */
7706 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
7707 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
7709 /* Return the test that should be put into the flags user, i.e.
7710 the bcc, scc, or cmov instruction. */
7711 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
7714 /* Figure out whether to use ordered or unordered fp comparisons.
7715 Return the appropriate mode to use. */
7718 ix86_fp_compare_mode (code
)
7719 enum rtx_code code ATTRIBUTE_UNUSED
;
7721 /* ??? In order to make all comparisons reversible, we do all comparisons
7722 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7723 all forms trapping and nontrapping comparisons, we can make inequality
7724 comparisons trapping again, since it results in better code when using
7725 FCOM based compares. */
7726 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
7730 ix86_cc_mode (code
, op0
, op1
)
7734 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
7735 return ix86_fp_compare_mode (code
);
7738 /* Only zero flag is needed. */
7740 case NE
: /* ZF!=0 */
7742 /* Codes needing carry flag. */
7743 case GEU
: /* CF=0 */
7744 case GTU
: /* CF=0 & ZF=0 */
7745 case LTU
: /* CF=1 */
7746 case LEU
: /* CF=1 | ZF=1 */
7748 /* Codes possibly doable only with sign flag when
7749 comparing against zero. */
7750 case GE
: /* SF=OF or SF=0 */
7751 case LT
: /* SF<>OF or SF=1 */
7752 if (op1
== const0_rtx
)
7755 /* For other cases Carry flag is not required. */
7757 /* Codes doable only with sign flag when comparing
7758 against zero, but we miss jump instruction for it
7759 so we need to use relational tests agains overflow
7760 that thus needs to be zero. */
7761 case GT
: /* ZF=0 & SF=OF */
7762 case LE
: /* ZF=1 | SF<>OF */
7763 if (op1
== const0_rtx
)
7767 /* strcmp pattern do (use flags) and combine may ask us for proper
7776 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7779 ix86_use_fcomi_compare (code
)
7780 enum rtx_code code ATTRIBUTE_UNUSED
;
7782 enum rtx_code swapped_code
= swap_condition (code
);
7783 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
7784 || (ix86_fp_comparison_cost (swapped_code
)
7785 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
7788 /* Swap, force into registers, or otherwise massage the two operands
7789 to a fp comparison. The operands are updated in place; the new
7790 comparsion code is returned. */
7792 static enum rtx_code
7793 ix86_prepare_fp_compare_args (code
, pop0
, pop1
)
7797 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
7798 rtx op0
= *pop0
, op1
= *pop1
;
7799 enum machine_mode op_mode
= GET_MODE (op0
);
7800 int is_sse
= SSE_REG_P (op0
) | SSE_REG_P (op1
);
7802 /* All of the unordered compare instructions only work on registers.
7803 The same is true of the XFmode compare instructions. The same is
7804 true of the fcomi compare instructions. */
7807 && (fpcmp_mode
== CCFPUmode
7808 || op_mode
== XFmode
7809 || op_mode
== TFmode
7810 || ix86_use_fcomi_compare (code
)))
7812 op0
= force_reg (op_mode
, op0
);
7813 op1
= force_reg (op_mode
, op1
);
7817 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7818 things around if they appear profitable, otherwise force op0
7821 if (standard_80387_constant_p (op0
) == 0
7822 || (GET_CODE (op0
) == MEM
7823 && ! (standard_80387_constant_p (op1
) == 0
7824 || GET_CODE (op1
) == MEM
)))
7827 tmp
= op0
, op0
= op1
, op1
= tmp
;
7828 code
= swap_condition (code
);
7831 if (GET_CODE (op0
) != REG
)
7832 op0
= force_reg (op_mode
, op0
);
7834 if (CONSTANT_P (op1
))
7836 if (standard_80387_constant_p (op1
))
7837 op1
= force_reg (op_mode
, op1
);
7839 op1
= validize_mem (force_const_mem (op_mode
, op1
));
7843 /* Try to rearrange the comparison to make it cheaper. */
7844 if (ix86_fp_comparison_cost (code
)
7845 > ix86_fp_comparison_cost (swap_condition (code
))
7846 && (GET_CODE (op1
) == REG
|| !no_new_pseudos
))
7849 tmp
= op0
, op0
= op1
, op1
= tmp
;
7850 code
= swap_condition (code
);
7851 if (GET_CODE (op0
) != REG
)
7852 op0
= force_reg (op_mode
, op0
);
7860 /* Convert comparison codes we use to represent FP comparison to integer
7861 code that will result in proper branch. Return UNKNOWN if no such code
7863 static enum rtx_code
7864 ix86_fp_compare_code_to_integer (code
)
7894 /* Split comparison code CODE into comparisons we can do using branch
7895 instructions. BYPASS_CODE is comparison code for branch that will
7896 branch around FIRST_CODE and SECOND_CODE. If some of branches
7897 is not required, set value to NIL.
7898 We never require more than two branches. */
7900 ix86_fp_comparison_codes (code
, bypass_code
, first_code
, second_code
)
7901 enum rtx_code code
, *bypass_code
, *first_code
, *second_code
;
7907 /* The fcomi comparison sets flags as follows:
7917 case GT
: /* GTU - CF=0 & ZF=0 */
7918 case GE
: /* GEU - CF=0 */
7919 case ORDERED
: /* PF=0 */
7920 case UNORDERED
: /* PF=1 */
7921 case UNEQ
: /* EQ - ZF=1 */
7922 case UNLT
: /* LTU - CF=1 */
7923 case UNLE
: /* LEU - CF=1 | ZF=1 */
7924 case LTGT
: /* EQ - ZF=0 */
7926 case LT
: /* LTU - CF=1 - fails on unordered */
7928 *bypass_code
= UNORDERED
;
7930 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
7932 *bypass_code
= UNORDERED
;
7934 case EQ
: /* EQ - ZF=1 - fails on unordered */
7936 *bypass_code
= UNORDERED
;
7938 case NE
: /* NE - ZF=0 - fails on unordered */
7940 *second_code
= UNORDERED
;
7942 case UNGE
: /* GEU - CF=0 - fails on unordered */
7944 *second_code
= UNORDERED
;
7946 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
7948 *second_code
= UNORDERED
;
7953 if (!TARGET_IEEE_FP
)
7960 /* Return cost of comparison done fcom + arithmetics operations on AX.
7961 All following functions do use number of instructions as an cost metrics.
7962 In future this should be tweaked to compute bytes for optimize_size and
7963 take into account performance of various instructions on various CPUs. */
7965 ix86_fp_comparison_arithmetics_cost (code
)
7968 if (!TARGET_IEEE_FP
)
7970 /* The cost of code output by ix86_expand_fp_compare. */
7998 /* Return cost of comparison done using fcomi operation.
7999 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8001 ix86_fp_comparison_fcomi_cost (code
)
8004 enum rtx_code bypass_code
, first_code
, second_code
;
8005 /* Return arbitarily high cost when instruction is not supported - this
8006 prevents gcc from using it. */
8009 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8010 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 2;
8013 /* Return cost of comparison done using sahf operation.
8014 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8016 ix86_fp_comparison_sahf_cost (code
)
8019 enum rtx_code bypass_code
, first_code
, second_code
;
8020 /* Return arbitarily high cost when instruction is not preferred - this
8021 avoids gcc from using it. */
8022 if (!TARGET_USE_SAHF
&& !optimize_size
)
8024 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8025 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 3;
8028 /* Compute cost of the comparison done using any method.
8029 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8031 ix86_fp_comparison_cost (code
)
8034 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
8037 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
8038 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
8040 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
8041 if (min
> sahf_cost
)
8043 if (min
> fcomi_cost
)
8048 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8051 ix86_expand_fp_compare (code
, op0
, op1
, scratch
, second_test
, bypass_test
)
8053 rtx op0
, op1
, scratch
;
8057 enum machine_mode fpcmp_mode
, intcmp_mode
;
8059 int cost
= ix86_fp_comparison_cost (code
);
8060 enum rtx_code bypass_code
, first_code
, second_code
;
8062 fpcmp_mode
= ix86_fp_compare_mode (code
);
8063 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
8066 *second_test
= NULL_RTX
;
8068 *bypass_test
= NULL_RTX
;
8070 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8072 /* Do fcomi/sahf based test when profitable. */
8073 if ((bypass_code
== NIL
|| bypass_test
)
8074 && (second_code
== NIL
|| second_test
)
8075 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
8079 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8080 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
8086 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8087 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
8089 scratch
= gen_reg_rtx (HImode
);
8090 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
8091 emit_insn (gen_x86_sahf_1 (scratch
));
8094 /* The FP codes work out to act like unsigned. */
8095 intcmp_mode
= fpcmp_mode
;
8097 if (bypass_code
!= NIL
)
8098 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
8099 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8101 if (second_code
!= NIL
)
8102 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
8103 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8108 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8109 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8110 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
8112 scratch
= gen_reg_rtx (HImode
);
8113 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
8115 /* In the unordered case, we have to check C2 for NaN's, which
8116 doesn't happen to work out to anything nice combination-wise.
8117 So do some bit twiddling on the value we've got in AH to come
8118 up with an appropriate set of condition codes. */
8120 intcmp_mode
= CCNOmode
;
8125 if (code
== GT
|| !TARGET_IEEE_FP
)
8127 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
8132 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8133 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
8134 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
8135 intcmp_mode
= CCmode
;
8141 if (code
== LT
&& TARGET_IEEE_FP
)
8143 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8144 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
8145 intcmp_mode
= CCmode
;
8150 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
8156 if (code
== GE
|| !TARGET_IEEE_FP
)
8158 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
8163 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8164 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
8171 if (code
== LE
&& TARGET_IEEE_FP
)
8173 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8174 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
8175 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
8176 intcmp_mode
= CCmode
;
8181 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
8187 if (code
== EQ
&& TARGET_IEEE_FP
)
8189 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8190 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
8191 intcmp_mode
= CCmode
;
8196 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
8203 if (code
== NE
&& TARGET_IEEE_FP
)
8205 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8206 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
8212 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
8218 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
8222 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
8231 /* Return the test that should be put into the flags user, i.e.
8232 the bcc, scc, or cmov instruction. */
8233 return gen_rtx_fmt_ee (code
, VOIDmode
,
8234 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8239 ix86_expand_compare (code
, second_test
, bypass_test
)
8241 rtx
*second_test
, *bypass_test
;
8244 op0
= ix86_compare_op0
;
8245 op1
= ix86_compare_op1
;
8248 *second_test
= NULL_RTX
;
8250 *bypass_test
= NULL_RTX
;
8252 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
8253 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
8254 second_test
, bypass_test
);
8256 ret
= ix86_expand_int_compare (code
, op0
, op1
);
8261 /* Return true if the CODE will result in nontrivial jump sequence. */
8263 ix86_fp_jump_nontrivial_p (code
)
8266 enum rtx_code bypass_code
, first_code
, second_code
;
8269 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8270 return bypass_code
!= NIL
|| second_code
!= NIL
;
8274 ix86_expand_branch (code
, label
)
8280 switch (GET_MODE (ix86_compare_op0
))
8286 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
8287 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
8288 gen_rtx_LABEL_REF (VOIDmode
, label
),
8290 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
8300 enum rtx_code bypass_code
, first_code
, second_code
;
8302 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
8305 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8307 /* Check whether we will use the natural sequence with one jump. If
8308 so, we can expand jump early. Otherwise delay expansion by
8309 creating compound insn to not confuse optimizers. */
8310 if (bypass_code
== NIL
&& second_code
== NIL
8313 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
8314 gen_rtx_LABEL_REF (VOIDmode
, label
),
8319 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
8320 ix86_compare_op0
, ix86_compare_op1
);
8321 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
8322 gen_rtx_LABEL_REF (VOIDmode
, label
),
8324 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
8326 use_fcomi
= ix86_use_fcomi_compare (code
);
8327 vec
= rtvec_alloc (3 + !use_fcomi
);
8328 RTVEC_ELT (vec
, 0) = tmp
;
8330 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
8332 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
8335 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
8337 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
8345 /* Expand DImode branch into multiple compare+branch. */
8347 rtx lo
[2], hi
[2], label2
;
8348 enum rtx_code code1
, code2
, code3
;
8350 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
8352 tmp
= ix86_compare_op0
;
8353 ix86_compare_op0
= ix86_compare_op1
;
8354 ix86_compare_op1
= tmp
;
8355 code
= swap_condition (code
);
8357 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
8358 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
8360 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8361 avoid two branches. This costs one extra insn, so disable when
8362 optimizing for size. */
8364 if ((code
== EQ
|| code
== NE
)
8366 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
8371 if (hi
[1] != const0_rtx
)
8372 xor1
= expand_binop (SImode
, xor_optab
, xor1
, hi
[1],
8373 NULL_RTX
, 0, OPTAB_WIDEN
);
8376 if (lo
[1] != const0_rtx
)
8377 xor0
= expand_binop (SImode
, xor_optab
, xor0
, lo
[1],
8378 NULL_RTX
, 0, OPTAB_WIDEN
);
8380 tmp
= expand_binop (SImode
, ior_optab
, xor1
, xor0
,
8381 NULL_RTX
, 0, OPTAB_WIDEN
);
8383 ix86_compare_op0
= tmp
;
8384 ix86_compare_op1
= const0_rtx
;
8385 ix86_expand_branch (code
, label
);
8389 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8390 op1 is a constant and the low word is zero, then we can just
8391 examine the high word. */
8393 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
)
8396 case LT
: case LTU
: case GE
: case GEU
:
8397 ix86_compare_op0
= hi
[0];
8398 ix86_compare_op1
= hi
[1];
8399 ix86_expand_branch (code
, label
);
8405 /* Otherwise, we need two or three jumps. */
8407 label2
= gen_label_rtx ();
8410 code2
= swap_condition (code
);
8411 code3
= unsigned_condition (code
);
8415 case LT
: case GT
: case LTU
: case GTU
:
8418 case LE
: code1
= LT
; code2
= GT
; break;
8419 case GE
: code1
= GT
; code2
= LT
; break;
8420 case LEU
: code1
= LTU
; code2
= GTU
; break;
8421 case GEU
: code1
= GTU
; code2
= LTU
; break;
8423 case EQ
: code1
= NIL
; code2
= NE
; break;
8424 case NE
: code2
= NIL
; break;
8432 * if (hi(a) < hi(b)) goto true;
8433 * if (hi(a) > hi(b)) goto false;
8434 * if (lo(a) < lo(b)) goto true;
8438 ix86_compare_op0
= hi
[0];
8439 ix86_compare_op1
= hi
[1];
8442 ix86_expand_branch (code1
, label
);
8444 ix86_expand_branch (code2
, label2
);
8446 ix86_compare_op0
= lo
[0];
8447 ix86_compare_op1
= lo
[1];
8448 ix86_expand_branch (code3
, label
);
8451 emit_label (label2
);
8460 /* Split branch based on floating point condition. */
8462 ix86_split_fp_branch (code
, op1
, op2
, target1
, target2
, tmp
)
8464 rtx op1
, op2
, target1
, target2
, tmp
;
8467 rtx label
= NULL_RTX
;
8469 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
8472 if (target2
!= pc_rtx
)
8475 code
= reverse_condition_maybe_unordered (code
);
8480 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
8481 tmp
, &second
, &bypass
);
8483 if (split_branch_probability
>= 0)
8485 /* Distribute the probabilities across the jumps.
8486 Assume the BYPASS and SECOND to be always test
8488 probability
= split_branch_probability
;
8490 /* Value of 1 is low enough to make no need for probability
8491 to be updated. Later we may run some experiments and see
8492 if unordered values are more frequent in practice. */
8494 bypass_probability
= 1;
8496 second_probability
= 1;
8498 if (bypass
!= NULL_RTX
)
8500 label
= gen_label_rtx ();
8501 i
= emit_jump_insn (gen_rtx_SET
8503 gen_rtx_IF_THEN_ELSE (VOIDmode
,
8505 gen_rtx_LABEL_REF (VOIDmode
,
8508 if (bypass_probability
>= 0)
8510 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
8511 GEN_INT (bypass_probability
),
8514 i
= emit_jump_insn (gen_rtx_SET
8516 gen_rtx_IF_THEN_ELSE (VOIDmode
,
8517 condition
, target1
, target2
)));
8518 if (probability
>= 0)
8520 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
8521 GEN_INT (probability
),
8523 if (second
!= NULL_RTX
)
8525 i
= emit_jump_insn (gen_rtx_SET
8527 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
8529 if (second_probability
>= 0)
8531 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
8532 GEN_INT (second_probability
),
8535 if (label
!= NULL_RTX
)
8540 ix86_expand_setcc (code
, dest
)
8544 rtx ret
, tmp
, tmpreg
;
8545 rtx second_test
, bypass_test
;
8547 if (GET_MODE (ix86_compare_op0
) == DImode
8549 return 0; /* FAIL */
8551 if (GET_MODE (dest
) != QImode
)
8554 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
8555 PUT_MODE (ret
, QImode
);
8560 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
8561 if (bypass_test
|| second_test
)
8563 rtx test
= second_test
;
8565 rtx tmp2
= gen_reg_rtx (QImode
);
8572 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
8574 PUT_MODE (test
, QImode
);
8575 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
8578 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
8580 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
8583 return 1; /* DONE */
8587 ix86_expand_int_movcc (operands
)
8590 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
8591 rtx compare_seq
, compare_op
;
8592 rtx second_test
, bypass_test
;
8593 enum machine_mode mode
= GET_MODE (operands
[0]);
8595 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8596 In case comparsion is done with immediate, we can convert it to LTU or
8597 GEU by altering the integer. */
8599 if ((code
== LEU
|| code
== GTU
)
8600 && GET_CODE (ix86_compare_op1
) == CONST_INT
8602 && (unsigned int) INTVAL (ix86_compare_op1
) != 0xffffffff
8603 /* The operand still must be representable as sign extended value. */
8605 || GET_MODE (ix86_compare_op0
) != DImode
8606 || (unsigned int) INTVAL (ix86_compare_op1
) != 0x7fffffff)
8607 && GET_CODE (operands
[2]) == CONST_INT
8608 && GET_CODE (operands
[3]) == CONST_INT
)
8615 = gen_int_mode (INTVAL (ix86_compare_op1
) + 1,
8616 GET_MODE (ix86_compare_op0
));
8620 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
8621 compare_seq
= gen_sequence ();
8624 compare_code
= GET_CODE (compare_op
);
8626 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8627 HImode insns, we'd be swallowed in word prefix ops. */
8630 && (mode
!= DImode
|| TARGET_64BIT
)
8631 && GET_CODE (operands
[2]) == CONST_INT
8632 && GET_CODE (operands
[3]) == CONST_INT
)
8634 rtx out
= operands
[0];
8635 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
8636 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
8639 if ((compare_code
== LTU
|| compare_code
== GEU
)
8640 && !second_test
&& !bypass_test
)
8643 /* Detect overlap between destination and compare sources. */
8646 /* To simplify rest of code, restrict to the GEU case. */
8647 if (compare_code
== LTU
)
8652 compare_code
= reverse_condition (compare_code
);
8653 code
= reverse_condition (code
);
8657 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
8658 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
8659 tmp
= gen_reg_rtx (mode
);
8661 emit_insn (compare_seq
);
8663 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
));
8665 emit_insn (gen_x86_movsicc_0_m1 (tmp
));
8677 tmp
= expand_simple_binop (mode
, PLUS
,
8679 tmp
, 1, OPTAB_DIRECT
);
8690 tmp
= expand_simple_binop (mode
, IOR
,
8692 tmp
, 1, OPTAB_DIRECT
);
8694 else if (diff
== -1 && ct
)
8704 tmp
= expand_simple_unop (mode
, NOT
, tmp
, tmp
, 1);
8706 tmp
= expand_simple_binop (mode
, PLUS
,
8708 tmp
, 1, OPTAB_DIRECT
);
8715 * andl cf - ct, dest
8720 tmp
= expand_simple_binop (mode
, AND
,
8722 gen_int_mode (cf
- ct
, mode
),
8723 tmp
, 1, OPTAB_DIRECT
);
8725 tmp
= expand_simple_binop (mode
, PLUS
,
8727 tmp
, 1, OPTAB_DIRECT
);
8731 emit_move_insn (out
, tmp
);
8733 return 1; /* DONE */
8740 tmp
= ct
, ct
= cf
, cf
= tmp
;
8742 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
8744 /* We may be reversing unordered compare to normal compare, that
8745 is not valid in general (we may convert non-trapping condition
8746 to trapping one), however on i386 we currently emit all
8747 comparisons unordered. */
8748 compare_code
= reverse_condition_maybe_unordered (compare_code
);
8749 code
= reverse_condition_maybe_unordered (code
);
8753 compare_code
= reverse_condition (compare_code
);
8754 code
= reverse_condition (code
);
8759 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
8760 && GET_CODE (ix86_compare_op1
) == CONST_INT
)
8762 if (ix86_compare_op1
== const0_rtx
8763 && (code
== LT
|| code
== GE
))
8764 compare_code
= code
;
8765 else if (ix86_compare_op1
== constm1_rtx
)
8769 else if (code
== GT
)
8774 /* Optimize dest = (op0 < 0) ? -1 : cf. */
8775 if (compare_code
!= NIL
8776 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
8777 && (cf
== -1 || ct
== -1))
8779 /* If lea code below could be used, only optimize
8780 if it results in a 2 insn sequence. */
8782 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
8783 || diff
== 3 || diff
== 5 || diff
== 9)
8784 || (compare_code
== LT
&& ct
== -1)
8785 || (compare_code
== GE
&& cf
== -1))
8788 * notl op1 (if necessary)
8796 code
= reverse_condition (code
);
8799 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
8800 ix86_compare_op1
, VOIDmode
, 0, -1);
8802 out
= expand_simple_binop (mode
, IOR
,
8804 out
, 1, OPTAB_DIRECT
);
8805 if (out
!= operands
[0])
8806 emit_move_insn (operands
[0], out
);
8808 return 1; /* DONE */
8812 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
8813 || diff
== 3 || diff
== 5 || diff
== 9)
8814 && (mode
!= DImode
|| x86_64_sign_extended_value (GEN_INT (cf
))))
8820 * lea cf(dest*(ct-cf)),dest
8824 * This also catches the degenerate setcc-only case.
8830 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
8831 ix86_compare_op1
, VOIDmode
, 0, 1);
8834 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
8835 done in proper mode to match. */
8842 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
8846 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
8852 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
8856 && (GET_CODE (tmp
) != SUBREG
|| SUBREG_REG (tmp
) != out
))
8862 clob
= gen_rtx_REG (CCmode
, FLAGS_REG
);
8863 clob
= gen_rtx_CLOBBER (VOIDmode
, clob
);
8865 tmp
= gen_rtx_SET (VOIDmode
, out
, tmp
);
8866 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
8870 emit_insn (gen_rtx_SET (VOIDmode
, out
, tmp
));
8872 if (out
!= operands
[0])
8873 emit_move_insn (operands
[0], out
);
8875 return 1; /* DONE */
8879 * General case: Jumpful:
8880 * xorl dest,dest cmpl op1, op2
8881 * cmpl op1, op2 movl ct, dest
8883 * decl dest movl cf, dest
8884 * andl (cf-ct),dest 1:
8889 * This is reasonably steep, but branch mispredict costs are
8890 * high on modern cpus, so consider failing only if optimizing
8893 * %%% Parameterize branch_cost on the tuning architecture, then
8894 * use that. The 80386 couldn't care less about mispredicts.
8897 if (!optimize_size
&& !TARGET_CMOVE
)
8903 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
8904 /* We may be reversing unordered compare to normal compare,
8905 that is not valid in general (we may convert non-trapping
8906 condition to trapping one), however on i386 we currently
8907 emit all comparisons unordered. */
8908 code
= reverse_condition_maybe_unordered (code
);
8911 code
= reverse_condition (code
);
8912 if (compare_code
!= NIL
)
8913 compare_code
= reverse_condition (compare_code
);
8917 if (compare_code
!= NIL
)
8919 /* notl op1 (if needed)
8924 For x < 0 (resp. x <= -1) there will be no notl,
8925 so if possible swap the constants to get rid of the
8927 True/false will be -1/0 while code below (store flag
8928 followed by decrement) is 0/-1, so the constants need
8929 to be exchanged once more. */
8931 if (compare_code
== GE
|| !cf
)
8933 code
= reverse_condition (code
);
8938 HOST_WIDE_INT tmp
= cf
;
8943 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
8944 ix86_compare_op1
, VOIDmode
, 0, -1);
8948 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
8949 ix86_compare_op1
, VOIDmode
, 0, 1);
8951 out
= expand_simple_binop (mode
, PLUS
,
8953 out
, 1, OPTAB_DIRECT
);
8956 out
= expand_simple_binop (mode
, AND
,
8958 gen_int_mode (cf
- ct
, mode
),
8959 out
, 1, OPTAB_DIRECT
);
8960 out
= expand_simple_binop (mode
, PLUS
,
8962 out
, 1, OPTAB_DIRECT
);
8963 if (out
!= operands
[0])
8964 emit_move_insn (operands
[0], out
);
8966 return 1; /* DONE */
8972 /* Try a few things more with specific constants and a variable. */
8975 rtx var
, orig_out
, out
, tmp
;
8978 return 0; /* FAIL */
8980 /* If one of the two operands is an interesting constant, load a
8981 constant with the above and mask it in with a logical operation. */
8983 if (GET_CODE (operands
[2]) == CONST_INT
)
8986 if (INTVAL (operands
[2]) == 0)
8987 operands
[3] = constm1_rtx
, op
= and_optab
;
8988 else if (INTVAL (operands
[2]) == -1)
8989 operands
[3] = const0_rtx
, op
= ior_optab
;
8991 return 0; /* FAIL */
8993 else if (GET_CODE (operands
[3]) == CONST_INT
)
8996 if (INTVAL (operands
[3]) == 0)
8997 operands
[2] = constm1_rtx
, op
= and_optab
;
8998 else if (INTVAL (operands
[3]) == -1)
8999 operands
[2] = const0_rtx
, op
= ior_optab
;
9001 return 0; /* FAIL */
9004 return 0; /* FAIL */
9006 orig_out
= operands
[0];
9007 tmp
= gen_reg_rtx (mode
);
9010 /* Recurse to get the constant loaded. */
9011 if (ix86_expand_int_movcc (operands
) == 0)
9012 return 0; /* FAIL */
9014 /* Mask in the interesting variable. */
9015 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
9017 if (out
!= orig_out
)
9018 emit_move_insn (orig_out
, out
);
9020 return 1; /* DONE */
9024 * For comparison with above,
9034 if (! nonimmediate_operand (operands
[2], mode
))
9035 operands
[2] = force_reg (mode
, operands
[2]);
9036 if (! nonimmediate_operand (operands
[3], mode
))
9037 operands
[3] = force_reg (mode
, operands
[3]);
9039 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
9041 rtx tmp
= gen_reg_rtx (mode
);
9042 emit_move_insn (tmp
, operands
[3]);
9045 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
9047 rtx tmp
= gen_reg_rtx (mode
);
9048 emit_move_insn (tmp
, operands
[2]);
9051 if (! register_operand (operands
[2], VOIDmode
)
9052 && ! register_operand (operands
[3], VOIDmode
))
9053 operands
[2] = force_reg (mode
, operands
[2]);
9055 emit_insn (compare_seq
);
9056 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9057 gen_rtx_IF_THEN_ELSE (mode
,
9058 compare_op
, operands
[2],
9061 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9062 gen_rtx_IF_THEN_ELSE (mode
,
9067 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9068 gen_rtx_IF_THEN_ELSE (mode
,
9073 return 1; /* DONE */
9077 ix86_expand_fp_movcc (operands
)
9082 rtx compare_op
, second_test
, bypass_test
;
9084 /* For SF/DFmode conditional moves based on comparisons
9085 in same mode, we may want to use SSE min/max instructions. */
9086 if (((TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == SFmode
)
9087 || (TARGET_SSE2
&& TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == DFmode
))
9088 && GET_MODE (ix86_compare_op0
) == GET_MODE (operands
[0])
9089 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9091 || (GET_CODE (operands
[1]) != LTGT
&& GET_CODE (operands
[1]) != UNEQ
))
9092 /* We may be called from the post-reload splitter. */
9093 && (!REG_P (operands
[0])
9094 || SSE_REG_P (operands
[0])
9095 || REGNO (operands
[0]) >= FIRST_PSEUDO_REGISTER
))
9097 rtx op0
= ix86_compare_op0
, op1
= ix86_compare_op1
;
9098 code
= GET_CODE (operands
[1]);
9100 /* See if we have (cross) match between comparison operands and
9101 conditional move operands. */
9102 if (rtx_equal_p (operands
[2], op1
))
9107 code
= reverse_condition_maybe_unordered (code
);
9109 if (rtx_equal_p (operands
[2], op0
) && rtx_equal_p (operands
[3], op1
))
9111 /* Check for min operation. */
9114 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
9115 if (memory_operand (op0
, VOIDmode
))
9116 op0
= force_reg (GET_MODE (operands
[0]), op0
);
9117 if (GET_MODE (operands
[0]) == SFmode
)
9118 emit_insn (gen_minsf3 (operands
[0], op0
, op1
));
9120 emit_insn (gen_mindf3 (operands
[0], op0
, op1
));
9123 /* Check for max operation. */
9126 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
9127 if (memory_operand (op0
, VOIDmode
))
9128 op0
= force_reg (GET_MODE (operands
[0]), op0
);
9129 if (GET_MODE (operands
[0]) == SFmode
)
9130 emit_insn (gen_maxsf3 (operands
[0], op0
, op1
));
9132 emit_insn (gen_maxdf3 (operands
[0], op0
, op1
));
9136 /* Manage condition to be sse_comparison_operator. In case we are
9137 in non-ieee mode, try to canonicalize the destination operand
9138 to be first in the comparison - this helps reload to avoid extra
9140 if (!sse_comparison_operator (operands
[1], VOIDmode
)
9141 || (rtx_equal_p (operands
[0], ix86_compare_op1
) && !TARGET_IEEE_FP
))
9143 rtx tmp
= ix86_compare_op0
;
9144 ix86_compare_op0
= ix86_compare_op1
;
9145 ix86_compare_op1
= tmp
;
9146 operands
[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands
[1])),
9147 VOIDmode
, ix86_compare_op0
,
9150 /* Similary try to manage result to be first operand of conditional
9151 move. We also don't support the NE comparison on SSE, so try to
9153 if ((rtx_equal_p (operands
[0], operands
[3])
9154 && (!TARGET_IEEE_FP
|| GET_CODE (operands
[1]) != EQ
))
9155 || (GET_CODE (operands
[1]) == NE
&& TARGET_IEEE_FP
))
9157 rtx tmp
= operands
[2];
9158 operands
[2] = operands
[3];
9160 operands
[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9161 (GET_CODE (operands
[1])),
9162 VOIDmode
, ix86_compare_op0
,
9165 if (GET_MODE (operands
[0]) == SFmode
)
9166 emit_insn (gen_sse_movsfcc (operands
[0], operands
[1],
9167 operands
[2], operands
[3],
9168 ix86_compare_op0
, ix86_compare_op1
));
9170 emit_insn (gen_sse_movdfcc (operands
[0], operands
[1],
9171 operands
[2], operands
[3],
9172 ix86_compare_op0
, ix86_compare_op1
));
9176 /* The floating point conditional move instructions don't directly
9177 support conditions resulting from a signed integer comparison. */
9179 code
= GET_CODE (operands
[1]);
9180 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9182 /* The floating point conditional move instructions don't directly
9183 support signed integer comparisons. */
9185 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
9187 if (second_test
!= NULL
|| bypass_test
!= NULL
)
9189 tmp
= gen_reg_rtx (QImode
);
9190 ix86_expand_setcc (code
, tmp
);
9192 ix86_compare_op0
= tmp
;
9193 ix86_compare_op1
= const0_rtx
;
9194 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9196 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
9198 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
9199 emit_move_insn (tmp
, operands
[3]);
9202 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
9204 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
9205 emit_move_insn (tmp
, operands
[2]);
9209 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9210 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9215 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9216 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9221 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9222 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9230 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9231 works for floating pointer parameters and nonoffsetable memories.
9232 For pushes, it returns just stack offsets; the values will be saved
9233 in the right order. Maximally three parts are generated. */
9236 ix86_split_to_parts (operand
, parts
, mode
)
9239 enum machine_mode mode
;
9244 size
= mode
== TFmode
? 3 : (GET_MODE_SIZE (mode
) / 4);
9246 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
9248 if (GET_CODE (operand
) == REG
&& MMX_REGNO_P (REGNO (operand
)))
9250 if (size
< 2 || size
> 3)
9253 /* Optimize constant pool reference to immediates. This is used by fp
9254 moves, that force all constants to memory to allow combining. */
9255 if (GET_CODE (operand
) == MEM
&& RTX_UNCHANGING_P (operand
))
9257 rtx tmp
= maybe_get_pool_constant (operand
);
9262 if (GET_CODE (operand
) == MEM
&& !offsettable_memref_p (operand
))
9264 /* The only non-offsetable memories we handle are pushes. */
9265 if (! push_operand (operand
, VOIDmode
))
9268 operand
= copy_rtx (operand
);
9269 PUT_MODE (operand
, Pmode
);
9270 parts
[0] = parts
[1] = parts
[2] = operand
;
9272 else if (!TARGET_64BIT
)
9275 split_di (&operand
, 1, &parts
[0], &parts
[1]);
9278 if (REG_P (operand
))
9280 if (!reload_completed
)
9282 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
9283 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
9285 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
9287 else if (offsettable_memref_p (operand
))
9289 operand
= adjust_address (operand
, SImode
, 0);
9291 parts
[1] = adjust_address (operand
, SImode
, 4);
9293 parts
[2] = adjust_address (operand
, SImode
, 8);
9295 else if (GET_CODE (operand
) == CONST_DOUBLE
)
9300 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
9305 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
9306 parts
[2] = gen_int_mode (l
[2], SImode
);
9309 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
9314 parts
[1] = gen_int_mode (l
[1], SImode
);
9315 parts
[0] = gen_int_mode (l
[0], SImode
);
9324 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
9325 if (mode
== XFmode
|| mode
== TFmode
)
9327 if (REG_P (operand
))
9329 if (!reload_completed
)
9331 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
9332 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
9334 else if (offsettable_memref_p (operand
))
9336 operand
= adjust_address (operand
, DImode
, 0);
9338 parts
[1] = adjust_address (operand
, SImode
, 8);
9340 else if (GET_CODE (operand
) == CONST_DOUBLE
)
9345 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
9346 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
9347 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9348 if (HOST_BITS_PER_WIDE_INT
>= 64)
9351 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
9352 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
9355 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
9356 parts
[1] = gen_int_mode (l
[2], SImode
);
9366 /* Emit insns to perform a move or push of DI, DF, and XF values.
9367 Return false when normal moves are needed; true when all required
9368 insns have been emitted. Operands 2-4 contain the input values
9369 int the correct order; operands 5-7 contain the output values. */
9372 ix86_split_long_move (operands
)
9379 enum machine_mode mode
= GET_MODE (operands
[0]);
9381 /* The DFmode expanders may ask us to move double.
9382 For 64bit target this is single move. By hiding the fact
9383 here we simplify i386.md splitters. */
9384 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
9386 /* Optimize constant pool reference to immediates. This is used by
9387 fp moves, that force all constants to memory to allow combining. */
9389 if (GET_CODE (operands
[1]) == MEM
9390 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
9391 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
9392 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
9393 if (push_operand (operands
[0], VOIDmode
))
9395 operands
[0] = copy_rtx (operands
[0]);
9396 PUT_MODE (operands
[0], Pmode
);
9399 operands
[0] = gen_lowpart (DImode
, operands
[0]);
9400 operands
[1] = gen_lowpart (DImode
, operands
[1]);
9401 emit_move_insn (operands
[0], operands
[1]);
9405 /* The only non-offsettable memory we handle is push. */
9406 if (push_operand (operands
[0], VOIDmode
))
9408 else if (GET_CODE (operands
[0]) == MEM
9409 && ! offsettable_memref_p (operands
[0]))
9412 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
9413 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
9415 /* When emitting push, take care for source operands on the stack. */
9416 if (push
&& GET_CODE (operands
[1]) == MEM
9417 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
9420 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
9421 XEXP (part
[1][2], 0));
9422 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
9423 XEXP (part
[1][1], 0));
9426 /* We need to do copy in the right order in case an address register
9427 of the source overlaps the destination. */
9428 if (REG_P (part
[0][0]) && GET_CODE (part
[1][0]) == MEM
)
9430 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
9432 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
9435 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
9438 /* Collision in the middle part can be handled by reordering. */
9439 if (collisions
== 1 && nparts
== 3
9440 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
9443 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
9444 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
9447 /* If there are more collisions, we can't handle it by reordering.
9448 Do an lea to the last part and use only one colliding move. */
9449 else if (collisions
> 1)
9452 emit_insn (gen_rtx_SET (VOIDmode
, part
[0][nparts
- 1],
9453 XEXP (part
[1][0], 0)));
9454 part
[1][0] = change_address (part
[1][0],
9455 TARGET_64BIT
? DImode
: SImode
,
9456 part
[0][nparts
- 1]);
9457 part
[1][1] = adjust_address (part
[1][0], VOIDmode
, UNITS_PER_WORD
);
9459 part
[1][2] = adjust_address (part
[1][0], VOIDmode
, 8);
9469 /* We use only first 12 bytes of TFmode value, but for pushing we
9470 are required to adjust stack as if we were pushing real 16byte
9472 if (mode
== TFmode
&& !TARGET_64BIT
)
9473 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
9475 emit_move_insn (part
[0][2], part
[1][2]);
9480 /* In 64bit mode we don't have 32bit push available. In case this is
9481 register, it is OK - we will just use larger counterpart. We also
9482 retype memory - these comes from attempt to avoid REX prefix on
9483 moving of second half of TFmode value. */
9484 if (GET_MODE (part
[1][1]) == SImode
)
9486 if (GET_CODE (part
[1][1]) == MEM
)
9487 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
9488 else if (REG_P (part
[1][1]))
9489 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
9492 if (GET_MODE (part
[1][0]) == SImode
)
9493 part
[1][0] = part
[1][1];
9496 emit_move_insn (part
[0][1], part
[1][1]);
9497 emit_move_insn (part
[0][0], part
[1][0]);
9501 /* Choose correct order to not overwrite the source before it is copied. */
9502 if ((REG_P (part
[0][0])
9503 && REG_P (part
[1][1])
9504 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
9506 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
9508 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
9512 operands
[2] = part
[0][2];
9513 operands
[3] = part
[0][1];
9514 operands
[4] = part
[0][0];
9515 operands
[5] = part
[1][2];
9516 operands
[6] = part
[1][1];
9517 operands
[7] = part
[1][0];
9521 operands
[2] = part
[0][1];
9522 operands
[3] = part
[0][0];
9523 operands
[5] = part
[1][1];
9524 operands
[6] = part
[1][0];
9531 operands
[2] = part
[0][0];
9532 operands
[3] = part
[0][1];
9533 operands
[4] = part
[0][2];
9534 operands
[5] = part
[1][0];
9535 operands
[6] = part
[1][1];
9536 operands
[7] = part
[1][2];
9540 operands
[2] = part
[0][0];
9541 operands
[3] = part
[0][1];
9542 operands
[5] = part
[1][0];
9543 operands
[6] = part
[1][1];
9546 emit_move_insn (operands
[2], operands
[5]);
9547 emit_move_insn (operands
[3], operands
[6]);
9549 emit_move_insn (operands
[4], operands
[7]);
9555 ix86_split_ashldi (operands
, scratch
)
9556 rtx
*operands
, scratch
;
9558 rtx low
[2], high
[2];
9561 if (GET_CODE (operands
[2]) == CONST_INT
)
9563 split_di (operands
, 2, low
, high
);
9564 count
= INTVAL (operands
[2]) & 63;
9568 emit_move_insn (high
[0], low
[1]);
9569 emit_move_insn (low
[0], const0_rtx
);
9572 emit_insn (gen_ashlsi3 (high
[0], high
[0], GEN_INT (count
- 32)));
9576 if (!rtx_equal_p (operands
[0], operands
[1]))
9577 emit_move_insn (operands
[0], operands
[1]);
9578 emit_insn (gen_x86_shld_1 (high
[0], low
[0], GEN_INT (count
)));
9579 emit_insn (gen_ashlsi3 (low
[0], low
[0], GEN_INT (count
)));
9584 if (!rtx_equal_p (operands
[0], operands
[1]))
9585 emit_move_insn (operands
[0], operands
[1]);
9587 split_di (operands
, 1, low
, high
);
9589 emit_insn (gen_x86_shld_1 (high
[0], low
[0], operands
[2]));
9590 emit_insn (gen_ashlsi3 (low
[0], low
[0], operands
[2]));
9592 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
9594 if (! no_new_pseudos
)
9595 scratch
= force_reg (SImode
, const0_rtx
);
9597 emit_move_insn (scratch
, const0_rtx
);
9599 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2],
9603 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
9608 ix86_split_ashrdi (operands
, scratch
)
9609 rtx
*operands
, scratch
;
9611 rtx low
[2], high
[2];
9614 if (GET_CODE (operands
[2]) == CONST_INT
)
9616 split_di (operands
, 2, low
, high
);
9617 count
= INTVAL (operands
[2]) & 63;
9621 emit_move_insn (low
[0], high
[1]);
9623 if (! reload_completed
)
9624 emit_insn (gen_ashrsi3 (high
[0], low
[0], GEN_INT (31)));
9627 emit_move_insn (high
[0], low
[0]);
9628 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
9632 emit_insn (gen_ashrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
9636 if (!rtx_equal_p (operands
[0], operands
[1]))
9637 emit_move_insn (operands
[0], operands
[1]);
9638 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
9639 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (count
)));
9644 if (!rtx_equal_p (operands
[0], operands
[1]))
9645 emit_move_insn (operands
[0], operands
[1]);
9647 split_di (operands
, 1, low
, high
);
9649 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
9650 emit_insn (gen_ashrsi3 (high
[0], high
[0], operands
[2]));
9652 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
9654 if (! no_new_pseudos
)
9655 scratch
= gen_reg_rtx (SImode
);
9656 emit_move_insn (scratch
, high
[0]);
9657 emit_insn (gen_ashrsi3 (scratch
, scratch
, GEN_INT (31)));
9658 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
9662 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
9667 ix86_split_lshrdi (operands
, scratch
)
9668 rtx
*operands
, scratch
;
9670 rtx low
[2], high
[2];
9673 if (GET_CODE (operands
[2]) == CONST_INT
)
9675 split_di (operands
, 2, low
, high
);
9676 count
= INTVAL (operands
[2]) & 63;
9680 emit_move_insn (low
[0], high
[1]);
9681 emit_move_insn (high
[0], const0_rtx
);
9684 emit_insn (gen_lshrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
9688 if (!rtx_equal_p (operands
[0], operands
[1]))
9689 emit_move_insn (operands
[0], operands
[1]);
9690 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
9691 emit_insn (gen_lshrsi3 (high
[0], high
[0], GEN_INT (count
)));
9696 if (!rtx_equal_p (operands
[0], operands
[1]))
9697 emit_move_insn (operands
[0], operands
[1]);
9699 split_di (operands
, 1, low
, high
);
9701 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
9702 emit_insn (gen_lshrsi3 (high
[0], high
[0], operands
[2]));
9704 /* Heh. By reversing the arguments, we can reuse this pattern. */
9705 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
9707 if (! no_new_pseudos
)
9708 scratch
= force_reg (SImode
, const0_rtx
);
9710 emit_move_insn (scratch
, const0_rtx
);
9712 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
9716 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
9720 /* Helper function for the string operations below. Dest VARIABLE whether
9721 it is aligned to VALUE bytes. If true, jump to the label. */
9723 ix86_expand_aligntest (variable
, value
)
9727 rtx label
= gen_label_rtx ();
9728 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
9729 if (GET_MODE (variable
) == DImode
)
9730 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
9732 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
9733 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
9738 /* Adjust COUNTER by the VALUE. */
9740 ix86_adjust_counter (countreg
, value
)
9742 HOST_WIDE_INT value
;
9744 if (GET_MODE (countreg
) == DImode
)
9745 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
9747 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
9750 /* Zero extend possibly SImode EXP to Pmode register. */
9752 ix86_zero_extend_to_Pmode (exp
)
9756 if (GET_MODE (exp
) == VOIDmode
)
9757 return force_reg (Pmode
, exp
);
9758 if (GET_MODE (exp
) == Pmode
)
9759 return copy_to_mode_reg (Pmode
, exp
);
9760 r
= gen_reg_rtx (Pmode
);
9761 emit_insn (gen_zero_extendsidi2 (r
, exp
));
9765 /* Expand string move (memcpy) operation. Use i386 string operations when
9766 profitable. expand_clrstr contains similar code. */
9768 ix86_expand_movstr (dst
, src
, count_exp
, align_exp
)
9769 rtx dst
, src
, count_exp
, align_exp
;
9771 rtx srcreg
, destreg
, countreg
;
9772 enum machine_mode counter_mode
;
9773 HOST_WIDE_INT align
= 0;
9774 unsigned HOST_WIDE_INT count
= 0;
9779 if (GET_CODE (align_exp
) == CONST_INT
)
9780 align
= INTVAL (align_exp
);
9782 /* This simple hack avoids all inlining code and simplifies code below. */
9783 if (!TARGET_ALIGN_STRINGOPS
)
9786 if (GET_CODE (count_exp
) == CONST_INT
)
9787 count
= INTVAL (count_exp
);
9789 /* Figure out proper mode for counter. For 32bits it is always SImode,
9790 for 64bits use SImode when possible, otherwise DImode.
9791 Set count to number of bytes copied when known at compile time. */
9792 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
9793 || x86_64_zero_extended_value (count_exp
))
9794 counter_mode
= SImode
;
9796 counter_mode
= DImode
;
9798 if (counter_mode
!= SImode
&& counter_mode
!= DImode
)
9801 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
9802 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
9804 emit_insn (gen_cld ());
9806 /* When optimizing for size emit simple rep ; movsb instruction for
9807 counts not divisible by 4. */
9809 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
9811 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
9813 emit_insn (gen_rep_movqi_rex64 (destreg
, srcreg
, countreg
,
9814 destreg
, srcreg
, countreg
));
9816 emit_insn (gen_rep_movqi (destreg
, srcreg
, countreg
,
9817 destreg
, srcreg
, countreg
));
9820 /* For constant aligned (or small unaligned) copies use rep movsl
9821 followed by code copying the rest. For PentiumPro ensure 8 byte
9822 alignment to allow rep movsl acceleration. */
9826 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
9827 || optimize_size
|| count
< (unsigned int) 64))
9829 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
9830 if (count
& ~(size
- 1))
9832 countreg
= copy_to_mode_reg (counter_mode
,
9833 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
9834 & (TARGET_64BIT
? -1 : 0x3fffffff)));
9835 countreg
= ix86_zero_extend_to_Pmode (countreg
);
9839 emit_insn (gen_rep_movsi_rex64 (destreg
, srcreg
, countreg
,
9840 destreg
, srcreg
, countreg
));
9842 emit_insn (gen_rep_movsi (destreg
, srcreg
, countreg
,
9843 destreg
, srcreg
, countreg
));
9846 emit_insn (gen_rep_movdi_rex64 (destreg
, srcreg
, countreg
,
9847 destreg
, srcreg
, countreg
));
9849 if (size
== 8 && (count
& 0x04))
9850 emit_insn (gen_strmovsi (destreg
, srcreg
));
9852 emit_insn (gen_strmovhi (destreg
, srcreg
));
9854 emit_insn (gen_strmovqi (destreg
, srcreg
));
9856 /* The generic code based on the glibc implementation:
9857 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9858 allowing accelerated copying there)
9859 - copy the data using rep movsl
9865 int desired_alignment
= (TARGET_PENTIUMPRO
9866 && (count
== 0 || count
>= (unsigned int) 260)
9867 ? 8 : UNITS_PER_WORD
);
9869 /* In case we don't know anything about the alignment, default to
9870 library version, since it is usually equally fast and result in
9872 if (!TARGET_INLINE_ALL_STRINGOPS
&& align
< UNITS_PER_WORD
)
9878 if (TARGET_SINGLE_STRINGOP
)
9879 emit_insn (gen_cld ());
9881 countreg2
= gen_reg_rtx (Pmode
);
9882 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
9884 /* We don't use loops to align destination and to copy parts smaller
9885 than 4 bytes, because gcc is able to optimize such code better (in
9886 the case the destination or the count really is aligned, gcc is often
9887 able to predict the branches) and also it is friendlier to the
9888 hardware branch prediction.
9890 Using loops is benefical for generic case, because we can
9891 handle small counts using the loops. Many CPUs (such as Athlon)
9892 have large REP prefix setup costs.
9894 This is quite costy. Maybe we can revisit this decision later or
9895 add some customizability to this code. */
9897 if (count
== 0 && align
< desired_alignment
)
9899 label
= gen_label_rtx ();
9900 emit_cmp_and_jump_insns (countreg
, GEN_INT (UNITS_PER_WORD
- 1),
9901 LEU
, 0, counter_mode
, 1, label
);
9905 rtx label
= ix86_expand_aligntest (destreg
, 1);
9906 emit_insn (gen_strmovqi (destreg
, srcreg
));
9907 ix86_adjust_counter (countreg
, 1);
9909 LABEL_NUSES (label
) = 1;
9913 rtx label
= ix86_expand_aligntest (destreg
, 2);
9914 emit_insn (gen_strmovhi (destreg
, srcreg
));
9915 ix86_adjust_counter (countreg
, 2);
9917 LABEL_NUSES (label
) = 1;
9919 if (align
<= 4 && desired_alignment
> 4)
9921 rtx label
= ix86_expand_aligntest (destreg
, 4);
9922 emit_insn (gen_strmovsi (destreg
, srcreg
));
9923 ix86_adjust_counter (countreg
, 4);
9925 LABEL_NUSES (label
) = 1;
9928 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
9931 LABEL_NUSES (label
) = 1;
9934 if (!TARGET_SINGLE_STRINGOP
)
9935 emit_insn (gen_cld ());
9938 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
9940 emit_insn (gen_rep_movdi_rex64 (destreg
, srcreg
, countreg2
,
9941 destreg
, srcreg
, countreg2
));
9945 emit_insn (gen_lshrsi3 (countreg2
, countreg
, GEN_INT (2)));
9946 emit_insn (gen_rep_movsi (destreg
, srcreg
, countreg2
,
9947 destreg
, srcreg
, countreg2
));
9953 LABEL_NUSES (label
) = 1;
9955 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
9956 emit_insn (gen_strmovsi (destreg
, srcreg
));
9957 if ((align
<= 4 || count
== 0) && TARGET_64BIT
)
9959 rtx label
= ix86_expand_aligntest (countreg
, 4);
9960 emit_insn (gen_strmovsi (destreg
, srcreg
));
9962 LABEL_NUSES (label
) = 1;
9964 if (align
> 2 && count
!= 0 && (count
& 2))
9965 emit_insn (gen_strmovhi (destreg
, srcreg
));
9966 if (align
<= 2 || count
== 0)
9968 rtx label
= ix86_expand_aligntest (countreg
, 2);
9969 emit_insn (gen_strmovhi (destreg
, srcreg
));
9971 LABEL_NUSES (label
) = 1;
9973 if (align
> 1 && count
!= 0 && (count
& 1))
9974 emit_insn (gen_strmovqi (destreg
, srcreg
));
9975 if (align
<= 1 || count
== 0)
9977 rtx label
= ix86_expand_aligntest (countreg
, 1);
9978 emit_insn (gen_strmovqi (destreg
, srcreg
));
9980 LABEL_NUSES (label
) = 1;
9984 insns
= get_insns ();
9987 ix86_set_move_mem_attrs (insns
, dst
, src
, destreg
, srcreg
);
9992 /* Expand string clear operation (bzero). Use i386 string operations when
9993 profitable. expand_movstr contains similar code. */
9995 ix86_expand_clrstr (src
, count_exp
, align_exp
)
9996 rtx src
, count_exp
, align_exp
;
9998 rtx destreg
, zeroreg
, countreg
;
9999 enum machine_mode counter_mode
;
10000 HOST_WIDE_INT align
= 0;
10001 unsigned HOST_WIDE_INT count
= 0;
10003 if (GET_CODE (align_exp
) == CONST_INT
)
10004 align
= INTVAL (align_exp
);
10006 /* This simple hack avoids all inlining code and simplifies code below. */
10007 if (!TARGET_ALIGN_STRINGOPS
)
10010 if (GET_CODE (count_exp
) == CONST_INT
)
10011 count
= INTVAL (count_exp
);
10012 /* Figure out proper mode for counter. For 32bits it is always SImode,
10013 for 64bits use SImode when possible, otherwise DImode.
10014 Set count to number of bytes copied when known at compile time. */
10015 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
10016 || x86_64_zero_extended_value (count_exp
))
10017 counter_mode
= SImode
;
10019 counter_mode
= DImode
;
10021 destreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
10023 emit_insn (gen_cld ());
10025 /* When optimizing for size emit simple rep ; movsb instruction for
10026 counts not divisible by 4. */
10028 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
10030 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
10031 zeroreg
= copy_to_mode_reg (QImode
, const0_rtx
);
10033 emit_insn (gen_rep_stosqi_rex64 (destreg
, countreg
, zeroreg
,
10034 destreg
, countreg
));
10036 emit_insn (gen_rep_stosqi (destreg
, countreg
, zeroreg
,
10037 destreg
, countreg
));
10039 else if (count
!= 0
10041 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
10042 || optimize_size
|| count
< (unsigned int) 64))
10044 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
10045 zeroreg
= copy_to_mode_reg (size
== 4 ? SImode
: DImode
, const0_rtx
);
10046 if (count
& ~(size
- 1))
10048 countreg
= copy_to_mode_reg (counter_mode
,
10049 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
10050 & (TARGET_64BIT
? -1 : 0x3fffffff)));
10051 countreg
= ix86_zero_extend_to_Pmode (countreg
);
10055 emit_insn (gen_rep_stossi_rex64 (destreg
, countreg
, zeroreg
,
10056 destreg
, countreg
));
10058 emit_insn (gen_rep_stossi (destreg
, countreg
, zeroreg
,
10059 destreg
, countreg
));
10062 emit_insn (gen_rep_stosdi_rex64 (destreg
, countreg
, zeroreg
,
10063 destreg
, countreg
));
10065 if (size
== 8 && (count
& 0x04))
10066 emit_insn (gen_strsetsi (destreg
,
10067 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
10069 emit_insn (gen_strsethi (destreg
,
10070 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10072 emit_insn (gen_strsetqi (destreg
,
10073 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10079 /* Compute desired alignment of the string operation. */
10080 int desired_alignment
= (TARGET_PENTIUMPRO
10081 && (count
== 0 || count
>= (unsigned int) 260)
10082 ? 8 : UNITS_PER_WORD
);
10084 /* In case we don't know anything about the alignment, default to
10085 library version, since it is usually equally fast and result in
10087 if (!TARGET_INLINE_ALL_STRINGOPS
&& align
< UNITS_PER_WORD
)
10090 if (TARGET_SINGLE_STRINGOP
)
10091 emit_insn (gen_cld ());
10093 countreg2
= gen_reg_rtx (Pmode
);
10094 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
10095 zeroreg
= copy_to_mode_reg (Pmode
, const0_rtx
);
10097 if (count
== 0 && align
< desired_alignment
)
10099 label
= gen_label_rtx ();
10100 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
10101 LEU
, 0, counter_mode
, 1, label
);
10105 rtx label
= ix86_expand_aligntest (destreg
, 1);
10106 emit_insn (gen_strsetqi (destreg
,
10107 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10108 ix86_adjust_counter (countreg
, 1);
10109 emit_label (label
);
10110 LABEL_NUSES (label
) = 1;
10114 rtx label
= ix86_expand_aligntest (destreg
, 2);
10115 emit_insn (gen_strsethi (destreg
,
10116 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10117 ix86_adjust_counter (countreg
, 2);
10118 emit_label (label
);
10119 LABEL_NUSES (label
) = 1;
10121 if (align
<= 4 && desired_alignment
> 4)
10123 rtx label
= ix86_expand_aligntest (destreg
, 4);
10124 emit_insn (gen_strsetsi (destreg
, (TARGET_64BIT
10125 ? gen_rtx_SUBREG (SImode
, zeroreg
, 0)
10127 ix86_adjust_counter (countreg
, 4);
10128 emit_label (label
);
10129 LABEL_NUSES (label
) = 1;
10132 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
10134 emit_label (label
);
10135 LABEL_NUSES (label
) = 1;
10139 if (!TARGET_SINGLE_STRINGOP
)
10140 emit_insn (gen_cld ());
10143 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
10145 emit_insn (gen_rep_stosdi_rex64 (destreg
, countreg2
, zeroreg
,
10146 destreg
, countreg2
));
10150 emit_insn (gen_lshrsi3 (countreg2
, countreg
, GEN_INT (2)));
10151 emit_insn (gen_rep_stossi (destreg
, countreg2
, zeroreg
,
10152 destreg
, countreg2
));
10156 emit_label (label
);
10157 LABEL_NUSES (label
) = 1;
10160 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
10161 emit_insn (gen_strsetsi (destreg
,
10162 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
10163 if (TARGET_64BIT
&& (align
<= 4 || count
== 0))
10165 rtx label
= ix86_expand_aligntest (countreg
, 2);
10166 emit_insn (gen_strsetsi (destreg
,
10167 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
10168 emit_label (label
);
10169 LABEL_NUSES (label
) = 1;
10171 if (align
> 2 && count
!= 0 && (count
& 2))
10172 emit_insn (gen_strsethi (destreg
,
10173 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10174 if (align
<= 2 || count
== 0)
10176 rtx label
= ix86_expand_aligntest (countreg
, 2);
10177 emit_insn (gen_strsethi (destreg
,
10178 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10179 emit_label (label
);
10180 LABEL_NUSES (label
) = 1;
10182 if (align
> 1 && count
!= 0 && (count
& 1))
10183 emit_insn (gen_strsetqi (destreg
,
10184 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10185 if (align
<= 1 || count
== 0)
10187 rtx label
= ix86_expand_aligntest (countreg
, 1);
10188 emit_insn (gen_strsetqi (destreg
,
10189 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10190 emit_label (label
);
10191 LABEL_NUSES (label
) = 1;
10196 /* Expand strlen. */
10198 ix86_expand_strlen (out
, src
, eoschar
, align
)
10199 rtx out
, src
, eoschar
, align
;
10201 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
10203 /* The generic case of strlen expander is long. Avoid it's
10204 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10206 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
10207 && !TARGET_INLINE_ALL_STRINGOPS
10209 && (GET_CODE (align
) != CONST_INT
|| INTVAL (align
) < 4))
10212 addr
= force_reg (Pmode
, XEXP (src
, 0));
10213 scratch1
= gen_reg_rtx (Pmode
);
10215 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
10218 /* Well it seems that some optimizer does not combine a call like
10219 foo(strlen(bar), strlen(bar));
10220 when the move and the subtraction is done here. It does calculate
10221 the length just once when these instructions are done inside of
10222 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10223 often used and I use one fewer register for the lifetime of
10224 output_strlen_unroll() this is better. */
10226 emit_move_insn (out
, addr
);
10228 ix86_expand_strlensi_unroll_1 (out
, align
);
10230 /* strlensi_unroll_1 returns the address of the zero at the end of
10231 the string, like memchr(), so compute the length by subtracting
10232 the start address. */
10234 emit_insn (gen_subdi3 (out
, out
, addr
));
10236 emit_insn (gen_subsi3 (out
, out
, addr
));
10240 scratch2
= gen_reg_rtx (Pmode
);
10241 scratch3
= gen_reg_rtx (Pmode
);
10242 scratch4
= force_reg (Pmode
, constm1_rtx
);
10244 emit_move_insn (scratch3
, addr
);
10245 eoschar
= force_reg (QImode
, eoschar
);
10247 emit_insn (gen_cld ());
10250 emit_insn (gen_strlenqi_rex_1 (scratch1
, scratch3
, eoschar
,
10251 align
, scratch4
, scratch3
));
10252 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
10253 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
10257 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, eoschar
,
10258 align
, scratch4
, scratch3
));
10259 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
10260 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
10266 /* Expand the appropriate insns for doing strlen if not just doing
10269 out = result, initialized with the start address
10270 align_rtx = alignment of the address.
10271 scratch = scratch register, initialized with the startaddress when
10272 not aligned, otherwise undefined
10274 This is just the body. It needs the initialisations mentioned above and
10275 some address computing at the end. These things are done in i386.md. */
10278 ix86_expand_strlensi_unroll_1 (out
, align_rtx
)
10279 rtx out
, align_rtx
;
10283 rtx align_2_label
= NULL_RTX
;
10284 rtx align_3_label
= NULL_RTX
;
10285 rtx align_4_label
= gen_label_rtx ();
10286 rtx end_0_label
= gen_label_rtx ();
10288 rtx tmpreg
= gen_reg_rtx (SImode
);
10289 rtx scratch
= gen_reg_rtx (SImode
);
10292 if (GET_CODE (align_rtx
) == CONST_INT
)
10293 align
= INTVAL (align_rtx
);
10295 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10297 /* Is there a known alignment and is it less than 4? */
10300 rtx scratch1
= gen_reg_rtx (Pmode
);
10301 emit_move_insn (scratch1
, out
);
10302 /* Is there a known alignment and is it not 2? */
10305 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
10306 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
10308 /* Leave just the 3 lower bits. */
10309 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
10310 NULL_RTX
, 0, OPTAB_WIDEN
);
10312 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
10313 Pmode
, 1, align_4_label
);
10314 emit_cmp_and_jump_insns (align_rtx
, GEN_INT (2), EQ
, NULL
,
10315 Pmode
, 1, align_2_label
);
10316 emit_cmp_and_jump_insns (align_rtx
, GEN_INT (2), GTU
, NULL
,
10317 Pmode
, 1, align_3_label
);
10321 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10322 check if is aligned to 4 - byte. */
10324 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (2),
10325 NULL_RTX
, 0, OPTAB_WIDEN
);
10327 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
10328 Pmode
, 1, align_4_label
);
10331 mem
= gen_rtx_MEM (QImode
, out
);
10333 /* Now compare the bytes. */
10335 /* Compare the first n unaligned byte on a byte per byte basis. */
10336 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
10337 QImode
, 1, end_0_label
);
10339 /* Increment the address. */
10341 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
10343 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
10345 /* Not needed with an alignment of 2 */
10348 emit_label (align_2_label
);
10350 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
10354 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
10356 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
10358 emit_label (align_3_label
);
10361 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
10365 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
10367 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
10370 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10371 align this loop. It gives only huge programs, but does not help to
10373 emit_label (align_4_label
);
10375 mem
= gen_rtx_MEM (SImode
, out
);
10376 emit_move_insn (scratch
, mem
);
10378 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
10380 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
10382 /* This formula yields a nonzero result iff one of the bytes is zero.
10383 This saves three branches inside loop and many cycles. */
10385 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
10386 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
10387 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
10388 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
10389 gen_int_mode (0x80808080, SImode
)));
10390 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
10395 rtx reg
= gen_reg_rtx (SImode
);
10396 rtx reg2
= gen_reg_rtx (Pmode
);
10397 emit_move_insn (reg
, tmpreg
);
10398 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
10400 /* If zero is not in the first two bytes, move two bytes forward. */
10401 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
10402 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
10403 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
10404 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
10405 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
10408 /* Emit lea manually to avoid clobbering of flags. */
10409 emit_insn (gen_rtx_SET (SImode
, reg2
,
10410 gen_rtx_PLUS (Pmode
, out
, GEN_INT (2))));
10412 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
10413 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
10414 emit_insn (gen_rtx_SET (VOIDmode
, out
,
10415 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
10422 rtx end_2_label
= gen_label_rtx ();
10423 /* Is zero in the first two bytes? */
10425 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
10426 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
10427 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
10428 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
10429 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
10431 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
10432 JUMP_LABEL (tmp
) = end_2_label
;
10434 /* Not in the first two. Move two bytes forward. */
10435 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
10437 emit_insn (gen_adddi3 (out
, out
, GEN_INT (2)));
10439 emit_insn (gen_addsi3 (out
, out
, GEN_INT (2)));
10441 emit_label (end_2_label
);
10445 /* Avoid branch in fixing the byte. */
10446 tmpreg
= gen_lowpart (QImode
, tmpreg
);
10447 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
10449 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3)));
10451 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3)));
10453 emit_label (end_0_label
);
10457 ix86_expand_call (retval
, fnaddr
, callarg1
, callarg2
, pop
)
10458 rtx retval
, fnaddr
, callarg1
, callarg2
, pop
;
10460 rtx use
= NULL
, call
;
10462 if (pop
== const0_rtx
)
10464 if (TARGET_64BIT
&& pop
)
10467 /* Static functions and indirect calls don't need the pic register. */
10468 if (! TARGET_64BIT
&& flag_pic
10469 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
10470 && ! SYMBOL_REF_FLAG (XEXP (fnaddr
, 0)))
10471 use_reg (&use
, pic_offset_table_rtx
);
10473 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
10475 rtx al
= gen_rtx_REG (QImode
, 0);
10476 emit_move_insn (al
, callarg2
);
10477 use_reg (&use
, al
);
10480 if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
10482 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
10483 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
10486 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
10488 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
10491 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
10492 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
10493 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
10496 call
= emit_call_insn (call
);
10498 CALL_INSN_FUNCTION_USAGE (call
) = use
;
10502 /* Clear stack slot assignments remembered from previous functions.
10503 This is called from INIT_EXPANDERS once before RTL is emitted for each
10507 ix86_init_machine_status (p
)
10508 struct function
*p
;
10510 p
->machine
= (struct machine_function
*)
10511 xcalloc (1, sizeof (struct machine_function
));
10514 /* Mark machine specific bits of P for GC. */
10516 ix86_mark_machine_status (p
)
10517 struct function
*p
;
10519 struct machine_function
*machine
= p
->machine
;
10520 enum machine_mode mode
;
10526 for (mode
= VOIDmode
; (int) mode
< (int) MAX_MACHINE_MODE
;
10527 mode
= (enum machine_mode
) ((int) mode
+ 1))
10528 for (n
= 0; n
< MAX_386_STACK_LOCALS
; n
++)
10529 ggc_mark_rtx (machine
->stack_locals
[(int) mode
][n
]);
10533 ix86_free_machine_status (p
)
10534 struct function
*p
;
10540 /* Return a MEM corresponding to a stack slot with mode MODE.
10541 Allocate a new slot if necessary.
10543 The RTL for a function can have several slots available: N is
10544 which slot to use. */
10547 assign_386_stack_local (mode
, n
)
10548 enum machine_mode mode
;
10551 if (n
< 0 || n
>= MAX_386_STACK_LOCALS
)
10554 if (ix86_stack_locals
[(int) mode
][n
] == NULL_RTX
)
10555 ix86_stack_locals
[(int) mode
][n
]
10556 = assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
10558 return ix86_stack_locals
[(int) mode
][n
];
10561 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10564 ix86_tls_get_addr ()
10570 symbol
= gen_rtx_SYMBOL_REF (Pmode
, (TARGET_GNU_TLS
10571 ? "___tls_get_addr"
10572 : "__tls_get_addr"));
10573 ggc_add_rtx_root (&symbol
, 1);
10579 /* Calculate the length of the memory address in the instruction
10580 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10583 memory_address_length (addr
)
10586 struct ix86_address parts
;
10587 rtx base
, index
, disp
;
10590 if (GET_CODE (addr
) == PRE_DEC
10591 || GET_CODE (addr
) == POST_INC
10592 || GET_CODE (addr
) == PRE_MODIFY
10593 || GET_CODE (addr
) == POST_MODIFY
)
10596 if (! ix86_decompose_address (addr
, &parts
))
10600 index
= parts
.index
;
10604 /* Register Indirect. */
10605 if (base
&& !index
&& !disp
)
10607 /* Special cases: ebp and esp need the two-byte modrm form. */
10608 if (addr
== stack_pointer_rtx
10609 || addr
== arg_pointer_rtx
10610 || addr
== frame_pointer_rtx
10611 || addr
== hard_frame_pointer_rtx
)
10615 /* Direct Addressing. */
10616 else if (disp
&& !base
&& !index
)
10621 /* Find the length of the displacement constant. */
10624 if (GET_CODE (disp
) == CONST_INT
10625 && CONST_OK_FOR_LETTER_P (INTVAL (disp
), 'K'))
10631 /* An index requires the two-byte modrm form. */
10639 /* Compute default value for "length_immediate" attribute. When SHORTFORM
10640 is set, expect that insn have 8bit immediate alternative. */
10642 ix86_attr_length_immediate_default (insn
, shortform
)
10648 extract_insn_cached (insn
);
10649 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
10650 if (CONSTANT_P (recog_data
.operand
[i
]))
10655 && GET_CODE (recog_data
.operand
[i
]) == CONST_INT
10656 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data
.operand
[i
]), 'K'))
10660 switch (get_attr_mode (insn
))
10671 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
10676 fatal_insn ("unknown insn mode", insn
);
10682 /* Compute default value for "length_address" attribute. */
10684 ix86_attr_length_address_default (insn
)
10688 extract_insn_cached (insn
);
10689 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
10690 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
10692 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
10698 /* Return the maximum number of instructions a cpu can issue. */
10705 case PROCESSOR_PENTIUM
:
10709 case PROCESSOR_PENTIUMPRO
:
10710 case PROCESSOR_PENTIUM4
:
10711 case PROCESSOR_ATHLON
:
10719 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10720 by DEP_INSN and nothing set by DEP_INSN. */
10723 ix86_flags_dependant (insn
, dep_insn
, insn_type
)
10724 rtx insn
, dep_insn
;
10725 enum attr_type insn_type
;
10729 /* Simplify the test for uninteresting insns. */
10730 if (insn_type
!= TYPE_SETCC
10731 && insn_type
!= TYPE_ICMOV
10732 && insn_type
!= TYPE_FCMOV
10733 && insn_type
!= TYPE_IBR
)
10736 if ((set
= single_set (dep_insn
)) != 0)
10738 set
= SET_DEST (set
);
10741 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
10742 && XVECLEN (PATTERN (dep_insn
), 0) == 2
10743 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
10744 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
10746 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
10747 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
10752 if (GET_CODE (set
) != REG
|| REGNO (set
) != FLAGS_REG
)
10755 /* This test is true if the dependent insn reads the flags but
10756 not any other potentially set register. */
10757 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
10760 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
10766 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
10767 address with operands set by DEP_INSN. */
10770 ix86_agi_dependant (insn
, dep_insn
, insn_type
)
10771 rtx insn
, dep_insn
;
10772 enum attr_type insn_type
;
10776 if (insn_type
== TYPE_LEA
10779 addr
= PATTERN (insn
);
10780 if (GET_CODE (addr
) == SET
)
10782 else if (GET_CODE (addr
) == PARALLEL
10783 && GET_CODE (XVECEXP (addr
, 0, 0)) == SET
)
10784 addr
= XVECEXP (addr
, 0, 0);
10787 addr
= SET_SRC (addr
);
10792 extract_insn_cached (insn
);
10793 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
10794 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
10796 addr
= XEXP (recog_data
.operand
[i
], 0);
10803 return modified_in_p (addr
, dep_insn
);
10807 ix86_adjust_cost (insn
, link
, dep_insn
, cost
)
10808 rtx insn
, link
, dep_insn
;
10811 enum attr_type insn_type
, dep_insn_type
;
10812 enum attr_memory memory
, dep_memory
;
10814 int dep_insn_code_number
;
10816 /* Anti and output depenancies have zero cost on all CPUs. */
10817 if (REG_NOTE_KIND (link
) != 0)
10820 dep_insn_code_number
= recog_memoized (dep_insn
);
10822 /* If we can't recognize the insns, we can't really do anything. */
10823 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
10826 insn_type
= get_attr_type (insn
);
10827 dep_insn_type
= get_attr_type (dep_insn
);
10831 case PROCESSOR_PENTIUM
:
10832 /* Address Generation Interlock adds a cycle of latency. */
10833 if (ix86_agi_dependant (insn
, dep_insn
, insn_type
))
10836 /* ??? Compares pair with jump/setcc. */
10837 if (ix86_flags_dependant (insn
, dep_insn
, insn_type
))
10840 /* Floating point stores require value to be ready one cycle ealier. */
10841 if (insn_type
== TYPE_FMOV
10842 && get_attr_memory (insn
) == MEMORY_STORE
10843 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
10847 case PROCESSOR_PENTIUMPRO
:
10848 memory
= get_attr_memory (insn
);
10849 dep_memory
= get_attr_memory (dep_insn
);
10851 /* Since we can't represent delayed latencies of load+operation,
10852 increase the cost here for non-imov insns. */
10853 if (dep_insn_type
!= TYPE_IMOV
10854 && dep_insn_type
!= TYPE_FMOV
10855 && (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
))
10858 /* INT->FP conversion is expensive. */
10859 if (get_attr_fp_int_src (dep_insn
))
10862 /* There is one cycle extra latency between an FP op and a store. */
10863 if (insn_type
== TYPE_FMOV
10864 && (set
= single_set (dep_insn
)) != NULL_RTX
10865 && (set2
= single_set (insn
)) != NULL_RTX
10866 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
10867 && GET_CODE (SET_DEST (set2
)) == MEM
)
10870 /* Show ability of reorder buffer to hide latency of load by executing
10871 in parallel with previous instruction in case
10872 previous instruction is not needed to compute the address. */
10873 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
10874 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
10876 /* Claim moves to take one cycle, as core can issue one load
10877 at time and the next load can start cycle later. */
10878 if (dep_insn_type
== TYPE_IMOV
10879 || dep_insn_type
== TYPE_FMOV
)
10887 memory
= get_attr_memory (insn
);
10888 dep_memory
= get_attr_memory (dep_insn
);
10889 /* The esp dependency is resolved before the instruction is really
10891 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
10892 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
10895 /* Since we can't represent delayed latencies of load+operation,
10896 increase the cost here for non-imov insns. */
10897 if (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
)
10898 cost
+= (dep_insn_type
!= TYPE_IMOV
) ? 2 : 1;
10900 /* INT->FP conversion is expensive. */
10901 if (get_attr_fp_int_src (dep_insn
))
10904 /* Show ability of reorder buffer to hide latency of load by executing
10905 in parallel with previous instruction in case
10906 previous instruction is not needed to compute the address. */
10907 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
10908 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
10910 /* Claim moves to take one cycle, as core can issue one load
10911 at time and the next load can start cycle later. */
10912 if (dep_insn_type
== TYPE_IMOV
10913 || dep_insn_type
== TYPE_FMOV
)
10922 case PROCESSOR_ATHLON
:
10923 memory
= get_attr_memory (insn
);
10924 dep_memory
= get_attr_memory (dep_insn
);
10926 if (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
)
10928 if (dep_insn_type
== TYPE_IMOV
|| dep_insn_type
== TYPE_FMOV
)
10933 /* Show ability of reorder buffer to hide latency of load by executing
10934 in parallel with previous instruction in case
10935 previous instruction is not needed to compute the address. */
10936 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
10937 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
10939 /* Claim moves to take one cycle, as core can issue one load
10940 at time and the next load can start cycle later. */
10941 if (dep_insn_type
== TYPE_IMOV
10942 || dep_insn_type
== TYPE_FMOV
)
10944 else if (cost
>= 3)
10959 struct ppro_sched_data
10962 int issued_this_cycle
;
10966 static enum attr_ppro_uops
10967 ix86_safe_ppro_uops (insn
)
10970 if (recog_memoized (insn
) >= 0)
10971 return get_attr_ppro_uops (insn
);
10973 return PPRO_UOPS_MANY
;
10977 ix86_dump_ppro_packet (dump
)
10980 if (ix86_sched_data
.ppro
.decode
[0])
10982 fprintf (dump
, "PPRO packet: %d",
10983 INSN_UID (ix86_sched_data
.ppro
.decode
[0]));
10984 if (ix86_sched_data
.ppro
.decode
[1])
10985 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[1]));
10986 if (ix86_sched_data
.ppro
.decode
[2])
10987 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[2]));
10988 fputc ('\n', dump
);
10992 /* We're beginning a new block. Initialize data structures as necessary. */
10995 ix86_sched_init (dump
, sched_verbose
, veclen
)
10996 FILE *dump ATTRIBUTE_UNUSED
;
10997 int sched_verbose ATTRIBUTE_UNUSED
;
10998 int veclen ATTRIBUTE_UNUSED
;
11000 memset (&ix86_sched_data
, 0, sizeof (ix86_sched_data
));
11003 /* Shift INSN to SLOT, and shift everything else down. */
11006 ix86_reorder_insn (insnp
, slot
)
11013 insnp
[0] = insnp
[1];
11014 while (++insnp
!= slot
);
11020 ix86_sched_reorder_ppro (ready
, e_ready
)
11025 enum attr_ppro_uops cur_uops
;
11026 int issued_this_cycle
;
11030 /* At this point .ppro.decode contains the state of the three
11031 decoders from last "cycle". That is, those insns that were
11032 actually independent. But here we're scheduling for the
11033 decoder, and we may find things that are decodable in the
11036 memcpy (decode
, ix86_sched_data
.ppro
.decode
, sizeof (decode
));
11037 issued_this_cycle
= 0;
11040 cur_uops
= ix86_safe_ppro_uops (*insnp
);
11042 /* If the decoders are empty, and we've a complex insn at the
11043 head of the priority queue, let it issue without complaint. */
11044 if (decode
[0] == NULL
)
11046 if (cur_uops
== PPRO_UOPS_MANY
)
11048 decode
[0] = *insnp
;
11052 /* Otherwise, search for a 2-4 uop unsn to issue. */
11053 while (cur_uops
!= PPRO_UOPS_FEW
)
11055 if (insnp
== ready
)
11057 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
11060 /* If so, move it to the head of the line. */
11061 if (cur_uops
== PPRO_UOPS_FEW
)
11062 ix86_reorder_insn (insnp
, e_ready
);
11064 /* Issue the head of the queue. */
11065 issued_this_cycle
= 1;
11066 decode
[0] = *e_ready
--;
11069 /* Look for simple insns to fill in the other two slots. */
11070 for (i
= 1; i
< 3; ++i
)
11071 if (decode
[i
] == NULL
)
11073 if (ready
> e_ready
)
11077 cur_uops
= ix86_safe_ppro_uops (*insnp
);
11078 while (cur_uops
!= PPRO_UOPS_ONE
)
11080 if (insnp
== ready
)
11082 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
11085 /* Found one. Move it to the head of the queue and issue it. */
11086 if (cur_uops
== PPRO_UOPS_ONE
)
11088 ix86_reorder_insn (insnp
, e_ready
);
11089 decode
[i
] = *e_ready
--;
11090 issued_this_cycle
++;
11094 /* ??? Didn't find one. Ideally, here we would do a lazy split
11095 of 2-uop insns, issue one and queue the other. */
11099 if (issued_this_cycle
== 0)
11100 issued_this_cycle
= 1;
11101 ix86_sched_data
.ppro
.issued_this_cycle
= issued_this_cycle
;
11104 /* We are about to being issuing insns for this clock cycle.
11105 Override the default sort algorithm to better slot instructions. */
11107 ix86_sched_reorder (dump
, sched_verbose
, ready
, n_readyp
, clock_var
)
11108 FILE *dump ATTRIBUTE_UNUSED
;
11109 int sched_verbose ATTRIBUTE_UNUSED
;
11112 int clock_var ATTRIBUTE_UNUSED
;
11114 int n_ready
= *n_readyp
;
11115 rtx
*e_ready
= ready
+ n_ready
- 1;
11117 /* Make sure to go ahead and initialize key items in
11118 ix86_sched_data if we are not going to bother trying to
11119 reorder the ready queue. */
11122 ix86_sched_data
.ppro
.issued_this_cycle
= 1;
11131 case PROCESSOR_PENTIUMPRO
:
11132 ix86_sched_reorder_ppro (ready
, e_ready
);
11137 return ix86_issue_rate ();
11140 /* We are about to issue INSN. Return the number of insns left on the
11141 ready queue that can be issued this cycle. */
11144 ix86_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
)
11148 int can_issue_more
;
11154 return can_issue_more
- 1;
11156 case PROCESSOR_PENTIUMPRO
:
11158 enum attr_ppro_uops uops
= ix86_safe_ppro_uops (insn
);
11160 if (uops
== PPRO_UOPS_MANY
)
11163 ix86_dump_ppro_packet (dump
);
11164 ix86_sched_data
.ppro
.decode
[0] = insn
;
11165 ix86_sched_data
.ppro
.decode
[1] = NULL
;
11166 ix86_sched_data
.ppro
.decode
[2] = NULL
;
11168 ix86_dump_ppro_packet (dump
);
11169 ix86_sched_data
.ppro
.decode
[0] = NULL
;
11171 else if (uops
== PPRO_UOPS_FEW
)
11174 ix86_dump_ppro_packet (dump
);
11175 ix86_sched_data
.ppro
.decode
[0] = insn
;
11176 ix86_sched_data
.ppro
.decode
[1] = NULL
;
11177 ix86_sched_data
.ppro
.decode
[2] = NULL
;
11181 for (i
= 0; i
< 3; ++i
)
11182 if (ix86_sched_data
.ppro
.decode
[i
] == NULL
)
11184 ix86_sched_data
.ppro
.decode
[i
] = insn
;
11192 ix86_dump_ppro_packet (dump
);
11193 ix86_sched_data
.ppro
.decode
[0] = NULL
;
11194 ix86_sched_data
.ppro
.decode
[1] = NULL
;
11195 ix86_sched_data
.ppro
.decode
[2] = NULL
;
11199 return --ix86_sched_data
.ppro
.issued_this_cycle
;
11204 ia32_use_dfa_pipeline_interface ()
11206 if (ix86_cpu
== PROCESSOR_PENTIUM
)
11211 /* How many alternative schedules to try. This should be as wide as the
11212 scheduling freedom in the DFA, but no wider. Making this value too
11213 large results extra work for the scheduler. */
11216 ia32_multipass_dfa_lookahead ()
11218 if (ix86_cpu
== PROCESSOR_PENTIUM
)
11225 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11226 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11230 ix86_set_move_mem_attrs (insns
, dstref
, srcref
, dstreg
, srcreg
)
11232 rtx dstref
, srcref
, dstreg
, srcreg
;
11236 for (insn
= insns
; insn
!= 0 ; insn
= NEXT_INSN (insn
))
11238 ix86_set_move_mem_attrs_1 (PATTERN (insn
), dstref
, srcref
,
11242 /* Subroutine of above to actually do the updating by recursively walking
11246 ix86_set_move_mem_attrs_1 (x
, dstref
, srcref
, dstreg
, srcreg
)
11248 rtx dstref
, srcref
, dstreg
, srcreg
;
11250 enum rtx_code code
= GET_CODE (x
);
11251 const char *format_ptr
= GET_RTX_FORMAT (code
);
11254 if (code
== MEM
&& XEXP (x
, 0) == dstreg
)
11255 MEM_COPY_ATTRIBUTES (x
, dstref
);
11256 else if (code
== MEM
&& XEXP (x
, 0) == srcreg
)
11257 MEM_COPY_ATTRIBUTES (x
, srcref
);
11259 for (i
= 0; i
< GET_RTX_LENGTH (code
); i
++, format_ptr
++)
11261 if (*format_ptr
== 'e')
11262 ix86_set_move_mem_attrs_1 (XEXP (x
, i
), dstref
, srcref
,
11264 else if (*format_ptr
== 'E')
11265 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
11266 ix86_set_move_mem_attrs_1 (XVECEXP (x
, i
, j
), dstref
, srcref
,
11271 /* Compute the alignment given to a constant that is being placed in memory.
11272 EXP is the constant and ALIGN is the alignment that the object would
11274 The value of this function is used instead of that alignment to align
11278 ix86_constant_alignment (exp
, align
)
11282 if (TREE_CODE (exp
) == REAL_CST
)
11284 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
11286 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
11289 else if (TREE_CODE (exp
) == STRING_CST
&& TREE_STRING_LENGTH (exp
) >= 31
11296 /* Compute the alignment for a static variable.
11297 TYPE is the data type, and ALIGN is the alignment that
11298 the object would ordinarily have. The value of this function is used
11299 instead of that alignment to align the object. */
11302 ix86_data_alignment (type
, align
)
11306 if (AGGREGATE_TYPE_P (type
)
11307 && TYPE_SIZE (type
)
11308 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
11309 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 256
11310 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 256)
11313 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11314 to 16byte boundary. */
11317 if (AGGREGATE_TYPE_P (type
)
11318 && TYPE_SIZE (type
)
11319 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
11320 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
11321 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
11325 if (TREE_CODE (type
) == ARRAY_TYPE
)
11327 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
11329 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
11332 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
11335 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
11337 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
11340 else if ((TREE_CODE (type
) == RECORD_TYPE
11341 || TREE_CODE (type
) == UNION_TYPE
11342 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
11343 && TYPE_FIELDS (type
))
11345 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
11347 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
11350 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
11351 || TREE_CODE (type
) == INTEGER_TYPE
)
11353 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
11355 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
11362 /* Compute the alignment for a local variable.
11363 TYPE is the data type, and ALIGN is the alignment that
11364 the object would ordinarily have. The value of this macro is used
11365 instead of that alignment to align the object. */
11368 ix86_local_alignment (type
, align
)
11372 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11373 to 16byte boundary. */
11376 if (AGGREGATE_TYPE_P (type
)
11377 && TYPE_SIZE (type
)
11378 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
11379 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
11380 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
11383 if (TREE_CODE (type
) == ARRAY_TYPE
)
11385 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
11387 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
11390 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
11392 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
11394 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
11397 else if ((TREE_CODE (type
) == RECORD_TYPE
11398 || TREE_CODE (type
) == UNION_TYPE
11399 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
11400 && TYPE_FIELDS (type
))
11402 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
11404 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
11407 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
11408 || TREE_CODE (type
) == INTEGER_TYPE
)
11411 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
11413 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
11419 /* Emit RTL insns to initialize the variable parts of a trampoline.
11420 FNADDR is an RTX for the address of the function's pure code.
11421 CXT is an RTX for the static chain value for the function. */
11423 x86_initialize_trampoline (tramp
, fnaddr
, cxt
)
11424 rtx tramp
, fnaddr
, cxt
;
11428 /* Compute offset from the end of the jmp to the target function. */
11429 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
11430 plus_constant (tramp
, 10),
11431 NULL_RTX
, 1, OPTAB_DIRECT
);
11432 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
11433 gen_int_mode (0xb9, QImode
));
11434 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
11435 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
11436 gen_int_mode (0xe9, QImode
));
11437 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
11442 /* Try to load address using shorter movl instead of movabs.
11443 We may want to support movq for kernel mode, but kernel does not use
11444 trampolines at the moment. */
11445 if (x86_64_zero_extended_value (fnaddr
))
11447 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
11448 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11449 gen_int_mode (0xbb41, HImode
));
11450 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
11451 gen_lowpart (SImode
, fnaddr
));
11456 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11457 gen_int_mode (0xbb49, HImode
));
11458 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
11462 /* Load static chain using movabs to r10. */
11463 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11464 gen_int_mode (0xba49, HImode
));
11465 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
11468 /* Jump to the r11 */
11469 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11470 gen_int_mode (0xff49, HImode
));
11471 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
11472 gen_int_mode (0xe3, QImode
));
11474 if (offset
> TRAMPOLINE_SIZE
)
11479 #define def_builtin(MASK, NAME, TYPE, CODE) \
11481 if ((MASK) & target_flags) \
11482 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL); \
11485 struct builtin_description
11487 const unsigned int mask
;
11488 const enum insn_code icode
;
11489 const char *const name
;
11490 const enum ix86_builtins code
;
11491 const enum rtx_code comparison
;
11492 const unsigned int flag
;
11495 /* Used for builtins that are enabled both by -msse and -msse2. */
11496 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11498 static const struct builtin_description bdesc_comi
[] =
11500 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, EQ
, 0 },
11501 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, LT
, 0 },
11502 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, LE
, 0 },
11503 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, LT
, 1 },
11504 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, LE
, 1 },
11505 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, NE
, 0 },
11506 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, EQ
, 0 },
11507 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, LT
, 0 },
11508 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, LE
, 0 },
11509 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, LT
, 1 },
11510 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, LE
, 1 },
11511 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, NE
, 0 },
11512 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, EQ
, 0 },
11513 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, LT
, 0 },
11514 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, LE
, 0 },
11515 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, LT
, 1 },
11516 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, LE
, 1 },
11517 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, NE
, 0 },
11518 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, EQ
, 0 },
11519 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, LT
, 0 },
11520 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, LE
, 0 },
11521 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, LT
, 1 },
11522 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, LE
, 1 },
11523 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, NE
, 0 },
11526 static const struct builtin_description bdesc_2arg
[] =
11529 { MASK_SSE1
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
11530 { MASK_SSE1
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
11531 { MASK_SSE1
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
11532 { MASK_SSE1
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
11533 { MASK_SSE1
, CODE_FOR_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
11534 { MASK_SSE1
, CODE_FOR_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
11535 { MASK_SSE1
, CODE_FOR_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
11536 { MASK_SSE1
, CODE_FOR_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
11538 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
11539 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
11540 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
11541 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, 1 },
11542 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, 1 },
11543 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
11544 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, EQ
, 0 },
11545 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, LT
, 0 },
11546 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, LE
, 0 },
11547 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, LT
, 1 },
11548 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, LE
, 1 },
11549 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, UNORDERED
, 0 },
11550 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
11551 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
11552 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
11553 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS
, LT
, 1 },
11554 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS
, LE
, 1 },
11555 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
11556 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, EQ
, 0 },
11557 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, LT
, 0 },
11558 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, LE
, 0 },
11559 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, LT
, 1 },
11560 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, LE
, 1 },
11561 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
11563 { MASK_SSE1
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
11564 { MASK_SSE1
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
11565 { MASK_SSE1
, CODE_FOR_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
11566 { MASK_SSE1
, CODE_FOR_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
11568 { MASK_SSE1
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
11569 { MASK_SSE1
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
11570 { MASK_SSE1
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
11571 { MASK_SSE1
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
11572 { MASK_SSE1
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
11575 { MASK_MMX
, CODE_FOR_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
11576 { MASK_MMX
, CODE_FOR_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
11577 { MASK_MMX
, CODE_FOR_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
11578 { MASK_MMX
, CODE_FOR_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
11579 { MASK_MMX
, CODE_FOR_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
11580 { MASK_MMX
, CODE_FOR_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
11582 { MASK_MMX
, CODE_FOR_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
11583 { MASK_MMX
, CODE_FOR_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
11584 { MASK_MMX
, CODE_FOR_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
11585 { MASK_MMX
, CODE_FOR_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
11586 { MASK_MMX
, CODE_FOR_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
11587 { MASK_MMX
, CODE_FOR_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
11588 { MASK_MMX
, CODE_FOR_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
11589 { MASK_MMX
, CODE_FOR_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
11591 { MASK_MMX
, CODE_FOR_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
11592 { MASK_MMX
, CODE_FOR_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
11593 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
11595 { MASK_MMX
, CODE_FOR_mmx_anddi3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
11596 { MASK_MMX
, CODE_FOR_mmx_nanddi3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
11597 { MASK_MMX
, CODE_FOR_mmx_iordi3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
11598 { MASK_MMX
, CODE_FOR_mmx_xordi3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
11600 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
11601 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
11603 { MASK_MMX
, CODE_FOR_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
11604 { MASK_MMX
, CODE_FOR_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
11605 { MASK_MMX
, CODE_FOR_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
11606 { MASK_MMX
, CODE_FOR_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
11607 { MASK_MMX
, CODE_FOR_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
11608 { MASK_MMX
, CODE_FOR_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
11610 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
11611 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
11612 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
11613 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
11615 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
11616 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
11617 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
11618 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
11619 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
11620 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
11623 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
11624 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
11625 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
11627 { MASK_SSE1
, CODE_FOR_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
11628 { MASK_SSE1
, CODE_FOR_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
11630 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
11631 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
11632 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
11633 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
11634 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
11635 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
11637 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
11638 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
11639 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
11640 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
11641 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
11642 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
11644 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
11645 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
11646 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
11647 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
11649 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
11650 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
11653 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
11654 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
11655 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
11656 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
11657 { MASK_SSE2
, CODE_FOR_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
11658 { MASK_SSE2
, CODE_FOR_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
11659 { MASK_SSE2
, CODE_FOR_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
11660 { MASK_SSE2
, CODE_FOR_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
11662 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
11663 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
11664 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
11665 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, 1 },
11666 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, 1 },
11667 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
11668 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, EQ
, 0 },
11669 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, LT
, 0 },
11670 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, LE
, 0 },
11671 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, LT
, 1 },
11672 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, LE
, 1 },
11673 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, UNORDERED
, 0 },
11674 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
11675 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
11676 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
11677 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD
, LT
, 1 },
11678 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD
, LE
, 1 },
11679 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
11680 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, EQ
, 0 },
11681 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, LT
, 0 },
11682 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, LE
, 0 },
11683 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD
, LT
, 1 },
11684 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD
, LE
, 1 },
11685 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, UNORDERED
, 0 },
11687 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
11688 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
11689 { MASK_SSE2
, CODE_FOR_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
11690 { MASK_SSE2
, CODE_FOR_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
11692 { MASK_SSE2
, CODE_FOR_sse2_anddf3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
11693 { MASK_SSE2
, CODE_FOR_sse2_nanddf3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
11694 { MASK_SSE2
, CODE_FOR_sse2_iordf3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
11695 { MASK_SSE2
, CODE_FOR_sse2_xordf3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
11697 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
11698 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
11699 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
11702 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
11703 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
11704 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
11705 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
11706 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
11707 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
11708 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
11709 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
11711 { MASK_MMX
, CODE_FOR_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
11712 { MASK_MMX
, CODE_FOR_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
11713 { MASK_MMX
, CODE_FOR_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
11714 { MASK_MMX
, CODE_FOR_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
11715 { MASK_MMX
, CODE_FOR_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
11716 { MASK_MMX
, CODE_FOR_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
11717 { MASK_MMX
, CODE_FOR_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
11718 { MASK_MMX
, CODE_FOR_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
11720 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
11721 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
11722 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, 0, 0 },
11723 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, 0, 0 },
11725 { MASK_SSE2
, CODE_FOR_sse2_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
11726 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
11727 { MASK_SSE2
, CODE_FOR_sse2_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
11728 { MASK_SSE2
, CODE_FOR_sse2_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
11730 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
11731 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
11733 { MASK_SSE2
, CODE_FOR_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
11734 { MASK_SSE2
, CODE_FOR_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
11735 { MASK_SSE2
, CODE_FOR_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
11736 { MASK_SSE2
, CODE_FOR_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
11737 { MASK_SSE2
, CODE_FOR_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
11738 { MASK_SSE2
, CODE_FOR_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
11740 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
11741 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
11742 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
11743 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
11745 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
11746 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
11747 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
11748 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
11749 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
11750 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
11752 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
11753 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
11754 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
11756 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
11757 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
11759 { MASK_SSE2
, CODE_FOR_ashlv8hi3_ti
, 0, IX86_BUILTIN_PSLLW128
, 0, 0 },
11760 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
11761 { MASK_SSE2
, CODE_FOR_ashlv4si3_ti
, 0, IX86_BUILTIN_PSLLD128
, 0, 0 },
11762 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
11763 { MASK_SSE2
, CODE_FOR_ashlv2di3_ti
, 0, IX86_BUILTIN_PSLLQ128
, 0, 0 },
11764 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
11766 { MASK_SSE2
, CODE_FOR_lshrv8hi3_ti
, 0, IX86_BUILTIN_PSRLW128
, 0, 0 },
11767 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
11768 { MASK_SSE2
, CODE_FOR_lshrv4si3_ti
, 0, IX86_BUILTIN_PSRLD128
, 0, 0 },
11769 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
11770 { MASK_SSE2
, CODE_FOR_lshrv2di3_ti
, 0, IX86_BUILTIN_PSRLQ128
, 0, 0 },
11771 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
11773 { MASK_SSE2
, CODE_FOR_ashrv8hi3_ti
, 0, IX86_BUILTIN_PSRAW128
, 0, 0 },
11774 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
11775 { MASK_SSE2
, CODE_FOR_ashrv4si3_ti
, 0, IX86_BUILTIN_PSRAD128
, 0, 0 },
11776 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
11778 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
11780 { MASK_SSE2
, CODE_FOR_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
11781 { MASK_SSE2
, CODE_FOR_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
11782 { MASK_SSE2
, CODE_FOR_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 }
11785 static const struct builtin_description bdesc_1arg
[] =
11787 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
11788 { MASK_SSE1
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
11790 { MASK_SSE1
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
11791 { MASK_SSE1
, CODE_FOR_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
11792 { MASK_SSE1
, CODE_FOR_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
11794 { MASK_SSE1
, CODE_FOR_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
11795 { MASK_SSE1
, CODE_FOR_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
11796 { MASK_SSE1
, CODE_FOR_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
11797 { MASK_SSE1
, CODE_FOR_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
11799 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
11800 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
11801 { MASK_SSE2
, CODE_FOR_sse2_movq2dq
, 0, IX86_BUILTIN_MOVQ2DQ
, 0, 0 },
11803 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
11805 { MASK_SSE2
, CODE_FOR_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
11806 { MASK_SSE2
, CODE_FOR_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
11808 { MASK_SSE2
, CODE_FOR_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
11809 { MASK_SSE2
, CODE_FOR_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
11810 { MASK_SSE2
, CODE_FOR_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
11811 { MASK_SSE2
, CODE_FOR_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
11812 { MASK_SSE2
, CODE_FOR_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
11814 { MASK_SSE2
, CODE_FOR_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
11816 { MASK_SSE2
, CODE_FOR_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
11817 { MASK_SSE2
, CODE_FOR_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
11819 { MASK_SSE2
, CODE_FOR_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
11820 { MASK_SSE2
, CODE_FOR_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
11821 { MASK_SSE2
, CODE_FOR_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 }
11825 ix86_init_builtins ()
11828 ix86_init_mmx_sse_builtins ();
11831 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
11832 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
11835 ix86_init_mmx_sse_builtins ()
11837 const struct builtin_description
* d
;
11839 tree endlink
= void_list_node
;
11841 tree pchar_type_node
= build_pointer_type (char_type_node
);
11842 tree pfloat_type_node
= build_pointer_type (float_type_node
);
11843 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
11844 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
11845 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
11848 tree int_ftype_v4sf_v4sf
11849 = build_function_type (integer_type_node
,
11850 tree_cons (NULL_TREE
, V4SF_type_node
,
11851 tree_cons (NULL_TREE
,
11854 tree v4si_ftype_v4sf_v4sf
11855 = build_function_type (V4SI_type_node
,
11856 tree_cons (NULL_TREE
, V4SF_type_node
,
11857 tree_cons (NULL_TREE
,
11860 /* MMX/SSE/integer conversions. */
11861 tree int_ftype_v4sf
11862 = build_function_type (integer_type_node
,
11863 tree_cons (NULL_TREE
, V4SF_type_node
,
11865 tree int_ftype_v8qi
11866 = build_function_type (integer_type_node
,
11867 tree_cons (NULL_TREE
, V8QI_type_node
,
11869 tree v4sf_ftype_v4sf_int
11870 = build_function_type (V4SF_type_node
,
11871 tree_cons (NULL_TREE
, V4SF_type_node
,
11872 tree_cons (NULL_TREE
, integer_type_node
,
11874 tree v4sf_ftype_v4sf_v2si
11875 = build_function_type (V4SF_type_node
,
11876 tree_cons (NULL_TREE
, V4SF_type_node
,
11877 tree_cons (NULL_TREE
, V2SI_type_node
,
11879 tree int_ftype_v4hi_int
11880 = build_function_type (integer_type_node
,
11881 tree_cons (NULL_TREE
, V4HI_type_node
,
11882 tree_cons (NULL_TREE
, integer_type_node
,
11884 tree v4hi_ftype_v4hi_int_int
11885 = build_function_type (V4HI_type_node
,
11886 tree_cons (NULL_TREE
, V4HI_type_node
,
11887 tree_cons (NULL_TREE
, integer_type_node
,
11888 tree_cons (NULL_TREE
,
11891 /* Miscellaneous. */
11892 tree v8qi_ftype_v4hi_v4hi
11893 = build_function_type (V8QI_type_node
,
11894 tree_cons (NULL_TREE
, V4HI_type_node
,
11895 tree_cons (NULL_TREE
, V4HI_type_node
,
11897 tree v4hi_ftype_v2si_v2si
11898 = build_function_type (V4HI_type_node
,
11899 tree_cons (NULL_TREE
, V2SI_type_node
,
11900 tree_cons (NULL_TREE
, V2SI_type_node
,
11902 tree v4sf_ftype_v4sf_v4sf_int
11903 = build_function_type (V4SF_type_node
,
11904 tree_cons (NULL_TREE
, V4SF_type_node
,
11905 tree_cons (NULL_TREE
, V4SF_type_node
,
11906 tree_cons (NULL_TREE
,
11909 tree v2si_ftype_v4hi_v4hi
11910 = build_function_type (V2SI_type_node
,
11911 tree_cons (NULL_TREE
, V4HI_type_node
,
11912 tree_cons (NULL_TREE
, V4HI_type_node
,
11914 tree v4hi_ftype_v4hi_int
11915 = build_function_type (V4HI_type_node
,
11916 tree_cons (NULL_TREE
, V4HI_type_node
,
11917 tree_cons (NULL_TREE
, integer_type_node
,
11919 tree v4hi_ftype_v4hi_di
11920 = build_function_type (V4HI_type_node
,
11921 tree_cons (NULL_TREE
, V4HI_type_node
,
11922 tree_cons (NULL_TREE
,
11923 long_long_integer_type_node
,
11925 tree v2si_ftype_v2si_di
11926 = build_function_type (V2SI_type_node
,
11927 tree_cons (NULL_TREE
, V2SI_type_node
,
11928 tree_cons (NULL_TREE
,
11929 long_long_integer_type_node
,
11931 tree void_ftype_void
11932 = build_function_type (void_type_node
, endlink
);
11933 tree void_ftype_unsigned
11934 = build_function_type (void_type_node
,
11935 tree_cons (NULL_TREE
, unsigned_type_node
,
11937 tree unsigned_ftype_void
11938 = build_function_type (unsigned_type_node
, endlink
);
11940 = build_function_type (long_long_unsigned_type_node
, endlink
);
11941 tree v4sf_ftype_void
11942 = build_function_type (V4SF_type_node
, endlink
);
11943 tree v2si_ftype_v4sf
11944 = build_function_type (V2SI_type_node
,
11945 tree_cons (NULL_TREE
, V4SF_type_node
,
11947 /* Loads/stores. */
11948 tree maskmovq_args
= tree_cons (NULL_TREE
, V8QI_type_node
,
11949 tree_cons (NULL_TREE
, V8QI_type_node
,
11950 tree_cons (NULL_TREE
,
11953 tree void_ftype_v8qi_v8qi_pchar
11954 = build_function_type (void_type_node
, maskmovq_args
);
11955 tree v4sf_ftype_pfloat
11956 = build_function_type (V4SF_type_node
,
11957 tree_cons (NULL_TREE
, pfloat_type_node
,
11959 /* @@@ the type is bogus */
11960 tree v4sf_ftype_v4sf_pv2si
11961 = build_function_type (V4SF_type_node
,
11962 tree_cons (NULL_TREE
, V4SF_type_node
,
11963 tree_cons (NULL_TREE
, pv2si_type_node
,
11965 tree void_ftype_pv2si_v4sf
11966 = build_function_type (void_type_node
,
11967 tree_cons (NULL_TREE
, pv2si_type_node
,
11968 tree_cons (NULL_TREE
, V4SF_type_node
,
11970 tree void_ftype_pfloat_v4sf
11971 = build_function_type (void_type_node
,
11972 tree_cons (NULL_TREE
, pfloat_type_node
,
11973 tree_cons (NULL_TREE
, V4SF_type_node
,
11975 tree void_ftype_pdi_di
11976 = build_function_type (void_type_node
,
11977 tree_cons (NULL_TREE
, pdi_type_node
,
11978 tree_cons (NULL_TREE
,
11979 long_long_unsigned_type_node
,
11981 tree void_ftype_pv2di_v2di
11982 = build_function_type (void_type_node
,
11983 tree_cons (NULL_TREE
, pv2di_type_node
,
11984 tree_cons (NULL_TREE
,
11987 /* Normal vector unops. */
11988 tree v4sf_ftype_v4sf
11989 = build_function_type (V4SF_type_node
,
11990 tree_cons (NULL_TREE
, V4SF_type_node
,
11993 /* Normal vector binops. */
11994 tree v4sf_ftype_v4sf_v4sf
11995 = build_function_type (V4SF_type_node
,
11996 tree_cons (NULL_TREE
, V4SF_type_node
,
11997 tree_cons (NULL_TREE
, V4SF_type_node
,
11999 tree v8qi_ftype_v8qi_v8qi
12000 = build_function_type (V8QI_type_node
,
12001 tree_cons (NULL_TREE
, V8QI_type_node
,
12002 tree_cons (NULL_TREE
, V8QI_type_node
,
12004 tree v4hi_ftype_v4hi_v4hi
12005 = build_function_type (V4HI_type_node
,
12006 tree_cons (NULL_TREE
, V4HI_type_node
,
12007 tree_cons (NULL_TREE
, V4HI_type_node
,
12009 tree v2si_ftype_v2si_v2si
12010 = build_function_type (V2SI_type_node
,
12011 tree_cons (NULL_TREE
, V2SI_type_node
,
12012 tree_cons (NULL_TREE
, V2SI_type_node
,
12014 tree di_ftype_di_di
12015 = build_function_type (long_long_unsigned_type_node
,
12016 tree_cons (NULL_TREE
, long_long_unsigned_type_node
,
12017 tree_cons (NULL_TREE
,
12018 long_long_unsigned_type_node
,
12021 tree v2si_ftype_v2sf
12022 = build_function_type (V2SI_type_node
,
12023 tree_cons (NULL_TREE
, V2SF_type_node
,
12025 tree v2sf_ftype_v2si
12026 = build_function_type (V2SF_type_node
,
12027 tree_cons (NULL_TREE
, V2SI_type_node
,
12029 tree v2si_ftype_v2si
12030 = build_function_type (V2SI_type_node
,
12031 tree_cons (NULL_TREE
, V2SI_type_node
,
12033 tree v2sf_ftype_v2sf
12034 = build_function_type (V2SF_type_node
,
12035 tree_cons (NULL_TREE
, V2SF_type_node
,
12037 tree v2sf_ftype_v2sf_v2sf
12038 = build_function_type (V2SF_type_node
,
12039 tree_cons (NULL_TREE
, V2SF_type_node
,
12040 tree_cons (NULL_TREE
,
12043 tree v2si_ftype_v2sf_v2sf
12044 = build_function_type (V2SI_type_node
,
12045 tree_cons (NULL_TREE
, V2SF_type_node
,
12046 tree_cons (NULL_TREE
,
12049 tree pint_type_node
= build_pointer_type (integer_type_node
);
12050 tree pdouble_type_node
= build_pointer_type (double_type_node
);
12051 tree int_ftype_v2df_v2df
12052 = build_function_type (integer_type_node
,
12053 tree_cons (NULL_TREE
, V2DF_type_node
,
12054 tree_cons (NULL_TREE
, V2DF_type_node
, endlink
)));
12057 = build_function_type (intTI_type_node
, endlink
);
12058 tree ti_ftype_ti_ti
12059 = build_function_type (intTI_type_node
,
12060 tree_cons (NULL_TREE
, intTI_type_node
,
12061 tree_cons (NULL_TREE
, intTI_type_node
,
12063 tree void_ftype_pvoid
12064 = build_function_type (void_type_node
,
12065 tree_cons (NULL_TREE
, ptr_type_node
, endlink
));
12067 = build_function_type (V2DI_type_node
,
12068 tree_cons (NULL_TREE
, long_long_unsigned_type_node
,
12070 tree v4sf_ftype_v4si
12071 = build_function_type (V4SF_type_node
,
12072 tree_cons (NULL_TREE
, V4SI_type_node
, endlink
));
12073 tree v4si_ftype_v4sf
12074 = build_function_type (V4SI_type_node
,
12075 tree_cons (NULL_TREE
, V4SF_type_node
, endlink
));
12076 tree v2df_ftype_v4si
12077 = build_function_type (V2DF_type_node
,
12078 tree_cons (NULL_TREE
, V4SI_type_node
, endlink
));
12079 tree v4si_ftype_v2df
12080 = build_function_type (V4SI_type_node
,
12081 tree_cons (NULL_TREE
, V2DF_type_node
, endlink
));
12082 tree v2si_ftype_v2df
12083 = build_function_type (V2SI_type_node
,
12084 tree_cons (NULL_TREE
, V2DF_type_node
, endlink
));
12085 tree v4sf_ftype_v2df
12086 = build_function_type (V4SF_type_node
,
12087 tree_cons (NULL_TREE
, V2DF_type_node
, endlink
));
12088 tree v2df_ftype_v2si
12089 = build_function_type (V2DF_type_node
,
12090 tree_cons (NULL_TREE
, V2SI_type_node
, endlink
));
12091 tree v2df_ftype_v4sf
12092 = build_function_type (V2DF_type_node
,
12093 tree_cons (NULL_TREE
, V4SF_type_node
, endlink
));
12094 tree int_ftype_v2df
12095 = build_function_type (integer_type_node
,
12096 tree_cons (NULL_TREE
, V2DF_type_node
, endlink
));
12097 tree v2df_ftype_v2df_int
12098 = build_function_type (V2DF_type_node
,
12099 tree_cons (NULL_TREE
, V2DF_type_node
,
12100 tree_cons (NULL_TREE
, integer_type_node
,
12102 tree v4sf_ftype_v4sf_v2df
12103 = build_function_type (V4SF_type_node
,
12104 tree_cons (NULL_TREE
, V4SF_type_node
,
12105 tree_cons (NULL_TREE
, V2DF_type_node
,
12107 tree v2df_ftype_v2df_v4sf
12108 = build_function_type (V2DF_type_node
,
12109 tree_cons (NULL_TREE
, V2DF_type_node
,
12110 tree_cons (NULL_TREE
, V4SF_type_node
,
12112 tree v2df_ftype_v2df_v2df_int
12113 = build_function_type (V2DF_type_node
,
12114 tree_cons (NULL_TREE
, V2DF_type_node
,
12115 tree_cons (NULL_TREE
, V2DF_type_node
,
12116 tree_cons (NULL_TREE
,
12119 tree v2df_ftype_v2df_pv2si
12120 = build_function_type (V2DF_type_node
,
12121 tree_cons (NULL_TREE
, V2DF_type_node
,
12122 tree_cons (NULL_TREE
, pv2si_type_node
,
12124 tree void_ftype_pv2si_v2df
12125 = build_function_type (void_type_node
,
12126 tree_cons (NULL_TREE
, pv2si_type_node
,
12127 tree_cons (NULL_TREE
, V2DF_type_node
,
12129 tree void_ftype_pdouble_v2df
12130 = build_function_type (void_type_node
,
12131 tree_cons (NULL_TREE
, pdouble_type_node
,
12132 tree_cons (NULL_TREE
, V2DF_type_node
,
12134 tree void_ftype_pint_int
12135 = build_function_type (void_type_node
,
12136 tree_cons (NULL_TREE
, pint_type_node
,
12137 tree_cons (NULL_TREE
, integer_type_node
,
12139 tree maskmovdqu_args
= tree_cons (NULL_TREE
, V16QI_type_node
,
12140 tree_cons (NULL_TREE
, V16QI_type_node
,
12141 tree_cons (NULL_TREE
,
12144 tree void_ftype_v16qi_v16qi_pchar
12145 = build_function_type (void_type_node
, maskmovdqu_args
);
12146 tree v2df_ftype_pdouble
12147 = build_function_type (V2DF_type_node
,
12148 tree_cons (NULL_TREE
, pdouble_type_node
,
12150 tree v2df_ftype_v2df_v2df
12151 = build_function_type (V2DF_type_node
,
12152 tree_cons (NULL_TREE
, V2DF_type_node
,
12153 tree_cons (NULL_TREE
, V2DF_type_node
,
12155 tree v16qi_ftype_v16qi_v16qi
12156 = build_function_type (V16QI_type_node
,
12157 tree_cons (NULL_TREE
, V16QI_type_node
,
12158 tree_cons (NULL_TREE
, V16QI_type_node
,
12160 tree v8hi_ftype_v8hi_v8hi
12161 = build_function_type (V8HI_type_node
,
12162 tree_cons (NULL_TREE
, V8HI_type_node
,
12163 tree_cons (NULL_TREE
, V8HI_type_node
,
12165 tree v4si_ftype_v4si_v4si
12166 = build_function_type (V4SI_type_node
,
12167 tree_cons (NULL_TREE
, V4SI_type_node
,
12168 tree_cons (NULL_TREE
, V4SI_type_node
,
12170 tree v2di_ftype_v2di_v2di
12171 = build_function_type (V2DI_type_node
,
12172 tree_cons (NULL_TREE
, V2DI_type_node
,
12173 tree_cons (NULL_TREE
, V2DI_type_node
,
12175 tree v2di_ftype_v2df_v2df
12176 = build_function_type (V2DI_type_node
,
12177 tree_cons (NULL_TREE
, V2DF_type_node
,
12178 tree_cons (NULL_TREE
, V2DF_type_node
,
12180 tree v2df_ftype_v2df
12181 = build_function_type (V2DF_type_node
,
12182 tree_cons (NULL_TREE
, V2DF_type_node
,
12184 tree v2df_ftype_double
12185 = build_function_type (V2DF_type_node
,
12186 tree_cons (NULL_TREE
, double_type_node
,
12188 tree v2df_ftype_double_double
12189 = build_function_type (V2DF_type_node
,
12190 tree_cons (NULL_TREE
, double_type_node
,
12191 tree_cons (NULL_TREE
, double_type_node
,
12193 tree int_ftype_v8hi_int
12194 = build_function_type (integer_type_node
,
12195 tree_cons (NULL_TREE
, V8HI_type_node
,
12196 tree_cons (NULL_TREE
, integer_type_node
,
12198 tree v8hi_ftype_v8hi_int_int
12199 = build_function_type (V8HI_type_node
,
12200 tree_cons (NULL_TREE
, V8HI_type_node
,
12201 tree_cons (NULL_TREE
, integer_type_node
,
12202 tree_cons (NULL_TREE
,
12205 tree v2di_ftype_v2di_int
12206 = build_function_type (V2DI_type_node
,
12207 tree_cons (NULL_TREE
, V2DI_type_node
,
12208 tree_cons (NULL_TREE
, integer_type_node
,
12210 tree v4si_ftype_v4si_int
12211 = build_function_type (V4SI_type_node
,
12212 tree_cons (NULL_TREE
, V4SI_type_node
,
12213 tree_cons (NULL_TREE
, integer_type_node
,
12215 tree v8hi_ftype_v8hi_int
12216 = build_function_type (V8HI_type_node
,
12217 tree_cons (NULL_TREE
, V8HI_type_node
,
12218 tree_cons (NULL_TREE
, integer_type_node
,
12220 tree v8hi_ftype_v8hi_v2di
12221 = build_function_type (V8HI_type_node
,
12222 tree_cons (NULL_TREE
, V8HI_type_node
,
12223 tree_cons (NULL_TREE
, V2DI_type_node
,
12225 tree v4si_ftype_v4si_v2di
12226 = build_function_type (V4SI_type_node
,
12227 tree_cons (NULL_TREE
, V4SI_type_node
,
12228 tree_cons (NULL_TREE
, V2DI_type_node
,
12230 tree v4si_ftype_v8hi_v8hi
12231 = build_function_type (V4SI_type_node
,
12232 tree_cons (NULL_TREE
, V8HI_type_node
,
12233 tree_cons (NULL_TREE
, V8HI_type_node
,
12235 tree di_ftype_v8qi_v8qi
12236 = build_function_type (long_long_unsigned_type_node
,
12237 tree_cons (NULL_TREE
, V8QI_type_node
,
12238 tree_cons (NULL_TREE
, V8QI_type_node
,
12240 tree v2di_ftype_v16qi_v16qi
12241 = build_function_type (V2DI_type_node
,
12242 tree_cons (NULL_TREE
, V16QI_type_node
,
12243 tree_cons (NULL_TREE
, V16QI_type_node
,
12245 tree int_ftype_v16qi
12246 = build_function_type (integer_type_node
,
12247 tree_cons (NULL_TREE
, V16QI_type_node
, endlink
));
12249 /* Add all builtins that are more or less simple operations on two
12251 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
12253 /* Use one of the operands; the target can have a different mode for
12254 mask-generating compares. */
12255 enum machine_mode mode
;
12260 mode
= insn_data
[d
->icode
].operand
[1].mode
;
12265 type
= v16qi_ftype_v16qi_v16qi
;
12268 type
= v8hi_ftype_v8hi_v8hi
;
12271 type
= v4si_ftype_v4si_v4si
;
12274 type
= v2di_ftype_v2di_v2di
;
12277 type
= v2df_ftype_v2df_v2df
;
12280 type
= ti_ftype_ti_ti
;
12283 type
= v4sf_ftype_v4sf_v4sf
;
12286 type
= v8qi_ftype_v8qi_v8qi
;
12289 type
= v4hi_ftype_v4hi_v4hi
;
12292 type
= v2si_ftype_v2si_v2si
;
12295 type
= di_ftype_di_di
;
12302 /* Override for comparisons. */
12303 if (d
->icode
== CODE_FOR_maskcmpv4sf3
12304 || d
->icode
== CODE_FOR_maskncmpv4sf3
12305 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
12306 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
)
12307 type
= v4si_ftype_v4sf_v4sf
;
12309 if (d
->icode
== CODE_FOR_maskcmpv2df3
12310 || d
->icode
== CODE_FOR_maskncmpv2df3
12311 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
12312 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
12313 type
= v2di_ftype_v2df_v2df
;
12315 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
12318 /* Add the remaining MMX insns with somewhat more complicated types. */
12319 def_builtin (MASK_MMX
, "__builtin_ia32_mmx_zero", di_ftype_void
, IX86_BUILTIN_MMX_ZERO
);
12320 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
12321 def_builtin (MASK_MMX
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
12322 def_builtin (MASK_MMX
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
12323 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
12324 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
12325 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
12327 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
12328 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
12329 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
12331 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
12332 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
12334 def_builtin (MASK_MMX
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
12335 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
12337 /* comi/ucomi insns. */
12338 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
12339 if (d
->mask
== MASK_SSE2
)
12340 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
12342 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
12344 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
12345 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
12346 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
12348 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
12349 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
12350 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
12351 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
12352 def_builtin (MASK_SSE1
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
12353 def_builtin (MASK_SSE1
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
12355 def_builtin (MASK_SSE1
, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf
, IX86_BUILTIN_ANDPS
);
12356 def_builtin (MASK_SSE1
, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf
, IX86_BUILTIN_ANDNPS
);
12357 def_builtin (MASK_SSE1
, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf
, IX86_BUILTIN_ORPS
);
12358 def_builtin (MASK_SSE1
, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf
, IX86_BUILTIN_XORPS
);
12360 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_pextrw", int_ftype_v4hi_int
, IX86_BUILTIN_PEXTRW
);
12361 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int
, IX86_BUILTIN_PINSRW
);
12363 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
12365 def_builtin (MASK_SSE1
, "__builtin_ia32_loadaps", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADAPS
);
12366 def_builtin (MASK_SSE1
, "__builtin_ia32_loadups", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADUPS
);
12367 def_builtin (MASK_SSE1
, "__builtin_ia32_loadss", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADSS
);
12368 def_builtin (MASK_SSE1
, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREAPS
);
12369 def_builtin (MASK_SSE1
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
12370 def_builtin (MASK_SSE1
, "__builtin_ia32_storess", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STORESS
);
12372 def_builtin (MASK_SSE1
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
12373 def_builtin (MASK_SSE1
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
12374 def_builtin (MASK_SSE1
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
12375 def_builtin (MASK_SSE1
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
12377 def_builtin (MASK_SSE1
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
12378 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
12379 def_builtin (MASK_SSE1
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
12380 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
12382 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
12384 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
12386 def_builtin (MASK_SSE1
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
12387 def_builtin (MASK_SSE1
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
12388 def_builtin (MASK_SSE1
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
12389 def_builtin (MASK_SSE1
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
12390 def_builtin (MASK_SSE1
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
12391 def_builtin (MASK_SSE1
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
12393 def_builtin (MASK_SSE1
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
12395 /* Original 3DNow! */
12396 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
12397 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
12398 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
12399 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
12400 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
12401 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
12402 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
12403 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
12404 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
12405 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
12406 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
12407 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
12408 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
12409 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
12410 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
12411 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
12412 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
12413 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
12414 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
12415 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
12417 /* 3DNow! extension as used in the Athlon CPU. */
12418 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
12419 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
12420 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
12421 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
12422 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
12423 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
12425 def_builtin (MASK_SSE1
, "__builtin_ia32_setzerops", v4sf_ftype_void
, IX86_BUILTIN_SSE_ZERO
);
12428 def_builtin (MASK_SSE2
, "__builtin_ia32_pextrw128", int_ftype_v8hi_int
, IX86_BUILTIN_PEXTRW128
);
12429 def_builtin (MASK_SSE2
, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int
, IX86_BUILTIN_PINSRW128
);
12431 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
12432 def_builtin (MASK_SSE2
, "__builtin_ia32_movq2dq", v2di_ftype_di
, IX86_BUILTIN_MOVQ2DQ
);
12434 def_builtin (MASK_SSE2
, "__builtin_ia32_loadapd", v2df_ftype_pdouble
, IX86_BUILTIN_LOADAPD
);
12435 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pdouble
, IX86_BUILTIN_LOADUPD
);
12436 def_builtin (MASK_SSE2
, "__builtin_ia32_loadsd", v2df_ftype_pdouble
, IX86_BUILTIN_LOADSD
);
12437 def_builtin (MASK_SSE2
, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREAPD
);
12438 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
12439 def_builtin (MASK_SSE2
, "__builtin_ia32_storesd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORESD
);
12441 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADHPD
);
12442 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADLPD
);
12443 def_builtin (MASK_SSE2
, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STOREHPD
);
12444 def_builtin (MASK_SSE2
, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STORELPD
);
12446 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
12447 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
12448 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
12449 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
12450 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
12452 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
12453 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
12454 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
12455 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
12457 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
12458 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
12460 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
12462 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
12463 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
12465 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
12466 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
12467 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
12468 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
12469 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
12471 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
12473 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
12474 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
12476 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
12477 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
12478 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
12480 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
12481 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
12482 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
12484 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd1", v2df_ftype_double
, IX86_BUILTIN_SETPD1
);
12485 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd", v2df_ftype_double_double
, IX86_BUILTIN_SETPD
);
12486 def_builtin (MASK_SSE2
, "__builtin_ia32_setzeropd", ti_ftype_void
, IX86_BUILTIN_CLRPD
);
12487 def_builtin (MASK_SSE2
, "__builtin_ia32_loadpd1", v2df_ftype_pdouble
, IX86_BUILTIN_LOADPD1
);
12488 def_builtin (MASK_SSE2
, "__builtin_ia32_loadrpd", v2df_ftype_pdouble
, IX86_BUILTIN_LOADRPD
);
12489 def_builtin (MASK_SSE2
, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREPD1
);
12490 def_builtin (MASK_SSE2
, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORERPD
);
12492 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pvoid
, IX86_BUILTIN_CLFLUSH
);
12493 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
12494 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
12496 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
12497 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
12498 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
12500 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
12501 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
12502 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
12504 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
12505 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
12507 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
12508 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
12509 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
12511 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
12512 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
12513 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
12515 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
12516 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
12518 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
12521 /* Errors in the source file can cause expand_expr to return const0_rtx
12522 where we expect a vector. To avoid crashing, use one of the vector
12523 clear instructions. */
12525 safe_vector_operand (x
, mode
)
12527 enum machine_mode mode
;
12529 if (x
!= const0_rtx
)
12531 x
= gen_reg_rtx (mode
);
12533 if (VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
))
12534 emit_insn (gen_mmx_clrdi (mode
== DImode
? x
12535 : gen_rtx_SUBREG (DImode
, x
, 0)));
12537 emit_insn (gen_sse_clrv4sf (mode
== V4SFmode
? x
12538 : gen_rtx_SUBREG (V4SFmode
, x
, 0)));
12542 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12545 ix86_expand_binop_builtin (icode
, arglist
, target
)
12546 enum insn_code icode
;
12551 tree arg0
= TREE_VALUE (arglist
);
12552 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12553 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12554 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12555 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
12556 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
12557 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
12559 if (VECTOR_MODE_P (mode0
))
12560 op0
= safe_vector_operand (op0
, mode0
);
12561 if (VECTOR_MODE_P (mode1
))
12562 op1
= safe_vector_operand (op1
, mode1
);
12565 || GET_MODE (target
) != tmode
12566 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12567 target
= gen_reg_rtx (tmode
);
12569 /* In case the insn wants input operands in modes different from
12570 the result, abort. */
12571 if (GET_MODE (op0
) != mode0
|| GET_MODE (op1
) != mode1
)
12574 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12575 op0
= copy_to_mode_reg (mode0
, op0
);
12576 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
12577 op1
= copy_to_mode_reg (mode1
, op1
);
12579 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12580 yet one of the two must not be a memory. This is normally enforced
12581 by expanders, but we didn't bother to create one here. */
12582 if (GET_CODE (op0
) == MEM
&& GET_CODE (op1
) == MEM
)
12583 op0
= copy_to_mode_reg (mode0
, op0
);
12585 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
12592 /* In type_for_mode we restrict the ability to create TImode types
12593 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
12594 to have a V4SFmode signature. Convert them in-place to TImode. */
12597 ix86_expand_timode_binop_builtin (icode
, arglist
, target
)
12598 enum insn_code icode
;
12603 tree arg0
= TREE_VALUE (arglist
);
12604 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12605 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12606 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12608 op0
= gen_lowpart (TImode
, op0
);
12609 op1
= gen_lowpart (TImode
, op1
);
12610 target
= gen_reg_rtx (TImode
);
12612 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, TImode
))
12613 op0
= copy_to_mode_reg (TImode
, op0
);
12614 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, TImode
))
12615 op1
= copy_to_mode_reg (TImode
, op1
);
12617 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12618 yet one of the two must not be a memory. This is normally enforced
12619 by expanders, but we didn't bother to create one here. */
12620 if (GET_CODE (op0
) == MEM
&& GET_CODE (op1
) == MEM
)
12621 op0
= copy_to_mode_reg (TImode
, op0
);
12623 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
12628 return gen_lowpart (V4SFmode
, target
);
12631 /* Subroutine of ix86_expand_builtin to take care of stores. */
12634 ix86_expand_store_builtin (icode
, arglist
)
12635 enum insn_code icode
;
12639 tree arg0
= TREE_VALUE (arglist
);
12640 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12641 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12642 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12643 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
12644 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
12646 if (VECTOR_MODE_P (mode1
))
12647 op1
= safe_vector_operand (op1
, mode1
);
12649 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
12651 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
12652 op1
= copy_to_mode_reg (mode1
, op1
);
12654 pat
= GEN_FCN (icode
) (op0
, op1
);
12660 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12663 ix86_expand_unop_builtin (icode
, arglist
, target
, do_load
)
12664 enum insn_code icode
;
12670 tree arg0
= TREE_VALUE (arglist
);
12671 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12672 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
12673 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
12676 || GET_MODE (target
) != tmode
12677 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12678 target
= gen_reg_rtx (tmode
);
12680 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
12683 if (VECTOR_MODE_P (mode0
))
12684 op0
= safe_vector_operand (op0
, mode0
);
12686 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12687 op0
= copy_to_mode_reg (mode0
, op0
);
12690 pat
= GEN_FCN (icode
) (target
, op0
);
12697 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12698 sqrtss, rsqrtss, rcpss. */
12701 ix86_expand_unop1_builtin (icode
, arglist
, target
)
12702 enum insn_code icode
;
12707 tree arg0
= TREE_VALUE (arglist
);
12708 rtx op1
, op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12709 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
12710 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
12713 || GET_MODE (target
) != tmode
12714 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12715 target
= gen_reg_rtx (tmode
);
12717 if (VECTOR_MODE_P (mode0
))
12718 op0
= safe_vector_operand (op0
, mode0
);
12720 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12721 op0
= copy_to_mode_reg (mode0
, op0
);
12724 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
12725 op1
= copy_to_mode_reg (mode0
, op1
);
12727 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
12734 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12737 ix86_expand_sse_compare (d
, arglist
, target
)
12738 const struct builtin_description
*d
;
12743 tree arg0
= TREE_VALUE (arglist
);
12744 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12745 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12746 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12748 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
12749 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
12750 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
12751 enum rtx_code comparison
= d
->comparison
;
12753 if (VECTOR_MODE_P (mode0
))
12754 op0
= safe_vector_operand (op0
, mode0
);
12755 if (VECTOR_MODE_P (mode1
))
12756 op1
= safe_vector_operand (op1
, mode1
);
12758 /* Swap operands if we have a comparison that isn't available in
12762 rtx tmp
= gen_reg_rtx (mode1
);
12763 emit_move_insn (tmp
, op1
);
12769 || GET_MODE (target
) != tmode
12770 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
12771 target
= gen_reg_rtx (tmode
);
12773 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
12774 op0
= copy_to_mode_reg (mode0
, op0
);
12775 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
12776 op1
= copy_to_mode_reg (mode1
, op1
);
12778 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
12779 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
12786 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12789 ix86_expand_sse_comi (d
, arglist
, target
)
12790 const struct builtin_description
*d
;
12795 tree arg0
= TREE_VALUE (arglist
);
12796 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12797 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12798 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12800 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
12801 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
12802 enum rtx_code comparison
= d
->comparison
;
12804 if (VECTOR_MODE_P (mode0
))
12805 op0
= safe_vector_operand (op0
, mode0
);
12806 if (VECTOR_MODE_P (mode1
))
12807 op1
= safe_vector_operand (op1
, mode1
);
12809 /* Swap operands if we have a comparison that isn't available in
12818 target
= gen_reg_rtx (SImode
);
12819 emit_move_insn (target
, const0_rtx
);
12820 target
= gen_rtx_SUBREG (QImode
, target
, 0);
12822 if (! (*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
12823 op0
= copy_to_mode_reg (mode0
, op0
);
12824 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
12825 op1
= copy_to_mode_reg (mode1
, op1
);
12827 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
12828 pat
= GEN_FCN (d
->icode
) (op0
, op1
, op2
);
12832 emit_insn (gen_rtx_SET (VOIDmode
,
12833 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
12834 gen_rtx_fmt_ee (comparison
, QImode
,
12835 gen_rtx_REG (CCmode
, FLAGS_REG
),
12838 return SUBREG_REG (target
);
12841 /* Expand an expression EXP that calls a built-in function,
12842 with result going to TARGET if that's convenient
12843 (and in mode MODE if that's convenient).
12844 SUBTARGET may be used as the target for computing one of EXP's operands.
12845 IGNORE is nonzero if the value is to be ignored. */
12848 ix86_expand_builtin (exp
, target
, subtarget
, mode
, ignore
)
12851 rtx subtarget ATTRIBUTE_UNUSED
;
12852 enum machine_mode mode ATTRIBUTE_UNUSED
;
12853 int ignore ATTRIBUTE_UNUSED
;
12855 const struct builtin_description
*d
;
12857 enum insn_code icode
;
12858 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
12859 tree arglist
= TREE_OPERAND (exp
, 1);
12860 tree arg0
, arg1
, arg2
;
12861 rtx op0
, op1
, op2
, pat
;
12862 enum machine_mode tmode
, mode0
, mode1
, mode2
;
12863 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
12867 case IX86_BUILTIN_EMMS
:
12868 emit_insn (gen_emms ());
12871 case IX86_BUILTIN_SFENCE
:
12872 emit_insn (gen_sfence ());
12875 case IX86_BUILTIN_PEXTRW
:
12876 case IX86_BUILTIN_PEXTRW128
:
12877 icode
= (fcode
== IX86_BUILTIN_PEXTRW
12878 ? CODE_FOR_mmx_pextrw
12879 : CODE_FOR_sse2_pextrw
);
12880 arg0
= TREE_VALUE (arglist
);
12881 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12882 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12883 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12884 tmode
= insn_data
[icode
].operand
[0].mode
;
12885 mode0
= insn_data
[icode
].operand
[1].mode
;
12886 mode1
= insn_data
[icode
].operand
[2].mode
;
12888 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12889 op0
= copy_to_mode_reg (mode0
, op0
);
12890 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
12892 /* @@@ better error message */
12893 error ("selector must be an immediate");
12894 return gen_reg_rtx (tmode
);
12897 || GET_MODE (target
) != tmode
12898 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12899 target
= gen_reg_rtx (tmode
);
12900 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
12906 case IX86_BUILTIN_PINSRW
:
12907 case IX86_BUILTIN_PINSRW128
:
12908 icode
= (fcode
== IX86_BUILTIN_PINSRW
12909 ? CODE_FOR_mmx_pinsrw
12910 : CODE_FOR_sse2_pinsrw
);
12911 arg0
= TREE_VALUE (arglist
);
12912 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12913 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
12914 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12915 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12916 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
12917 tmode
= insn_data
[icode
].operand
[0].mode
;
12918 mode0
= insn_data
[icode
].operand
[1].mode
;
12919 mode1
= insn_data
[icode
].operand
[2].mode
;
12920 mode2
= insn_data
[icode
].operand
[3].mode
;
12922 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12923 op0
= copy_to_mode_reg (mode0
, op0
);
12924 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
12925 op1
= copy_to_mode_reg (mode1
, op1
);
12926 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
12928 /* @@@ better error message */
12929 error ("selector must be an immediate");
12933 || GET_MODE (target
) != tmode
12934 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12935 target
= gen_reg_rtx (tmode
);
12936 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
12942 case IX86_BUILTIN_MASKMOVQ
:
12943 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
12944 ? (TARGET_64BIT
? CODE_FOR_mmx_maskmovq_rex
: CODE_FOR_mmx_maskmovq
)
12945 : CODE_FOR_sse2_maskmovdqu
);
12946 /* Note the arg order is different from the operand order. */
12947 arg1
= TREE_VALUE (arglist
);
12948 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
12949 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
12950 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12951 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12952 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
12953 mode0
= insn_data
[icode
].operand
[0].mode
;
12954 mode1
= insn_data
[icode
].operand
[1].mode
;
12955 mode2
= insn_data
[icode
].operand
[2].mode
;
12957 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
12958 op0
= copy_to_mode_reg (mode0
, op0
);
12959 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
12960 op1
= copy_to_mode_reg (mode1
, op1
);
12961 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
12962 op2
= copy_to_mode_reg (mode2
, op2
);
12963 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
12969 case IX86_BUILTIN_SQRTSS
:
12970 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2
, arglist
, target
);
12971 case IX86_BUILTIN_RSQRTSS
:
12972 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2
, arglist
, target
);
12973 case IX86_BUILTIN_RCPSS
:
12974 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2
, arglist
, target
);
12976 case IX86_BUILTIN_ANDPS
:
12977 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3
,
12979 case IX86_BUILTIN_ANDNPS
:
12980 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3
,
12982 case IX86_BUILTIN_ORPS
:
12983 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3
,
12985 case IX86_BUILTIN_XORPS
:
12986 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3
,
12989 case IX86_BUILTIN_LOADAPS
:
12990 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps
, arglist
, target
, 1);
12992 case IX86_BUILTIN_LOADUPS
:
12993 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
12995 case IX86_BUILTIN_STOREAPS
:
12996 return ix86_expand_store_builtin (CODE_FOR_sse_movaps
, arglist
);
12997 case IX86_BUILTIN_STOREUPS
:
12998 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
);
13000 case IX86_BUILTIN_LOADSS
:
13001 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss
, arglist
, target
, 1);
13003 case IX86_BUILTIN_STORESS
:
13004 return ix86_expand_store_builtin (CODE_FOR_sse_storess
, arglist
);
13006 case IX86_BUILTIN_LOADHPS
:
13007 case IX86_BUILTIN_LOADLPS
:
13008 case IX86_BUILTIN_LOADHPD
:
13009 case IX86_BUILTIN_LOADLPD
:
13010 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_movhps
13011 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_movlps
13012 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_movhpd
13013 : CODE_FOR_sse2_movlpd
);
13014 arg0
= TREE_VALUE (arglist
);
13015 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13016 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13017 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13018 tmode
= insn_data
[icode
].operand
[0].mode
;
13019 mode0
= insn_data
[icode
].operand
[1].mode
;
13020 mode1
= insn_data
[icode
].operand
[2].mode
;
13022 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13023 op0
= copy_to_mode_reg (mode0
, op0
);
13024 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
13026 || GET_MODE (target
) != tmode
13027 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13028 target
= gen_reg_rtx (tmode
);
13029 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13035 case IX86_BUILTIN_STOREHPS
:
13036 case IX86_BUILTIN_STORELPS
:
13037 case IX86_BUILTIN_STOREHPD
:
13038 case IX86_BUILTIN_STORELPD
:
13039 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_movhps
13040 : fcode
== IX86_BUILTIN_STORELPS
? CODE_FOR_sse_movlps
13041 : fcode
== IX86_BUILTIN_STOREHPD
? CODE_FOR_sse2_movhpd
13042 : CODE_FOR_sse2_movlpd
);
13043 arg0
= TREE_VALUE (arglist
);
13044 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13045 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13046 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13047 mode0
= insn_data
[icode
].operand
[1].mode
;
13048 mode1
= insn_data
[icode
].operand
[2].mode
;
13050 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
13051 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13052 op1
= copy_to_mode_reg (mode1
, op1
);
13054 pat
= GEN_FCN (icode
) (op0
, op0
, op1
);
13060 case IX86_BUILTIN_MOVNTPS
:
13061 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
);
13062 case IX86_BUILTIN_MOVNTQ
:
13063 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
);
13065 case IX86_BUILTIN_LDMXCSR
:
13066 op0
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
13067 target
= assign_386_stack_local (SImode
, 0);
13068 emit_move_insn (target
, op0
);
13069 emit_insn (gen_ldmxcsr (target
));
13072 case IX86_BUILTIN_STMXCSR
:
13073 target
= assign_386_stack_local (SImode
, 0);
13074 emit_insn (gen_stmxcsr (target
));
13075 return copy_to_mode_reg (SImode
, target
);
13077 case IX86_BUILTIN_SHUFPS
:
13078 case IX86_BUILTIN_SHUFPD
:
13079 icode
= (fcode
== IX86_BUILTIN_SHUFPS
13080 ? CODE_FOR_sse_shufps
13081 : CODE_FOR_sse2_shufpd
);
13082 arg0
= TREE_VALUE (arglist
);
13083 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13084 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13085 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13086 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13087 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13088 tmode
= insn_data
[icode
].operand
[0].mode
;
13089 mode0
= insn_data
[icode
].operand
[1].mode
;
13090 mode1
= insn_data
[icode
].operand
[2].mode
;
13091 mode2
= insn_data
[icode
].operand
[3].mode
;
13093 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13094 op0
= copy_to_mode_reg (mode0
, op0
);
13095 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13096 op1
= copy_to_mode_reg (mode1
, op1
);
13097 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
13099 /* @@@ better error message */
13100 error ("mask must be an immediate");
13101 return gen_reg_rtx (tmode
);
13104 || GET_MODE (target
) != tmode
13105 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13106 target
= gen_reg_rtx (tmode
);
13107 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
13113 case IX86_BUILTIN_PSHUFW
:
13114 case IX86_BUILTIN_PSHUFD
:
13115 case IX86_BUILTIN_PSHUFHW
:
13116 case IX86_BUILTIN_PSHUFLW
:
13117 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
13118 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
13119 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
13120 : CODE_FOR_mmx_pshufw
);
13121 arg0
= TREE_VALUE (arglist
);
13122 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13123 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13124 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13125 tmode
= insn_data
[icode
].operand
[0].mode
;
13126 mode1
= insn_data
[icode
].operand
[1].mode
;
13127 mode2
= insn_data
[icode
].operand
[2].mode
;
13129 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
13130 op0
= copy_to_mode_reg (mode1
, op0
);
13131 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
13133 /* @@@ better error message */
13134 error ("mask must be an immediate");
13138 || GET_MODE (target
) != tmode
13139 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13140 target
= gen_reg_rtx (tmode
);
13141 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13147 case IX86_BUILTIN_FEMMS
:
13148 emit_insn (gen_femms ());
13151 case IX86_BUILTIN_PAVGUSB
:
13152 return ix86_expand_binop_builtin (CODE_FOR_pavgusb
, arglist
, target
);
13154 case IX86_BUILTIN_PF2ID
:
13155 return ix86_expand_unop_builtin (CODE_FOR_pf2id
, arglist
, target
, 0);
13157 case IX86_BUILTIN_PFACC
:
13158 return ix86_expand_binop_builtin (CODE_FOR_pfacc
, arglist
, target
);
13160 case IX86_BUILTIN_PFADD
:
13161 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3
, arglist
, target
);
13163 case IX86_BUILTIN_PFCMPEQ
:
13164 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3
, arglist
, target
);
13166 case IX86_BUILTIN_PFCMPGE
:
13167 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3
, arglist
, target
);
13169 case IX86_BUILTIN_PFCMPGT
:
13170 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3
, arglist
, target
);
13172 case IX86_BUILTIN_PFMAX
:
13173 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3
, arglist
, target
);
13175 case IX86_BUILTIN_PFMIN
:
13176 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3
, arglist
, target
);
13178 case IX86_BUILTIN_PFMUL
:
13179 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3
, arglist
, target
);
13181 case IX86_BUILTIN_PFRCP
:
13182 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2
, arglist
, target
, 0);
13184 case IX86_BUILTIN_PFRCPIT1
:
13185 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3
, arglist
, target
);
13187 case IX86_BUILTIN_PFRCPIT2
:
13188 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3
, arglist
, target
);
13190 case IX86_BUILTIN_PFRSQIT1
:
13191 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3
, arglist
, target
);
13193 case IX86_BUILTIN_PFRSQRT
:
13194 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2
, arglist
, target
, 0);
13196 case IX86_BUILTIN_PFSUB
:
13197 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3
, arglist
, target
);
13199 case IX86_BUILTIN_PFSUBR
:
13200 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3
, arglist
, target
);
13202 case IX86_BUILTIN_PI2FD
:
13203 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2
, arglist
, target
, 0);
13205 case IX86_BUILTIN_PMULHRW
:
13206 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3
, arglist
, target
);
13208 case IX86_BUILTIN_PF2IW
:
13209 return ix86_expand_unop_builtin (CODE_FOR_pf2iw
, arglist
, target
, 0);
13211 case IX86_BUILTIN_PFNACC
:
13212 return ix86_expand_binop_builtin (CODE_FOR_pfnacc
, arglist
, target
);
13214 case IX86_BUILTIN_PFPNACC
:
13215 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc
, arglist
, target
);
13217 case IX86_BUILTIN_PI2FW
:
13218 return ix86_expand_unop_builtin (CODE_FOR_pi2fw
, arglist
, target
, 0);
13220 case IX86_BUILTIN_PSWAPDSI
:
13221 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2
, arglist
, target
, 0);
13223 case IX86_BUILTIN_PSWAPDSF
:
13224 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2
, arglist
, target
, 0);
13226 case IX86_BUILTIN_SSE_ZERO
:
13227 target
= gen_reg_rtx (V4SFmode
);
13228 emit_insn (gen_sse_clrv4sf (target
));
13231 case IX86_BUILTIN_MMX_ZERO
:
13232 target
= gen_reg_rtx (DImode
);
13233 emit_insn (gen_mmx_clrdi (target
));
13236 case IX86_BUILTIN_SQRTSD
:
13237 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2
, arglist
, target
);
13238 case IX86_BUILTIN_LOADAPD
:
13239 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
, target
, 1);
13240 case IX86_BUILTIN_LOADUPD
:
13241 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, arglist
, target
, 1);
13243 case IX86_BUILTIN_STOREAPD
:
13244 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
13245 case IX86_BUILTIN_STOREUPD
:
13246 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, arglist
);
13248 case IX86_BUILTIN_LOADSD
:
13249 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
, target
, 1);
13251 case IX86_BUILTIN_STORESD
:
13252 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd
, arglist
);
13254 case IX86_BUILTIN_SETPD1
:
13255 target
= assign_386_stack_local (DFmode
, 0);
13256 arg0
= TREE_VALUE (arglist
);
13257 emit_move_insn (adjust_address (target
, DFmode
, 0),
13258 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
13259 op0
= gen_reg_rtx (V2DFmode
);
13260 emit_insn (gen_sse2_loadsd (op0
, adjust_address (target
, V2DFmode
, 0)));
13261 emit_insn (gen_sse2_shufpd (op0
, op0
, op0
, GEN_INT (0)));
13264 case IX86_BUILTIN_SETPD
:
13265 target
= assign_386_stack_local (V2DFmode
, 0);
13266 arg0
= TREE_VALUE (arglist
);
13267 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13268 emit_move_insn (adjust_address (target
, DFmode
, 0),
13269 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
13270 emit_move_insn (adjust_address (target
, DFmode
, 8),
13271 expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0));
13272 op0
= gen_reg_rtx (V2DFmode
);
13273 emit_insn (gen_sse2_movapd (op0
, target
));
13276 case IX86_BUILTIN_LOADRPD
:
13277 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
,
13278 gen_reg_rtx (V2DFmode
), 1);
13279 emit_insn (gen_sse2_shufpd (target
, target
, target
, GEN_INT (1)));
13282 case IX86_BUILTIN_LOADPD1
:
13283 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
,
13284 gen_reg_rtx (V2DFmode
), 1);
13285 emit_insn (gen_sse2_shufpd (target
, target
, target
, const0_rtx
));
13288 case IX86_BUILTIN_STOREPD1
:
13289 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
13290 case IX86_BUILTIN_STORERPD
:
13291 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
13293 case IX86_BUILTIN_MFENCE
:
13294 emit_insn (gen_sse2_mfence ());
13296 case IX86_BUILTIN_LFENCE
:
13297 emit_insn (gen_sse2_lfence ());
13300 case IX86_BUILTIN_CLFLUSH
:
13301 arg0
= TREE_VALUE (arglist
);
13302 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13303 icode
= CODE_FOR_sse2_clflush
;
13304 mode0
= insn_data
[icode
].operand
[0].mode
;
13305 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
13306 op0
= copy_to_mode_reg (mode0
, op0
);
13308 emit_insn (gen_sse2_clflush (op0
));
13311 case IX86_BUILTIN_MOVNTPD
:
13312 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, arglist
);
13313 case IX86_BUILTIN_MOVNTDQ
:
13314 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, arglist
);
13315 case IX86_BUILTIN_MOVNTI
:
13316 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, arglist
);
13322 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
13323 if (d
->code
== fcode
)
13325 /* Compares are treated specially. */
13326 if (d
->icode
== CODE_FOR_maskcmpv4sf3
13327 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
13328 || d
->icode
== CODE_FOR_maskncmpv4sf3
13329 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
13330 || d
->icode
== CODE_FOR_maskcmpv2df3
13331 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
13332 || d
->icode
== CODE_FOR_maskncmpv2df3
13333 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
13334 return ix86_expand_sse_compare (d
, arglist
, target
);
13336 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
13339 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
13340 if (d
->code
== fcode
)
13341 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
13343 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
13344 if (d
->code
== fcode
)
13345 return ix86_expand_sse_comi (d
, arglist
, target
);
13347 /* @@@ Should really do something sensible here. */
13351 /* Store OPERAND to the memory after reload is completed. This means
13352 that we can't easily use assign_stack_local. */
13354 ix86_force_to_memory (mode
, operand
)
13355 enum machine_mode mode
;
13359 if (!reload_completed
)
13361 if (TARGET_64BIT
&& TARGET_RED_ZONE
)
13363 result
= gen_rtx_MEM (mode
,
13364 gen_rtx_PLUS (Pmode
,
13366 GEN_INT (-RED_ZONE_SIZE
)));
13367 emit_move_insn (result
, operand
);
13369 else if (TARGET_64BIT
&& !TARGET_RED_ZONE
)
13375 operand
= gen_lowpart (DImode
, operand
);
13379 gen_rtx_SET (VOIDmode
,
13380 gen_rtx_MEM (DImode
,
13381 gen_rtx_PRE_DEC (DImode
,
13382 stack_pointer_rtx
)),
13388 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
13397 split_di (&operand
, 1, operands
, operands
+ 1);
13399 gen_rtx_SET (VOIDmode
,
13400 gen_rtx_MEM (SImode
,
13401 gen_rtx_PRE_DEC (Pmode
,
13402 stack_pointer_rtx
)),
13405 gen_rtx_SET (VOIDmode
,
13406 gen_rtx_MEM (SImode
,
13407 gen_rtx_PRE_DEC (Pmode
,
13408 stack_pointer_rtx
)),
13413 /* It is better to store HImodes as SImodes. */
13414 if (!TARGET_PARTIAL_REG_STALL
)
13415 operand
= gen_lowpart (SImode
, operand
);
13419 gen_rtx_SET (VOIDmode
,
13420 gen_rtx_MEM (GET_MODE (operand
),
13421 gen_rtx_PRE_DEC (SImode
,
13422 stack_pointer_rtx
)),
13428 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
13433 /* Free operand from the memory. */
13435 ix86_free_from_memory (mode
)
13436 enum machine_mode mode
;
13438 if (!TARGET_64BIT
|| !TARGET_RED_ZONE
)
13442 if (mode
== DImode
|| TARGET_64BIT
)
13444 else if (mode
== HImode
&& TARGET_PARTIAL_REG_STALL
)
13448 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13449 to pop or add instruction if registers are available. */
13450 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
13451 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
13456 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13457 QImode must go into class Q_REGS.
13458 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13459 movdf to do mem-to-mem moves through integer regs. */
13461 ix86_preferred_reload_class (x
, class)
13463 enum reg_class
class;
13465 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
13467 /* SSE can't load any constant directly yet. */
13468 if (SSE_CLASS_P (class))
13470 /* Floats can load 0 and 1. */
13471 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x
))
13473 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13474 if (MAYBE_SSE_CLASS_P (class))
13475 return (reg_class_subset_p (class, GENERAL_REGS
)
13476 ? GENERAL_REGS
: FLOAT_REGS
);
13480 /* General regs can load everything. */
13481 if (reg_class_subset_p (class, GENERAL_REGS
))
13482 return GENERAL_REGS
;
13483 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13484 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13487 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x
))
13489 if (GET_MODE (x
) == QImode
&& ! reg_class_subset_p (class, Q_REGS
))
13494 /* If we are copying between general and FP registers, we need a memory
13495 location. The same is true for SSE and MMX registers.
13497 The macro can't work reliably when one of the CLASSES is class containing
13498 registers from multiple units (SSE, MMX, integer). We avoid this by never
13499 combining those units in single alternative in the machine description.
13500 Ensure that this constraint holds to avoid unexpected surprises.
13502 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13503 enforce these sanity checks. */
13505 ix86_secondary_memory_needed (class1
, class2
, mode
, strict
)
13506 enum reg_class class1
, class2
;
13507 enum machine_mode mode
;
13510 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
13511 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
13512 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
13513 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
13514 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
13515 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
13522 return (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
)
13523 || (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
)
13524 && (mode
) != SImode
)
13525 || (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
13526 && (mode
) != SImode
));
13528 /* Return the cost of moving data from a register in class CLASS1 to
13529 one in class CLASS2.
13531 It is not required that the cost always equal 2 when FROM is the same as TO;
13532 on some machines it is expensive to move between registers if they are not
13533 general registers. */
13535 ix86_register_move_cost (mode
, class1
, class2
)
13536 enum machine_mode mode
;
13537 enum reg_class class1
, class2
;
13539 /* In case we require secondary memory, compute cost of the store followed
13540 by load. In case of copying from general_purpose_register we may emit
13541 multiple stores followed by single load causing memory size mismatch
13542 stall. Count this as arbitarily high cost of 20. */
13543 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
13546 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
13548 return (MEMORY_MOVE_COST (mode
, class1
, 0)
13549 + MEMORY_MOVE_COST (mode
, class2
, 1) + add_cost
);
13551 /* Moves between SSE/MMX and integer unit are expensive. */
13552 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
13553 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
13554 return ix86_cost
->mmxsse_to_integer
;
13555 if (MAYBE_FLOAT_CLASS_P (class1
))
13556 return ix86_cost
->fp_move
;
13557 if (MAYBE_SSE_CLASS_P (class1
))
13558 return ix86_cost
->sse_move
;
13559 if (MAYBE_MMX_CLASS_P (class1
))
13560 return ix86_cost
->mmx_move
;
13564 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13566 ix86_hard_regno_mode_ok (regno
, mode
)
13568 enum machine_mode mode
;
13570 /* Flags and only flags can only hold CCmode values. */
13571 if (CC_REGNO_P (regno
))
13572 return GET_MODE_CLASS (mode
) == MODE_CC
;
13573 if (GET_MODE_CLASS (mode
) == MODE_CC
13574 || GET_MODE_CLASS (mode
) == MODE_RANDOM
13575 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
13577 if (FP_REGNO_P (regno
))
13578 return VALID_FP_MODE_P (mode
);
13579 if (SSE_REGNO_P (regno
))
13580 return VALID_SSE_REG_MODE (mode
);
13581 if (MMX_REGNO_P (regno
))
13582 return VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
);
13583 /* We handle both integer and floats in the general purpose registers.
13584 In future we should be able to handle vector modes as well. */
13585 if (!VALID_INT_MODE_P (mode
) && !VALID_FP_MODE_P (mode
))
13587 /* Take care for QImode values - they can be in non-QI regs, but then
13588 they do cause partial register stalls. */
13589 if (regno
< 4 || mode
!= QImode
|| TARGET_64BIT
)
13591 return reload_in_progress
|| reload_completed
|| !TARGET_PARTIAL_REG_STALL
;
13594 /* Return the cost of moving data of mode M between a
13595 register and memory. A value of 2 is the default; this cost is
13596 relative to those in `REGISTER_MOVE_COST'.
13598 If moving between registers and memory is more expensive than
13599 between two registers, you should define this macro to express the
13602 Model also increased moving costs of QImode registers in non
13606 ix86_memory_move_cost (mode
, class, in
)
13607 enum machine_mode mode
;
13608 enum reg_class
class;
13611 if (FLOAT_CLASS_P (class))
13629 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
13631 if (SSE_CLASS_P (class))
13634 switch (GET_MODE_SIZE (mode
))
13648 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
13650 if (MMX_CLASS_P (class))
13653 switch (GET_MODE_SIZE (mode
))
13664 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
13666 switch (GET_MODE_SIZE (mode
))
13670 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
13671 : ix86_cost
->movzbl_load
);
13673 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
13674 : ix86_cost
->int_store
[0] + 4);
13677 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
13679 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13680 if (mode
== TFmode
)
13682 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
13683 * (int) GET_MODE_SIZE (mode
) / 4);
13687 #ifdef DO_GLOBAL_CTORS_BODY
13689 ix86_svr3_asm_out_constructor (symbol
, priority
)
13691 int priority ATTRIBUTE_UNUSED
;
13694 fputs ("\tpushl $", asm_out_file
);
13695 assemble_name (asm_out_file
, XSTR (symbol
, 0));
13696 fputc ('\n', asm_out_file
);
13700 /* Order the registers for register allocator. */
13703 x86_order_regs_for_local_alloc ()
13708 /* First allocate the local general purpose registers. */
13709 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
13710 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
13711 reg_alloc_order
[pos
++] = i
;
13713 /* Global general purpose registers. */
13714 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
13715 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
13716 reg_alloc_order
[pos
++] = i
;
13718 /* x87 registers come first in case we are doing FP math
13720 if (!TARGET_SSE_MATH
)
13721 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
13722 reg_alloc_order
[pos
++] = i
;
13724 /* SSE registers. */
13725 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
13726 reg_alloc_order
[pos
++] = i
;
13727 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
13728 reg_alloc_order
[pos
++] = i
;
13730 /* x87 registerts. */
13731 if (TARGET_SSE_MATH
)
13732 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
13733 reg_alloc_order
[pos
++] = i
;
13735 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
13736 reg_alloc_order
[pos
++] = i
;
13738 /* Initialize the rest of array as we do not allocate some registers
13740 while (pos
< FIRST_PSEUDO_REGISTER
)
13741 reg_alloc_order
[pos
++] = 0;
13745 x86_output_mi_thunk (file
, delta
, function
)
13753 if (ix86_regparm
> 0)
13754 parm
= TYPE_ARG_TYPES (TREE_TYPE (function
));
13757 for (; parm
; parm
= TREE_CHAIN (parm
))
13758 if (TREE_VALUE (parm
) == void_type_node
)
13761 xops
[0] = GEN_INT (delta
);
13764 int n
= aggregate_value_p (TREE_TYPE (TREE_TYPE (function
))) != 0;
13765 xops
[1] = gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
13766 output_asm_insn ("add{q} {%0, %1|%1, %0}", xops
);
13769 fprintf (file
, "\tjmp *");
13770 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
13771 fprintf (file
, "@GOTPCREL(%%rip)\n");
13775 fprintf (file
, "\tjmp ");
13776 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
13777 fprintf (file
, "\n");
13783 xops
[1] = gen_rtx_REG (SImode
, 0);
13784 else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
))))
13785 xops
[1] = gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
13787 xops
[1] = gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
13788 output_asm_insn ("add{l} {%0, %1|%1, %0}", xops
);
13792 xops
[0] = pic_offset_table_rtx
;
13793 xops
[1] = gen_label_rtx ();
13794 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
13796 if (ix86_regparm
> 2)
13798 output_asm_insn ("push{l}\t%0", xops
);
13799 output_asm_insn ("call\t%P1", xops
);
13800 ASM_OUTPUT_INTERNAL_LABEL (file
, "L", CODE_LABEL_NUMBER (xops
[1]));
13801 output_asm_insn ("pop{l}\t%0", xops
);
13803 ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops
);
13804 xops
[0] = gen_rtx_MEM (SImode
, XEXP (DECL_RTL (function
), 0));
13806 ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops
);
13807 asm_fprintf (file
, "\tpop{l\t%%ebx|\t%%ebx}\n");
13808 asm_fprintf (file
, "\tjmp\t{*%%ecx|%%ecx}\n");
13812 fprintf (file
, "\tjmp ");
13813 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
13814 fprintf (file
, "\n");