1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
28 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "basic-block.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
51 /* Processor costs (relative to an add) */
53 struct processor_costs size_cost
= { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
88 /* Processor costs (relative to an add) */
90 struct processor_costs i386_cost
= { /* 386 specific costs */
91 1, /* cost of an add instruction */
92 1, /* cost of a lea instruction */
93 3, /* variable shift costs */
94 2, /* constant shift costs */
95 6, /* cost of starting a multiply */
96 1, /* cost of multiply per each bit set */
97 23, /* cost of a divide/mod */
98 3, /* cost of movsx */
99 2, /* cost of movzx */
100 15, /* "large" insn */
102 4, /* cost for loading QImode using movzbl */
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
105 Relative to reg-reg move (2). */
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {8, 8, 8}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
110 {8, 8, 8}, /* cost of loading integer registers */
111 2, /* cost of moving MMX register */
112 {4, 8}, /* cost of loading MMX registers
113 in SImode and DImode */
114 {4, 8}, /* cost of storing MMX registers
115 in SImode and DImode */
116 2, /* cost of moving SSE register */
117 {4, 8, 16}, /* cost of loading SSE registers
118 in SImode, DImode and TImode */
119 {4, 8, 16}, /* cost of storing SSE registers
120 in SImode, DImode and TImode */
121 3, /* MMX or SSE register to integer */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
127 struct processor_costs i486_cost
= { /* 486 specific costs */
128 1, /* cost of an add instruction */
129 1, /* cost of a lea instruction */
130 3, /* variable shift costs */
131 2, /* constant shift costs */
132 12, /* cost of starting a multiply */
133 1, /* cost of multiply per each bit set */
134 40, /* cost of a divide/mod */
135 3, /* cost of movsx */
136 2, /* cost of movzx */
137 15, /* "large" insn */
139 4, /* cost for loading QImode using movzbl */
140 {2, 4, 2}, /* cost of loading integer registers
141 in QImode, HImode and SImode.
142 Relative to reg-reg move (2). */
143 {2, 4, 2}, /* cost of storing integer registers */
144 2, /* cost of reg,reg fld/fst */
145 {8, 8, 8}, /* cost of loading fp registers
146 in SFmode, DFmode and XFmode */
147 {8, 8, 8}, /* cost of loading integer registers */
148 2, /* cost of moving MMX register */
149 {4, 8}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {4, 8}, /* cost of storing MMX registers
152 in SImode and DImode */
153 2, /* cost of moving SSE register */
154 {4, 8, 16}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {4, 8, 16}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of prefetch block */
160 0, /* number of parallel prefetches */
164 struct processor_costs pentium_cost
= {
165 1, /* cost of an add instruction */
166 1, /* cost of a lea instruction */
167 4, /* variable shift costs */
168 1, /* constant shift costs */
169 11, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
171 25, /* cost of a divide/mod */
172 3, /* cost of movsx */
173 2, /* cost of movzx */
174 8, /* "large" insn */
176 6, /* cost for loading QImode using movzbl */
177 {2, 4, 2}, /* cost of loading integer registers
178 in QImode, HImode and SImode.
179 Relative to reg-reg move (2). */
180 {2, 4, 2}, /* cost of storing integer registers */
181 2, /* cost of reg,reg fld/fst */
182 {2, 2, 6}, /* cost of loading fp registers
183 in SFmode, DFmode and XFmode */
184 {4, 4, 6}, /* cost of loading integer registers */
185 8, /* cost of moving MMX register */
186 {8, 8}, /* cost of loading MMX registers
187 in SImode and DImode */
188 {8, 8}, /* cost of storing MMX registers
189 in SImode and DImode */
190 2, /* cost of moving SSE register */
191 {4, 8, 16}, /* cost of loading SSE registers
192 in SImode, DImode and TImode */
193 {4, 8, 16}, /* cost of storing SSE registers
194 in SImode, DImode and TImode */
195 3, /* MMX or SSE register to integer */
196 0, /* size of prefetch block */
197 0, /* number of parallel prefetches */
201 struct processor_costs pentiumpro_cost
= {
202 1, /* cost of an add instruction */
203 1, /* cost of a lea instruction */
204 1, /* variable shift costs */
205 1, /* constant shift costs */
206 4, /* cost of starting a multiply */
207 0, /* cost of multiply per each bit set */
208 17, /* cost of a divide/mod */
209 1, /* cost of movsx */
210 1, /* cost of movzx */
211 8, /* "large" insn */
213 2, /* cost for loading QImode using movzbl */
214 {4, 4, 4}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
216 Relative to reg-reg move (2). */
217 {2, 2, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {2, 2, 6}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
221 {4, 4, 6}, /* cost of loading integer registers */
222 2, /* cost of moving MMX register */
223 {2, 2}, /* cost of loading MMX registers
224 in SImode and DImode */
225 {2, 2}, /* cost of storing MMX registers
226 in SImode and DImode */
227 2, /* cost of moving SSE register */
228 {2, 2, 8}, /* cost of loading SSE registers
229 in SImode, DImode and TImode */
230 {2, 2, 8}, /* cost of storing SSE registers
231 in SImode, DImode and TImode */
232 3, /* MMX or SSE register to integer */
233 32, /* size of prefetch block */
234 6, /* number of parallel prefetches */
238 struct processor_costs k6_cost
= {
239 1, /* cost of an add instruction */
240 2, /* cost of a lea instruction */
241 1, /* variable shift costs */
242 1, /* constant shift costs */
243 3, /* cost of starting a multiply */
244 0, /* cost of multiply per each bit set */
245 18, /* cost of a divide/mod */
246 2, /* cost of movsx */
247 2, /* cost of movzx */
248 8, /* "large" insn */
250 3, /* cost for loading QImode using movzbl */
251 {4, 5, 4}, /* cost of loading integer registers
252 in QImode, HImode and SImode.
253 Relative to reg-reg move (2). */
254 {2, 3, 2}, /* cost of storing integer registers */
255 4, /* cost of reg,reg fld/fst */
256 {6, 6, 6}, /* cost of loading fp registers
257 in SFmode, DFmode and XFmode */
258 {4, 4, 4}, /* cost of loading integer registers */
259 2, /* cost of moving MMX register */
260 {2, 2}, /* cost of loading MMX registers
261 in SImode and DImode */
262 {2, 2}, /* cost of storing MMX registers
263 in SImode and DImode */
264 2, /* cost of moving SSE register */
265 {2, 2, 8}, /* cost of loading SSE registers
266 in SImode, DImode and TImode */
267 {2, 2, 8}, /* cost of storing SSE registers
268 in SImode, DImode and TImode */
269 6, /* MMX or SSE register to integer */
270 32, /* size of prefetch block */
271 1, /* number of parallel prefetches */
275 struct processor_costs athlon_cost
= {
276 1, /* cost of an add instruction */
277 2, /* cost of a lea instruction */
278 1, /* variable shift costs */
279 1, /* constant shift costs */
280 5, /* cost of starting a multiply */
281 0, /* cost of multiply per each bit set */
282 42, /* cost of a divide/mod */
283 1, /* cost of movsx */
284 1, /* cost of movzx */
285 8, /* "large" insn */
287 4, /* cost for loading QImode using movzbl */
288 {4, 5, 4}, /* cost of loading integer registers
289 in QImode, HImode and SImode.
290 Relative to reg-reg move (2). */
291 {2, 3, 2}, /* cost of storing integer registers */
292 4, /* cost of reg,reg fld/fst */
293 {6, 6, 20}, /* cost of loading fp registers
294 in SFmode, DFmode and XFmode */
295 {4, 4, 16}, /* cost of loading integer registers */
296 2, /* cost of moving MMX register */
297 {2, 2}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {2, 2}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {2, 2, 8}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {2, 2, 8}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
306 6, /* MMX or SSE register to integer */
307 64, /* size of prefetch block */
308 6, /* number of parallel prefetches */
312 struct processor_costs pentium4_cost
= {
313 1, /* cost of an add instruction */
314 1, /* cost of a lea instruction */
315 8, /* variable shift costs */
316 8, /* constant shift costs */
317 30, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 112, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 16, /* "large" insn */
324 2, /* cost for loading QImode using movzbl */
325 {4, 5, 4}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {2, 3, 2}, /* cost of storing integer registers */
329 2, /* cost of reg,reg fld/fst */
330 {2, 2, 6}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {4, 4, 6}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 12, /* cost of moving SSE register */
339 {12, 12, 12}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 10, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
348 const struct processor_costs
*ix86_cost
= &pentium_cost
;
350 /* Processor feature/optimization bitmasks. */
351 #define m_386 (1<<PROCESSOR_I386)
352 #define m_486 (1<<PROCESSOR_I486)
353 #define m_PENT (1<<PROCESSOR_PENTIUM)
354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
355 #define m_K6 (1<<PROCESSOR_K6)
356 #define m_ATHLON (1<<PROCESSOR_ATHLON)
357 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
359 const int x86_use_leave
= m_386
| m_K6
| m_ATHLON
;
360 const int x86_push_memory
= m_386
| m_K6
| m_ATHLON
| m_PENT4
;
361 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
362 const int x86_movx
= m_ATHLON
| m_PPRO
| m_PENT4
/* m_386 | m_K6 */;
363 const int x86_double_with_add
= ~m_386
;
364 const int x86_use_bit_test
= m_386
;
365 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON
| m_K6
;
366 const int x86_cmove
= m_PPRO
| m_ATHLON
| m_PENT4
;
367 const int x86_3dnow_a
= m_ATHLON
;
368 const int x86_deep_branch
= m_PPRO
| m_K6
| m_ATHLON
| m_PENT4
;
369 const int x86_branch_hints
= m_PENT4
;
370 const int x86_use_sahf
= m_PPRO
| m_K6
| m_PENT4
;
371 const int x86_partial_reg_stall
= m_PPRO
;
372 const int x86_use_loop
= m_K6
;
373 const int x86_use_fiop
= ~(m_PPRO
| m_ATHLON
| m_PENT
);
374 const int x86_use_mov0
= m_K6
;
375 const int x86_use_cltd
= ~(m_PENT
| m_K6
);
376 const int x86_read_modify_write
= ~m_PENT
;
377 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
378 const int x86_split_long_moves
= m_PPRO
;
379 const int x86_promote_QImode
= m_K6
| m_PENT
| m_386
| m_486
| m_ATHLON
;
380 const int x86_fast_prefix
= ~(m_PENT
| m_486
| m_386
);
381 const int x86_single_stringop
= m_386
| m_PENT4
;
382 const int x86_qimode_math
= ~(0);
383 const int x86_promote_qi_regs
= 0;
384 const int x86_himode_math
= ~(m_PPRO
);
385 const int x86_promote_hi_regs
= m_PPRO
;
386 const int x86_sub_esp_4
= m_ATHLON
| m_PPRO
| m_PENT4
;
387 const int x86_sub_esp_8
= m_ATHLON
| m_PPRO
| m_386
| m_486
| m_PENT4
;
388 const int x86_add_esp_4
= m_ATHLON
| m_K6
| m_PENT4
;
389 const int x86_add_esp_8
= m_ATHLON
| m_PPRO
| m_K6
| m_386
| m_486
| m_PENT4
;
390 const int x86_integer_DFmode_moves
= ~(m_ATHLON
| m_PENT4
| m_PPRO
);
391 const int x86_partial_reg_dependency
= m_ATHLON
| m_PENT4
;
392 const int x86_memory_mismatch_stall
= m_ATHLON
| m_PENT4
;
393 const int x86_accumulate_outgoing_args
= m_ATHLON
| m_PENT4
| m_PPRO
;
394 const int x86_prologue_using_move
= m_ATHLON
| m_PENT4
| m_PPRO
;
395 const int x86_epilogue_using_move
= m_ATHLON
| m_PENT4
| m_PPRO
;
396 const int x86_decompose_lea
= m_PENT4
;
397 const int x86_shift1
= ~m_486
;
398 const int x86_arch_always_fancy_math_387
= m_PENT
| m_PPRO
| m_ATHLON
| m_PENT4
;
400 /* In case the avreage insn count for single function invocation is
401 lower than this constant, emit fast (but longer) prologue and
403 #define FAST_PROLOGUE_INSN_COUNT 30
405 /* Set by prologue expander and used by epilogue expander to determine
407 static int use_fast_prologue_epilogue
;
409 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
410 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
411 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
412 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
414 /* Array of the smallest class containing reg number REGNO, indexed by
415 REGNO. Used by REGNO_REG_CLASS in i386.h. */
417 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
420 AREG
, DREG
, CREG
, BREG
,
422 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
424 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
425 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
428 /* flags, fpsr, dirflag, frame */
429 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
430 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
432 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
434 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
435 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
436 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
440 /* The "default" register map used in 32bit mode. */
442 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
444 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
445 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
446 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
447 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
448 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
449 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
450 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
453 static int const x86_64_int_parameter_registers
[6] =
455 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
456 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
459 static int const x86_64_int_return_registers
[4] =
461 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
464 /* The "default" register map used in 64bit mode. */
465 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
467 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
468 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
469 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
470 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
471 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
472 8,9,10,11,12,13,14,15, /* extended integer registers */
473 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
476 /* Define the register numbers to be used in Dwarf debugging information.
477 The SVR4 reference port C compiler uses the following register numbers
478 in its Dwarf output code:
479 0 for %eax (gcc regno = 0)
480 1 for %ecx (gcc regno = 2)
481 2 for %edx (gcc regno = 1)
482 3 for %ebx (gcc regno = 3)
483 4 for %esp (gcc regno = 7)
484 5 for %ebp (gcc regno = 6)
485 6 for %esi (gcc regno = 4)
486 7 for %edi (gcc regno = 5)
487 The following three DWARF register numbers are never generated by
488 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
489 believes these numbers have these meanings.
490 8 for %eip (no gcc equivalent)
491 9 for %eflags (gcc regno = 17)
492 10 for %trapno (no gcc equivalent)
493 It is not at all clear how we should number the FP stack registers
494 for the x86 architecture. If the version of SDB on x86/svr4 were
495 a bit less brain dead with respect to floating-point then we would
496 have a precedent to follow with respect to DWARF register numbers
497 for x86 FP registers, but the SDB on x86/svr4 is so completely
498 broken with respect to FP registers that it is hardly worth thinking
499 of it as something to strive for compatibility with.
500 The version of x86/svr4 SDB I have at the moment does (partially)
501 seem to believe that DWARF register number 11 is associated with
502 the x86 register %st(0), but that's about all. Higher DWARF
503 register numbers don't seem to be associated with anything in
504 particular, and even for DWARF regno 11, SDB only seems to under-
505 stand that it should say that a variable lives in %st(0) (when
506 asked via an `=' command) if we said it was in DWARF regno 11,
507 but SDB still prints garbage when asked for the value of the
508 variable in question (via a `/' command).
509 (Also note that the labels SDB prints for various FP stack regs
510 when doing an `x' command are all wrong.)
511 Note that these problems generally don't affect the native SVR4
512 C compiler because it doesn't allow the use of -O with -g and
513 because when it is *not* optimizing, it allocates a memory
514 location for each floating-point variable, and the memory
515 location is what gets described in the DWARF AT_location
516 attribute for the variable in question.
517 Regardless of the severe mental illness of the x86/svr4 SDB, we
518 do something sensible here and we use the following DWARF
519 register numbers. Note that these are all stack-top-relative
521 11 for %st(0) (gcc regno = 8)
522 12 for %st(1) (gcc regno = 9)
523 13 for %st(2) (gcc regno = 10)
524 14 for %st(3) (gcc regno = 11)
525 15 for %st(4) (gcc regno = 12)
526 16 for %st(5) (gcc regno = 13)
527 17 for %st(6) (gcc regno = 14)
528 18 for %st(7) (gcc regno = 15)
530 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
532 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
533 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
534 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
535 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
536 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
537 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
538 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
541 /* Test and compare insns in i386.md store the information needed to
542 generate branch and scc insns here. */
544 rtx ix86_compare_op0
= NULL_RTX
;
545 rtx ix86_compare_op1
= NULL_RTX
;
547 /* The encoding characters for the four TLS models present in ELF. */
549 static char const tls_model_chars
[] = " GLil";
551 #define MAX_386_STACK_LOCALS 3
552 /* Size of the register save area. */
553 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
555 /* Define the structure for the machine field in struct function. */
556 struct machine_function
GTY(())
558 rtx stack_locals
[(int) MAX_MACHINE_MODE
][MAX_386_STACK_LOCALS
];
559 const char *some_ld_name
;
560 int save_varrargs_registers
;
561 int accesses_prev_frame
;
564 #define ix86_stack_locals (cfun->machine->stack_locals)
565 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
567 /* Structure describing stack frame layout.
568 Stack grows downward:
574 saved frame pointer if frame_pointer_needed
575 <- HARD_FRAME_POINTER
581 > to_allocate <- FRAME_POINTER
593 int outgoing_arguments_size
;
596 HOST_WIDE_INT to_allocate
;
597 /* The offsets relative to ARG_POINTER. */
598 HOST_WIDE_INT frame_pointer_offset
;
599 HOST_WIDE_INT hard_frame_pointer_offset
;
600 HOST_WIDE_INT stack_pointer_offset
;
603 /* Used to enable/disable debugging features. */
604 const char *ix86_debug_arg_string
, *ix86_debug_addr_string
;
605 /* Code model option as passed by user. */
606 const char *ix86_cmodel_string
;
608 enum cmodel ix86_cmodel
;
610 const char *ix86_asm_string
;
611 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
613 const char *ix86_tls_dialect_string
;
614 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
616 /* Which unit we are generating floating point math for. */
617 enum fpmath_unit ix86_fpmath
;
619 /* Which cpu are we scheduling for. */
620 enum processor_type ix86_cpu
;
621 /* Which instruction set architecture to use. */
622 enum processor_type ix86_arch
;
624 /* Strings to hold which cpu and instruction set architecture to use. */
625 const char *ix86_cpu_string
; /* for -mcpu=<xxx> */
626 const char *ix86_arch_string
; /* for -march=<xxx> */
627 const char *ix86_fpmath_string
; /* for -mfpmath=<xxx> */
629 /* # of registers to use to pass arguments. */
630 const char *ix86_regparm_string
;
632 /* true if sse prefetch instruction is not NOOP. */
633 int x86_prefetch_sse
;
635 /* ix86_regparm_string as a number */
638 /* Alignment to use for loops and jumps: */
640 /* Power of two alignment for loops. */
641 const char *ix86_align_loops_string
;
643 /* Power of two alignment for non-loop jumps. */
644 const char *ix86_align_jumps_string
;
646 /* Power of two alignment for stack boundary in bytes. */
647 const char *ix86_preferred_stack_boundary_string
;
649 /* Preferred alignment for stack boundary in bits. */
650 int ix86_preferred_stack_boundary
;
652 /* Values 1-5: see jump.c */
653 int ix86_branch_cost
;
654 const char *ix86_branch_cost_string
;
656 /* Power of two alignment for functions. */
657 const char *ix86_align_funcs_string
;
659 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
660 static char internal_label_prefix
[16];
661 static int internal_label_prefix_len
;
663 static int local_symbolic_operand
PARAMS ((rtx
, enum machine_mode
));
664 static int tls_symbolic_operand_1
PARAMS ((rtx
, enum tls_model
));
665 static void output_pic_addr_const
PARAMS ((FILE *, rtx
, int));
666 static void put_condition_code
PARAMS ((enum rtx_code
, enum machine_mode
,
668 static const char *get_some_local_dynamic_name
PARAMS ((void));
669 static int get_some_local_dynamic_name_1
PARAMS ((rtx
*, void *));
670 static rtx maybe_get_pool_constant
PARAMS ((rtx
));
671 static rtx ix86_expand_int_compare
PARAMS ((enum rtx_code
, rtx
, rtx
));
672 static enum rtx_code ix86_prepare_fp_compare_args
PARAMS ((enum rtx_code
,
674 static rtx get_thread_pointer
PARAMS ((void));
675 static void get_pc_thunk_name
PARAMS ((char [32], unsigned int));
676 static rtx gen_push
PARAMS ((rtx
));
677 static int memory_address_length
PARAMS ((rtx addr
));
678 static int ix86_flags_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
679 static int ix86_agi_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
680 static enum attr_ppro_uops ix86_safe_ppro_uops
PARAMS ((rtx
));
681 static void ix86_dump_ppro_packet
PARAMS ((FILE *));
682 static void ix86_reorder_insn
PARAMS ((rtx
*, rtx
*));
683 static struct machine_function
* ix86_init_machine_status
PARAMS ((void));
684 static int ix86_split_to_parts
PARAMS ((rtx
, rtx
*, enum machine_mode
));
685 static int ix86_nsaved_regs
PARAMS ((void));
686 static void ix86_emit_save_regs
PARAMS ((void));
687 static void ix86_emit_save_regs_using_mov
PARAMS ((rtx
, HOST_WIDE_INT
));
688 static void ix86_emit_restore_regs_using_mov
PARAMS ((rtx
, int, int));
689 static void ix86_output_function_epilogue
PARAMS ((FILE *, HOST_WIDE_INT
));
690 static void ix86_set_move_mem_attrs_1
PARAMS ((rtx
, rtx
, rtx
, rtx
, rtx
));
691 static void ix86_sched_reorder_ppro
PARAMS ((rtx
*, rtx
*));
692 static HOST_WIDE_INT ix86_GOT_alias_set
PARAMS ((void));
693 static void ix86_adjust_counter
PARAMS ((rtx
, HOST_WIDE_INT
));
694 static rtx ix86_expand_aligntest
PARAMS ((rtx
, int));
695 static void ix86_expand_strlensi_unroll_1
PARAMS ((rtx
, rtx
));
696 static int ix86_issue_rate
PARAMS ((void));
697 static int ix86_adjust_cost
PARAMS ((rtx
, rtx
, rtx
, int));
698 static void ix86_sched_init
PARAMS ((FILE *, int, int));
699 static int ix86_sched_reorder
PARAMS ((FILE *, int, rtx
*, int *, int));
700 static int ix86_variable_issue
PARAMS ((FILE *, int, rtx
, int));
701 static int ia32_use_dfa_pipeline_interface
PARAMS ((void));
702 static int ia32_multipass_dfa_lookahead
PARAMS ((void));
703 static void ix86_init_mmx_sse_builtins
PARAMS ((void));
707 rtx base
, index
, disp
;
711 static int ix86_decompose_address
PARAMS ((rtx
, struct ix86_address
*));
713 static void ix86_encode_section_info
PARAMS ((tree
, int)) ATTRIBUTE_UNUSED
;
714 static const char *ix86_strip_name_encoding
PARAMS ((const char *))
717 struct builtin_description
;
718 static rtx ix86_expand_sse_comi
PARAMS ((const struct builtin_description
*,
720 static rtx ix86_expand_sse_compare
PARAMS ((const struct builtin_description
*,
722 static rtx ix86_expand_unop1_builtin
PARAMS ((enum insn_code
, tree
, rtx
));
723 static rtx ix86_expand_unop_builtin
PARAMS ((enum insn_code
, tree
, rtx
, int));
724 static rtx ix86_expand_binop_builtin
PARAMS ((enum insn_code
, tree
, rtx
));
725 static rtx ix86_expand_timode_binop_builtin
PARAMS ((enum insn_code
,
727 static rtx ix86_expand_store_builtin
PARAMS ((enum insn_code
, tree
));
728 static rtx safe_vector_operand
PARAMS ((rtx
, enum machine_mode
));
729 static enum rtx_code ix86_fp_compare_code_to_integer
PARAMS ((enum rtx_code
));
730 static void ix86_fp_comparison_codes
PARAMS ((enum rtx_code code
,
734 static rtx ix86_expand_fp_compare
PARAMS ((enum rtx_code
, rtx
, rtx
, rtx
,
736 static int ix86_fp_comparison_arithmetics_cost
PARAMS ((enum rtx_code code
));
737 static int ix86_fp_comparison_fcomi_cost
PARAMS ((enum rtx_code code
));
738 static int ix86_fp_comparison_sahf_cost
PARAMS ((enum rtx_code code
));
739 static int ix86_fp_comparison_cost
PARAMS ((enum rtx_code code
));
740 static unsigned int ix86_select_alt_pic_regnum
PARAMS ((void));
741 static int ix86_save_reg
PARAMS ((unsigned int, int));
742 static void ix86_compute_frame_layout
PARAMS ((struct ix86_frame
*));
743 static int ix86_comp_type_attributes
PARAMS ((tree
, tree
));
744 const struct attribute_spec ix86_attribute_table
[];
745 static tree ix86_handle_cdecl_attribute
PARAMS ((tree
*, tree
, tree
, int, bool *));
746 static tree ix86_handle_regparm_attribute
PARAMS ((tree
*, tree
, tree
, int, bool *));
747 static int ix86_value_regno
PARAMS ((enum machine_mode
));
749 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
750 static void ix86_svr3_asm_out_constructor
PARAMS ((rtx
, int));
753 /* Register class used for passing given 64bit part of the argument.
754 These represent classes as documented by the PS ABI, with the exception
755 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
756 use SF or DFmode move instead of DImode to avoid reformating penalties.
758 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
759 whenever possible (upper half does contain padding).
761 enum x86_64_reg_class
764 X86_64_INTEGER_CLASS
,
765 X86_64_INTEGERSI_CLASS
,
774 static const char * const x86_64_reg_class_name
[] =
775 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
777 #define MAX_CLASSES 4
778 static int classify_argument
PARAMS ((enum machine_mode
, tree
,
779 enum x86_64_reg_class
[MAX_CLASSES
],
781 static int examine_argument
PARAMS ((enum machine_mode
, tree
, int, int *,
783 static rtx construct_container
PARAMS ((enum machine_mode
, tree
, int, int, int,
785 static enum x86_64_reg_class merge_classes
PARAMS ((enum x86_64_reg_class
,
786 enum x86_64_reg_class
));
788 /* Initialize the GCC target structure. */
789 #undef TARGET_ATTRIBUTE_TABLE
790 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
791 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
792 # undef TARGET_MERGE_DECL_ATTRIBUTES
793 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
796 #undef TARGET_COMP_TYPE_ATTRIBUTES
797 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
799 #undef TARGET_INIT_BUILTINS
800 #define TARGET_INIT_BUILTINS ix86_init_builtins
802 #undef TARGET_EXPAND_BUILTIN
803 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
805 #undef TARGET_ASM_FUNCTION_EPILOGUE
806 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
808 #undef TARGET_ASM_OPEN_PAREN
809 #define TARGET_ASM_OPEN_PAREN ""
810 #undef TARGET_ASM_CLOSE_PAREN
811 #define TARGET_ASM_CLOSE_PAREN ""
813 #undef TARGET_ASM_ALIGNED_HI_OP
814 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
815 #undef TARGET_ASM_ALIGNED_SI_OP
816 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
818 #undef TARGET_ASM_ALIGNED_DI_OP
819 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
822 #undef TARGET_ASM_UNALIGNED_HI_OP
823 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
824 #undef TARGET_ASM_UNALIGNED_SI_OP
825 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
826 #undef TARGET_ASM_UNALIGNED_DI_OP
827 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
829 #undef TARGET_SCHED_ADJUST_COST
830 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
831 #undef TARGET_SCHED_ISSUE_RATE
832 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
833 #undef TARGET_SCHED_VARIABLE_ISSUE
834 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
835 #undef TARGET_SCHED_INIT
836 #define TARGET_SCHED_INIT ix86_sched_init
837 #undef TARGET_SCHED_REORDER
838 #define TARGET_SCHED_REORDER ix86_sched_reorder
839 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
840 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
841 ia32_use_dfa_pipeline_interface
842 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
843 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
844 ia32_multipass_dfa_lookahead
847 #undef TARGET_HAVE_TLS
848 #define TARGET_HAVE_TLS true
851 struct gcc_target targetm
= TARGET_INITIALIZER
;
853 /* Sometimes certain combinations of command options do not make
854 sense on a particular target machine. You can define a macro
855 `OVERRIDE_OPTIONS' to take account of this. This macro, if
856 defined, is executed once just after all the command options have
859 Don't use this macro to turn on various extra optimizations for
860 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
866 /* Comes from final.c -- no real reason to change it. */
867 #define MAX_CODE_ALIGN 16
871 const struct processor_costs
*cost
; /* Processor costs */
872 const int target_enable
; /* Target flags to enable. */
873 const int target_disable
; /* Target flags to disable. */
874 const int align_loop
; /* Default alignments. */
875 const int align_loop_max_skip
;
876 const int align_jump
;
877 const int align_jump_max_skip
;
878 const int align_func
;
879 const int branch_cost
;
881 const processor_target_table
[PROCESSOR_max
] =
883 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4, 1},
884 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16, 1},
885 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16, 1},
886 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16, 1},
887 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32, 1},
888 {&athlon_cost
, 0, 0, 16, 7, 64, 7, 16, 1},
889 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0, 1}
892 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
895 const char *const name
; /* processor name or nickname. */
896 const enum processor_type processor
;
902 PTA_PREFETCH_SSE
= 8,
907 const processor_alias_table
[] =
909 {"i386", PROCESSOR_I386
, 0},
910 {"i486", PROCESSOR_I486
, 0},
911 {"i586", PROCESSOR_PENTIUM
, 0},
912 {"pentium", PROCESSOR_PENTIUM
, 0},
913 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
914 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
915 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
916 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
917 {"i686", PROCESSOR_PENTIUMPRO
, 0},
918 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
919 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
920 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
921 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
|
922 PTA_MMX
| PTA_PREFETCH_SSE
},
923 {"k6", PROCESSOR_K6
, PTA_MMX
},
924 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
925 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
926 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
928 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
929 | PTA_3DNOW
| PTA_3DNOW_A
},
930 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
931 | PTA_3DNOW_A
| PTA_SSE
},
932 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
933 | PTA_3DNOW_A
| PTA_SSE
},
934 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
935 | PTA_3DNOW_A
| PTA_SSE
},
938 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
940 #ifdef SUBTARGET_OVERRIDE_OPTIONS
941 SUBTARGET_OVERRIDE_OPTIONS
;
944 if (!ix86_cpu_string
&& ix86_arch_string
)
945 ix86_cpu_string
= ix86_arch_string
;
946 if (!ix86_cpu_string
)
947 ix86_cpu_string
= cpu_names
[TARGET_CPU_DEFAULT
];
948 if (!ix86_arch_string
)
949 ix86_arch_string
= TARGET_64BIT
? "athlon-4" : "i386";
951 if (ix86_cmodel_string
!= 0)
953 if (!strcmp (ix86_cmodel_string
, "small"))
954 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
956 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string
);
957 else if (!strcmp (ix86_cmodel_string
, "32"))
959 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
960 ix86_cmodel
= CM_KERNEL
;
961 else if (!strcmp (ix86_cmodel_string
, "medium") && !flag_pic
)
962 ix86_cmodel
= CM_MEDIUM
;
963 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
964 ix86_cmodel
= CM_LARGE
;
966 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
972 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
974 if (ix86_asm_string
!= 0)
976 if (!strcmp (ix86_asm_string
, "intel"))
977 ix86_asm_dialect
= ASM_INTEL
;
978 else if (!strcmp (ix86_asm_string
, "att"))
979 ix86_asm_dialect
= ASM_ATT
;
981 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
983 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
984 error ("code model `%s' not supported in the %s bit mode",
985 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
986 if (ix86_cmodel
== CM_LARGE
)
987 sorry ("code model `large' not supported yet");
988 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
989 sorry ("%i-bit mode not compiled in",
990 (target_flags
& MASK_64BIT
) ? 64 : 32);
992 for (i
= 0; i
< pta_size
; i
++)
993 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
995 ix86_arch
= processor_alias_table
[i
].processor
;
996 /* Default cpu tuning to the architecture. */
997 ix86_cpu
= ix86_arch
;
998 if (processor_alias_table
[i
].flags
& PTA_MMX
999 && !(target_flags
& MASK_MMX_SET
))
1000 target_flags
|= MASK_MMX
;
1001 if (processor_alias_table
[i
].flags
& PTA_3DNOW
1002 && !(target_flags
& MASK_3DNOW_SET
))
1003 target_flags
|= MASK_3DNOW
;
1004 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
1005 && !(target_flags
& MASK_3DNOW_A_SET
))
1006 target_flags
|= MASK_3DNOW_A
;
1007 if (processor_alias_table
[i
].flags
& PTA_SSE
1008 && !(target_flags
& MASK_SSE_SET
))
1009 target_flags
|= MASK_SSE
;
1010 if (processor_alias_table
[i
].flags
& PTA_SSE2
1011 && !(target_flags
& MASK_SSE2_SET
))
1012 target_flags
|= MASK_SSE2
;
1013 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1014 x86_prefetch_sse
= true;
1019 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1021 for (i
= 0; i
< pta_size
; i
++)
1022 if (! strcmp (ix86_cpu_string
, processor_alias_table
[i
].name
))
1024 ix86_cpu
= processor_alias_table
[i
].processor
;
1027 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1028 x86_prefetch_sse
= true;
1030 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string
);
1033 ix86_cost
= &size_cost
;
1035 ix86_cost
= processor_target_table
[ix86_cpu
].cost
;
1036 target_flags
|= processor_target_table
[ix86_cpu
].target_enable
;
1037 target_flags
&= ~processor_target_table
[ix86_cpu
].target_disable
;
1039 /* Arrange to set up i386_stack_locals for all functions. */
1040 init_machine_status
= ix86_init_machine_status
;
1042 /* Validate -mregparm= value. */
1043 if (ix86_regparm_string
)
1045 i
= atoi (ix86_regparm_string
);
1046 if (i
< 0 || i
> REGPARM_MAX
)
1047 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
1053 ix86_regparm
= REGPARM_MAX
;
1055 /* If the user has provided any of the -malign-* options,
1056 warn and use that value only if -falign-* is not set.
1057 Remove this code in GCC 3.2 or later. */
1058 if (ix86_align_loops_string
)
1060 warning ("-malign-loops is obsolete, use -falign-loops");
1061 if (align_loops
== 0)
1063 i
= atoi (ix86_align_loops_string
);
1064 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1065 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1067 align_loops
= 1 << i
;
1071 if (ix86_align_jumps_string
)
1073 warning ("-malign-jumps is obsolete, use -falign-jumps");
1074 if (align_jumps
== 0)
1076 i
= atoi (ix86_align_jumps_string
);
1077 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1078 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1080 align_jumps
= 1 << i
;
1084 if (ix86_align_funcs_string
)
1086 warning ("-malign-functions is obsolete, use -falign-functions");
1087 if (align_functions
== 0)
1089 i
= atoi (ix86_align_funcs_string
);
1090 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1091 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1093 align_functions
= 1 << i
;
1097 /* Default align_* from the processor table. */
1098 if (align_loops
== 0)
1100 align_loops
= processor_target_table
[ix86_cpu
].align_loop
;
1101 align_loops_max_skip
= processor_target_table
[ix86_cpu
].align_loop_max_skip
;
1103 if (align_jumps
== 0)
1105 align_jumps
= processor_target_table
[ix86_cpu
].align_jump
;
1106 align_jumps_max_skip
= processor_target_table
[ix86_cpu
].align_jump_max_skip
;
1108 if (align_functions
== 0)
1110 align_functions
= processor_target_table
[ix86_cpu
].align_func
;
1113 /* Validate -mpreferred-stack-boundary= value, or provide default.
1114 The default of 128 bits is for Pentium III's SSE __m128, but we
1115 don't want additional code to keep the stack aligned when
1116 optimizing for code size. */
1117 ix86_preferred_stack_boundary
= (optimize_size
1118 ? TARGET_64BIT
? 64 : 32
1120 if (ix86_preferred_stack_boundary_string
)
1122 i
= atoi (ix86_preferred_stack_boundary_string
);
1123 if (i
< (TARGET_64BIT
? 3 : 2) || i
> 12)
1124 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
1125 TARGET_64BIT
? 3 : 2);
1127 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
1130 /* Validate -mbranch-cost= value, or provide default. */
1131 ix86_branch_cost
= processor_target_table
[ix86_cpu
].branch_cost
;
1132 if (ix86_branch_cost_string
)
1134 i
= atoi (ix86_branch_cost_string
);
1136 error ("-mbranch-cost=%d is not between 0 and 5", i
);
1138 ix86_branch_cost
= i
;
1141 if (ix86_tls_dialect_string
)
1143 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
1144 ix86_tls_dialect
= TLS_DIALECT_GNU
;
1145 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
1146 ix86_tls_dialect
= TLS_DIALECT_SUN
;
1148 error ("bad value (%s) for -mtls-dialect= switch",
1149 ix86_tls_dialect_string
);
1153 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
1155 /* Keep nonleaf frame pointers. */
1156 if (TARGET_OMIT_LEAF_FRAME_POINTER
)
1157 flag_omit_frame_pointer
= 1;
1159 /* If we're doing fast math, we don't care about comparison order
1160 wrt NaNs. This lets us use a shorter comparison sequence. */
1161 if (flag_unsafe_math_optimizations
)
1162 target_flags
&= ~MASK_IEEE_FP
;
1164 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1165 since the insns won't need emulation. */
1166 if (x86_arch_always_fancy_math_387
& (1 << ix86_arch
))
1167 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
1171 if (TARGET_ALIGN_DOUBLE
)
1172 error ("-malign-double makes no sense in the 64bit mode");
1174 error ("-mrtd calling convention not supported in the 64bit mode");
1175 /* Enable by default the SSE and MMX builtins. */
1176 target_flags
|= (MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
);
1177 ix86_fpmath
= FPMATH_SSE
;
1180 ix86_fpmath
= FPMATH_387
;
1182 if (ix86_fpmath_string
!= 0)
1184 if (! strcmp (ix86_fpmath_string
, "387"))
1185 ix86_fpmath
= FPMATH_387
;
1186 else if (! strcmp (ix86_fpmath_string
, "sse"))
1190 warning ("SSE instruction set disabled, using 387 arithmetics");
1191 ix86_fpmath
= FPMATH_387
;
1194 ix86_fpmath
= FPMATH_SSE
;
1196 else if (! strcmp (ix86_fpmath_string
, "387,sse")
1197 || ! strcmp (ix86_fpmath_string
, "sse,387"))
1201 warning ("SSE instruction set disabled, using 387 arithmetics");
1202 ix86_fpmath
= FPMATH_387
;
1204 else if (!TARGET_80387
)
1206 warning ("387 instruction set disabled, using SSE arithmetics");
1207 ix86_fpmath
= FPMATH_SSE
;
1210 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
1213 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
1216 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1220 target_flags
|= MASK_MMX
;
1221 x86_prefetch_sse
= true;
1224 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1227 target_flags
|= MASK_MMX
;
1228 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1229 extensions it adds. */
1230 if (x86_3dnow_a
& (1 << ix86_arch
))
1231 target_flags
|= MASK_3DNOW_A
;
1233 if ((x86_accumulate_outgoing_args
& CPUMASK
)
1234 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS_SET
)
1236 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
1238 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1241 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
1242 p
= strchr (internal_label_prefix
, 'X');
1243 internal_label_prefix_len
= p
- internal_label_prefix
;
1249 optimization_options (level
, size
)
1251 int size ATTRIBUTE_UNUSED
;
1253 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1254 make the problem with not enough registers even worse. */
1255 #ifdef INSN_SCHEDULING
1257 flag_schedule_insns
= 0;
1259 if (TARGET_64BIT
&& optimize
>= 1)
1260 flag_omit_frame_pointer
= 1;
1263 flag_pcc_struct_return
= 0;
1264 flag_asynchronous_unwind_tables
= 1;
1267 flag_omit_frame_pointer
= 0;
1270 /* Table of valid machine attributes. */
1271 const struct attribute_spec ix86_attribute_table
[] =
1273 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1274 /* Stdcall attribute says callee is responsible for popping arguments
1275 if they are not variable. */
1276 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1277 /* Cdecl attribute says the callee is a normal C declaration */
1278 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1279 /* Regparm attribute specifies how many integer arguments are to be
1280 passed in registers. */
1281 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute
},
1282 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1283 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute
},
1284 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute
},
1285 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
1287 { NULL
, 0, 0, false, false, false, NULL
}
1290 /* Handle a "cdecl" or "stdcall" attribute;
1291 arguments as in struct attribute_spec.handler. */
1293 ix86_handle_cdecl_attribute (node
, name
, args
, flags
, no_add_attrs
)
1296 tree args ATTRIBUTE_UNUSED
;
1297 int flags ATTRIBUTE_UNUSED
;
1300 if (TREE_CODE (*node
) != FUNCTION_TYPE
1301 && TREE_CODE (*node
) != METHOD_TYPE
1302 && TREE_CODE (*node
) != FIELD_DECL
1303 && TREE_CODE (*node
) != TYPE_DECL
)
1305 warning ("`%s' attribute only applies to functions",
1306 IDENTIFIER_POINTER (name
));
1307 *no_add_attrs
= true;
1312 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name
));
1313 *no_add_attrs
= true;
1319 /* Handle a "regparm" attribute;
1320 arguments as in struct attribute_spec.handler. */
1322 ix86_handle_regparm_attribute (node
, name
, args
, flags
, no_add_attrs
)
1326 int flags ATTRIBUTE_UNUSED
;
1329 if (TREE_CODE (*node
) != FUNCTION_TYPE
1330 && TREE_CODE (*node
) != METHOD_TYPE
1331 && TREE_CODE (*node
) != FIELD_DECL
1332 && TREE_CODE (*node
) != TYPE_DECL
)
1334 warning ("`%s' attribute only applies to functions",
1335 IDENTIFIER_POINTER (name
));
1336 *no_add_attrs
= true;
1342 cst
= TREE_VALUE (args
);
1343 if (TREE_CODE (cst
) != INTEGER_CST
)
1345 warning ("`%s' attribute requires an integer constant argument",
1346 IDENTIFIER_POINTER (name
));
1347 *no_add_attrs
= true;
1349 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
1351 warning ("argument to `%s' attribute larger than %d",
1352 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
1353 *no_add_attrs
= true;
1360 /* Return 0 if the attributes for two types are incompatible, 1 if they
1361 are compatible, and 2 if they are nearly compatible (which causes a
1362 warning to be generated). */
1365 ix86_comp_type_attributes (type1
, type2
)
1369 /* Check for mismatch of non-default calling convention. */
1370 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
1372 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
1375 /* Check for mismatched return types (cdecl vs stdcall). */
1376 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
1377 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
1382 /* Value is the number of bytes of arguments automatically
1383 popped when returning from a subroutine call.
1384 FUNDECL is the declaration node of the function (as a tree),
1385 FUNTYPE is the data type of the function (as a tree),
1386 or for a library call it is an identifier node for the subroutine name.
1387 SIZE is the number of bytes of arguments passed on the stack.
1389 On the 80386, the RTD insn may be used to pop them if the number
1390 of args is fixed, but if the number is variable then the caller
1391 must pop them all. RTD can't be used for library calls now
1392 because the library is compiled with the Unix compiler.
1393 Use of RTD is a selectable option, since it is incompatible with
1394 standard Unix calling sequences. If the option is not selected,
1395 the caller must always pop the args.
1397 The attribute stdcall is equivalent to RTD on a per module basis. */
1400 ix86_return_pops_args (fundecl
, funtype
, size
)
1405 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
1407 /* Cdecl functions override -mrtd, and never pop the stack. */
1408 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
1410 /* Stdcall functions will pop the stack if not variable args. */
1411 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
)))
1415 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
1416 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
1417 == void_type_node
)))
1421 /* Lose any fake structure return argument if it is passed on the stack. */
1422 if (aggregate_value_p (TREE_TYPE (funtype
))
1425 int nregs
= ix86_regparm
;
1429 tree attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype
));
1432 nregs
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1436 return GET_MODE_SIZE (Pmode
);
1442 /* Argument support functions. */
1444 /* Return true when register may be used to pass function parameters. */
1446 ix86_function_arg_regno_p (regno
)
1451 return (regno
< REGPARM_MAX
1452 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
1453 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
1455 /* RAX is used as hidden argument to va_arg functions. */
1458 for (i
= 0; i
< REGPARM_MAX
; i
++)
1459 if (regno
== x86_64_int_parameter_registers
[i
])
1464 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1465 for a call to a function whose data type is FNTYPE.
1466 For a library call, FNTYPE is 0. */
1469 init_cumulative_args (cum
, fntype
, libname
)
1470 CUMULATIVE_ARGS
*cum
; /* Argument info to initialize */
1471 tree fntype
; /* tree ptr for function decl */
1472 rtx libname
; /* SYMBOL_REF of library name or 0 */
1474 static CUMULATIVE_ARGS zero_cum
;
1475 tree param
, next_param
;
1477 if (TARGET_DEBUG_ARG
)
1479 fprintf (stderr
, "\ninit_cumulative_args (");
1481 fprintf (stderr
, "fntype code = %s, ret code = %s",
1482 tree_code_name
[(int) TREE_CODE (fntype
)],
1483 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
1485 fprintf (stderr
, "no fntype");
1488 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
1493 /* Set up the number of registers to use for passing arguments. */
1494 cum
->nregs
= ix86_regparm
;
1495 cum
->sse_nregs
= SSE_REGPARM_MAX
;
1496 if (fntype
&& !TARGET_64BIT
)
1498 tree attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype
));
1501 cum
->nregs
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1503 cum
->maybe_vaarg
= false;
1505 /* Determine if this function has variable arguments. This is
1506 indicated by the last argument being 'void_type_mode' if there
1507 are no variable arguments. If there are variable arguments, then
1508 we won't pass anything in registers */
1512 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
1513 param
!= 0; param
= next_param
)
1515 next_param
= TREE_CHAIN (param
);
1516 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
1520 cum
->maybe_vaarg
= true;
1524 if ((!fntype
&& !libname
)
1525 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
1526 cum
->maybe_vaarg
= 1;
1528 if (TARGET_DEBUG_ARG
)
1529 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
1534 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1535 of this code is to classify each 8bytes of incoming argument by the register
1536 class and assign registers accordingly. */
1538 /* Return the union class of CLASS1 and CLASS2.
1539 See the x86-64 PS ABI for details. */
1541 static enum x86_64_reg_class
1542 merge_classes (class1
, class2
)
1543 enum x86_64_reg_class class1
, class2
;
1545 /* Rule #1: If both classes are equal, this is the resulting class. */
1546 if (class1
== class2
)
1549 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1551 if (class1
== X86_64_NO_CLASS
)
1553 if (class2
== X86_64_NO_CLASS
)
1556 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1557 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
1558 return X86_64_MEMORY_CLASS
;
1560 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1561 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
1562 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
1563 return X86_64_INTEGERSI_CLASS
;
1564 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
1565 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
1566 return X86_64_INTEGER_CLASS
;
1568 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1569 if (class1
== X86_64_X87_CLASS
|| class1
== X86_64_X87UP_CLASS
1570 || class2
== X86_64_X87_CLASS
|| class2
== X86_64_X87UP_CLASS
)
1571 return X86_64_MEMORY_CLASS
;
1573 /* Rule #6: Otherwise class SSE is used. */
1574 return X86_64_SSE_CLASS
;
1577 /* Classify the argument of type TYPE and mode MODE.
1578 CLASSES will be filled by the register class used to pass each word
1579 of the operand. The number of words is returned. In case the parameter
1580 should be passed in memory, 0 is returned. As a special case for zero
1581 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1583 BIT_OFFSET is used internally for handling records and specifies offset
1584 of the offset in bits modulo 256 to avoid overflow cases.
1586 See the x86-64 PS ABI for details.
1590 classify_argument (mode
, type
, classes
, bit_offset
)
1591 enum machine_mode mode
;
1593 enum x86_64_reg_class classes
[MAX_CLASSES
];
1597 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1598 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
1600 /* Variable sized entities are always passed/returned in memory. */
1604 if (type
&& AGGREGATE_TYPE_P (type
))
1608 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
1610 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1614 for (i
= 0; i
< words
; i
++)
1615 classes
[i
] = X86_64_NO_CLASS
;
1617 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1618 signalize memory class, so handle it as special case. */
1621 classes
[0] = X86_64_NO_CLASS
;
1625 /* Classify each field of record and merge classes. */
1626 if (TREE_CODE (type
) == RECORD_TYPE
)
1628 /* For classes first merge in the field of the subclasses. */
1629 if (TYPE_BINFO (type
) != NULL
&& TYPE_BINFO_BASETYPES (type
) != NULL
)
1631 tree bases
= TYPE_BINFO_BASETYPES (type
);
1632 int n_bases
= TREE_VEC_LENGTH (bases
);
1635 for (i
= 0; i
< n_bases
; ++i
)
1637 tree binfo
= TREE_VEC_ELT (bases
, i
);
1639 int offset
= tree_low_cst (BINFO_OFFSET (binfo
), 0) * 8;
1640 tree type
= BINFO_TYPE (binfo
);
1642 num
= classify_argument (TYPE_MODE (type
),
1644 (offset
+ bit_offset
) % 256);
1647 for (i
= 0; i
< num
; i
++)
1649 int pos
= (offset
+ (bit_offset
% 64)) / 8 / 8;
1651 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
1655 /* And now merge the fields of structure. */
1656 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
1658 if (TREE_CODE (field
) == FIELD_DECL
)
1662 /* Bitfields are always classified as integer. Handle them
1663 early, since later code would consider them to be
1664 misaligned integers. */
1665 if (DECL_BIT_FIELD (field
))
1667 for (i
= int_bit_position (field
) / 8 / 8;
1668 i
< (int_bit_position (field
)
1669 + tree_low_cst (DECL_SIZE (field
), 0)
1672 merge_classes (X86_64_INTEGER_CLASS
,
1677 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
1678 TREE_TYPE (field
), subclasses
,
1679 (int_bit_position (field
)
1680 + bit_offset
) % 256);
1683 for (i
= 0; i
< num
; i
++)
1686 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
1688 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
1694 /* Arrays are handled as small records. */
1695 else if (TREE_CODE (type
) == ARRAY_TYPE
)
1698 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
1699 TREE_TYPE (type
), subclasses
, bit_offset
);
1703 /* The partial classes are now full classes. */
1704 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
1705 subclasses
[0] = X86_64_SSE_CLASS
;
1706 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
1707 subclasses
[0] = X86_64_INTEGER_CLASS
;
1709 for (i
= 0; i
< words
; i
++)
1710 classes
[i
] = subclasses
[i
% num
];
1712 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1713 else if (TREE_CODE (type
) == UNION_TYPE
1714 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
1716 /* For classes first merge in the field of the subclasses. */
1717 if (TYPE_BINFO (type
) != NULL
&& TYPE_BINFO_BASETYPES (type
) != NULL
)
1719 tree bases
= TYPE_BINFO_BASETYPES (type
);
1720 int n_bases
= TREE_VEC_LENGTH (bases
);
1723 for (i
= 0; i
< n_bases
; ++i
)
1725 tree binfo
= TREE_VEC_ELT (bases
, i
);
1727 int offset
= tree_low_cst (BINFO_OFFSET (binfo
), 0) * 8;
1728 tree type
= BINFO_TYPE (binfo
);
1730 num
= classify_argument (TYPE_MODE (type
),
1732 (offset
+ (bit_offset
% 64)) % 256);
1735 for (i
= 0; i
< num
; i
++)
1737 int pos
= (offset
+ (bit_offset
% 64)) / 8 / 8;
1739 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
1743 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
1745 if (TREE_CODE (field
) == FIELD_DECL
)
1748 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
1749 TREE_TYPE (field
), subclasses
,
1753 for (i
= 0; i
< num
; i
++)
1754 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
1761 /* Final merger cleanup. */
1762 for (i
= 0; i
< words
; i
++)
1764 /* If one class is MEMORY, everything should be passed in
1766 if (classes
[i
] == X86_64_MEMORY_CLASS
)
1769 /* The X86_64_SSEUP_CLASS should be always preceded by
1770 X86_64_SSE_CLASS. */
1771 if (classes
[i
] == X86_64_SSEUP_CLASS
1772 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
1773 classes
[i
] = X86_64_SSE_CLASS
;
1775 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1776 if (classes
[i
] == X86_64_X87UP_CLASS
1777 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
1778 classes
[i
] = X86_64_SSE_CLASS
;
1783 /* Compute alignment needed. We align all types to natural boundaries with
1784 exception of XFmode that is aligned to 64bits. */
1785 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
1787 int mode_alignment
= GET_MODE_BITSIZE (mode
);
1790 mode_alignment
= 128;
1791 else if (mode
== XCmode
)
1792 mode_alignment
= 256;
1793 /* Misaligned fields are always returned in memory. */
1794 if (bit_offset
% mode_alignment
)
1798 /* Classification of atomic types. */
1808 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
1809 classes
[0] = X86_64_INTEGERSI_CLASS
;
1811 classes
[0] = X86_64_INTEGER_CLASS
;
1815 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
1818 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
1819 classes
[2] = classes
[3] = X86_64_INTEGER_CLASS
;
1822 if (!(bit_offset
% 64))
1823 classes
[0] = X86_64_SSESF_CLASS
;
1825 classes
[0] = X86_64_SSE_CLASS
;
1828 classes
[0] = X86_64_SSEDF_CLASS
;
1831 classes
[0] = X86_64_X87_CLASS
;
1832 classes
[1] = X86_64_X87UP_CLASS
;
1835 classes
[0] = X86_64_X87_CLASS
;
1836 classes
[1] = X86_64_X87UP_CLASS
;
1837 classes
[2] = X86_64_X87_CLASS
;
1838 classes
[3] = X86_64_X87UP_CLASS
;
1841 classes
[0] = X86_64_SSEDF_CLASS
;
1842 classes
[1] = X86_64_SSEDF_CLASS
;
1845 classes
[0] = X86_64_SSE_CLASS
;
1853 classes
[0] = X86_64_SSE_CLASS
;
1854 classes
[1] = X86_64_SSEUP_CLASS
;
1860 classes
[0] = X86_64_SSE_CLASS
;
1870 /* Examine the argument and return set number of register required in each
1871 class. Return 0 iff parameter should be passed in memory. */
1873 examine_argument (mode
, type
, in_return
, int_nregs
, sse_nregs
)
1874 enum machine_mode mode
;
1876 int *int_nregs
, *sse_nregs
;
1879 enum x86_64_reg_class
class[MAX_CLASSES
];
1880 int n
= classify_argument (mode
, type
, class, 0);
1886 for (n
--; n
>= 0; n
--)
1889 case X86_64_INTEGER_CLASS
:
1890 case X86_64_INTEGERSI_CLASS
:
1893 case X86_64_SSE_CLASS
:
1894 case X86_64_SSESF_CLASS
:
1895 case X86_64_SSEDF_CLASS
:
1898 case X86_64_NO_CLASS
:
1899 case X86_64_SSEUP_CLASS
:
1901 case X86_64_X87_CLASS
:
1902 case X86_64_X87UP_CLASS
:
1906 case X86_64_MEMORY_CLASS
:
1911 /* Construct container for the argument used by GCC interface. See
1912 FUNCTION_ARG for the detailed description. */
1914 construct_container (mode
, type
, in_return
, nintregs
, nsseregs
, intreg
, sse_regno
)
1915 enum machine_mode mode
;
1918 int nintregs
, nsseregs
;
1922 enum machine_mode tmpmode
;
1924 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1925 enum x86_64_reg_class
class[MAX_CLASSES
];
1929 int needed_sseregs
, needed_intregs
;
1930 rtx exp
[MAX_CLASSES
];
1933 n
= classify_argument (mode
, type
, class, 0);
1934 if (TARGET_DEBUG_ARG
)
1937 fprintf (stderr
, "Memory class\n");
1940 fprintf (stderr
, "Classes:");
1941 for (i
= 0; i
< n
; i
++)
1943 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
1945 fprintf (stderr
, "\n");
1950 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
, &needed_sseregs
))
1952 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
1955 /* First construct simple cases. Avoid SCmode, since we want to use
1956 single register to pass this type. */
1957 if (n
== 1 && mode
!= SCmode
)
1960 case X86_64_INTEGER_CLASS
:
1961 case X86_64_INTEGERSI_CLASS
:
1962 return gen_rtx_REG (mode
, intreg
[0]);
1963 case X86_64_SSE_CLASS
:
1964 case X86_64_SSESF_CLASS
:
1965 case X86_64_SSEDF_CLASS
:
1966 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
1967 case X86_64_X87_CLASS
:
1968 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
1969 case X86_64_NO_CLASS
:
1970 /* Zero sized array, struct or class. */
1975 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
)
1976 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
1978 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
1979 return gen_rtx_REG (TFmode
, FIRST_STACK_REG
);
1980 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
1981 && class[1] == X86_64_INTEGER_CLASS
1982 && (mode
== CDImode
|| mode
== TImode
)
1983 && intreg
[0] + 1 == intreg
[1])
1984 return gen_rtx_REG (mode
, intreg
[0]);
1986 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
1987 && class[2] == X86_64_X87_CLASS
&& class[3] == X86_64_X87UP_CLASS
)
1988 return gen_rtx_REG (TCmode
, FIRST_STACK_REG
);
1990 /* Otherwise figure out the entries of the PARALLEL. */
1991 for (i
= 0; i
< n
; i
++)
1995 case X86_64_NO_CLASS
:
1997 case X86_64_INTEGER_CLASS
:
1998 case X86_64_INTEGERSI_CLASS
:
1999 /* Merge TImodes on aligned occassions here too. */
2000 if (i
* 8 + 8 > bytes
)
2001 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
2002 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
2006 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2007 if (tmpmode
== BLKmode
)
2009 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2010 gen_rtx_REG (tmpmode
, *intreg
),
2014 case X86_64_SSESF_CLASS
:
2015 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2016 gen_rtx_REG (SFmode
,
2017 SSE_REGNO (sse_regno
)),
2021 case X86_64_SSEDF_CLASS
:
2022 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2023 gen_rtx_REG (DFmode
,
2024 SSE_REGNO (sse_regno
)),
2028 case X86_64_SSE_CLASS
:
2029 if (i
< n
&& class[i
+ 1] == X86_64_SSEUP_CLASS
)
2030 tmpmode
= TImode
, i
++;
2033 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2034 gen_rtx_REG (tmpmode
,
2035 SSE_REGNO (sse_regno
)),
2043 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
2044 for (i
= 0; i
< nexps
; i
++)
2045 XVECEXP (ret
, 0, i
) = exp
[i
];
2049 /* Update the data in CUM to advance over an argument
2050 of mode MODE and data type TYPE.
2051 (TYPE is null for libcalls where that information may not be available.) */
2054 function_arg_advance (cum
, mode
, type
, named
)
2055 CUMULATIVE_ARGS
*cum
; /* current arg information */
2056 enum machine_mode mode
; /* current arg mode */
2057 tree type
; /* type of the argument or 0 if lib support */
2058 int named
; /* whether or not the argument was named */
2061 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2062 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2064 if (TARGET_DEBUG_ARG
)
2066 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2067 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
2070 int int_nregs
, sse_nregs
;
2071 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
2072 cum
->words
+= words
;
2073 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
2075 cum
->nregs
-= int_nregs
;
2076 cum
->sse_nregs
-= sse_nregs
;
2077 cum
->regno
+= int_nregs
;
2078 cum
->sse_regno
+= sse_nregs
;
2081 cum
->words
+= words
;
2085 if (TARGET_SSE
&& mode
== TImode
)
2087 cum
->sse_words
+= words
;
2088 cum
->sse_nregs
-= 1;
2089 cum
->sse_regno
+= 1;
2090 if (cum
->sse_nregs
<= 0)
2098 cum
->words
+= words
;
2099 cum
->nregs
-= words
;
2100 cum
->regno
+= words
;
2102 if (cum
->nregs
<= 0)
2112 /* Define where to put the arguments to a function.
2113 Value is zero to push the argument on the stack,
2114 or a hard register in which to store the argument.
2116 MODE is the argument's machine mode.
2117 TYPE is the data type of the argument (as a tree).
2118 This is null for libcalls where that information may
2120 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2121 the preceding args and about the function being called.
2122 NAMED is nonzero if this argument is a named parameter
2123 (otherwise it is an extra parameter matching an ellipsis). */
2126 function_arg (cum
, mode
, type
, named
)
2127 CUMULATIVE_ARGS
*cum
; /* current arg information */
2128 enum machine_mode mode
; /* current arg mode */
2129 tree type
; /* type of the argument or 0 if lib support */
2130 int named
; /* != 0 for normal args, == 0 for ... args */
2134 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2135 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2137 /* Handle an hidden AL argument containing number of registers for varargs
2138 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2140 if (mode
== VOIDmode
)
2143 return GEN_INT (cum
->maybe_vaarg
2144 ? (cum
->sse_nregs
< 0
2152 ret
= construct_container (mode
, type
, 0, cum
->nregs
, cum
->sse_nregs
,
2153 &x86_64_int_parameter_registers
[cum
->regno
],
2158 /* For now, pass fp/complex values on the stack. */
2167 if (words
<= cum
->nregs
)
2168 ret
= gen_rtx_REG (mode
, cum
->regno
);
2172 ret
= gen_rtx_REG (mode
, cum
->sse_regno
);
2176 if (TARGET_DEBUG_ARG
)
2179 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2180 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
2183 print_simple_rtl (stderr
, ret
);
2185 fprintf (stderr
, ", stack");
2187 fprintf (stderr
, " )\n");
2193 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2197 ix86_function_arg_boundary (mode
, type
)
2198 enum machine_mode mode
;
2203 return PARM_BOUNDARY
;
2205 align
= TYPE_ALIGN (type
);
2207 align
= GET_MODE_ALIGNMENT (mode
);
2208 if (align
< PARM_BOUNDARY
)
2209 align
= PARM_BOUNDARY
;
2215 /* Return true if N is a possible register number of function value. */
2217 ix86_function_value_regno_p (regno
)
2222 return ((regno
) == 0
2223 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
2224 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
));
2226 return ((regno
) == 0 || (regno
) == FIRST_FLOAT_REG
2227 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
)
2228 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
));
2231 /* Define how to find the value returned by a function.
2232 VALTYPE is the data type of the value (as a tree).
2233 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2234 otherwise, FUNC is 0. */
2236 ix86_function_value (valtype
)
2241 rtx ret
= construct_container (TYPE_MODE (valtype
), valtype
, 1,
2242 REGPARM_MAX
, SSE_REGPARM_MAX
,
2243 x86_64_int_return_registers
, 0);
2244 /* For zero sized structures, construct_continer return NULL, but we need
2245 to keep rest of compiler happy by returning meaningfull value. */
2247 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
2251 return gen_rtx_REG (TYPE_MODE (valtype
),
2252 ix86_value_regno (TYPE_MODE (valtype
)));
2255 /* Return false iff type is returned in memory. */
2257 ix86_return_in_memory (type
)
2260 int needed_intregs
, needed_sseregs
;
2263 return !examine_argument (TYPE_MODE (type
), type
, 1,
2264 &needed_intregs
, &needed_sseregs
);
2268 if (TYPE_MODE (type
) == BLKmode
2269 || (VECTOR_MODE_P (TYPE_MODE (type
))
2270 && int_size_in_bytes (type
) == 8)
2271 || (int_size_in_bytes (type
) > 12 && TYPE_MODE (type
) != TImode
2272 && TYPE_MODE (type
) != TFmode
2273 && !VECTOR_MODE_P (TYPE_MODE (type
))))
2279 /* Define how to find the value returned by a library function
2280 assuming the value has mode MODE. */
2282 ix86_libcall_value (mode
)
2283 enum machine_mode mode
;
2293 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
2296 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
2298 return gen_rtx_REG (mode
, 0);
2302 return gen_rtx_REG (mode
, ix86_value_regno (mode
));
2305 /* Given a mode, return the register to use for a return value. */
2308 ix86_value_regno (mode
)
2309 enum machine_mode mode
;
2311 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
&& TARGET_FLOAT_RETURNS_IN_80387
)
2312 return FIRST_FLOAT_REG
;
2313 if (mode
== TImode
|| VECTOR_MODE_P (mode
))
2314 return FIRST_SSE_REG
;
2318 /* Create the va_list data type. */
2321 ix86_build_va_list ()
2323 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
2325 /* For i386 we use plain pointer to argument area. */
2327 return build_pointer_type (char_type_node
);
2329 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
2330 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
2332 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
2333 unsigned_type_node
);
2334 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
2335 unsigned_type_node
);
2336 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
2338 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
2341 DECL_FIELD_CONTEXT (f_gpr
) = record
;
2342 DECL_FIELD_CONTEXT (f_fpr
) = record
;
2343 DECL_FIELD_CONTEXT (f_ovf
) = record
;
2344 DECL_FIELD_CONTEXT (f_sav
) = record
;
2346 TREE_CHAIN (record
) = type_decl
;
2347 TYPE_NAME (record
) = type_decl
;
2348 TYPE_FIELDS (record
) = f_gpr
;
2349 TREE_CHAIN (f_gpr
) = f_fpr
;
2350 TREE_CHAIN (f_fpr
) = f_ovf
;
2351 TREE_CHAIN (f_ovf
) = f_sav
;
2353 layout_type (record
);
2355 /* The correct type is an array type of one element. */
2356 return build_array_type (record
, build_index_type (size_zero_node
));
2359 /* Perform any needed actions needed for a function that is receiving a
2360 variable number of arguments.
2364 MODE and TYPE are the mode and type of the current parameter.
2366 PRETEND_SIZE is a variable that should be set to the amount of stack
2367 that must be pushed by the prolog to pretend that our caller pushed
2370 Normally, this macro will push all remaining incoming registers on the
2371 stack and set PRETEND_SIZE to the length of the registers pushed. */
2374 ix86_setup_incoming_varargs (cum
, mode
, type
, pretend_size
, no_rtl
)
2375 CUMULATIVE_ARGS
*cum
;
2376 enum machine_mode mode
;
2378 int *pretend_size ATTRIBUTE_UNUSED
;
2382 CUMULATIVE_ARGS next_cum
;
2383 rtx save_area
= NULL_RTX
, mem
;
2396 /* Indicate to allocate space on the stack for varargs save area. */
2397 ix86_save_varrargs_registers
= 1;
2399 fntype
= TREE_TYPE (current_function_decl
);
2400 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
2401 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
2402 != void_type_node
));
2404 /* For varargs, we do not want to skip the dummy va_dcl argument.
2405 For stdargs, we do want to skip the last named argument. */
2408 function_arg_advance (&next_cum
, mode
, type
, 1);
2411 save_area
= frame_pointer_rtx
;
2413 set
= get_varargs_alias_set ();
2415 for (i
= next_cum
.regno
; i
< ix86_regparm
; i
++)
2417 mem
= gen_rtx_MEM (Pmode
,
2418 plus_constant (save_area
, i
* UNITS_PER_WORD
));
2419 set_mem_alias_set (mem
, set
);
2420 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
2421 x86_64_int_parameter_registers
[i
]));
2424 if (next_cum
.sse_nregs
)
2426 /* Now emit code to save SSE registers. The AX parameter contains number
2427 of SSE parameter regsiters used to call this function. We use
2428 sse_prologue_save insn template that produces computed jump across
2429 SSE saves. We need some preparation work to get this working. */
2431 label
= gen_label_rtx ();
2432 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
2434 /* Compute address to jump to :
2435 label - 5*eax + nnamed_sse_arguments*5 */
2436 tmp_reg
= gen_reg_rtx (Pmode
);
2437 nsse_reg
= gen_reg_rtx (Pmode
);
2438 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
2439 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
2440 gen_rtx_MULT (Pmode
, nsse_reg
,
2442 if (next_cum
.sse_regno
)
2445 gen_rtx_CONST (DImode
,
2446 gen_rtx_PLUS (DImode
,
2448 GEN_INT (next_cum
.sse_regno
* 4))));
2450 emit_move_insn (nsse_reg
, label_ref
);
2451 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
2453 /* Compute address of memory block we save into. We always use pointer
2454 pointing 127 bytes after first byte to store - this is needed to keep
2455 instruction size limited by 4 bytes. */
2456 tmp_reg
= gen_reg_rtx (Pmode
);
2457 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
2458 plus_constant (save_area
,
2459 8 * REGPARM_MAX
+ 127)));
2460 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
2461 set_mem_alias_set (mem
, set
);
2462 set_mem_align (mem
, BITS_PER_WORD
);
2464 /* And finally do the dirty job! */
2465 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
2466 GEN_INT (next_cum
.sse_regno
), label
));
2471 /* Implement va_start. */
2474 ix86_va_start (valist
, nextarg
)
2478 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
2479 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
2480 tree gpr
, fpr
, ovf
, sav
, t
;
2482 /* Only 64bit target needs something special. */
2485 std_expand_builtin_va_start (valist
, nextarg
);
2489 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
2490 f_fpr
= TREE_CHAIN (f_gpr
);
2491 f_ovf
= TREE_CHAIN (f_fpr
);
2492 f_sav
= TREE_CHAIN (f_ovf
);
2494 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
2495 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
);
2496 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
);
2497 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
);
2498 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
);
2500 /* Count number of gp and fp argument registers used. */
2501 words
= current_function_args_info
.words
;
2502 n_gpr
= current_function_args_info
.regno
;
2503 n_fpr
= current_function_args_info
.sse_regno
;
2505 if (TARGET_DEBUG_ARG
)
2506 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2507 (int) words
, (int) n_gpr
, (int) n_fpr
);
2509 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
,
2510 build_int_2 (n_gpr
* 8, 0));
2511 TREE_SIDE_EFFECTS (t
) = 1;
2512 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2514 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
,
2515 build_int_2 (n_fpr
* 16 + 8*REGPARM_MAX
, 0));
2516 TREE_SIDE_EFFECTS (t
) = 1;
2517 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2519 /* Find the overflow area. */
2520 t
= make_tree (TREE_TYPE (ovf
), virtual_incoming_args_rtx
);
2522 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), t
,
2523 build_int_2 (words
* UNITS_PER_WORD
, 0));
2524 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
2525 TREE_SIDE_EFFECTS (t
) = 1;
2526 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2528 /* Find the register save area.
2529 Prologue of the function save it right above stack frame. */
2530 t
= make_tree (TREE_TYPE (sav
), frame_pointer_rtx
);
2531 t
= build (MODIFY_EXPR
, TREE_TYPE (sav
), sav
, t
);
2532 TREE_SIDE_EFFECTS (t
) = 1;
2533 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2536 /* Implement va_arg. */
2538 ix86_va_arg (valist
, type
)
2541 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
2542 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
2543 tree gpr
, fpr
, ovf
, sav
, t
;
2545 rtx lab_false
, lab_over
= NULL_RTX
;
2549 /* Only 64bit target needs something special. */
2552 return std_expand_builtin_va_arg (valist
, type
);
2555 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
2556 f_fpr
= TREE_CHAIN (f_gpr
);
2557 f_ovf
= TREE_CHAIN (f_fpr
);
2558 f_sav
= TREE_CHAIN (f_ovf
);
2560 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
2561 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
);
2562 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
);
2563 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
);
2564 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
);
2566 size
= int_size_in_bytes (type
);
2567 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2569 container
= construct_container (TYPE_MODE (type
), type
, 0,
2570 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
2572 * Pull the value out of the saved registers ...
2575 addr_rtx
= gen_reg_rtx (Pmode
);
2579 rtx int_addr_rtx
, sse_addr_rtx
;
2580 int needed_intregs
, needed_sseregs
;
2583 lab_over
= gen_label_rtx ();
2584 lab_false
= gen_label_rtx ();
2586 examine_argument (TYPE_MODE (type
), type
, 0,
2587 &needed_intregs
, &needed_sseregs
);
2590 need_temp
= ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
2591 || TYPE_ALIGN (type
) > 128);
2593 /* In case we are passing structure, verify that it is consetuctive block
2594 on the register save area. If not we need to do moves. */
2595 if (!need_temp
&& !REG_P (container
))
2597 /* Verify that all registers are strictly consetuctive */
2598 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
2602 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
2604 rtx slot
= XVECEXP (container
, 0, i
);
2605 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
2606 || INTVAL (XEXP (slot
, 1)) != i
* 16)
2614 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
2616 rtx slot
= XVECEXP (container
, 0, i
);
2617 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
2618 || INTVAL (XEXP (slot
, 1)) != i
* 8)
2625 int_addr_rtx
= addr_rtx
;
2626 sse_addr_rtx
= addr_rtx
;
2630 int_addr_rtx
= gen_reg_rtx (Pmode
);
2631 sse_addr_rtx
= gen_reg_rtx (Pmode
);
2633 /* First ensure that we fit completely in registers. */
2636 emit_cmp_and_jump_insns (expand_expr
2637 (gpr
, NULL_RTX
, SImode
, EXPAND_NORMAL
),
2638 GEN_INT ((REGPARM_MAX
- needed_intregs
+
2639 1) * 8), GE
, const1_rtx
, SImode
,
2644 emit_cmp_and_jump_insns (expand_expr
2645 (fpr
, NULL_RTX
, SImode
, EXPAND_NORMAL
),
2646 GEN_INT ((SSE_REGPARM_MAX
-
2647 needed_sseregs
+ 1) * 16 +
2648 REGPARM_MAX
* 8), GE
, const1_rtx
,
2649 SImode
, 1, lab_false
);
2652 /* Compute index to start of area used for integer regs. */
2655 t
= build (PLUS_EXPR
, ptr_type_node
, sav
, gpr
);
2656 r
= expand_expr (t
, int_addr_rtx
, Pmode
, EXPAND_NORMAL
);
2657 if (r
!= int_addr_rtx
)
2658 emit_move_insn (int_addr_rtx
, r
);
2662 t
= build (PLUS_EXPR
, ptr_type_node
, sav
, fpr
);
2663 r
= expand_expr (t
, sse_addr_rtx
, Pmode
, EXPAND_NORMAL
);
2664 if (r
!= sse_addr_rtx
)
2665 emit_move_insn (sse_addr_rtx
, r
);
2672 /* Never use the memory itself, as it has the alias set. */
2673 addr_rtx
= XEXP (assign_temp (type
, 0, 1, 0), 0);
2674 mem
= gen_rtx_MEM (BLKmode
, addr_rtx
);
2675 set_mem_alias_set (mem
, get_varargs_alias_set ());
2676 set_mem_align (mem
, BITS_PER_UNIT
);
2678 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
2680 rtx slot
= XVECEXP (container
, 0, i
);
2681 rtx reg
= XEXP (slot
, 0);
2682 enum machine_mode mode
= GET_MODE (reg
);
2688 if (SSE_REGNO_P (REGNO (reg
)))
2690 src_addr
= sse_addr_rtx
;
2691 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
2695 src_addr
= int_addr_rtx
;
2696 src_offset
= REGNO (reg
) * 8;
2698 src_mem
= gen_rtx_MEM (mode
, src_addr
);
2699 set_mem_alias_set (src_mem
, get_varargs_alias_set ());
2700 src_mem
= adjust_address (src_mem
, mode
, src_offset
);
2701 dest_mem
= adjust_address (mem
, mode
, INTVAL (XEXP (slot
, 1)));
2702 emit_move_insn (dest_mem
, src_mem
);
2709 build (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
2710 build_int_2 (needed_intregs
* 8, 0));
2711 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
, t
);
2712 TREE_SIDE_EFFECTS (t
) = 1;
2713 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2718 build (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
2719 build_int_2 (needed_sseregs
* 16, 0));
2720 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
, t
);
2721 TREE_SIDE_EFFECTS (t
) = 1;
2722 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2725 emit_jump_insn (gen_jump (lab_over
));
2727 emit_label (lab_false
);
2730 /* ... otherwise out of the overflow area. */
2732 /* Care for on-stack alignment if needed. */
2733 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64)
2737 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
2738 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
, build_int_2 (align
- 1, 0));
2739 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
, build_int_2 (-align
, -1));
2743 r
= expand_expr (t
, addr_rtx
, Pmode
, EXPAND_NORMAL
);
2745 emit_move_insn (addr_rtx
, r
);
2748 build (PLUS_EXPR
, TREE_TYPE (t
), t
,
2749 build_int_2 (rsize
* UNITS_PER_WORD
, 0));
2750 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
2751 TREE_SIDE_EFFECTS (t
) = 1;
2752 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2755 emit_label (lab_over
);
2760 /* Return nonzero if OP is either a i387 or SSE fp register. */
2762 any_fp_register_operand (op
, mode
)
2764 enum machine_mode mode ATTRIBUTE_UNUSED
;
2766 return ANY_FP_REG_P (op
);
2769 /* Return nonzero if OP is an i387 fp register. */
2771 fp_register_operand (op
, mode
)
2773 enum machine_mode mode ATTRIBUTE_UNUSED
;
2775 return FP_REG_P (op
);
2778 /* Return nonzero if OP is a non-fp register_operand. */
2780 register_and_not_any_fp_reg_operand (op
, mode
)
2782 enum machine_mode mode
;
2784 return register_operand (op
, mode
) && !ANY_FP_REG_P (op
);
2787 /* Return nonzero of OP is a register operand other than an
2788 i387 fp register. */
2790 register_and_not_fp_reg_operand (op
, mode
)
2792 enum machine_mode mode
;
2794 return register_operand (op
, mode
) && !FP_REG_P (op
);
2797 /* Return nonzero if OP is general operand representable on x86_64. */
2800 x86_64_general_operand (op
, mode
)
2802 enum machine_mode mode
;
2805 return general_operand (op
, mode
);
2806 if (nonimmediate_operand (op
, mode
))
2808 return x86_64_sign_extended_value (op
);
2811 /* Return nonzero if OP is general operand representable on x86_64
2812 as either sign extended or zero extended constant. */
2815 x86_64_szext_general_operand (op
, mode
)
2817 enum machine_mode mode
;
2820 return general_operand (op
, mode
);
2821 if (nonimmediate_operand (op
, mode
))
2823 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
2826 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2829 x86_64_nonmemory_operand (op
, mode
)
2831 enum machine_mode mode
;
2834 return nonmemory_operand (op
, mode
);
2835 if (register_operand (op
, mode
))
2837 return x86_64_sign_extended_value (op
);
2840 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2843 x86_64_movabs_operand (op
, mode
)
2845 enum machine_mode mode
;
2847 if (!TARGET_64BIT
|| !flag_pic
)
2848 return nonmemory_operand (op
, mode
);
2849 if (register_operand (op
, mode
) || x86_64_sign_extended_value (op
))
2851 if (CONSTANT_P (op
) && !symbolic_reference_mentioned_p (op
))
2856 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2859 x86_64_szext_nonmemory_operand (op
, mode
)
2861 enum machine_mode mode
;
2864 return nonmemory_operand (op
, mode
);
2865 if (register_operand (op
, mode
))
2867 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
2870 /* Return nonzero if OP is immediate operand representable on x86_64. */
2873 x86_64_immediate_operand (op
, mode
)
2875 enum machine_mode mode
;
2878 return immediate_operand (op
, mode
);
2879 return x86_64_sign_extended_value (op
);
2882 /* Return nonzero if OP is immediate operand representable on x86_64. */
2885 x86_64_zext_immediate_operand (op
, mode
)
2887 enum machine_mode mode ATTRIBUTE_UNUSED
;
2889 return x86_64_zero_extended_value (op
);
2892 /* Return nonzero if OP is (const_int 1), else return zero. */
2895 const_int_1_operand (op
, mode
)
2897 enum machine_mode mode ATTRIBUTE_UNUSED
;
2899 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) == 1);
2902 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
2903 for shift & compare patterns, as shifting by 0 does not change flags),
2904 else return zero. */
2907 const_int_1_31_operand (op
, mode
)
2909 enum machine_mode mode ATTRIBUTE_UNUSED
;
2911 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 1 && INTVAL (op
) <= 31);
2914 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2915 reference and a constant. */
2918 symbolic_operand (op
, mode
)
2920 enum machine_mode mode ATTRIBUTE_UNUSED
;
2922 switch (GET_CODE (op
))
2930 if (GET_CODE (op
) == SYMBOL_REF
2931 || GET_CODE (op
) == LABEL_REF
2932 || (GET_CODE (op
) == UNSPEC
2933 && (XINT (op
, 1) == UNSPEC_GOT
2934 || XINT (op
, 1) == UNSPEC_GOTOFF
2935 || XINT (op
, 1) == UNSPEC_GOTPCREL
)))
2937 if (GET_CODE (op
) != PLUS
2938 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
2942 if (GET_CODE (op
) == SYMBOL_REF
2943 || GET_CODE (op
) == LABEL_REF
)
2945 /* Only @GOTOFF gets offsets. */
2946 if (GET_CODE (op
) != UNSPEC
2947 || XINT (op
, 1) != UNSPEC_GOTOFF
)
2950 op
= XVECEXP (op
, 0, 0);
2951 if (GET_CODE (op
) == SYMBOL_REF
2952 || GET_CODE (op
) == LABEL_REF
)
2961 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2964 pic_symbolic_operand (op
, mode
)
2966 enum machine_mode mode ATTRIBUTE_UNUSED
;
2968 if (GET_CODE (op
) != CONST
)
2973 if (GET_CODE (XEXP (op
, 0)) == UNSPEC
)
2978 if (GET_CODE (op
) == UNSPEC
)
2980 if (GET_CODE (op
) != PLUS
2981 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
2984 if (GET_CODE (op
) == UNSPEC
)
2990 /* Return true if OP is a symbolic operand that resolves locally. */
2993 local_symbolic_operand (op
, mode
)
2995 enum machine_mode mode ATTRIBUTE_UNUSED
;
2997 if (GET_CODE (op
) == LABEL_REF
)
3000 if (GET_CODE (op
) == CONST
3001 && GET_CODE (XEXP (op
, 0)) == PLUS
3002 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
3003 op
= XEXP (XEXP (op
, 0), 0);
3005 if (GET_CODE (op
) != SYMBOL_REF
)
3008 /* These we've been told are local by varasm and encode_section_info
3010 if (CONSTANT_POOL_ADDRESS_P (op
) || SYMBOL_REF_FLAG (op
))
3013 /* There is, however, a not insubstantial body of code in the rest of
3014 the compiler that assumes it can just stick the results of
3015 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3016 /* ??? This is a hack. Should update the body of the compiler to
3017 always create a DECL an invoke targetm.encode_section_info. */
3018 if (strncmp (XSTR (op
, 0), internal_label_prefix
,
3019 internal_label_prefix_len
) == 0)
3025 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3028 tls_symbolic_operand (op
, mode
)
3030 enum machine_mode mode ATTRIBUTE_UNUSED
;
3032 const char *symbol_str
;
3034 if (GET_CODE (op
) != SYMBOL_REF
)
3036 symbol_str
= XSTR (op
, 0);
3038 if (symbol_str
[0] != '%')
3040 return strchr (tls_model_chars
, symbol_str
[1]) - tls_model_chars
;
3044 tls_symbolic_operand_1 (op
, kind
)
3046 enum tls_model kind
;
3048 const char *symbol_str
;
3050 if (GET_CODE (op
) != SYMBOL_REF
)
3052 symbol_str
= XSTR (op
, 0);
3054 return symbol_str
[0] == '%' && symbol_str
[1] == tls_model_chars
[kind
];
3058 global_dynamic_symbolic_operand (op
, mode
)
3060 enum machine_mode mode ATTRIBUTE_UNUSED
;
3062 return tls_symbolic_operand_1 (op
, TLS_MODEL_GLOBAL_DYNAMIC
);
3066 local_dynamic_symbolic_operand (op
, mode
)
3068 enum machine_mode mode ATTRIBUTE_UNUSED
;
3070 return tls_symbolic_operand_1 (op
, TLS_MODEL_LOCAL_DYNAMIC
);
3074 initial_exec_symbolic_operand (op
, mode
)
3076 enum machine_mode mode ATTRIBUTE_UNUSED
;
3078 return tls_symbolic_operand_1 (op
, TLS_MODEL_INITIAL_EXEC
);
3082 local_exec_symbolic_operand (op
, mode
)
3084 enum machine_mode mode ATTRIBUTE_UNUSED
;
3086 return tls_symbolic_operand_1 (op
, TLS_MODEL_LOCAL_EXEC
);
3089 /* Test for a valid operand for a call instruction. Don't allow the
3090 arg pointer register or virtual regs since they may decay into
3091 reg + const, which the patterns can't handle. */
3094 call_insn_operand (op
, mode
)
3096 enum machine_mode mode ATTRIBUTE_UNUSED
;
3098 /* Disallow indirect through a virtual register. This leads to
3099 compiler aborts when trying to eliminate them. */
3100 if (GET_CODE (op
) == REG
3101 && (op
== arg_pointer_rtx
3102 || op
== frame_pointer_rtx
3103 || (REGNO (op
) >= FIRST_PSEUDO_REGISTER
3104 && REGNO (op
) <= LAST_VIRTUAL_REGISTER
)))
3107 /* Disallow `call 1234'. Due to varying assembler lameness this
3108 gets either rejected or translated to `call .+1234'. */
3109 if (GET_CODE (op
) == CONST_INT
)
3112 /* Explicitly allow SYMBOL_REF even if pic. */
3113 if (GET_CODE (op
) == SYMBOL_REF
)
3116 /* Otherwise we can allow any general_operand in the address. */
3117 return general_operand (op
, Pmode
);
3121 constant_call_address_operand (op
, mode
)
3123 enum machine_mode mode ATTRIBUTE_UNUSED
;
3125 if (GET_CODE (op
) == CONST
3126 && GET_CODE (XEXP (op
, 0)) == PLUS
3127 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
3128 op
= XEXP (XEXP (op
, 0), 0);
3129 return GET_CODE (op
) == SYMBOL_REF
;
3132 /* Match exactly zero and one. */
3135 const0_operand (op
, mode
)
3137 enum machine_mode mode
;
3139 return op
== CONST0_RTX (mode
);
3143 const1_operand (op
, mode
)
3145 enum machine_mode mode ATTRIBUTE_UNUSED
;
3147 return op
== const1_rtx
;
3150 /* Match 2, 4, or 8. Used for leal multiplicands. */
3153 const248_operand (op
, mode
)
3155 enum machine_mode mode ATTRIBUTE_UNUSED
;
3157 return (GET_CODE (op
) == CONST_INT
3158 && (INTVAL (op
) == 2 || INTVAL (op
) == 4 || INTVAL (op
) == 8));
3161 /* True if this is a constant appropriate for an increment or decremenmt. */
3164 incdec_operand (op
, mode
)
3166 enum machine_mode mode ATTRIBUTE_UNUSED
;
3168 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3169 registers, since carry flag is not set. */
3170 if (TARGET_PENTIUM4
&& !optimize_size
)
3172 return op
== const1_rtx
|| op
== constm1_rtx
;
3175 /* Return nonzero if OP is acceptable as operand of DImode shift
3179 shiftdi_operand (op
, mode
)
3181 enum machine_mode mode ATTRIBUTE_UNUSED
;
3184 return nonimmediate_operand (op
, mode
);
3186 return register_operand (op
, mode
);
3189 /* Return false if this is the stack pointer, or any other fake
3190 register eliminable to the stack pointer. Otherwise, this is
3193 This is used to prevent esp from being used as an index reg.
3194 Which would only happen in pathological cases. */
3197 reg_no_sp_operand (op
, mode
)
3199 enum machine_mode mode
;
3202 if (GET_CODE (t
) == SUBREG
)
3204 if (t
== stack_pointer_rtx
|| t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
)
3207 return register_operand (op
, mode
);
3211 mmx_reg_operand (op
, mode
)
3213 enum machine_mode mode ATTRIBUTE_UNUSED
;
3215 return MMX_REG_P (op
);
3218 /* Return false if this is any eliminable register. Otherwise
3222 general_no_elim_operand (op
, mode
)
3224 enum machine_mode mode
;
3227 if (GET_CODE (t
) == SUBREG
)
3229 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
3230 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
3231 || t
== virtual_stack_dynamic_rtx
)
3234 && REGNO (t
) >= FIRST_VIRTUAL_REGISTER
3235 && REGNO (t
) <= LAST_VIRTUAL_REGISTER
)
3238 return general_operand (op
, mode
);
3241 /* Return false if this is any eliminable register. Otherwise
3242 register_operand or const_int. */
3245 nonmemory_no_elim_operand (op
, mode
)
3247 enum machine_mode mode
;
3250 if (GET_CODE (t
) == SUBREG
)
3252 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
3253 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
3254 || t
== virtual_stack_dynamic_rtx
)
3257 return GET_CODE (op
) == CONST_INT
|| register_operand (op
, mode
);
3260 /* Return false if this is any eliminable register or stack register,
3261 otherwise work like register_operand. */
3264 index_register_operand (op
, mode
)
3266 enum machine_mode mode
;
3269 if (GET_CODE (t
) == SUBREG
)
3273 if (t
== arg_pointer_rtx
3274 || t
== frame_pointer_rtx
3275 || t
== virtual_incoming_args_rtx
3276 || t
== virtual_stack_vars_rtx
3277 || t
== virtual_stack_dynamic_rtx
3278 || REGNO (t
) == STACK_POINTER_REGNUM
)
3281 return general_operand (op
, mode
);
3284 /* Return true if op is a Q_REGS class register. */
3287 q_regs_operand (op
, mode
)
3289 enum machine_mode mode
;
3291 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3293 if (GET_CODE (op
) == SUBREG
)
3294 op
= SUBREG_REG (op
);
3295 return ANY_QI_REG_P (op
);
3298 /* Return true if op is a NON_Q_REGS class register. */
3301 non_q_regs_operand (op
, mode
)
3303 enum machine_mode mode
;
3305 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3307 if (GET_CODE (op
) == SUBREG
)
3308 op
= SUBREG_REG (op
);
3309 return NON_QI_REG_P (op
);
3312 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3315 sse_comparison_operator (op
, mode
)
3317 enum machine_mode mode ATTRIBUTE_UNUSED
;
3319 enum rtx_code code
= GET_CODE (op
);
3322 /* Operations supported directly. */
3332 /* These are equivalent to ones above in non-IEEE comparisons. */
3339 return !TARGET_IEEE_FP
;
3344 /* Return 1 if OP is a valid comparison operator in valid mode. */
3346 ix86_comparison_operator (op
, mode
)
3348 enum machine_mode mode
;
3350 enum machine_mode inmode
;
3351 enum rtx_code code
= GET_CODE (op
);
3352 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3354 if (GET_RTX_CLASS (code
) != '<')
3356 inmode
= GET_MODE (XEXP (op
, 0));
3358 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3360 enum rtx_code second_code
, bypass_code
;
3361 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
3362 return (bypass_code
== NIL
&& second_code
== NIL
);
3369 if (inmode
== CCmode
|| inmode
== CCGCmode
3370 || inmode
== CCGOCmode
|| inmode
== CCNOmode
)
3373 case LTU
: case GTU
: case LEU
: case ORDERED
: case UNORDERED
: case GEU
:
3374 if (inmode
== CCmode
)
3378 if (inmode
== CCmode
|| inmode
== CCGCmode
|| inmode
== CCNOmode
)
3386 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3389 fcmov_comparison_operator (op
, mode
)
3391 enum machine_mode mode
;
3393 enum machine_mode inmode
;
3394 enum rtx_code code
= GET_CODE (op
);
3395 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3397 if (GET_RTX_CLASS (code
) != '<')
3399 inmode
= GET_MODE (XEXP (op
, 0));
3400 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3402 enum rtx_code second_code
, bypass_code
;
3403 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
3404 if (bypass_code
!= NIL
|| second_code
!= NIL
)
3406 code
= ix86_fp_compare_code_to_integer (code
);
3408 /* i387 supports just limited amount of conditional codes. */
3411 case LTU
: case GTU
: case LEU
: case GEU
:
3412 if (inmode
== CCmode
|| inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3415 case ORDERED
: case UNORDERED
:
3423 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3426 promotable_binary_operator (op
, mode
)
3428 enum machine_mode mode ATTRIBUTE_UNUSED
;
3430 switch (GET_CODE (op
))
3433 /* Modern CPUs have same latency for HImode and SImode multiply,
3434 but 386 and 486 do HImode multiply faster. */
3435 return ix86_cpu
> PROCESSOR_I486
;
3447 /* Nearly general operand, but accept any const_double, since we wish
3448 to be able to drop them into memory rather than have them get pulled
3452 cmp_fp_expander_operand (op
, mode
)
3454 enum machine_mode mode
;
3456 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
3458 if (GET_CODE (op
) == CONST_DOUBLE
)
3460 return general_operand (op
, mode
);
3463 /* Match an SI or HImode register for a zero_extract. */
3466 ext_register_operand (op
, mode
)
3468 enum machine_mode mode ATTRIBUTE_UNUSED
;
3471 if ((!TARGET_64BIT
|| GET_MODE (op
) != DImode
)
3472 && GET_MODE (op
) != SImode
&& GET_MODE (op
) != HImode
)
3475 if (!register_operand (op
, VOIDmode
))
3478 /* Be curefull to accept only registers having upper parts. */
3479 regno
= REG_P (op
) ? REGNO (op
) : REGNO (SUBREG_REG (op
));
3480 return (regno
> LAST_VIRTUAL_REGISTER
|| regno
< 4);
3483 /* Return 1 if this is a valid binary floating-point operation.
3484 OP is the expression matched, and MODE is its mode. */
3487 binary_fp_operator (op
, mode
)
3489 enum machine_mode mode
;
3491 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
3494 switch (GET_CODE (op
))
3500 return GET_MODE_CLASS (GET_MODE (op
)) == MODE_FLOAT
;
3508 mult_operator (op
, mode
)
3510 enum machine_mode mode ATTRIBUTE_UNUSED
;
3512 return GET_CODE (op
) == MULT
;
3516 div_operator (op
, mode
)
3518 enum machine_mode mode ATTRIBUTE_UNUSED
;
3520 return GET_CODE (op
) == DIV
;
3524 arith_or_logical_operator (op
, mode
)
3526 enum machine_mode mode
;
3528 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
3529 && (GET_RTX_CLASS (GET_CODE (op
)) == 'c'
3530 || GET_RTX_CLASS (GET_CODE (op
)) == '2'));
3533 /* Returns 1 if OP is memory operand with a displacement. */
3536 memory_displacement_operand (op
, mode
)
3538 enum machine_mode mode
;
3540 struct ix86_address parts
;
3542 if (! memory_operand (op
, mode
))
3545 if (! ix86_decompose_address (XEXP (op
, 0), &parts
))
3548 return parts
.disp
!= NULL_RTX
;
3551 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3552 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3554 ??? It seems likely that this will only work because cmpsi is an
3555 expander, and no actual insns use this. */
3558 cmpsi_operand (op
, mode
)
3560 enum machine_mode mode
;
3562 if (nonimmediate_operand (op
, mode
))
3565 if (GET_CODE (op
) == AND
3566 && GET_MODE (op
) == SImode
3567 && GET_CODE (XEXP (op
, 0)) == ZERO_EXTRACT
3568 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
3569 && GET_CODE (XEXP (XEXP (op
, 0), 2)) == CONST_INT
3570 && INTVAL (XEXP (XEXP (op
, 0), 1)) == 8
3571 && INTVAL (XEXP (XEXP (op
, 0), 2)) == 8
3572 && GET_CODE (XEXP (op
, 1)) == CONST_INT
)
3578 /* Returns 1 if OP is memory operand that can not be represented by the
3582 long_memory_operand (op
, mode
)
3584 enum machine_mode mode
;
3586 if (! memory_operand (op
, mode
))
3589 return memory_address_length (op
) != 0;
3592 /* Return nonzero if the rtx is known aligned. */
3595 aligned_operand (op
, mode
)
3597 enum machine_mode mode
;
3599 struct ix86_address parts
;
3601 if (!general_operand (op
, mode
))
3604 /* Registers and immediate operands are always "aligned". */
3605 if (GET_CODE (op
) != MEM
)
3608 /* Don't even try to do any aligned optimizations with volatiles. */
3609 if (MEM_VOLATILE_P (op
))
3614 /* Pushes and pops are only valid on the stack pointer. */
3615 if (GET_CODE (op
) == PRE_DEC
3616 || GET_CODE (op
) == POST_INC
)
3619 /* Decode the address. */
3620 if (! ix86_decompose_address (op
, &parts
))
3623 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
3624 parts
.base
= SUBREG_REG (parts
.base
);
3625 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
3626 parts
.index
= SUBREG_REG (parts
.index
);
3628 /* Look for some component that isn't known to be aligned. */
3632 && REGNO_POINTER_ALIGN (REGNO (parts
.index
)) < 32)
3637 if (REGNO_POINTER_ALIGN (REGNO (parts
.base
)) < 32)
3642 if (GET_CODE (parts
.disp
) != CONST_INT
3643 || (INTVAL (parts
.disp
) & 3) != 0)
3647 /* Didn't find one -- this must be an aligned address. */
3651 /* Return true if the constant is something that can be loaded with
3652 a special instruction. Only handle 0.0 and 1.0; others are less
3656 standard_80387_constant_p (x
)
3659 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
3661 /* Note that on the 80387, other constants, such as pi, that we should support
3662 too. On some machines, these are much slower to load as standard constant,
3663 than to load from doubles in memory. */
3664 if (x
== CONST0_RTX (GET_MODE (x
)))
3666 if (x
== CONST1_RTX (GET_MODE (x
)))
3671 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3674 standard_sse_constant_p (x
)
3677 if (GET_CODE (x
) != CONST_DOUBLE
)
3679 return (x
== CONST0_RTX (GET_MODE (x
)));
3682 /* Returns 1 if OP contains a symbol reference */
3685 symbolic_reference_mentioned_p (op
)
3688 register const char *fmt
;
3691 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
3694 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
3695 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
3701 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
3702 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
3706 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
3713 /* Return 1 if it is appropriate to emit `ret' instructions in the
3714 body of a function. Do this only if the epilogue is simple, needing a
3715 couple of insns. Prior to reloading, we can't tell how many registers
3716 must be saved, so return 0 then. Return 0 if there is no frame
3717 marker to de-allocate.
3719 If NON_SAVING_SETJMP is defined and true, then it is not possible
3720 for the epilogue to be simple, so return 0. This is a special case
3721 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3722 until final, but jump_optimize may need to know sooner if a
3726 ix86_can_use_return_insn_p ()
3728 struct ix86_frame frame
;
3730 #ifdef NON_SAVING_SETJMP
3731 if (NON_SAVING_SETJMP
&& current_function_calls_setjmp
)
3735 if (! reload_completed
|| frame_pointer_needed
)
3738 /* Don't allow more than 32 pop, since that's all we can do
3739 with one instruction. */
3740 if (current_function_pops_args
3741 && current_function_args_size
>= 32768)
3744 ix86_compute_frame_layout (&frame
);
3745 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
3748 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3750 x86_64_sign_extended_value (value
)
3753 switch (GET_CODE (value
))
3755 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3756 to be at least 32 and this all acceptable constants are
3757 represented as CONST_INT. */
3759 if (HOST_BITS_PER_WIDE_INT
== 32)
3763 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (value
), DImode
);
3764 return trunc_int_for_mode (val
, SImode
) == val
;
3768 /* For certain code models, the symbolic references are known to fit. */
3770 return ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_KERNEL
;
3772 /* For certain code models, the code is near as well. */
3774 return ix86_cmodel
!= CM_LARGE
&& ix86_cmodel
!= CM_SMALL_PIC
;
3776 /* We also may accept the offsetted memory references in certain special
3779 if (GET_CODE (XEXP (value
, 0)) == UNSPEC
3780 && XINT (XEXP (value
, 0), 1) == UNSPEC_GOTPCREL
)
3782 else if (GET_CODE (XEXP (value
, 0)) == PLUS
)
3784 rtx op1
= XEXP (XEXP (value
, 0), 0);
3785 rtx op2
= XEXP (XEXP (value
, 0), 1);
3786 HOST_WIDE_INT offset
;
3788 if (ix86_cmodel
== CM_LARGE
)
3790 if (GET_CODE (op2
) != CONST_INT
)
3792 offset
= trunc_int_for_mode (INTVAL (op2
), DImode
);
3793 switch (GET_CODE (op1
))
3796 /* For CM_SMALL assume that latest object is 1MB before
3797 end of 31bits boundary. We may also accept pretty
3798 large negative constants knowing that all objects are
3799 in the positive half of address space. */
3800 if (ix86_cmodel
== CM_SMALL
3801 && offset
< 1024*1024*1024
3802 && trunc_int_for_mode (offset
, SImode
) == offset
)
3804 /* For CM_KERNEL we know that all object resist in the
3805 negative half of 32bits address space. We may not
3806 accept negative offsets, since they may be just off
3807 and we may accept pretty large positive ones. */
3808 if (ix86_cmodel
== CM_KERNEL
3810 && trunc_int_for_mode (offset
, SImode
) == offset
)
3814 /* These conditions are similar to SYMBOL_REF ones, just the
3815 constraints for code models differ. */
3816 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
3817 && offset
< 1024*1024*1024
3818 && trunc_int_for_mode (offset
, SImode
) == offset
)
3820 if (ix86_cmodel
== CM_KERNEL
3822 && trunc_int_for_mode (offset
, SImode
) == offset
)
3835 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3837 x86_64_zero_extended_value (value
)
3840 switch (GET_CODE (value
))
3843 if (HOST_BITS_PER_WIDE_INT
== 32)
3844 return (GET_MODE (value
) == VOIDmode
3845 && !CONST_DOUBLE_HIGH (value
));
3849 if (HOST_BITS_PER_WIDE_INT
== 32)
3850 return INTVAL (value
) >= 0;
3852 return !(INTVAL (value
) & ~(HOST_WIDE_INT
) 0xffffffff);
3855 /* For certain code models, the symbolic references are known to fit. */
3857 return ix86_cmodel
== CM_SMALL
;
3859 /* For certain code models, the code is near as well. */
3861 return ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
;
3863 /* We also may accept the offsetted memory references in certain special
3866 if (GET_CODE (XEXP (value
, 0)) == PLUS
)
3868 rtx op1
= XEXP (XEXP (value
, 0), 0);
3869 rtx op2
= XEXP (XEXP (value
, 0), 1);
3871 if (ix86_cmodel
== CM_LARGE
)
3873 switch (GET_CODE (op1
))
3877 /* For small code model we may accept pretty large positive
3878 offsets, since one bit is available for free. Negative
3879 offsets are limited by the size of NULL pointer area
3880 specified by the ABI. */
3881 if (ix86_cmodel
== CM_SMALL
3882 && GET_CODE (op2
) == CONST_INT
3883 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
3884 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
3887 /* ??? For the kernel, we may accept adjustment of
3888 -0x10000000, since we know that it will just convert
3889 negative address space to positive, but perhaps this
3890 is not worthwhile. */
3893 /* These conditions are similar to SYMBOL_REF ones, just the
3894 constraints for code models differ. */
3895 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
3896 && GET_CODE (op2
) == CONST_INT
3897 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
3898 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
3912 /* Value should be nonzero if functions must have frame pointers.
3913 Zero means the frame pointer need not be set up (and parms may
3914 be accessed via the stack pointer) in functions that seem suitable. */
3917 ix86_frame_pointer_required ()
3919 /* If we accessed previous frames, then the generated code expects
3920 to be able to access the saved ebp value in our frame. */
3921 if (cfun
->machine
->accesses_prev_frame
)
3924 /* Several x86 os'es need a frame pointer for other reasons,
3925 usually pertaining to setjmp. */
3926 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
3929 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3930 the frame pointer by default. Turn it back on now if we've not
3931 got a leaf function. */
3932 if (TARGET_OMIT_LEAF_FRAME_POINTER
3933 && (!current_function_is_leaf
|| current_function_profile
))
3939 /* Record that the current function accesses previous call frames. */
3942 ix86_setup_frame_addresses ()
3944 cfun
->machine
->accesses_prev_frame
= 1;
3947 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3948 # define USE_HIDDEN_LINKONCE 1
3950 # define USE_HIDDEN_LINKONCE 0
3953 static int pic_labels_used
;
3955 /* Fills in the label name that should be used for a pc thunk for
3956 the given register. */
3959 get_pc_thunk_name (name
, regno
)
3963 if (USE_HIDDEN_LINKONCE
)
3964 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
3966 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
3970 /* This function generates code for -fpic that loads %ebx with
3971 the return address of the caller and then returns. */
3974 ix86_asm_file_end (file
)
3980 for (regno
= 0; regno
< 8; ++regno
)
3984 if (! ((pic_labels_used
>> regno
) & 1))
3987 get_pc_thunk_name (name
, regno
);
3989 if (USE_HIDDEN_LINKONCE
)
3993 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
3995 TREE_PUBLIC (decl
) = 1;
3996 TREE_STATIC (decl
) = 1;
3997 DECL_ONE_ONLY (decl
) = 1;
3999 (*targetm
.asm_out
.unique_section
) (decl
, 0);
4000 named_section (decl
, NULL
, 0);
4002 (*targetm
.asm_out
.globalize_label
) (file
, name
);
4003 fputs ("\t.hidden\t", file
);
4004 assemble_name (file
, name
);
4006 ASM_DECLARE_FUNCTION_NAME (file
, name
, decl
);
4011 ASM_OUTPUT_LABEL (file
, name
);
4014 xops
[0] = gen_rtx_REG (SImode
, regno
);
4015 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
4016 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
4017 output_asm_insn ("ret", xops
);
4021 /* Emit code for the SET_GOT patterns. */
4024 output_set_got (dest
)
4030 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
4032 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
4034 xops
[2] = gen_rtx_LABEL_REF (Pmode
, gen_label_rtx ());
4037 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
4039 output_asm_insn ("call\t%a2", xops
);
4042 /* Output the "canonical" label name ("Lxx$pb") here too. This
4043 is what will be referred to by the Mach-O PIC subsystem. */
4044 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
4046 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, "L",
4047 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
4050 output_asm_insn ("pop{l}\t%0", xops
);
4055 get_pc_thunk_name (name
, REGNO (dest
));
4056 pic_labels_used
|= 1 << REGNO (dest
);
4058 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
4059 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
4060 output_asm_insn ("call\t%X2", xops
);
4063 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
4064 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
4065 else if (!TARGET_MACHO
)
4066 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops
);
4071 /* Generate an "push" pattern for input ARG. */
4077 return gen_rtx_SET (VOIDmode
,
4079 gen_rtx_PRE_DEC (Pmode
,
4080 stack_pointer_rtx
)),
4084 /* Return >= 0 if there is an unused call-clobbered register available
4085 for the entire function. */
4088 ix86_select_alt_pic_regnum ()
4090 if (current_function_is_leaf
&& !current_function_profile
)
4093 for (i
= 2; i
>= 0; --i
)
4094 if (!regs_ever_live
[i
])
4098 return INVALID_REGNUM
;
4101 /* Return 1 if we need to save REGNO. */
4103 ix86_save_reg (regno
, maybe_eh_return
)
4105 int maybe_eh_return
;
4107 if (pic_offset_table_rtx
4108 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
4109 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
4110 || current_function_profile
4111 || current_function_calls_eh_return
))
4113 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
4118 if (current_function_calls_eh_return
&& maybe_eh_return
)
4123 unsigned test
= EH_RETURN_DATA_REGNO (i
);
4124 if (test
== INVALID_REGNUM
)
4131 return (regs_ever_live
[regno
]
4132 && !call_used_regs
[regno
]
4133 && !fixed_regs
[regno
]
4134 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
4137 /* Return number of registers to be saved on the stack. */
4145 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
4146 if (ix86_save_reg (regno
, true))
4151 /* Return the offset between two registers, one to be eliminated, and the other
4152 its replacement, at the start of a routine. */
4155 ix86_initial_elimination_offset (from
, to
)
4159 struct ix86_frame frame
;
4160 ix86_compute_frame_layout (&frame
);
4162 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4163 return frame
.hard_frame_pointer_offset
;
4164 else if (from
== FRAME_POINTER_REGNUM
4165 && to
== HARD_FRAME_POINTER_REGNUM
)
4166 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
4169 if (to
!= STACK_POINTER_REGNUM
)
4171 else if (from
== ARG_POINTER_REGNUM
)
4172 return frame
.stack_pointer_offset
;
4173 else if (from
!= FRAME_POINTER_REGNUM
)
4176 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
4180 /* Fill structure ix86_frame about frame of currently computed function. */
4183 ix86_compute_frame_layout (frame
)
4184 struct ix86_frame
*frame
;
4186 HOST_WIDE_INT total_size
;
4187 int stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
4189 int preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
4190 HOST_WIDE_INT size
= get_frame_size ();
4192 frame
->nregs
= ix86_nsaved_regs ();
4195 /* Skip return address and saved base pointer. */
4196 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
4198 frame
->hard_frame_pointer_offset
= offset
;
4200 /* Do some sanity checking of stack_alignment_needed and
4201 preferred_alignment, since i386 port is the only using those features
4202 that may break easily. */
4204 if (size
&& !stack_alignment_needed
)
4206 if (preferred_alignment
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
4208 if (preferred_alignment
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
4210 if (stack_alignment_needed
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
4213 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
4214 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
4216 /* Register save area */
4217 offset
+= frame
->nregs
* UNITS_PER_WORD
;
4220 if (ix86_save_varrargs_registers
)
4222 offset
+= X86_64_VARARGS_SIZE
;
4223 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
4226 frame
->va_arg_size
= 0;
4228 /* Align start of frame for local function. */
4229 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
4230 & -stack_alignment_needed
) - offset
;
4232 offset
+= frame
->padding1
;
4234 /* Frame pointer points here. */
4235 frame
->frame_pointer_offset
= offset
;
4239 /* Add outgoing arguments area. Can be skipped if we eliminated
4240 all the function calls as dead code. */
4241 if (ACCUMULATE_OUTGOING_ARGS
&& !current_function_is_leaf
)
4243 offset
+= current_function_outgoing_args_size
;
4244 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
4247 frame
->outgoing_arguments_size
= 0;
4249 /* Align stack boundary. Only needed if we're calling another function
4251 if (!current_function_is_leaf
|| current_function_calls_alloca
)
4252 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
4253 & -preferred_alignment
) - offset
;
4255 frame
->padding2
= 0;
4257 offset
+= frame
->padding2
;
4259 /* We've reached end of stack frame. */
4260 frame
->stack_pointer_offset
= offset
;
4262 /* Size prologue needs to allocate. */
4263 frame
->to_allocate
=
4264 (size
+ frame
->padding1
+ frame
->padding2
4265 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
4267 if (TARGET_64BIT
&& TARGET_RED_ZONE
&& current_function_sp_is_unchanging
4268 && current_function_is_leaf
)
4270 frame
->red_zone_size
= frame
->to_allocate
;
4271 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
4272 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
4275 frame
->red_zone_size
= 0;
4276 frame
->to_allocate
-= frame
->red_zone_size
;
4277 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
4279 fprintf (stderr
, "nregs: %i\n", frame
->nregs
);
4280 fprintf (stderr
, "size: %i\n", size
);
4281 fprintf (stderr
, "alignment1: %i\n", stack_alignment_needed
);
4282 fprintf (stderr
, "padding1: %i\n", frame
->padding1
);
4283 fprintf (stderr
, "va_arg: %i\n", frame
->va_arg_size
);
4284 fprintf (stderr
, "padding2: %i\n", frame
->padding2
);
4285 fprintf (stderr
, "to_allocate: %i\n", frame
->to_allocate
);
4286 fprintf (stderr
, "red_zone_size: %i\n", frame
->red_zone_size
);
4287 fprintf (stderr
, "frame_pointer_offset: %i\n", frame
->frame_pointer_offset
);
4288 fprintf (stderr
, "hard_frame_pointer_offset: %i\n",
4289 frame
->hard_frame_pointer_offset
);
4290 fprintf (stderr
, "stack_pointer_offset: %i\n", frame
->stack_pointer_offset
);
4294 /* Emit code to save registers in the prologue. */
4297 ix86_emit_save_regs ()
4302 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
4303 if (ix86_save_reg (regno
, true))
4305 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
4306 RTX_FRAME_RELATED_P (insn
) = 1;
4310 /* Emit code to save registers using MOV insns. First register
4311 is restored from POINTER + OFFSET. */
4313 ix86_emit_save_regs_using_mov (pointer
, offset
)
4315 HOST_WIDE_INT offset
;
4320 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4321 if (ix86_save_reg (regno
, true))
4323 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
4325 gen_rtx_REG (Pmode
, regno
));
4326 RTX_FRAME_RELATED_P (insn
) = 1;
4327 offset
+= UNITS_PER_WORD
;
4331 /* Expand the prologue into a bunch of separate insns. */
4334 ix86_expand_prologue ()
4338 struct ix86_frame frame
;
4340 HOST_WIDE_INT allocate
;
4344 use_fast_prologue_epilogue
4345 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT
);
4346 if (TARGET_PROLOGUE_USING_MOVE
)
4347 use_mov
= use_fast_prologue_epilogue
;
4349 ix86_compute_frame_layout (&frame
);
4351 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4352 slower on all targets. Also sdb doesn't like it. */
4354 if (frame_pointer_needed
)
4356 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
4357 RTX_FRAME_RELATED_P (insn
) = 1;
4359 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
4360 RTX_FRAME_RELATED_P (insn
) = 1;
4363 allocate
= frame
.to_allocate
;
4364 /* In case we are dealing only with single register and empty frame,
4365 push is equivalent of the mov+add sequence. */
4366 if (allocate
== 0 && frame
.nregs
<= 1)
4370 ix86_emit_save_regs ();
4372 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
4376 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
4378 insn
= emit_insn (gen_pro_epilogue_adjust_stack
4379 (stack_pointer_rtx
, stack_pointer_rtx
,
4380 GEN_INT (-allocate
)));
4381 RTX_FRAME_RELATED_P (insn
) = 1;
4385 /* ??? Is this only valid for Win32? */
4392 arg0
= gen_rtx_REG (SImode
, 0);
4393 emit_move_insn (arg0
, GEN_INT (allocate
));
4395 sym
= gen_rtx_MEM (FUNCTION_MODE
,
4396 gen_rtx_SYMBOL_REF (Pmode
, "_alloca"));
4397 insn
= emit_call_insn (gen_call (sym
, const0_rtx
, constm1_rtx
));
4399 CALL_INSN_FUNCTION_USAGE (insn
)
4400 = gen_rtx_EXPR_LIST (VOIDmode
, gen_rtx_USE (VOIDmode
, arg0
),
4401 CALL_INSN_FUNCTION_USAGE (insn
));
4405 if (!frame_pointer_needed
|| !frame
.to_allocate
)
4406 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
4408 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
4409 -frame
.nregs
* UNITS_PER_WORD
);
4412 #ifdef SUBTARGET_PROLOGUE
4416 pic_reg_used
= false;
4417 if (pic_offset_table_rtx
4418 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
4419 || current_function_profile
))
4421 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
4423 if (alt_pic_reg_used
!= INVALID_REGNUM
)
4424 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
4426 pic_reg_used
= true;
4431 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
4433 /* Even with accurate pre-reload life analysis, we can wind up
4434 deleting all references to the pic register after reload.
4435 Consider if cross-jumping unifies two sides of a branch
4436 controled by a comparison vs the only read from a global.
4437 In which case, allow the set_got to be deleted, though we're
4438 too late to do anything about the ebx save in the prologue. */
4439 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
4442 /* Prevent function calls from be scheduled before the call to mcount.
4443 In the pic_reg_used case, make sure that the got load isn't deleted. */
4444 if (current_function_profile
)
4445 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
4448 /* Emit code to restore saved registers using MOV insns. First register
4449 is restored from POINTER + OFFSET. */
4451 ix86_emit_restore_regs_using_mov (pointer
, offset
, maybe_eh_return
)
4454 int maybe_eh_return
;
4458 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4459 if (ix86_save_reg (regno
, maybe_eh_return
))
4461 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
4462 adjust_address (gen_rtx_MEM (Pmode
, pointer
),
4464 offset
+= UNITS_PER_WORD
;
4468 /* Restore function stack, frame, and registers. */
4471 ix86_expand_epilogue (style
)
4475 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
4476 struct ix86_frame frame
;
4477 HOST_WIDE_INT offset
;
4479 ix86_compute_frame_layout (&frame
);
4481 /* Calculate start of saved registers relative to ebp. Special care
4482 must be taken for the normal return case of a function using
4483 eh_return: the eax and edx registers are marked as saved, but not
4484 restored along this path. */
4485 offset
= frame
.nregs
;
4486 if (current_function_calls_eh_return
&& style
!= 2)
4488 offset
*= -UNITS_PER_WORD
;
4490 /* If we're only restoring one register and sp is not valid then
4491 using a move instruction to restore the register since it's
4492 less work than reloading sp and popping the register.
4494 The default code result in stack adjustment using add/lea instruction,
4495 while this code results in LEAVE instruction (or discrete equivalent),
4496 so it is profitable in some other cases as well. Especially when there
4497 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4498 and there is exactly one register to pop. This heruistic may need some
4499 tuning in future. */
4500 if ((!sp_valid
&& frame
.nregs
<= 1)
4501 || (TARGET_EPILOGUE_USING_MOVE
4502 && use_fast_prologue_epilogue
4503 && (frame
.nregs
> 1 || frame
.to_allocate
))
4504 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
4505 || (frame_pointer_needed
&& TARGET_USE_LEAVE
4506 && use_fast_prologue_epilogue
&& frame
.nregs
== 1)
4507 || current_function_calls_eh_return
)
4509 /* Restore registers. We can use ebp or esp to address the memory
4510 locations. If both are available, default to ebp, since offsets
4511 are known to be small. Only exception is esp pointing directly to the
4512 end of block of saved registers, where we may simplify addressing
4515 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
4516 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
4517 frame
.to_allocate
, style
== 2);
4519 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
4520 offset
, style
== 2);
4522 /* eh_return epilogues need %ecx added to the stack pointer. */
4525 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
4527 if (frame_pointer_needed
)
4529 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
4530 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
4531 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
4533 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
4534 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
4536 emit_insn (gen_pro_epilogue_adjust_stack
4537 (stack_pointer_rtx
, sa
, const0_rtx
));
4541 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
4542 tmp
= plus_constant (tmp
, (frame
.to_allocate
4543 + frame
.nregs
* UNITS_PER_WORD
));
4544 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
4547 else if (!frame_pointer_needed
)
4548 emit_insn (gen_pro_epilogue_adjust_stack
4549 (stack_pointer_rtx
, stack_pointer_rtx
,
4550 GEN_INT (frame
.to_allocate
4551 + frame
.nregs
* UNITS_PER_WORD
)));
4552 /* If not an i386, mov & pop is faster than "leave". */
4553 else if (TARGET_USE_LEAVE
|| optimize_size
|| !use_fast_prologue_epilogue
)
4554 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4557 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
4558 hard_frame_pointer_rtx
,
4561 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4563 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4568 /* First step is to deallocate the stack frame so that we can
4569 pop the registers. */
4572 if (!frame_pointer_needed
)
4574 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
4575 hard_frame_pointer_rtx
,
4578 else if (frame
.to_allocate
)
4579 emit_insn (gen_pro_epilogue_adjust_stack
4580 (stack_pointer_rtx
, stack_pointer_rtx
,
4581 GEN_INT (frame
.to_allocate
)));
4583 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4584 if (ix86_save_reg (regno
, false))
4587 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
4589 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
4591 if (frame_pointer_needed
)
4593 /* Leave results in shorter dependency chains on CPUs that are
4594 able to grok it fast. */
4595 if (TARGET_USE_LEAVE
)
4596 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4597 else if (TARGET_64BIT
)
4598 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4600 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4604 /* Sibcall epilogues don't want a return instruction. */
4608 if (current_function_pops_args
&& current_function_args_size
)
4610 rtx popc
= GEN_INT (current_function_pops_args
);
4612 /* i386 can only pop 64K bytes. If asked to pop more, pop
4613 return address, do explicit add, and jump indirectly to the
4616 if (current_function_pops_args
>= 65536)
4618 rtx ecx
= gen_rtx_REG (SImode
, 2);
4620 /* There are is no "pascal" calling convention in 64bit ABI. */
4624 emit_insn (gen_popsi1 (ecx
));
4625 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
4626 emit_jump_insn (gen_return_indirect_internal (ecx
));
4629 emit_jump_insn (gen_return_pop_internal (popc
));
4632 emit_jump_insn (gen_return_internal ());
4635 /* Reset from the function's potential modifications. */
4638 ix86_output_function_epilogue (file
, size
)
4639 FILE *file ATTRIBUTE_UNUSED
;
4640 HOST_WIDE_INT size ATTRIBUTE_UNUSED
;
4642 if (pic_offset_table_rtx
)
4643 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
4646 /* Extract the parts of an RTL expression that is a valid memory address
4647 for an instruction. Return 0 if the structure of the address is
4648 grossly off. Return -1 if the address contains ASHIFT, so it is not
4649 strictly valid, but still used for computing length of lea instruction.
4653 ix86_decompose_address (addr
, out
)
4655 struct ix86_address
*out
;
4657 rtx base
= NULL_RTX
;
4658 rtx index
= NULL_RTX
;
4659 rtx disp
= NULL_RTX
;
4660 HOST_WIDE_INT scale
= 1;
4661 rtx scale_rtx
= NULL_RTX
;
4664 if (REG_P (addr
) || GET_CODE (addr
) == SUBREG
)
4666 else if (GET_CODE (addr
) == PLUS
)
4668 rtx op0
= XEXP (addr
, 0);
4669 rtx op1
= XEXP (addr
, 1);
4670 enum rtx_code code0
= GET_CODE (op0
);
4671 enum rtx_code code1
= GET_CODE (op1
);
4673 if (code0
== REG
|| code0
== SUBREG
)
4675 if (code1
== REG
|| code1
== SUBREG
)
4676 index
= op0
, base
= op1
; /* index + base */
4678 base
= op0
, disp
= op1
; /* base + displacement */
4680 else if (code0
== MULT
)
4682 index
= XEXP (op0
, 0);
4683 scale_rtx
= XEXP (op0
, 1);
4684 if (code1
== REG
|| code1
== SUBREG
)
4685 base
= op1
; /* index*scale + base */
4687 disp
= op1
; /* index*scale + disp */
4689 else if (code0
== PLUS
&& GET_CODE (XEXP (op0
, 0)) == MULT
)
4691 index
= XEXP (XEXP (op0
, 0), 0); /* index*scale + base + disp */
4692 scale_rtx
= XEXP (XEXP (op0
, 0), 1);
4693 base
= XEXP (op0
, 1);
4696 else if (code0
== PLUS
)
4698 index
= XEXP (op0
, 0); /* index + base + disp */
4699 base
= XEXP (op0
, 1);
4705 else if (GET_CODE (addr
) == MULT
)
4707 index
= XEXP (addr
, 0); /* index*scale */
4708 scale_rtx
= XEXP (addr
, 1);
4710 else if (GET_CODE (addr
) == ASHIFT
)
4714 /* We're called for lea too, which implements ashift on occasion. */
4715 index
= XEXP (addr
, 0);
4716 tmp
= XEXP (addr
, 1);
4717 if (GET_CODE (tmp
) != CONST_INT
)
4719 scale
= INTVAL (tmp
);
4720 if ((unsigned HOST_WIDE_INT
) scale
> 3)
4726 disp
= addr
; /* displacement */
4728 /* Extract the integral value of scale. */
4731 if (GET_CODE (scale_rtx
) != CONST_INT
)
4733 scale
= INTVAL (scale_rtx
);
4736 /* Allow arg pointer and stack pointer as index if there is not scaling */
4737 if (base
&& index
&& scale
== 1
4738 && (index
== arg_pointer_rtx
|| index
== frame_pointer_rtx
4739 || index
== stack_pointer_rtx
))
4746 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4747 if ((base
== hard_frame_pointer_rtx
4748 || base
== frame_pointer_rtx
4749 || base
== arg_pointer_rtx
) && !disp
)
4752 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4753 Avoid this by transforming to [%esi+0]. */
4754 if (ix86_cpu
== PROCESSOR_K6
&& !optimize_size
4755 && base
&& !index
&& !disp
4757 && REGNO_REG_CLASS (REGNO (base
)) == SIREG
)
4760 /* Special case: encode reg+reg instead of reg*2. */
4761 if (!base
&& index
&& scale
&& scale
== 2)
4762 base
= index
, scale
= 1;
4764 /* Special case: scaling cannot be encoded without base or displacement. */
4765 if (!base
&& !disp
&& index
&& scale
!= 1)
4776 /* Return cost of the memory address x.
4777 For i386, it is better to use a complex address than let gcc copy
4778 the address into a reg and make a new pseudo. But not if the address
4779 requires to two regs - that would mean more pseudos with longer
4782 ix86_address_cost (x
)
4785 struct ix86_address parts
;
4788 if (!ix86_decompose_address (x
, &parts
))
4791 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
4792 parts
.base
= SUBREG_REG (parts
.base
);
4793 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
4794 parts
.index
= SUBREG_REG (parts
.index
);
4796 /* More complex memory references are better. */
4797 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
4800 /* Attempt to minimize number of registers in the address. */
4802 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
4804 && (!REG_P (parts
.index
)
4805 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
4809 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
4811 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
4812 && parts
.base
!= parts
.index
)
4815 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4816 since it's predecode logic can't detect the length of instructions
4817 and it degenerates to vector decoded. Increase cost of such
4818 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4819 to split such addresses or even refuse such addresses at all.
4821 Following addressing modes are affected:
4826 The first and last case may be avoidable by explicitly coding the zero in
4827 memory address, but I don't have AMD-K6 machine handy to check this
4831 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
4832 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
4833 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
4839 /* If X is a machine specific address (i.e. a symbol or label being
4840 referenced as a displacement from the GOT implemented using an
4841 UNSPEC), then return the base term. Otherwise return X. */
4844 ix86_find_base_term (x
)
4851 if (GET_CODE (x
) != CONST
)
4854 if (GET_CODE (term
) == PLUS
4855 && (GET_CODE (XEXP (term
, 1)) == CONST_INT
4856 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
4857 term
= XEXP (term
, 0);
4858 if (GET_CODE (term
) != UNSPEC
4859 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
4862 term
= XVECEXP (term
, 0, 0);
4864 if (GET_CODE (term
) != SYMBOL_REF
4865 && GET_CODE (term
) != LABEL_REF
)
4871 if (GET_CODE (x
) != PLUS
4872 || XEXP (x
, 0) != pic_offset_table_rtx
4873 || GET_CODE (XEXP (x
, 1)) != CONST
)
4876 term
= XEXP (XEXP (x
, 1), 0);
4878 if (GET_CODE (term
) == PLUS
&& GET_CODE (XEXP (term
, 1)) == CONST_INT
)
4879 term
= XEXP (term
, 0);
4881 if (GET_CODE (term
) != UNSPEC
4882 || XINT (term
, 1) != UNSPEC_GOTOFF
)
4885 term
= XVECEXP (term
, 0, 0);
4887 if (GET_CODE (term
) != SYMBOL_REF
4888 && GET_CODE (term
) != LABEL_REF
)
4894 /* Determine if a given RTX is a valid constant. We already know this
4895 satisfies CONSTANT_P. */
4898 legitimate_constant_p (x
)
4903 switch (GET_CODE (x
))
4906 /* TLS symbols are not constant. */
4907 if (tls_symbolic_operand (x
, Pmode
))
4912 inner
= XEXP (x
, 0);
4914 /* Offsets of TLS symbols are never valid.
4915 Discourage CSE from creating them. */
4916 if (GET_CODE (inner
) == PLUS
4917 && tls_symbolic_operand (XEXP (inner
, 0), Pmode
))
4920 /* Only some unspecs are valid as "constants". */
4921 if (GET_CODE (inner
) == UNSPEC
)
4922 switch (XINT (inner
, 1))
4925 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
4935 /* Otherwise we handle everything else in the move patterns. */
4939 /* Determine if a given RTX is a valid constant address. */
4942 constant_address_p (x
)
4945 switch (GET_CODE (x
))
4952 return TARGET_64BIT
;
4955 /* For Mach-O, really believe the CONST. */
4958 /* Otherwise fall through. */
4960 return !flag_pic
&& legitimate_constant_p (x
);
4967 /* Nonzero if the constant value X is a legitimate general operand
4968 when generating PIC code. It is given that flag_pic is on and
4969 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4972 legitimate_pic_operand_p (x
)
4977 switch (GET_CODE (x
))
4980 inner
= XEXP (x
, 0);
4982 /* Only some unspecs are valid as "constants". */
4983 if (GET_CODE (inner
) == UNSPEC
)
4984 switch (XINT (inner
, 1))
4987 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
4995 return legitimate_pic_address_disp_p (x
);
5002 /* Determine if a given CONST RTX is a valid memory displacement
5006 legitimate_pic_address_disp_p (disp
)
5011 /* In 64bit mode we can allow direct addresses of symbols and labels
5012 when they are not dynamic symbols. */
5016 if (GET_CODE (disp
) == CONST
)
5018 /* ??? Handle PIC code models */
5019 if (GET_CODE (x
) == PLUS
5020 && (GET_CODE (XEXP (x
, 1)) == CONST_INT
5021 && ix86_cmodel
== CM_SMALL_PIC
5022 && INTVAL (XEXP (x
, 1)) < 1024*1024*1024
5023 && INTVAL (XEXP (x
, 1)) > -1024*1024*1024))
5025 if (local_symbolic_operand (x
, Pmode
))
5028 if (GET_CODE (disp
) != CONST
)
5030 disp
= XEXP (disp
, 0);
5034 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5035 of GOT tables. We should not need these anyway. */
5036 if (GET_CODE (disp
) != UNSPEC
5037 || XINT (disp
, 1) != UNSPEC_GOTPCREL
)
5040 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
5041 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
5047 if (GET_CODE (disp
) == PLUS
)
5049 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
5051 disp
= XEXP (disp
, 0);
5055 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5056 if (TARGET_MACHO
&& GET_CODE (disp
) == MINUS
)
5058 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
5059 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
5060 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
5062 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
5063 if (strstr (sym_name
, "$pb") != 0)
5068 if (GET_CODE (disp
) != UNSPEC
)
5071 switch (XINT (disp
, 1))
5076 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
5078 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5079 case UNSPEC_GOTTPOFF
:
5080 case UNSPEC_GOTNTPOFF
:
5081 case UNSPEC_INDNTPOFF
:
5084 return initial_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5086 return local_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5088 return local_dynamic_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5094 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5095 memory address for an instruction. The MODE argument is the machine mode
5096 for the MEM expression that wants to use this address.
5098 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5099 convert common non-canonical forms to canonical form so that they will
5103 legitimate_address_p (mode
, addr
, strict
)
5104 enum machine_mode mode
;
5108 struct ix86_address parts
;
5109 rtx base
, index
, disp
;
5110 HOST_WIDE_INT scale
;
5111 const char *reason
= NULL
;
5112 rtx reason_rtx
= NULL_RTX
;
5114 if (TARGET_DEBUG_ADDR
)
5117 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5118 GET_MODE_NAME (mode
), strict
);
5122 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_TP
)
5124 if (TARGET_DEBUG_ADDR
)
5125 fprintf (stderr
, "Success.\n");
5129 if (ix86_decompose_address (addr
, &parts
) <= 0)
5131 reason
= "decomposition failed";
5136 index
= parts
.index
;
5138 scale
= parts
.scale
;
5140 /* Validate base register.
5142 Don't allow SUBREG's here, it can lead to spill failures when the base
5143 is one word out of a two word structure, which is represented internally
5151 if (GET_CODE (base
) == SUBREG
)
5152 reg
= SUBREG_REG (base
);
5156 if (GET_CODE (reg
) != REG
)
5158 reason
= "base is not a register";
5162 if (GET_MODE (base
) != Pmode
)
5164 reason
= "base is not in Pmode";
5168 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
5169 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
5171 reason
= "base is not valid";
5176 /* Validate index register.
5178 Don't allow SUBREG's here, it can lead to spill failures when the index
5179 is one word out of a two word structure, which is represented internally
5187 if (GET_CODE (index
) == SUBREG
)
5188 reg
= SUBREG_REG (index
);
5192 if (GET_CODE (reg
) != REG
)
5194 reason
= "index is not a register";
5198 if (GET_MODE (index
) != Pmode
)
5200 reason
= "index is not in Pmode";
5204 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
5205 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
5207 reason
= "index is not valid";
5212 /* Validate scale factor. */
5215 reason_rtx
= GEN_INT (scale
);
5218 reason
= "scale without index";
5222 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
5224 reason
= "scale is not a valid multiplier";
5229 /* Validate displacement. */
5236 if (!x86_64_sign_extended_value (disp
))
5238 reason
= "displacement is out of range";
5244 if (GET_CODE (disp
) == CONST_DOUBLE
)
5246 reason
= "displacement is a const_double";
5251 if (GET_CODE (disp
) == CONST
5252 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
5253 switch (XINT (XEXP (disp
, 0), 1))
5257 case UNSPEC_GOTPCREL
:
5260 goto is_legitimate_pic
;
5262 case UNSPEC_GOTTPOFF
:
5263 case UNSPEC_GOTNTPOFF
:
5264 case UNSPEC_INDNTPOFF
:
5270 reason
= "invalid address unspec";
5274 else if (flag_pic
&& (SYMBOLIC_CONST (disp
)
5276 && !machopic_operand_p (disp
)
5281 if (TARGET_64BIT
&& (index
|| base
))
5283 reason
= "non-constant pic memory reference";
5286 if (! legitimate_pic_address_disp_p (disp
))
5288 reason
= "displacement is an invalid pic construct";
5292 /* This code used to verify that a symbolic pic displacement
5293 includes the pic_offset_table_rtx register.
5295 While this is good idea, unfortunately these constructs may
5296 be created by "adds using lea" optimization for incorrect
5305 This code is nonsensical, but results in addressing
5306 GOT table with pic_offset_table_rtx base. We can't
5307 just refuse it easily, since it gets matched by
5308 "addsi3" pattern, that later gets split to lea in the
5309 case output register differs from input. While this
5310 can be handled by separate addsi pattern for this case
5311 that never results in lea, this seems to be easier and
5312 correct fix for crash to disable this test. */
5314 else if (!CONSTANT_ADDRESS_P (disp
))
5316 reason
= "displacement is not constant";
5321 /* Everything looks valid. */
5322 if (TARGET_DEBUG_ADDR
)
5323 fprintf (stderr
, "Success.\n");
5327 if (TARGET_DEBUG_ADDR
)
5329 fprintf (stderr
, "Error: %s\n", reason
);
5330 debug_rtx (reason_rtx
);
5335 /* Return an unique alias set for the GOT. */
5337 static HOST_WIDE_INT
5338 ix86_GOT_alias_set ()
5340 static HOST_WIDE_INT set
= -1;
5342 set
= new_alias_set ();
5346 /* Return a legitimate reference for ORIG (an address) using the
5347 register REG. If REG is 0, a new pseudo is generated.
5349 There are two types of references that must be handled:
5351 1. Global data references must load the address from the GOT, via
5352 the PIC reg. An insn is emitted to do this load, and the reg is
5355 2. Static data references, constant pool addresses, and code labels
5356 compute the address as an offset from the GOT, whose base is in
5357 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5358 differentiate them from global data objects. The returned
5359 address is the PIC reg + an unspec constant.
5361 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5362 reg also appears in the address. */
5365 legitimize_pic_address (orig
, reg
)
5375 reg
= gen_reg_rtx (Pmode
);
5376 /* Use the generic Mach-O PIC machinery. */
5377 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
5380 if (local_symbolic_operand (addr
, Pmode
))
5382 /* In 64bit mode we can address such objects directly. */
5387 /* This symbol may be referenced via a displacement from the PIC
5388 base address (@GOTOFF). */
5390 if (reload_in_progress
)
5391 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5392 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
5393 new = gen_rtx_CONST (Pmode
, new);
5394 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5398 emit_move_insn (reg
, new);
5403 else if (GET_CODE (addr
) == SYMBOL_REF
)
5407 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
5408 new = gen_rtx_CONST (Pmode
, new);
5409 new = gen_rtx_MEM (Pmode
, new);
5410 RTX_UNCHANGING_P (new) = 1;
5411 set_mem_alias_set (new, ix86_GOT_alias_set ());
5414 reg
= gen_reg_rtx (Pmode
);
5415 /* Use directly gen_movsi, otherwise the address is loaded
5416 into register for CSE. We don't want to CSE this addresses,
5417 instead we CSE addresses from the GOT table, so skip this. */
5418 emit_insn (gen_movsi (reg
, new));
5423 /* This symbol must be referenced via a load from the
5424 Global Offset Table (@GOT). */
5426 if (reload_in_progress
)
5427 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5428 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
5429 new = gen_rtx_CONST (Pmode
, new);
5430 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5431 new = gen_rtx_MEM (Pmode
, new);
5432 RTX_UNCHANGING_P (new) = 1;
5433 set_mem_alias_set (new, ix86_GOT_alias_set ());
5436 reg
= gen_reg_rtx (Pmode
);
5437 emit_move_insn (reg
, new);
5443 if (GET_CODE (addr
) == CONST
)
5445 addr
= XEXP (addr
, 0);
5447 /* We must match stuff we generate before. Assume the only
5448 unspecs that can get here are ours. Not that we could do
5449 anything with them anyway... */
5450 if (GET_CODE (addr
) == UNSPEC
5451 || (GET_CODE (addr
) == PLUS
5452 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
5454 if (GET_CODE (addr
) != PLUS
)
5457 if (GET_CODE (addr
) == PLUS
)
5459 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
5461 /* Check first to see if this is a constant offset from a @GOTOFF
5462 symbol reference. */
5463 if (local_symbolic_operand (op0
, Pmode
)
5464 && GET_CODE (op1
) == CONST_INT
)
5468 if (reload_in_progress
)
5469 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5470 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
5472 new = gen_rtx_PLUS (Pmode
, new, op1
);
5473 new = gen_rtx_CONST (Pmode
, new);
5474 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5478 emit_move_insn (reg
, new);
5484 /* ??? We need to limit offsets here. */
5489 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
5490 new = legitimize_pic_address (XEXP (addr
, 1),
5491 base
== reg
? NULL_RTX
: reg
);
5493 if (GET_CODE (new) == CONST_INT
)
5494 new = plus_constant (base
, INTVAL (new));
5497 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
5499 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
5500 new = XEXP (new, 1);
5502 new = gen_rtx_PLUS (Pmode
, base
, new);
5511 ix86_encode_section_info (decl
, first
)
5513 int first ATTRIBUTE_UNUSED
;
5515 bool local_p
= (*targetm
.binds_local_p
) (decl
);
5518 rtl
= DECL_P (decl
) ? DECL_RTL (decl
) : TREE_CST_RTL (decl
);
5519 if (GET_CODE (rtl
) != MEM
)
5521 symbol
= XEXP (rtl
, 0);
5522 if (GET_CODE (symbol
) != SYMBOL_REF
)
5525 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5526 symbol so that we may access it directly in the GOT. */
5529 SYMBOL_REF_FLAG (symbol
) = local_p
;
5531 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5532 "local dynamic", "initial exec" or "local exec" TLS models
5535 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL (decl
))
5537 const char *symbol_str
;
5540 enum tls_model kind
;
5545 kind
= TLS_MODEL_LOCAL_EXEC
;
5547 kind
= TLS_MODEL_INITIAL_EXEC
;
5549 /* Local dynamic is inefficient when we're not combining the
5550 parts of the address. */
5551 else if (optimize
&& local_p
)
5552 kind
= TLS_MODEL_LOCAL_DYNAMIC
;
5554 kind
= TLS_MODEL_GLOBAL_DYNAMIC
;
5555 if (kind
< flag_tls_default
)
5556 kind
= flag_tls_default
;
5558 symbol_str
= XSTR (symbol
, 0);
5560 if (symbol_str
[0] == '%')
5562 if (symbol_str
[1] == tls_model_chars
[kind
])
5566 len
= strlen (symbol_str
) + 1;
5567 newstr
= alloca (len
+ 2);
5570 newstr
[1] = tls_model_chars
[kind
];
5571 memcpy (newstr
+ 2, symbol_str
, len
);
5573 XSTR (symbol
, 0) = ggc_alloc_string (newstr
, len
+ 2 - 1);
5577 /* Undo the above when printing symbol names. */
5580 ix86_strip_name_encoding (str
)
5590 /* Load the thread pointer into a register. */
5593 get_thread_pointer ()
5597 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
5598 tp
= gen_rtx_MEM (Pmode
, tp
);
5599 RTX_UNCHANGING_P (tp
) = 1;
5600 set_mem_alias_set (tp
, ix86_GOT_alias_set ());
5601 tp
= force_reg (Pmode
, tp
);
5606 /* Try machine-dependent ways of modifying an illegitimate address
5607 to be legitimate. If we find one, return the new, valid address.
5608 This macro is used in only one place: `memory_address' in explow.c.
5610 OLDX is the address as it was before break_out_memory_refs was called.
5611 In some cases it is useful to look at this to decide what needs to be done.
5613 MODE and WIN are passed so that this macro can use
5614 GO_IF_LEGITIMATE_ADDRESS.
5616 It is always safe for this macro to do nothing. It exists to recognize
5617 opportunities to optimize the output.
5619 For the 80386, we handle X+REG by loading X into a register R and
5620 using R+REG. R will go in a general reg and indexing will be used.
5621 However, if REG is a broken-out memory address or multiplication,
5622 nothing needs to be done because REG can certainly go in a general reg.
5624 When -fpic is used, special handling is needed for symbolic references.
5625 See comments by legitimize_pic_address in i386.c for details. */
5628 legitimize_address (x
, oldx
, mode
)
5630 register rtx oldx ATTRIBUTE_UNUSED
;
5631 enum machine_mode mode
;
5636 if (TARGET_DEBUG_ADDR
)
5638 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5639 GET_MODE_NAME (mode
));
5643 log
= tls_symbolic_operand (x
, mode
);
5646 rtx dest
, base
, off
, pic
;
5650 case TLS_MODEL_GLOBAL_DYNAMIC
:
5651 dest
= gen_reg_rtx (Pmode
);
5652 emit_insn (gen_tls_global_dynamic (dest
, x
));
5655 case TLS_MODEL_LOCAL_DYNAMIC
:
5656 base
= gen_reg_rtx (Pmode
);
5657 emit_insn (gen_tls_local_dynamic_base (base
));
5659 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
5660 off
= gen_rtx_CONST (Pmode
, off
);
5662 return gen_rtx_PLUS (Pmode
, base
, off
);
5664 case TLS_MODEL_INITIAL_EXEC
:
5667 if (reload_in_progress
)
5668 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5669 pic
= pic_offset_table_rtx
;
5671 else if (!TARGET_GNU_TLS
)
5673 pic
= gen_reg_rtx (Pmode
);
5674 emit_insn (gen_set_got (pic
));
5679 base
= get_thread_pointer ();
5681 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
5684 : flag_pic
? UNSPEC_GOTNTPOFF
5685 : UNSPEC_INDNTPOFF
);
5686 off
= gen_rtx_CONST (Pmode
, off
);
5687 if (flag_pic
|| !TARGET_GNU_TLS
)
5688 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
5689 off
= gen_rtx_MEM (Pmode
, off
);
5690 RTX_UNCHANGING_P (off
) = 1;
5691 set_mem_alias_set (off
, ix86_GOT_alias_set ());
5692 dest
= gen_reg_rtx (Pmode
);
5696 emit_move_insn (dest
, off
);
5697 return gen_rtx_PLUS (Pmode
, base
, dest
);
5700 emit_insn (gen_subsi3 (dest
, base
, off
));
5703 case TLS_MODEL_LOCAL_EXEC
:
5704 base
= get_thread_pointer ();
5706 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
5707 TARGET_GNU_TLS
? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
5708 off
= gen_rtx_CONST (Pmode
, off
);
5711 return gen_rtx_PLUS (Pmode
, base
, off
);
5714 dest
= gen_reg_rtx (Pmode
);
5715 emit_insn (gen_subsi3 (dest
, base
, off
));
5726 if (flag_pic
&& SYMBOLIC_CONST (x
))
5727 return legitimize_pic_address (x
, 0);
5729 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5730 if (GET_CODE (x
) == ASHIFT
5731 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5732 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (x
, 1)))) < 4)
5735 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
5736 GEN_INT (1 << log
));
5739 if (GET_CODE (x
) == PLUS
)
5741 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5743 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
5744 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
5745 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)))) < 4)
5748 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
5749 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
5750 GEN_INT (1 << log
));
5753 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
5754 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
5755 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 1), 1)))) < 4)
5758 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
5759 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
5760 GEN_INT (1 << log
));
5763 /* Put multiply first if it isn't already. */
5764 if (GET_CODE (XEXP (x
, 1)) == MULT
)
5766 rtx tmp
= XEXP (x
, 0);
5767 XEXP (x
, 0) = XEXP (x
, 1);
5772 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5773 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5774 created by virtual register instantiation, register elimination, and
5775 similar optimizations. */
5776 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
5779 x
= gen_rtx_PLUS (Pmode
,
5780 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
5781 XEXP (XEXP (x
, 1), 0)),
5782 XEXP (XEXP (x
, 1), 1));
5786 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5787 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5788 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
5789 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
5790 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
5791 && CONSTANT_P (XEXP (x
, 1)))
5794 rtx other
= NULL_RTX
;
5796 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5798 constant
= XEXP (x
, 1);
5799 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5801 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
5803 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5804 other
= XEXP (x
, 1);
5812 x
= gen_rtx_PLUS (Pmode
,
5813 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
5814 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
5815 plus_constant (other
, INTVAL (constant
)));
5819 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
5822 if (GET_CODE (XEXP (x
, 0)) == MULT
)
5825 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
5828 if (GET_CODE (XEXP (x
, 1)) == MULT
)
5831 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
5835 && GET_CODE (XEXP (x
, 1)) == REG
5836 && GET_CODE (XEXP (x
, 0)) == REG
)
5839 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
5842 x
= legitimize_pic_address (x
, 0);
5845 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
5848 if (GET_CODE (XEXP (x
, 0)) == REG
)
5850 register rtx temp
= gen_reg_rtx (Pmode
);
5851 register rtx val
= force_operand (XEXP (x
, 1), temp
);
5853 emit_move_insn (temp
, val
);
5859 else if (GET_CODE (XEXP (x
, 1)) == REG
)
5861 register rtx temp
= gen_reg_rtx (Pmode
);
5862 register rtx val
= force_operand (XEXP (x
, 0), temp
);
5864 emit_move_insn (temp
, val
);
5874 /* Print an integer constant expression in assembler syntax. Addition
5875 and subtraction are the only arithmetic that may appear in these
5876 expressions. FILE is the stdio stream to write to, X is the rtx, and
5877 CODE is the operand print code from the output string. */
5880 output_pic_addr_const (file
, x
, code
)
5887 switch (GET_CODE (x
))
5897 assemble_name (file
, XSTR (x
, 0));
5898 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_FLAG (x
))
5899 fputs ("@PLT", file
);
5906 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
5907 assemble_name (asm_out_file
, buf
);
5911 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
5915 /* This used to output parentheses around the expression,
5916 but that does not work on the 386 (either ATT or BSD assembler). */
5917 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5921 if (GET_MODE (x
) == VOIDmode
)
5923 /* We can use %d if the number is <32 bits and positive. */
5924 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
5925 fprintf (file
, "0x%lx%08lx",
5926 (unsigned long) CONST_DOUBLE_HIGH (x
),
5927 (unsigned long) CONST_DOUBLE_LOW (x
));
5929 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
5932 /* We can't handle floating point constants;
5933 PRINT_OPERAND must handle them. */
5934 output_operand_lossage ("floating constant misused");
5938 /* Some assemblers need integer constants to appear first. */
5939 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
5941 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5943 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5945 else if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5947 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5949 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5957 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
5958 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5960 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5962 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
5966 if (XVECLEN (x
, 0) != 1)
5968 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
5969 switch (XINT (x
, 1))
5972 fputs ("@GOT", file
);
5975 fputs ("@GOTOFF", file
);
5977 case UNSPEC_GOTPCREL
:
5978 fputs ("@GOTPCREL(%rip)", file
);
5980 case UNSPEC_GOTTPOFF
:
5981 /* FIXME: This might be @TPOFF in Sun ld too. */
5982 fputs ("@GOTTPOFF", file
);
5985 fputs ("@TPOFF", file
);
5988 fputs ("@NTPOFF", file
);
5991 fputs ("@DTPOFF", file
);
5993 case UNSPEC_GOTNTPOFF
:
5994 fputs ("@GOTNTPOFF", file
);
5996 case UNSPEC_INDNTPOFF
:
5997 fputs ("@INDNTPOFF", file
);
6000 output_operand_lossage ("invalid UNSPEC as operand");
6006 output_operand_lossage ("invalid expression as operand");
6010 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6011 We need to handle our special PIC relocations. */
6014 i386_dwarf_output_addr_const (file
, x
)
6019 fprintf (file
, "%s", TARGET_64BIT
? ASM_QUAD
: ASM_LONG
);
6023 fprintf (file
, "%s", ASM_LONG
);
6026 output_pic_addr_const (file
, x
, '\0');
6028 output_addr_const (file
, x
);
6032 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6033 We need to emit DTP-relative relocations. */
6036 i386_output_dwarf_dtprel (file
, size
, x
)
6044 fputs (ASM_LONG
, file
);
6048 fputs (ASM_QUAD
, file
);
6055 output_addr_const (file
, x
);
6056 fputs ("@DTPOFF", file
);
6059 /* In the name of slightly smaller debug output, and to cater to
6060 general assembler losage, recognize PIC+GOTOFF and turn it back
6061 into a direct symbol reference. */
6064 i386_simplify_dwarf_addr (orig_x
)
6069 if (GET_CODE (x
) == MEM
)
6074 if (GET_CODE (x
) != CONST
6075 || GET_CODE (XEXP (x
, 0)) != UNSPEC
6076 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
6077 || GET_CODE (orig_x
) != MEM
)
6079 return XVECEXP (XEXP (x
, 0), 0, 0);
6082 if (GET_CODE (x
) != PLUS
6083 || GET_CODE (XEXP (x
, 1)) != CONST
)
6086 if (GET_CODE (XEXP (x
, 0)) == REG
6087 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
6088 /* %ebx + GOT/GOTOFF */
6090 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
6092 /* %ebx + %reg * scale + GOT/GOTOFF */
6094 if (GET_CODE (XEXP (y
, 0)) == REG
6095 && REGNO (XEXP (y
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
6097 else if (GET_CODE (XEXP (y
, 1)) == REG
6098 && REGNO (XEXP (y
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
6102 if (GET_CODE (y
) != REG
6103 && GET_CODE (y
) != MULT
6104 && GET_CODE (y
) != ASHIFT
)
6110 x
= XEXP (XEXP (x
, 1), 0);
6111 if (GET_CODE (x
) == UNSPEC
6112 && ((XINT (x
, 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
6113 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& GET_CODE (orig_x
) != MEM
)))
6116 return gen_rtx_PLUS (Pmode
, y
, XVECEXP (x
, 0, 0));
6117 return XVECEXP (x
, 0, 0);
6120 if (GET_CODE (x
) == PLUS
6121 && GET_CODE (XEXP (x
, 0)) == UNSPEC
6122 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6123 && ((XINT (XEXP (x
, 0), 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
6124 || (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTOFF
6125 && GET_CODE (orig_x
) != MEM
)))
6127 x
= gen_rtx_PLUS (VOIDmode
, XVECEXP (XEXP (x
, 0), 0, 0), XEXP (x
, 1));
6129 return gen_rtx_PLUS (Pmode
, y
, x
);
6137 put_condition_code (code
, mode
, reverse
, fp
, file
)
6139 enum machine_mode mode
;
6145 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
6147 enum rtx_code second_code
, bypass_code
;
6148 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
6149 if (bypass_code
!= NIL
|| second_code
!= NIL
)
6151 code
= ix86_fp_compare_code_to_integer (code
);
6155 code
= reverse_condition (code
);
6166 if (mode
!= CCmode
&& mode
!= CCNOmode
&& mode
!= CCGCmode
)
6171 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6172 Those same assemblers have the same but opposite losage on cmov. */
6175 suffix
= fp
? "nbe" : "a";
6178 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
6180 else if (mode
== CCmode
|| mode
== CCGCmode
)
6191 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
6193 else if (mode
== CCmode
|| mode
== CCGCmode
)
6202 suffix
= fp
? "nb" : "ae";
6205 if (mode
!= CCmode
&& mode
!= CCGCmode
&& mode
!= CCNOmode
)
6215 suffix
= fp
? "u" : "p";
6218 suffix
= fp
? "nu" : "np";
6223 fputs (suffix
, file
);
6227 print_reg (x
, code
, file
)
6232 if (REGNO (x
) == ARG_POINTER_REGNUM
6233 || REGNO (x
) == FRAME_POINTER_REGNUM
6234 || REGNO (x
) == FLAGS_REG
6235 || REGNO (x
) == FPSR_REG
)
6238 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
6241 if (code
== 'w' || MMX_REG_P (x
))
6243 else if (code
== 'b')
6245 else if (code
== 'k')
6247 else if (code
== 'q')
6249 else if (code
== 'y')
6251 else if (code
== 'h')
6254 code
= GET_MODE_SIZE (GET_MODE (x
));
6256 /* Irritatingly, AMD extended registers use different naming convention
6257 from the normal registers. */
6258 if (REX_INT_REG_P (x
))
6265 error ("extended registers have no high halves");
6268 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6271 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6274 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6277 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6280 error ("unsupported operand size for extended register");
6288 if (STACK_TOP_P (x
))
6290 fputs ("st(0)", file
);
6297 if (! ANY_FP_REG_P (x
))
6298 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
6302 fputs (hi_reg_name
[REGNO (x
)], file
);
6305 fputs (qi_reg_name
[REGNO (x
)], file
);
6308 fputs (qi_high_reg_name
[REGNO (x
)], file
);
6315 /* Locate some local-dynamic symbol still in use by this function
6316 so that we can print its name in some tls_local_dynamic_base
6320 get_some_local_dynamic_name ()
6324 if (cfun
->machine
->some_ld_name
)
6325 return cfun
->machine
->some_ld_name
;
6327 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
6329 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
6330 return cfun
->machine
->some_ld_name
;
6336 get_some_local_dynamic_name_1 (px
, data
)
6338 void *data ATTRIBUTE_UNUSED
;
6342 if (GET_CODE (x
) == SYMBOL_REF
6343 && local_dynamic_symbolic_operand (x
, Pmode
))
6345 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
6353 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6354 C -- print opcode suffix for set/cmov insn.
6355 c -- like C, but print reversed condition
6356 F,f -- likewise, but for floating-point.
6357 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6359 R -- print the prefix for register names.
6360 z -- print the opcode suffix for the size of the current operand.
6361 * -- print a star (in certain assembler syntax)
6362 A -- print an absolute memory reference.
6363 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6364 s -- print a shift double count, followed by the assemblers argument
6366 b -- print the QImode name of the register for the indicated operand.
6367 %b0 would print %al if operands[0] is reg 0.
6368 w -- likewise, print the HImode name of the register.
6369 k -- likewise, print the SImode name of the register.
6370 q -- likewise, print the DImode name of the register.
6371 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6372 y -- print "st(0)" instead of "st" as a register.
6373 D -- print condition for SSE cmp instruction.
6374 P -- if PIC, print an @PLT suffix.
6375 X -- don't print any sort of PIC '@' suffix for a symbol.
6376 & -- print some in-use local-dynamic symbol name.
6380 print_operand (file
, x
, code
)
6390 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6395 assemble_name (file
, get_some_local_dynamic_name ());
6399 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6401 else if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6403 /* Intel syntax. For absolute addresses, registers should not
6404 be surrounded by braces. */
6405 if (GET_CODE (x
) != REG
)
6408 PRINT_OPERAND (file
, x
, 0);
6416 PRINT_OPERAND (file
, x
, 0);
6421 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6426 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6431 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6436 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6441 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6446 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6451 /* 387 opcodes don't get size suffixes if the operands are
6453 if (STACK_REG_P (x
))
6456 /* Likewise if using Intel opcodes. */
6457 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6460 /* This is the size of op from size of operand. */
6461 switch (GET_MODE_SIZE (GET_MODE (x
)))
6464 #ifdef HAVE_GAS_FILDS_FISTS
6470 if (GET_MODE (x
) == SFmode
)
6485 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
6487 #ifdef GAS_MNEMONICS
6513 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
6515 PRINT_OPERAND (file
, x
, 0);
6521 /* Little bit of braindamage here. The SSE compare instructions
6522 does use completely different names for the comparisons that the
6523 fp conditional moves. */
6524 switch (GET_CODE (x
))
6539 fputs ("unord", file
);
6543 fputs ("neq", file
);
6547 fputs ("nlt", file
);
6551 fputs ("nle", file
);
6554 fputs ("ord", file
);
6562 #ifdef CMOV_SUN_AS_SYNTAX
6563 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6565 switch (GET_MODE (x
))
6567 case HImode
: putc ('w', file
); break;
6569 case SFmode
: putc ('l', file
); break;
6571 case DFmode
: putc ('q', file
); break;
6579 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
6582 #ifdef CMOV_SUN_AS_SYNTAX
6583 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6586 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
6589 /* Like above, but reverse condition */
6591 /* Check to see if argument to %c is really a constant
6592 and not a condition code which needs to be reversed. */
6593 if (GET_RTX_CLASS (GET_CODE (x
)) != '<')
6595 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6598 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
6601 #ifdef CMOV_SUN_AS_SYNTAX
6602 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6605 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
6611 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
6614 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
6617 int pred_val
= INTVAL (XEXP (x
, 0));
6619 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
6620 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
6622 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
6623 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
6625 /* Emit hints only in the case default branch prediction
6626 heruistics would fail. */
6627 if (taken
!= cputaken
)
6629 /* We use 3e (DS) prefix for taken branches and
6630 2e (CS) prefix for not taken branches. */
6632 fputs ("ds ; ", file
);
6634 fputs ("cs ; ", file
);
6641 output_operand_lossage ("invalid operand code `%c'", code
);
6645 if (GET_CODE (x
) == REG
)
6647 PRINT_REG (x
, code
, file
);
6650 else if (GET_CODE (x
) == MEM
)
6652 /* No `byte ptr' prefix for call instructions. */
6653 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
6656 switch (GET_MODE_SIZE (GET_MODE (x
)))
6658 case 1: size
= "BYTE"; break;
6659 case 2: size
= "WORD"; break;
6660 case 4: size
= "DWORD"; break;
6661 case 8: size
= "QWORD"; break;
6662 case 12: size
= "XWORD"; break;
6663 case 16: size
= "XMMWORD"; break;
6668 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6671 else if (code
== 'w')
6673 else if (code
== 'k')
6677 fputs (" PTR ", file
);
6681 if (flag_pic
&& CONSTANT_ADDRESS_P (x
))
6682 output_pic_addr_const (file
, x
, code
);
6683 /* Avoid (%rip) for call operands. */
6684 else if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
6685 && GET_CODE (x
) != CONST_INT
)
6686 output_addr_const (file
, x
);
6687 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
6688 output_operand_lossage ("invalid constraints for operand");
6693 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
6698 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
6699 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
6701 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6703 fprintf (file
, "0x%lx", l
);
6706 /* These float cases don't actually occur as immediate operands. */
6707 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
6712 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
6713 REAL_VALUE_TO_DECIMAL (r
, dstr
, -1);
6714 fprintf (file
, "%s", dstr
);
6717 else if (GET_CODE (x
) == CONST_DOUBLE
6718 && (GET_MODE (x
) == XFmode
|| GET_MODE (x
) == TFmode
))
6723 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
6724 REAL_VALUE_TO_DECIMAL (r
, dstr
, -1);
6725 fprintf (file
, "%s", dstr
);
6732 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
6734 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6737 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
6738 || GET_CODE (x
) == LABEL_REF
)
6740 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6743 fputs ("OFFSET FLAT:", file
);
6746 if (GET_CODE (x
) == CONST_INT
)
6747 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
6749 output_pic_addr_const (file
, x
, code
);
6751 output_addr_const (file
, x
);
6755 /* Print a memory operand whose address is ADDR. */
6758 print_operand_address (file
, addr
)
6762 struct ix86_address parts
;
6763 rtx base
, index
, disp
;
6766 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_TP
)
6768 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6769 fputs ("DWORD PTR ", file
);
6770 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
6772 fputs ("gs:0", file
);
6776 if (! ix86_decompose_address (addr
, &parts
))
6780 index
= parts
.index
;
6782 scale
= parts
.scale
;
6784 if (!base
&& !index
)
6786 /* Displacement only requires special attention. */
6788 if (GET_CODE (disp
) == CONST_INT
)
6790 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6792 if (USER_LABEL_PREFIX
[0] == 0)
6794 fputs ("ds:", file
);
6796 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (addr
));
6799 output_pic_addr_const (file
, addr
, 0);
6801 output_addr_const (file
, addr
);
6803 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6805 && (GET_CODE (addr
) == SYMBOL_REF
6806 || GET_CODE (addr
) == LABEL_REF
6807 || (GET_CODE (addr
) == CONST
6808 && GET_CODE (XEXP (addr
, 0)) == PLUS
6809 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
6810 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)))
6811 fputs ("(%rip)", file
);
6815 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6820 output_pic_addr_const (file
, disp
, 0);
6821 else if (GET_CODE (disp
) == LABEL_REF
)
6822 output_asm_label (disp
);
6824 output_addr_const (file
, disp
);
6829 PRINT_REG (base
, 0, file
);
6833 PRINT_REG (index
, 0, file
);
6835 fprintf (file
, ",%d", scale
);
6841 rtx offset
= NULL_RTX
;
6845 /* Pull out the offset of a symbol; print any symbol itself. */
6846 if (GET_CODE (disp
) == CONST
6847 && GET_CODE (XEXP (disp
, 0)) == PLUS
6848 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
6850 offset
= XEXP (XEXP (disp
, 0), 1);
6851 disp
= gen_rtx_CONST (VOIDmode
,
6852 XEXP (XEXP (disp
, 0), 0));
6856 output_pic_addr_const (file
, disp
, 0);
6857 else if (GET_CODE (disp
) == LABEL_REF
)
6858 output_asm_label (disp
);
6859 else if (GET_CODE (disp
) == CONST_INT
)
6862 output_addr_const (file
, disp
);
6868 PRINT_REG (base
, 0, file
);
6871 if (INTVAL (offset
) >= 0)
6873 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
6877 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
6884 PRINT_REG (index
, 0, file
);
6886 fprintf (file
, "*%d", scale
);
6894 output_addr_const_extra (file
, x
)
6900 if (GET_CODE (x
) != UNSPEC
)
6903 op
= XVECEXP (x
, 0, 0);
6904 switch (XINT (x
, 1))
6906 case UNSPEC_GOTTPOFF
:
6907 output_addr_const (file
, op
);
6908 /* FIXME: This might be @TPOFF in Sun ld. */
6909 fputs ("@GOTTPOFF", file
);
6912 output_addr_const (file
, op
);
6913 fputs ("@TPOFF", file
);
6916 output_addr_const (file
, op
);
6917 fputs ("@NTPOFF", file
);
6920 output_addr_const (file
, op
);
6921 fputs ("@DTPOFF", file
);
6923 case UNSPEC_GOTNTPOFF
:
6924 output_addr_const (file
, op
);
6925 fputs ("@GOTNTPOFF", file
);
6927 case UNSPEC_INDNTPOFF
:
6928 output_addr_const (file
, op
);
6929 fputs ("@INDNTPOFF", file
);
6939 /* Split one or more DImode RTL references into pairs of SImode
6940 references. The RTL can be REG, offsettable MEM, integer constant, or
6941 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6942 split and "num" is its length. lo_half and hi_half are output arrays
6943 that parallel "operands". */
6946 split_di (operands
, num
, lo_half
, hi_half
)
6949 rtx lo_half
[], hi_half
[];
6953 rtx op
= operands
[num
];
6955 /* simplify_subreg refuse to split volatile memory addresses,
6956 but we still have to handle it. */
6957 if (GET_CODE (op
) == MEM
)
6959 lo_half
[num
] = adjust_address (op
, SImode
, 0);
6960 hi_half
[num
] = adjust_address (op
, SImode
, 4);
6964 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
6965 GET_MODE (op
) == VOIDmode
6966 ? DImode
: GET_MODE (op
), 0);
6967 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
6968 GET_MODE (op
) == VOIDmode
6969 ? DImode
: GET_MODE (op
), 4);
6973 /* Split one or more TImode RTL references into pairs of SImode
6974 references. The RTL can be REG, offsettable MEM, integer constant, or
6975 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6976 split and "num" is its length. lo_half and hi_half are output arrays
6977 that parallel "operands". */
6980 split_ti (operands
, num
, lo_half
, hi_half
)
6983 rtx lo_half
[], hi_half
[];
6987 rtx op
= operands
[num
];
6989 /* simplify_subreg refuse to split volatile memory addresses, but we
6990 still have to handle it. */
6991 if (GET_CODE (op
) == MEM
)
6993 lo_half
[num
] = adjust_address (op
, DImode
, 0);
6994 hi_half
[num
] = adjust_address (op
, DImode
, 8);
6998 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
6999 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
7004 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7005 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7006 is the expression of the binary operation. The output may either be
7007 emitted here, or returned to the caller, like all output_* functions.
7009 There is no guarantee that the operands are the same mode, as they
7010 might be within FLOAT or FLOAT_EXTEND expressions. */
7012 #ifndef SYSV386_COMPAT
7013 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7014 wants to fix the assemblers because that causes incompatibility
7015 with gcc. No-one wants to fix gcc because that causes
7016 incompatibility with assemblers... You can use the option of
7017 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7018 #define SYSV386_COMPAT 1
7022 output_387_binary_op (insn
, operands
)
7026 static char buf
[30];
7029 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]) | SSE_REG_P (operands
[2]);
7031 #ifdef ENABLE_CHECKING
7032 /* Even if we do not want to check the inputs, this documents input
7033 constraints. Which helps in understanding the following code. */
7034 if (STACK_REG_P (operands
[0])
7035 && ((REG_P (operands
[1])
7036 && REGNO (operands
[0]) == REGNO (operands
[1])
7037 && (STACK_REG_P (operands
[2]) || GET_CODE (operands
[2]) == MEM
))
7038 || (REG_P (operands
[2])
7039 && REGNO (operands
[0]) == REGNO (operands
[2])
7040 && (STACK_REG_P (operands
[1]) || GET_CODE (operands
[1]) == MEM
)))
7041 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
7047 switch (GET_CODE (operands
[3]))
7050 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7051 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7059 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7060 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7068 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7069 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7077 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7078 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7092 if (GET_MODE (operands
[0]) == SFmode
)
7093 strcat (buf
, "ss\t{%2, %0|%0, %2}");
7095 strcat (buf
, "sd\t{%2, %0|%0, %2}");
7100 switch (GET_CODE (operands
[3]))
7104 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
7106 rtx temp
= operands
[2];
7107 operands
[2] = operands
[1];
7111 /* know operands[0] == operands[1]. */
7113 if (GET_CODE (operands
[2]) == MEM
)
7119 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
7121 if (STACK_TOP_P (operands
[0]))
7122 /* How is it that we are storing to a dead operand[2]?
7123 Well, presumably operands[1] is dead too. We can't
7124 store the result to st(0) as st(0) gets popped on this
7125 instruction. Instead store to operands[2] (which I
7126 think has to be st(1)). st(1) will be popped later.
7127 gcc <= 2.8.1 didn't have this check and generated
7128 assembly code that the Unixware assembler rejected. */
7129 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7131 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7135 if (STACK_TOP_P (operands
[0]))
7136 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7138 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7143 if (GET_CODE (operands
[1]) == MEM
)
7149 if (GET_CODE (operands
[2]) == MEM
)
7155 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
7158 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7159 derived assemblers, confusingly reverse the direction of
7160 the operation for fsub{r} and fdiv{r} when the
7161 destination register is not st(0). The Intel assembler
7162 doesn't have this brain damage. Read !SYSV386_COMPAT to
7163 figure out what the hardware really does. */
7164 if (STACK_TOP_P (operands
[0]))
7165 p
= "{p\t%0, %2|rp\t%2, %0}";
7167 p
= "{rp\t%2, %0|p\t%0, %2}";
7169 if (STACK_TOP_P (operands
[0]))
7170 /* As above for fmul/fadd, we can't store to st(0). */
7171 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7173 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7178 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
7181 if (STACK_TOP_P (operands
[0]))
7182 p
= "{rp\t%0, %1|p\t%1, %0}";
7184 p
= "{p\t%1, %0|rp\t%0, %1}";
7186 if (STACK_TOP_P (operands
[0]))
7187 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7189 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7194 if (STACK_TOP_P (operands
[0]))
7196 if (STACK_TOP_P (operands
[1]))
7197 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7199 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7202 else if (STACK_TOP_P (operands
[1]))
7205 p
= "{\t%1, %0|r\t%0, %1}";
7207 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7213 p
= "{r\t%2, %0|\t%0, %2}";
7215 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7228 /* Output code to initialize control word copies used by
7229 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7230 is set to control word rounding downwards. */
7232 emit_i387_cw_initialization (normal
, round_down
)
7233 rtx normal
, round_down
;
7235 rtx reg
= gen_reg_rtx (HImode
);
7237 emit_insn (gen_x86_fnstcw_1 (normal
));
7238 emit_move_insn (reg
, normal
);
7239 if (!TARGET_PARTIAL_REG_STALL
&& !optimize_size
7241 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
7243 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0xc00)));
7244 emit_move_insn (round_down
, reg
);
7247 /* Output code for INSN to convert a float to a signed int. OPERANDS
7248 are the insn operands. The output may be [HSD]Imode and the input
7249 operand may be [SDX]Fmode. */
7252 output_fix_trunc (insn
, operands
)
7256 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
7257 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
7259 /* Jump through a hoop or two for DImode, since the hardware has no
7260 non-popping instruction. We used to do this a different way, but
7261 that was somewhat fragile and broke with post-reload splitters. */
7262 if (dimode_p
&& !stack_top_dies
)
7263 output_asm_insn ("fld\t%y1", operands
);
7265 if (!STACK_TOP_P (operands
[1]))
7268 if (GET_CODE (operands
[0]) != MEM
)
7271 output_asm_insn ("fldcw\t%3", operands
);
7272 if (stack_top_dies
|| dimode_p
)
7273 output_asm_insn ("fistp%z0\t%0", operands
);
7275 output_asm_insn ("fist%z0\t%0", operands
);
7276 output_asm_insn ("fldcw\t%2", operands
);
7281 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7282 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7283 when fucom should be used. */
7286 output_fp_compare (insn
, operands
, eflags_p
, unordered_p
)
7289 int eflags_p
, unordered_p
;
7292 rtx cmp_op0
= operands
[0];
7293 rtx cmp_op1
= operands
[1];
7294 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]);
7299 cmp_op1
= operands
[2];
7303 if (GET_MODE (operands
[0]) == SFmode
)
7305 return "ucomiss\t{%1, %0|%0, %1}";
7307 return "comiss\t{%1, %0|%0, %y}";
7310 return "ucomisd\t{%1, %0|%0, %1}";
7312 return "comisd\t{%1, %0|%0, %y}";
7315 if (! STACK_TOP_P (cmp_op0
))
7318 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
7320 if (STACK_REG_P (cmp_op1
)
7322 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
7323 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
7325 /* If both the top of the 387 stack dies, and the other operand
7326 is also a stack register that dies, then this must be a
7327 `fcompp' float compare */
7331 /* There is no double popping fcomi variant. Fortunately,
7332 eflags is immune from the fstp's cc clobbering. */
7334 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
7336 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
7344 return "fucompp\n\tfnstsw\t%0";
7346 return "fcompp\n\tfnstsw\t%0";
7359 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7361 static const char * const alt
[24] =
7373 "fcomi\t{%y1, %0|%0, %y1}",
7374 "fcomip\t{%y1, %0|%0, %y1}",
7375 "fucomi\t{%y1, %0|%0, %y1}",
7376 "fucomip\t{%y1, %0|%0, %y1}",
7383 "fcom%z2\t%y2\n\tfnstsw\t%0",
7384 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7385 "fucom%z2\t%y2\n\tfnstsw\t%0",
7386 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7388 "ficom%z2\t%y2\n\tfnstsw\t%0",
7389 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7397 mask
= eflags_p
<< 3;
7398 mask
|= (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
) << 2;
7399 mask
|= unordered_p
<< 1;
7400 mask
|= stack_top_dies
;
7413 ix86_output_addr_vec_elt (file
, value
)
7417 const char *directive
= ASM_LONG
;
7422 directive
= ASM_QUAD
;
7428 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
7432 ix86_output_addr_diff_elt (file
, value
, rel
)
7437 fprintf (file
, "%s%s%d-%s%d\n",
7438 ASM_LONG
, LPREFIX
, value
, LPREFIX
, rel
);
7439 else if (HAVE_AS_GOTOFF_IN_DATA
)
7440 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
7442 else if (TARGET_MACHO
)
7443 fprintf (file
, "%s%s%d-%s\n", ASM_LONG
, LPREFIX
, value
,
7444 machopic_function_base_name () + 1);
7447 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
7448 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
7451 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7455 ix86_expand_clear (dest
)
7460 /* We play register width games, which are only valid after reload. */
7461 if (!reload_completed
)
7464 /* Avoid HImode and its attendant prefix byte. */
7465 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
7466 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
7468 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
7470 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7471 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
7473 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
7474 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
7480 /* X is an unchanging MEM. If it is a constant pool reference, return
7481 the constant pool rtx, else NULL. */
7484 maybe_get_pool_constant (x
)
7491 if (GET_CODE (x
) != PLUS
)
7493 if (XEXP (x
, 0) != pic_offset_table_rtx
)
7496 if (GET_CODE (x
) != CONST
)
7499 if (GET_CODE (x
) != UNSPEC
)
7501 if (XINT (x
, 1) != UNSPEC_GOTOFF
)
7503 x
= XVECEXP (x
, 0, 0);
7506 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
7507 return get_pool_constant (x
);
7513 ix86_expand_move (mode
, operands
)
7514 enum machine_mode mode
;
7517 int strict
= (reload_in_progress
|| reload_completed
);
7518 rtx insn
, op0
, op1
, tmp
;
7523 /* ??? We have a slight problem. We need to say that tls symbols are
7524 not legitimate constants so that reload does not helpfully reload
7525 these constants from a REG_EQUIV, which we cannot handle. (Recall
7526 that general- and local-dynamic address resolution requires a
7529 However, if we say that tls symbols are not legitimate constants,
7530 then emit_move_insn helpfully drop them into the constant pool.
7532 It is far easier to work around emit_move_insn than reload. Recognize
7533 the MEM that we would have created and extract the symbol_ref. */
7536 && GET_CODE (op1
) == MEM
7537 && RTX_UNCHANGING_P (op1
))
7539 tmp
= maybe_get_pool_constant (op1
);
7540 /* Note that we only care about symbolic constants here, which
7541 unlike CONST_INT will always have a proper mode. */
7542 if (tmp
&& GET_MODE (tmp
) == Pmode
)
7546 if (tls_symbolic_operand (op1
, Pmode
))
7548 op1
= legitimize_address (op1
, op1
, VOIDmode
);
7549 if (GET_CODE (op0
) == MEM
)
7551 tmp
= gen_reg_rtx (mode
);
7552 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, op1
));
7556 else if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
7561 rtx temp
= ((reload_in_progress
7562 || ((op0
&& GET_CODE (op0
) == REG
)
7564 ? op0
: gen_reg_rtx (Pmode
));
7565 op1
= machopic_indirect_data_reference (op1
, temp
);
7566 op1
= machopic_legitimize_pic_address (op1
, mode
,
7567 temp
== op1
? 0 : temp
);
7571 if (MACHOPIC_INDIRECT
)
7572 op1
= machopic_indirect_data_reference (op1
, 0);
7576 insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
7580 #endif /* TARGET_MACHO */
7581 if (GET_CODE (op0
) == MEM
)
7582 op1
= force_reg (Pmode
, op1
);
7586 if (GET_CODE (temp
) != REG
)
7587 temp
= gen_reg_rtx (Pmode
);
7588 temp
= legitimize_pic_address (op1
, temp
);
7596 if (GET_CODE (op0
) == MEM
7597 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
7598 || !push_operand (op0
, mode
))
7599 && GET_CODE (op1
) == MEM
)
7600 op1
= force_reg (mode
, op1
);
7602 if (push_operand (op0
, mode
)
7603 && ! general_no_elim_operand (op1
, mode
))
7604 op1
= copy_to_mode_reg (mode
, op1
);
7606 /* Force large constants in 64bit compilation into register
7607 to get them CSEed. */
7608 if (TARGET_64BIT
&& mode
== DImode
7609 && immediate_operand (op1
, mode
)
7610 && !x86_64_zero_extended_value (op1
)
7611 && !register_operand (op0
, mode
)
7612 && optimize
&& !reload_completed
&& !reload_in_progress
)
7613 op1
= copy_to_mode_reg (mode
, op1
);
7615 if (FLOAT_MODE_P (mode
))
7617 /* If we are loading a floating point constant to a register,
7618 force the value to memory now, since we'll get better code
7619 out the back end. */
7623 else if (GET_CODE (op1
) == CONST_DOUBLE
7624 && register_operand (op0
, mode
))
7625 op1
= validize_mem (force_const_mem (mode
, op1
));
7629 insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
7635 ix86_expand_vector_move (mode
, operands
)
7636 enum machine_mode mode
;
7639 /* Force constants other than zero into memory. We do not know how
7640 the instructions used to build constants modify the upper 64 bits
7641 of the register, once we have that information we may be able
7642 to handle some of them more efficiently. */
7643 if ((reload_in_progress
| reload_completed
) == 0
7644 && register_operand (operands
[0], mode
)
7645 && CONSTANT_P (operands
[1]))
7647 rtx addr
= gen_reg_rtx (Pmode
);
7648 emit_move_insn (addr
, XEXP (force_const_mem (mode
, operands
[1]), 0));
7649 operands
[1] = gen_rtx_MEM (mode
, addr
);
7652 /* Make operand1 a register if it isn't already. */
7653 if ((reload_in_progress
| reload_completed
) == 0
7654 && !register_operand (operands
[0], mode
)
7655 && !register_operand (operands
[1], mode
)
7656 && operands
[1] != CONST0_RTX (mode
))
7658 rtx temp
= force_reg (GET_MODE (operands
[1]), operands
[1]);
7659 emit_move_insn (operands
[0], temp
);
7663 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], operands
[1]));
7666 /* Attempt to expand a binary operator. Make the expansion closer to the
7667 actual machine, then just general_operand, which will allow 3 separate
7668 memory references (one output, two input) in a single insn. */
7671 ix86_expand_binary_operator (code
, mode
, operands
)
7673 enum machine_mode mode
;
7676 int matching_memory
;
7677 rtx src1
, src2
, dst
, op
, clob
;
7683 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7684 if (GET_RTX_CLASS (code
) == 'c'
7685 && (rtx_equal_p (dst
, src2
)
7686 || immediate_operand (src1
, mode
)))
7693 /* If the destination is memory, and we do not have matching source
7694 operands, do things in registers. */
7695 matching_memory
= 0;
7696 if (GET_CODE (dst
) == MEM
)
7698 if (rtx_equal_p (dst
, src1
))
7699 matching_memory
= 1;
7700 else if (GET_RTX_CLASS (code
) == 'c'
7701 && rtx_equal_p (dst
, src2
))
7702 matching_memory
= 2;
7704 dst
= gen_reg_rtx (mode
);
7707 /* Both source operands cannot be in memory. */
7708 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
7710 if (matching_memory
!= 2)
7711 src2
= force_reg (mode
, src2
);
7713 src1
= force_reg (mode
, src1
);
7716 /* If the operation is not commutable, source 1 cannot be a constant
7717 or non-matching memory. */
7718 if ((CONSTANT_P (src1
)
7719 || (!matching_memory
&& GET_CODE (src1
) == MEM
))
7720 && GET_RTX_CLASS (code
) != 'c')
7721 src1
= force_reg (mode
, src1
);
7723 /* If optimizing, copy to regs to improve CSE */
7724 if (optimize
&& ! no_new_pseudos
)
7726 if (GET_CODE (dst
) == MEM
)
7727 dst
= gen_reg_rtx (mode
);
7728 if (GET_CODE (src1
) == MEM
)
7729 src1
= force_reg (mode
, src1
);
7730 if (GET_CODE (src2
) == MEM
)
7731 src2
= force_reg (mode
, src2
);
7734 /* Emit the instruction. */
7736 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
7737 if (reload_in_progress
)
7739 /* Reload doesn't know about the flags register, and doesn't know that
7740 it doesn't want to clobber it. We can only do this with PLUS. */
7747 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
7748 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
7751 /* Fix up the destination if needed. */
7752 if (dst
!= operands
[0])
7753 emit_move_insn (operands
[0], dst
);
7756 /* Return TRUE or FALSE depending on whether the binary operator meets the
7757 appropriate constraints. */
7760 ix86_binary_operator_ok (code
, mode
, operands
)
7762 enum machine_mode mode ATTRIBUTE_UNUSED
;
7765 /* Both source operands cannot be in memory. */
7766 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
7768 /* If the operation is not commutable, source 1 cannot be a constant. */
7769 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != 'c')
7771 /* If the destination is memory, we must have a matching source operand. */
7772 if (GET_CODE (operands
[0]) == MEM
7773 && ! (rtx_equal_p (operands
[0], operands
[1])
7774 || (GET_RTX_CLASS (code
) == 'c'
7775 && rtx_equal_p (operands
[0], operands
[2]))))
7777 /* If the operation is not commutable and the source 1 is memory, we must
7778 have a matching destination. */
7779 if (GET_CODE (operands
[1]) == MEM
7780 && GET_RTX_CLASS (code
) != 'c'
7781 && ! rtx_equal_p (operands
[0], operands
[1]))
7786 /* Attempt to expand a unary operator. Make the expansion closer to the
7787 actual machine, then just general_operand, which will allow 2 separate
7788 memory references (one output, one input) in a single insn. */
7791 ix86_expand_unary_operator (code
, mode
, operands
)
7793 enum machine_mode mode
;
7796 int matching_memory
;
7797 rtx src
, dst
, op
, clob
;
7802 /* If the destination is memory, and we do not have matching source
7803 operands, do things in registers. */
7804 matching_memory
= 0;
7805 if (GET_CODE (dst
) == MEM
)
7807 if (rtx_equal_p (dst
, src
))
7808 matching_memory
= 1;
7810 dst
= gen_reg_rtx (mode
);
7813 /* When source operand is memory, destination must match. */
7814 if (!matching_memory
&& GET_CODE (src
) == MEM
)
7815 src
= force_reg (mode
, src
);
7817 /* If optimizing, copy to regs to improve CSE */
7818 if (optimize
&& ! no_new_pseudos
)
7820 if (GET_CODE (dst
) == MEM
)
7821 dst
= gen_reg_rtx (mode
);
7822 if (GET_CODE (src
) == MEM
)
7823 src
= force_reg (mode
, src
);
7826 /* Emit the instruction. */
7828 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
7829 if (reload_in_progress
|| code
== NOT
)
7831 /* Reload doesn't know about the flags register, and doesn't know that
7832 it doesn't want to clobber it. */
7839 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
7840 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
7843 /* Fix up the destination if needed. */
7844 if (dst
!= operands
[0])
7845 emit_move_insn (operands
[0], dst
);
7848 /* Return TRUE or FALSE depending on whether the unary operator meets the
7849 appropriate constraints. */
7852 ix86_unary_operator_ok (code
, mode
, operands
)
7853 enum rtx_code code ATTRIBUTE_UNUSED
;
7854 enum machine_mode mode ATTRIBUTE_UNUSED
;
7855 rtx operands
[2] ATTRIBUTE_UNUSED
;
7857 /* If one of operands is memory, source and destination must match. */
7858 if ((GET_CODE (operands
[0]) == MEM
7859 || GET_CODE (operands
[1]) == MEM
)
7860 && ! rtx_equal_p (operands
[0], operands
[1]))
7865 /* Return TRUE or FALSE depending on whether the first SET in INSN
7866 has source and destination with matching CC modes, and that the
7867 CC mode is at least as constrained as REQ_MODE. */
7870 ix86_match_ccmode (insn
, req_mode
)
7872 enum machine_mode req_mode
;
7875 enum machine_mode set_mode
;
7877 set
= PATTERN (insn
);
7878 if (GET_CODE (set
) == PARALLEL
)
7879 set
= XVECEXP (set
, 0, 0);
7880 if (GET_CODE (set
) != SET
)
7882 if (GET_CODE (SET_SRC (set
)) != COMPARE
)
7885 set_mode
= GET_MODE (SET_DEST (set
));
7889 if (req_mode
!= CCNOmode
7890 && (req_mode
!= CCmode
7891 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
7895 if (req_mode
== CCGCmode
)
7899 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
7903 if (req_mode
== CCZmode
)
7913 return (GET_MODE (SET_SRC (set
)) == set_mode
);
7916 /* Generate insn patterns to do an integer compare of OPERANDS. */
7919 ix86_expand_int_compare (code
, op0
, op1
)
7923 enum machine_mode cmpmode
;
7926 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
7927 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
7929 /* This is very simple, but making the interface the same as in the
7930 FP case makes the rest of the code easier. */
7931 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
7932 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
7934 /* Return the test that should be put into the flags user, i.e.
7935 the bcc, scc, or cmov instruction. */
7936 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
7939 /* Figure out whether to use ordered or unordered fp comparisons.
7940 Return the appropriate mode to use. */
7943 ix86_fp_compare_mode (code
)
7944 enum rtx_code code ATTRIBUTE_UNUSED
;
7946 /* ??? In order to make all comparisons reversible, we do all comparisons
7947 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7948 all forms trapping and nontrapping comparisons, we can make inequality
7949 comparisons trapping again, since it results in better code when using
7950 FCOM based compares. */
7951 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
7955 ix86_cc_mode (code
, op0
, op1
)
7959 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
7960 return ix86_fp_compare_mode (code
);
7963 /* Only zero flag is needed. */
7965 case NE
: /* ZF!=0 */
7967 /* Codes needing carry flag. */
7968 case GEU
: /* CF=0 */
7969 case GTU
: /* CF=0 & ZF=0 */
7970 case LTU
: /* CF=1 */
7971 case LEU
: /* CF=1 | ZF=1 */
7973 /* Codes possibly doable only with sign flag when
7974 comparing against zero. */
7975 case GE
: /* SF=OF or SF=0 */
7976 case LT
: /* SF<>OF or SF=1 */
7977 if (op1
== const0_rtx
)
7980 /* For other cases Carry flag is not required. */
7982 /* Codes doable only with sign flag when comparing
7983 against zero, but we miss jump instruction for it
7984 so we need to use relational tests agains overflow
7985 that thus needs to be zero. */
7986 case GT
: /* ZF=0 & SF=OF */
7987 case LE
: /* ZF=1 | SF<>OF */
7988 if (op1
== const0_rtx
)
7992 /* strcmp pattern do (use flags) and combine may ask us for proper
8001 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8004 ix86_use_fcomi_compare (code
)
8005 enum rtx_code code ATTRIBUTE_UNUSED
;
8007 enum rtx_code swapped_code
= swap_condition (code
);
8008 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
8009 || (ix86_fp_comparison_cost (swapped_code
)
8010 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
8013 /* Swap, force into registers, or otherwise massage the two operands
8014 to a fp comparison. The operands are updated in place; the new
8015 comparsion code is returned. */
8017 static enum rtx_code
8018 ix86_prepare_fp_compare_args (code
, pop0
, pop1
)
8022 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
8023 rtx op0
= *pop0
, op1
= *pop1
;
8024 enum machine_mode op_mode
= GET_MODE (op0
);
8025 int is_sse
= SSE_REG_P (op0
) | SSE_REG_P (op1
);
8027 /* All of the unordered compare instructions only work on registers.
8028 The same is true of the XFmode compare instructions. The same is
8029 true of the fcomi compare instructions. */
8032 && (fpcmp_mode
== CCFPUmode
8033 || op_mode
== XFmode
8034 || op_mode
== TFmode
8035 || ix86_use_fcomi_compare (code
)))
8037 op0
= force_reg (op_mode
, op0
);
8038 op1
= force_reg (op_mode
, op1
);
8042 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8043 things around if they appear profitable, otherwise force op0
8046 if (standard_80387_constant_p (op0
) == 0
8047 || (GET_CODE (op0
) == MEM
8048 && ! (standard_80387_constant_p (op1
) == 0
8049 || GET_CODE (op1
) == MEM
)))
8052 tmp
= op0
, op0
= op1
, op1
= tmp
;
8053 code
= swap_condition (code
);
8056 if (GET_CODE (op0
) != REG
)
8057 op0
= force_reg (op_mode
, op0
);
8059 if (CONSTANT_P (op1
))
8061 if (standard_80387_constant_p (op1
))
8062 op1
= force_reg (op_mode
, op1
);
8064 op1
= validize_mem (force_const_mem (op_mode
, op1
));
8068 /* Try to rearrange the comparison to make it cheaper. */
8069 if (ix86_fp_comparison_cost (code
)
8070 > ix86_fp_comparison_cost (swap_condition (code
))
8071 && (GET_CODE (op1
) == REG
|| !no_new_pseudos
))
8074 tmp
= op0
, op0
= op1
, op1
= tmp
;
8075 code
= swap_condition (code
);
8076 if (GET_CODE (op0
) != REG
)
8077 op0
= force_reg (op_mode
, op0
);
8085 /* Convert comparison codes we use to represent FP comparison to integer
8086 code that will result in proper branch. Return UNKNOWN if no such code
8088 static enum rtx_code
8089 ix86_fp_compare_code_to_integer (code
)
8119 /* Split comparison code CODE into comparisons we can do using branch
8120 instructions. BYPASS_CODE is comparison code for branch that will
8121 branch around FIRST_CODE and SECOND_CODE. If some of branches
8122 is not required, set value to NIL.
8123 We never require more than two branches. */
8125 ix86_fp_comparison_codes (code
, bypass_code
, first_code
, second_code
)
8126 enum rtx_code code
, *bypass_code
, *first_code
, *second_code
;
8132 /* The fcomi comparison sets flags as follows:
8142 case GT
: /* GTU - CF=0 & ZF=0 */
8143 case GE
: /* GEU - CF=0 */
8144 case ORDERED
: /* PF=0 */
8145 case UNORDERED
: /* PF=1 */
8146 case UNEQ
: /* EQ - ZF=1 */
8147 case UNLT
: /* LTU - CF=1 */
8148 case UNLE
: /* LEU - CF=1 | ZF=1 */
8149 case LTGT
: /* EQ - ZF=0 */
8151 case LT
: /* LTU - CF=1 - fails on unordered */
8153 *bypass_code
= UNORDERED
;
8155 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
8157 *bypass_code
= UNORDERED
;
8159 case EQ
: /* EQ - ZF=1 - fails on unordered */
8161 *bypass_code
= UNORDERED
;
8163 case NE
: /* NE - ZF=0 - fails on unordered */
8165 *second_code
= UNORDERED
;
8167 case UNGE
: /* GEU - CF=0 - fails on unordered */
8169 *second_code
= UNORDERED
;
8171 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
8173 *second_code
= UNORDERED
;
8178 if (!TARGET_IEEE_FP
)
8185 /* Return cost of comparison done fcom + arithmetics operations on AX.
8186 All following functions do use number of instructions as an cost metrics.
8187 In future this should be tweaked to compute bytes for optimize_size and
8188 take into account performance of various instructions on various CPUs. */
8190 ix86_fp_comparison_arithmetics_cost (code
)
8193 if (!TARGET_IEEE_FP
)
8195 /* The cost of code output by ix86_expand_fp_compare. */
8223 /* Return cost of comparison done using fcomi operation.
8224 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8226 ix86_fp_comparison_fcomi_cost (code
)
8229 enum rtx_code bypass_code
, first_code
, second_code
;
8230 /* Return arbitarily high cost when instruction is not supported - this
8231 prevents gcc from using it. */
8234 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8235 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 2;
8238 /* Return cost of comparison done using sahf operation.
8239 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8241 ix86_fp_comparison_sahf_cost (code
)
8244 enum rtx_code bypass_code
, first_code
, second_code
;
8245 /* Return arbitarily high cost when instruction is not preferred - this
8246 avoids gcc from using it. */
8247 if (!TARGET_USE_SAHF
&& !optimize_size
)
8249 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8250 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 3;
8253 /* Compute cost of the comparison done using any method.
8254 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8256 ix86_fp_comparison_cost (code
)
8259 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
8262 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
8263 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
8265 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
8266 if (min
> sahf_cost
)
8268 if (min
> fcomi_cost
)
8273 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8276 ix86_expand_fp_compare (code
, op0
, op1
, scratch
, second_test
, bypass_test
)
8278 rtx op0
, op1
, scratch
;
8282 enum machine_mode fpcmp_mode
, intcmp_mode
;
8284 int cost
= ix86_fp_comparison_cost (code
);
8285 enum rtx_code bypass_code
, first_code
, second_code
;
8287 fpcmp_mode
= ix86_fp_compare_mode (code
);
8288 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
8291 *second_test
= NULL_RTX
;
8293 *bypass_test
= NULL_RTX
;
8295 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8297 /* Do fcomi/sahf based test when profitable. */
8298 if ((bypass_code
== NIL
|| bypass_test
)
8299 && (second_code
== NIL
|| second_test
)
8300 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
8304 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8305 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
8311 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8312 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
8314 scratch
= gen_reg_rtx (HImode
);
8315 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
8316 emit_insn (gen_x86_sahf_1 (scratch
));
8319 /* The FP codes work out to act like unsigned. */
8320 intcmp_mode
= fpcmp_mode
;
8322 if (bypass_code
!= NIL
)
8323 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
8324 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8326 if (second_code
!= NIL
)
8327 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
8328 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8333 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8334 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8335 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
8337 scratch
= gen_reg_rtx (HImode
);
8338 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
8340 /* In the unordered case, we have to check C2 for NaN's, which
8341 doesn't happen to work out to anything nice combination-wise.
8342 So do some bit twiddling on the value we've got in AH to come
8343 up with an appropriate set of condition codes. */
8345 intcmp_mode
= CCNOmode
;
8350 if (code
== GT
|| !TARGET_IEEE_FP
)
8352 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
8357 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8358 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
8359 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
8360 intcmp_mode
= CCmode
;
8366 if (code
== LT
&& TARGET_IEEE_FP
)
8368 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8369 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
8370 intcmp_mode
= CCmode
;
8375 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
8381 if (code
== GE
|| !TARGET_IEEE_FP
)
8383 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
8388 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8389 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
8396 if (code
== LE
&& TARGET_IEEE_FP
)
8398 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8399 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
8400 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
8401 intcmp_mode
= CCmode
;
8406 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
8412 if (code
== EQ
&& TARGET_IEEE_FP
)
8414 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8415 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
8416 intcmp_mode
= CCmode
;
8421 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
8428 if (code
== NE
&& TARGET_IEEE_FP
)
8430 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8431 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
8437 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
8443 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
8447 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
8456 /* Return the test that should be put into the flags user, i.e.
8457 the bcc, scc, or cmov instruction. */
8458 return gen_rtx_fmt_ee (code
, VOIDmode
,
8459 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8464 ix86_expand_compare (code
, second_test
, bypass_test
)
8466 rtx
*second_test
, *bypass_test
;
8469 op0
= ix86_compare_op0
;
8470 op1
= ix86_compare_op1
;
8473 *second_test
= NULL_RTX
;
8475 *bypass_test
= NULL_RTX
;
8477 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
8478 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
8479 second_test
, bypass_test
);
8481 ret
= ix86_expand_int_compare (code
, op0
, op1
);
8486 /* Return true if the CODE will result in nontrivial jump sequence. */
8488 ix86_fp_jump_nontrivial_p (code
)
8491 enum rtx_code bypass_code
, first_code
, second_code
;
8494 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8495 return bypass_code
!= NIL
|| second_code
!= NIL
;
8499 ix86_expand_branch (code
, label
)
8505 switch (GET_MODE (ix86_compare_op0
))
8511 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
8512 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
8513 gen_rtx_LABEL_REF (VOIDmode
, label
),
8515 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
8525 enum rtx_code bypass_code
, first_code
, second_code
;
8527 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
8530 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8532 /* Check whether we will use the natural sequence with one jump. If
8533 so, we can expand jump early. Otherwise delay expansion by
8534 creating compound insn to not confuse optimizers. */
8535 if (bypass_code
== NIL
&& second_code
== NIL
8538 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
8539 gen_rtx_LABEL_REF (VOIDmode
, label
),
8544 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
8545 ix86_compare_op0
, ix86_compare_op1
);
8546 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
8547 gen_rtx_LABEL_REF (VOIDmode
, label
),
8549 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
8551 use_fcomi
= ix86_use_fcomi_compare (code
);
8552 vec
= rtvec_alloc (3 + !use_fcomi
);
8553 RTVEC_ELT (vec
, 0) = tmp
;
8555 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
8557 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
8560 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
8562 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
8570 /* Expand DImode branch into multiple compare+branch. */
8572 rtx lo
[2], hi
[2], label2
;
8573 enum rtx_code code1
, code2
, code3
;
8575 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
8577 tmp
= ix86_compare_op0
;
8578 ix86_compare_op0
= ix86_compare_op1
;
8579 ix86_compare_op1
= tmp
;
8580 code
= swap_condition (code
);
8582 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
8583 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
8585 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8586 avoid two branches. This costs one extra insn, so disable when
8587 optimizing for size. */
8589 if ((code
== EQ
|| code
== NE
)
8591 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
8596 if (hi
[1] != const0_rtx
)
8597 xor1
= expand_binop (SImode
, xor_optab
, xor1
, hi
[1],
8598 NULL_RTX
, 0, OPTAB_WIDEN
);
8601 if (lo
[1] != const0_rtx
)
8602 xor0
= expand_binop (SImode
, xor_optab
, xor0
, lo
[1],
8603 NULL_RTX
, 0, OPTAB_WIDEN
);
8605 tmp
= expand_binop (SImode
, ior_optab
, xor1
, xor0
,
8606 NULL_RTX
, 0, OPTAB_WIDEN
);
8608 ix86_compare_op0
= tmp
;
8609 ix86_compare_op1
= const0_rtx
;
8610 ix86_expand_branch (code
, label
);
8614 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8615 op1 is a constant and the low word is zero, then we can just
8616 examine the high word. */
8618 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
)
8621 case LT
: case LTU
: case GE
: case GEU
:
8622 ix86_compare_op0
= hi
[0];
8623 ix86_compare_op1
= hi
[1];
8624 ix86_expand_branch (code
, label
);
8630 /* Otherwise, we need two or three jumps. */
8632 label2
= gen_label_rtx ();
8635 code2
= swap_condition (code
);
8636 code3
= unsigned_condition (code
);
8640 case LT
: case GT
: case LTU
: case GTU
:
8643 case LE
: code1
= LT
; code2
= GT
; break;
8644 case GE
: code1
= GT
; code2
= LT
; break;
8645 case LEU
: code1
= LTU
; code2
= GTU
; break;
8646 case GEU
: code1
= GTU
; code2
= LTU
; break;
8648 case EQ
: code1
= NIL
; code2
= NE
; break;
8649 case NE
: code2
= NIL
; break;
8657 * if (hi(a) < hi(b)) goto true;
8658 * if (hi(a) > hi(b)) goto false;
8659 * if (lo(a) < lo(b)) goto true;
8663 ix86_compare_op0
= hi
[0];
8664 ix86_compare_op1
= hi
[1];
8667 ix86_expand_branch (code1
, label
);
8669 ix86_expand_branch (code2
, label2
);
8671 ix86_compare_op0
= lo
[0];
8672 ix86_compare_op1
= lo
[1];
8673 ix86_expand_branch (code3
, label
);
8676 emit_label (label2
);
8685 /* Split branch based on floating point condition. */
8687 ix86_split_fp_branch (code
, op1
, op2
, target1
, target2
, tmp
)
8689 rtx op1
, op2
, target1
, target2
, tmp
;
8692 rtx label
= NULL_RTX
;
8694 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
8697 if (target2
!= pc_rtx
)
8700 code
= reverse_condition_maybe_unordered (code
);
8705 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
8706 tmp
, &second
, &bypass
);
8708 if (split_branch_probability
>= 0)
8710 /* Distribute the probabilities across the jumps.
8711 Assume the BYPASS and SECOND to be always test
8713 probability
= split_branch_probability
;
8715 /* Value of 1 is low enough to make no need for probability
8716 to be updated. Later we may run some experiments and see
8717 if unordered values are more frequent in practice. */
8719 bypass_probability
= 1;
8721 second_probability
= 1;
8723 if (bypass
!= NULL_RTX
)
8725 label
= gen_label_rtx ();
8726 i
= emit_jump_insn (gen_rtx_SET
8728 gen_rtx_IF_THEN_ELSE (VOIDmode
,
8730 gen_rtx_LABEL_REF (VOIDmode
,
8733 if (bypass_probability
>= 0)
8735 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
8736 GEN_INT (bypass_probability
),
8739 i
= emit_jump_insn (gen_rtx_SET
8741 gen_rtx_IF_THEN_ELSE (VOIDmode
,
8742 condition
, target1
, target2
)));
8743 if (probability
>= 0)
8745 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
8746 GEN_INT (probability
),
8748 if (second
!= NULL_RTX
)
8750 i
= emit_jump_insn (gen_rtx_SET
8752 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
8754 if (second_probability
>= 0)
8756 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
8757 GEN_INT (second_probability
),
8760 if (label
!= NULL_RTX
)
8765 ix86_expand_setcc (code
, dest
)
8769 rtx ret
, tmp
, tmpreg
;
8770 rtx second_test
, bypass_test
;
8772 if (GET_MODE (ix86_compare_op0
) == DImode
8774 return 0; /* FAIL */
8776 if (GET_MODE (dest
) != QImode
)
8779 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
8780 PUT_MODE (ret
, QImode
);
8785 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
8786 if (bypass_test
|| second_test
)
8788 rtx test
= second_test
;
8790 rtx tmp2
= gen_reg_rtx (QImode
);
8797 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
8799 PUT_MODE (test
, QImode
);
8800 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
8803 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
8805 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
8808 return 1; /* DONE */
8812 ix86_expand_int_movcc (operands
)
8815 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
8816 rtx compare_seq
, compare_op
;
8817 rtx second_test
, bypass_test
;
8818 enum machine_mode mode
= GET_MODE (operands
[0]);
8820 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8821 In case comparsion is done with immediate, we can convert it to LTU or
8822 GEU by altering the integer. */
8824 if ((code
== LEU
|| code
== GTU
)
8825 && GET_CODE (ix86_compare_op1
) == CONST_INT
8827 && INTVAL (ix86_compare_op1
) != -1
8828 /* For x86-64, the immediate field in the instruction is 32-bit
8829 signed, so we can't increment a DImode value above 0x7fffffff. */
8831 || GET_MODE (ix86_compare_op0
) != DImode
8832 || INTVAL (ix86_compare_op1
) != 0x7fffffff)
8833 && GET_CODE (operands
[2]) == CONST_INT
8834 && GET_CODE (operands
[3]) == CONST_INT
)
8840 ix86_compare_op1
= gen_int_mode (INTVAL (ix86_compare_op1
) + 1,
8841 GET_MODE (ix86_compare_op0
));
8845 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
8846 compare_seq
= get_insns ();
8849 compare_code
= GET_CODE (compare_op
);
8851 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8852 HImode insns, we'd be swallowed in word prefix ops. */
8855 && (mode
!= DImode
|| TARGET_64BIT
)
8856 && GET_CODE (operands
[2]) == CONST_INT
8857 && GET_CODE (operands
[3]) == CONST_INT
)
8859 rtx out
= operands
[0];
8860 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
8861 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
8864 if ((compare_code
== LTU
|| compare_code
== GEU
)
8865 && !second_test
&& !bypass_test
)
8867 /* Detect overlap between destination and compare sources. */
8870 /* To simplify rest of code, restrict to the GEU case. */
8871 if (compare_code
== LTU
)
8876 compare_code
= reverse_condition (compare_code
);
8877 code
= reverse_condition (code
);
8881 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
8882 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
8883 tmp
= gen_reg_rtx (mode
);
8885 emit_insn (compare_seq
);
8887 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
));
8889 emit_insn (gen_x86_movsicc_0_m1 (tmp
));
8901 tmp
= expand_simple_binop (mode
, PLUS
,
8903 tmp
, 1, OPTAB_DIRECT
);
8914 tmp
= expand_simple_binop (mode
, IOR
,
8916 tmp
, 1, OPTAB_DIRECT
);
8918 else if (diff
== -1 && ct
)
8928 tmp
= expand_simple_unop (mode
, NOT
, tmp
, tmp
, 1);
8930 tmp
= expand_simple_binop (mode
, PLUS
,
8932 tmp
, 1, OPTAB_DIRECT
);
8940 * andl cf - ct, dest
8950 tmp
= expand_simple_unop (mode
, NOT
, tmp
, tmp
, 1);
8953 tmp
= expand_simple_binop (mode
, AND
,
8955 gen_int_mode (cf
- ct
, mode
),
8956 tmp
, 1, OPTAB_DIRECT
);
8958 tmp
= expand_simple_binop (mode
, PLUS
,
8960 tmp
, 1, OPTAB_DIRECT
);
8964 emit_move_insn (out
, tmp
);
8966 return 1; /* DONE */
8973 tmp
= ct
, ct
= cf
, cf
= tmp
;
8975 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
8977 /* We may be reversing unordered compare to normal compare, that
8978 is not valid in general (we may convert non-trapping condition
8979 to trapping one), however on i386 we currently emit all
8980 comparisons unordered. */
8981 compare_code
= reverse_condition_maybe_unordered (compare_code
);
8982 code
= reverse_condition_maybe_unordered (code
);
8986 compare_code
= reverse_condition (compare_code
);
8987 code
= reverse_condition (code
);
8992 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
8993 && GET_CODE (ix86_compare_op1
) == CONST_INT
)
8995 if (ix86_compare_op1
== const0_rtx
8996 && (code
== LT
|| code
== GE
))
8997 compare_code
= code
;
8998 else if (ix86_compare_op1
== constm1_rtx
)
9002 else if (code
== GT
)
9007 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9008 if (compare_code
!= NIL
9009 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
9010 && (cf
== -1 || ct
== -1))
9012 /* If lea code below could be used, only optimize
9013 if it results in a 2 insn sequence. */
9015 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
9016 || diff
== 3 || diff
== 5 || diff
== 9)
9017 || (compare_code
== LT
&& ct
== -1)
9018 || (compare_code
== GE
&& cf
== -1))
9021 * notl op1 (if necessary)
9029 code
= reverse_condition (code
);
9032 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9033 ix86_compare_op1
, VOIDmode
, 0, -1);
9035 out
= expand_simple_binop (mode
, IOR
,
9037 out
, 1, OPTAB_DIRECT
);
9038 if (out
!= operands
[0])
9039 emit_move_insn (operands
[0], out
);
9041 return 1; /* DONE */
9045 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
9046 || diff
== 3 || diff
== 5 || diff
== 9)
9047 && (mode
!= DImode
|| x86_64_sign_extended_value (GEN_INT (cf
))))
9053 * lea cf(dest*(ct-cf)),dest
9057 * This also catches the degenerate setcc-only case.
9063 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9064 ix86_compare_op1
, VOIDmode
, 0, 1);
9067 /* On x86_64 the lea instruction operates on Pmode, so we need
9068 to get arithmetics done in proper mode to match. */
9075 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
9079 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
9085 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
9089 && (GET_CODE (tmp
) != SUBREG
|| SUBREG_REG (tmp
) != out
))
9095 clob
= gen_rtx_REG (CCmode
, FLAGS_REG
);
9096 clob
= gen_rtx_CLOBBER (VOIDmode
, clob
);
9098 tmp
= gen_rtx_SET (VOIDmode
, out
, tmp
);
9099 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
9103 emit_insn (gen_rtx_SET (VOIDmode
, out
, tmp
));
9105 if (out
!= operands
[0])
9106 emit_move_insn (operands
[0], copy_rtx (out
));
9108 return 1; /* DONE */
9112 * General case: Jumpful:
9113 * xorl dest,dest cmpl op1, op2
9114 * cmpl op1, op2 movl ct, dest
9116 * decl dest movl cf, dest
9117 * andl (cf-ct),dest 1:
9122 * This is reasonably steep, but branch mispredict costs are
9123 * high on modern cpus, so consider failing only if optimizing
9126 * %%% Parameterize branch_cost on the tuning architecture, then
9127 * use that. The 80386 couldn't care less about mispredicts.
9130 if (!optimize_size
&& !TARGET_CMOVE
)
9136 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
9137 /* We may be reversing unordered compare to normal compare,
9138 that is not valid in general (we may convert non-trapping
9139 condition to trapping one), however on i386 we currently
9140 emit all comparisons unordered. */
9141 code
= reverse_condition_maybe_unordered (code
);
9144 code
= reverse_condition (code
);
9145 if (compare_code
!= NIL
)
9146 compare_code
= reverse_condition (compare_code
);
9150 if (compare_code
!= NIL
)
9152 /* notl op1 (if needed)
9157 For x < 0 (resp. x <= -1) there will be no notl,
9158 so if possible swap the constants to get rid of the
9160 True/false will be -1/0 while code below (store flag
9161 followed by decrement) is 0/-1, so the constants need
9162 to be exchanged once more. */
9164 if (compare_code
== GE
|| !cf
)
9166 code
= reverse_condition (code
);
9171 HOST_WIDE_INT tmp
= cf
;
9176 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9177 ix86_compare_op1
, VOIDmode
, 0, -1);
9181 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9182 ix86_compare_op1
, VOIDmode
, 0, 1);
9184 out
= expand_simple_binop (mode
, PLUS
, out
, constm1_rtx
,
9185 out
, 1, OPTAB_DIRECT
);
9188 out
= expand_simple_binop (mode
, AND
, out
,
9189 gen_int_mode (cf
- ct
, mode
),
9190 out
, 1, OPTAB_DIRECT
);
9192 out
= expand_simple_binop (mode
, PLUS
, out
, GEN_INT (ct
),
9193 out
, 1, OPTAB_DIRECT
);
9194 if (out
!= operands
[0])
9195 emit_move_insn (operands
[0], out
);
9197 return 1; /* DONE */
9203 /* Try a few things more with specific constants and a variable. */
9206 rtx var
, orig_out
, out
, tmp
;
9209 return 0; /* FAIL */
9211 /* If one of the two operands is an interesting constant, load a
9212 constant with the above and mask it in with a logical operation. */
9214 if (GET_CODE (operands
[2]) == CONST_INT
)
9217 if (INTVAL (operands
[2]) == 0)
9218 operands
[3] = constm1_rtx
, op
= and_optab
;
9219 else if (INTVAL (operands
[2]) == -1)
9220 operands
[3] = const0_rtx
, op
= ior_optab
;
9222 return 0; /* FAIL */
9224 else if (GET_CODE (operands
[3]) == CONST_INT
)
9227 if (INTVAL (operands
[3]) == 0)
9228 operands
[2] = constm1_rtx
, op
= and_optab
;
9229 else if (INTVAL (operands
[3]) == -1)
9230 operands
[2] = const0_rtx
, op
= ior_optab
;
9232 return 0; /* FAIL */
9235 return 0; /* FAIL */
9237 orig_out
= operands
[0];
9238 tmp
= gen_reg_rtx (mode
);
9241 /* Recurse to get the constant loaded. */
9242 if (ix86_expand_int_movcc (operands
) == 0)
9243 return 0; /* FAIL */
9245 /* Mask in the interesting variable. */
9246 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
9248 if (out
!= orig_out
)
9249 emit_move_insn (orig_out
, out
);
9251 return 1; /* DONE */
9255 * For comparison with above,
9265 if (! nonimmediate_operand (operands
[2], mode
))
9266 operands
[2] = force_reg (mode
, operands
[2]);
9267 if (! nonimmediate_operand (operands
[3], mode
))
9268 operands
[3] = force_reg (mode
, operands
[3]);
9270 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
9272 rtx tmp
= gen_reg_rtx (mode
);
9273 emit_move_insn (tmp
, operands
[3]);
9276 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
9278 rtx tmp
= gen_reg_rtx (mode
);
9279 emit_move_insn (tmp
, operands
[2]);
9282 if (! register_operand (operands
[2], VOIDmode
)
9283 && ! register_operand (operands
[3], VOIDmode
))
9284 operands
[2] = force_reg (mode
, operands
[2]);
9286 emit_insn (compare_seq
);
9287 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9288 gen_rtx_IF_THEN_ELSE (mode
,
9289 compare_op
, operands
[2],
9292 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9293 gen_rtx_IF_THEN_ELSE (mode
,
9298 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9299 gen_rtx_IF_THEN_ELSE (mode
,
9304 return 1; /* DONE */
9308 ix86_expand_fp_movcc (operands
)
9313 rtx compare_op
, second_test
, bypass_test
;
9315 /* For SF/DFmode conditional moves based on comparisons
9316 in same mode, we may want to use SSE min/max instructions. */
9317 if (((TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == SFmode
)
9318 || (TARGET_SSE2
&& TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == DFmode
))
9319 && GET_MODE (ix86_compare_op0
) == GET_MODE (operands
[0])
9320 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9322 || (GET_CODE (operands
[1]) != LTGT
&& GET_CODE (operands
[1]) != UNEQ
))
9323 /* We may be called from the post-reload splitter. */
9324 && (!REG_P (operands
[0])
9325 || SSE_REG_P (operands
[0])
9326 || REGNO (operands
[0]) >= FIRST_PSEUDO_REGISTER
))
9328 rtx op0
= ix86_compare_op0
, op1
= ix86_compare_op1
;
9329 code
= GET_CODE (operands
[1]);
9331 /* See if we have (cross) match between comparison operands and
9332 conditional move operands. */
9333 if (rtx_equal_p (operands
[2], op1
))
9338 code
= reverse_condition_maybe_unordered (code
);
9340 if (rtx_equal_p (operands
[2], op0
) && rtx_equal_p (operands
[3], op1
))
9342 /* Check for min operation. */
9345 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
9346 if (memory_operand (op0
, VOIDmode
))
9347 op0
= force_reg (GET_MODE (operands
[0]), op0
);
9348 if (GET_MODE (operands
[0]) == SFmode
)
9349 emit_insn (gen_minsf3 (operands
[0], op0
, op1
));
9351 emit_insn (gen_mindf3 (operands
[0], op0
, op1
));
9354 /* Check for max operation. */
9357 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
9358 if (memory_operand (op0
, VOIDmode
))
9359 op0
= force_reg (GET_MODE (operands
[0]), op0
);
9360 if (GET_MODE (operands
[0]) == SFmode
)
9361 emit_insn (gen_maxsf3 (operands
[0], op0
, op1
));
9363 emit_insn (gen_maxdf3 (operands
[0], op0
, op1
));
9367 /* Manage condition to be sse_comparison_operator. In case we are
9368 in non-ieee mode, try to canonicalize the destination operand
9369 to be first in the comparison - this helps reload to avoid extra
9371 if (!sse_comparison_operator (operands
[1], VOIDmode
)
9372 || (rtx_equal_p (operands
[0], ix86_compare_op1
) && !TARGET_IEEE_FP
))
9374 rtx tmp
= ix86_compare_op0
;
9375 ix86_compare_op0
= ix86_compare_op1
;
9376 ix86_compare_op1
= tmp
;
9377 operands
[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands
[1])),
9378 VOIDmode
, ix86_compare_op0
,
9381 /* Similary try to manage result to be first operand of conditional
9382 move. We also don't support the NE comparison on SSE, so try to
9384 if ((rtx_equal_p (operands
[0], operands
[3])
9385 && (!TARGET_IEEE_FP
|| GET_CODE (operands
[1]) != EQ
))
9386 || (GET_CODE (operands
[1]) == NE
&& TARGET_IEEE_FP
))
9388 rtx tmp
= operands
[2];
9389 operands
[2] = operands
[3];
9391 operands
[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9392 (GET_CODE (operands
[1])),
9393 VOIDmode
, ix86_compare_op0
,
9396 if (GET_MODE (operands
[0]) == SFmode
)
9397 emit_insn (gen_sse_movsfcc (operands
[0], operands
[1],
9398 operands
[2], operands
[3],
9399 ix86_compare_op0
, ix86_compare_op1
));
9401 emit_insn (gen_sse_movdfcc (operands
[0], operands
[1],
9402 operands
[2], operands
[3],
9403 ix86_compare_op0
, ix86_compare_op1
));
9407 /* The floating point conditional move instructions don't directly
9408 support conditions resulting from a signed integer comparison. */
9410 code
= GET_CODE (operands
[1]);
9411 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9413 /* The floating point conditional move instructions don't directly
9414 support signed integer comparisons. */
9416 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
9418 if (second_test
!= NULL
|| bypass_test
!= NULL
)
9420 tmp
= gen_reg_rtx (QImode
);
9421 ix86_expand_setcc (code
, tmp
);
9423 ix86_compare_op0
= tmp
;
9424 ix86_compare_op1
= const0_rtx
;
9425 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9427 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
9429 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
9430 emit_move_insn (tmp
, operands
[3]);
9433 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
9435 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
9436 emit_move_insn (tmp
, operands
[2]);
9440 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9441 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9446 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9447 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9452 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9453 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9461 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9462 works for floating pointer parameters and nonoffsetable memories.
9463 For pushes, it returns just stack offsets; the values will be saved
9464 in the right order. Maximally three parts are generated. */
9467 ix86_split_to_parts (operand
, parts
, mode
)
9470 enum machine_mode mode
;
9475 size
= mode
== TFmode
? 3 : (GET_MODE_SIZE (mode
) / 4);
9477 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
9479 if (GET_CODE (operand
) == REG
&& MMX_REGNO_P (REGNO (operand
)))
9481 if (size
< 2 || size
> 3)
9484 /* Optimize constant pool reference to immediates. This is used by fp
9485 moves, that force all constants to memory to allow combining. */
9486 if (GET_CODE (operand
) == MEM
&& RTX_UNCHANGING_P (operand
))
9488 rtx tmp
= maybe_get_pool_constant (operand
);
9493 if (GET_CODE (operand
) == MEM
&& !offsettable_memref_p (operand
))
9495 /* The only non-offsetable memories we handle are pushes. */
9496 if (! push_operand (operand
, VOIDmode
))
9499 operand
= copy_rtx (operand
);
9500 PUT_MODE (operand
, Pmode
);
9501 parts
[0] = parts
[1] = parts
[2] = operand
;
9503 else if (!TARGET_64BIT
)
9506 split_di (&operand
, 1, &parts
[0], &parts
[1]);
9509 if (REG_P (operand
))
9511 if (!reload_completed
)
9513 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
9514 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
9516 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
9518 else if (offsettable_memref_p (operand
))
9520 operand
= adjust_address (operand
, SImode
, 0);
9522 parts
[1] = adjust_address (operand
, SImode
, 4);
9524 parts
[2] = adjust_address (operand
, SImode
, 8);
9526 else if (GET_CODE (operand
) == CONST_DOUBLE
)
9531 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
9536 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
9537 parts
[2] = gen_int_mode (l
[2], SImode
);
9540 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
9545 parts
[1] = gen_int_mode (l
[1], SImode
);
9546 parts
[0] = gen_int_mode (l
[0], SImode
);
9555 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
9556 if (mode
== XFmode
|| mode
== TFmode
)
9558 if (REG_P (operand
))
9560 if (!reload_completed
)
9562 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
9563 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
9565 else if (offsettable_memref_p (operand
))
9567 operand
= adjust_address (operand
, DImode
, 0);
9569 parts
[1] = adjust_address (operand
, SImode
, 8);
9571 else if (GET_CODE (operand
) == CONST_DOUBLE
)
9576 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
9577 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
9578 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9579 if (HOST_BITS_PER_WIDE_INT
>= 64)
9582 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
9583 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
9586 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
9587 parts
[1] = gen_int_mode (l
[2], SImode
);
9597 /* Emit insns to perform a move or push of DI, DF, and XF values.
9598 Return false when normal moves are needed; true when all required
9599 insns have been emitted. Operands 2-4 contain the input values
9600 int the correct order; operands 5-7 contain the output values. */
9603 ix86_split_long_move (operands
)
9610 enum machine_mode mode
= GET_MODE (operands
[0]);
9612 /* The DFmode expanders may ask us to move double.
9613 For 64bit target this is single move. By hiding the fact
9614 here we simplify i386.md splitters. */
9615 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
9617 /* Optimize constant pool reference to immediates. This is used by
9618 fp moves, that force all constants to memory to allow combining. */
9620 if (GET_CODE (operands
[1]) == MEM
9621 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
9622 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
9623 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
9624 if (push_operand (operands
[0], VOIDmode
))
9626 operands
[0] = copy_rtx (operands
[0]);
9627 PUT_MODE (operands
[0], Pmode
);
9630 operands
[0] = gen_lowpart (DImode
, operands
[0]);
9631 operands
[1] = gen_lowpart (DImode
, operands
[1]);
9632 emit_move_insn (operands
[0], operands
[1]);
9636 /* The only non-offsettable memory we handle is push. */
9637 if (push_operand (operands
[0], VOIDmode
))
9639 else if (GET_CODE (operands
[0]) == MEM
9640 && ! offsettable_memref_p (operands
[0]))
9643 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
9644 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
9646 /* When emitting push, take care for source operands on the stack. */
9647 if (push
&& GET_CODE (operands
[1]) == MEM
9648 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
9651 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
9652 XEXP (part
[1][2], 0));
9653 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
9654 XEXP (part
[1][1], 0));
9657 /* We need to do copy in the right order in case an address register
9658 of the source overlaps the destination. */
9659 if (REG_P (part
[0][0]) && GET_CODE (part
[1][0]) == MEM
)
9661 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
9663 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
9666 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
9669 /* Collision in the middle part can be handled by reordering. */
9670 if (collisions
== 1 && nparts
== 3
9671 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
9674 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
9675 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
9678 /* If there are more collisions, we can't handle it by reordering.
9679 Do an lea to the last part and use only one colliding move. */
9680 else if (collisions
> 1)
9683 emit_insn (gen_rtx_SET (VOIDmode
, part
[0][nparts
- 1],
9684 XEXP (part
[1][0], 0)));
9685 part
[1][0] = change_address (part
[1][0],
9686 TARGET_64BIT
? DImode
: SImode
,
9687 part
[0][nparts
- 1]);
9688 part
[1][1] = adjust_address (part
[1][0], VOIDmode
, UNITS_PER_WORD
);
9690 part
[1][2] = adjust_address (part
[1][0], VOIDmode
, 8);
9700 /* We use only first 12 bytes of TFmode value, but for pushing we
9701 are required to adjust stack as if we were pushing real 16byte
9703 if (mode
== TFmode
&& !TARGET_64BIT
)
9704 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
9706 emit_move_insn (part
[0][2], part
[1][2]);
9711 /* In 64bit mode we don't have 32bit push available. In case this is
9712 register, it is OK - we will just use larger counterpart. We also
9713 retype memory - these comes from attempt to avoid REX prefix on
9714 moving of second half of TFmode value. */
9715 if (GET_MODE (part
[1][1]) == SImode
)
9717 if (GET_CODE (part
[1][1]) == MEM
)
9718 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
9719 else if (REG_P (part
[1][1]))
9720 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
9723 if (GET_MODE (part
[1][0]) == SImode
)
9724 part
[1][0] = part
[1][1];
9727 emit_move_insn (part
[0][1], part
[1][1]);
9728 emit_move_insn (part
[0][0], part
[1][0]);
9732 /* Choose correct order to not overwrite the source before it is copied. */
9733 if ((REG_P (part
[0][0])
9734 && REG_P (part
[1][1])
9735 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
9737 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
9739 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
9743 operands
[2] = part
[0][2];
9744 operands
[3] = part
[0][1];
9745 operands
[4] = part
[0][0];
9746 operands
[5] = part
[1][2];
9747 operands
[6] = part
[1][1];
9748 operands
[7] = part
[1][0];
9752 operands
[2] = part
[0][1];
9753 operands
[3] = part
[0][0];
9754 operands
[5] = part
[1][1];
9755 operands
[6] = part
[1][0];
9762 operands
[2] = part
[0][0];
9763 operands
[3] = part
[0][1];
9764 operands
[4] = part
[0][2];
9765 operands
[5] = part
[1][0];
9766 operands
[6] = part
[1][1];
9767 operands
[7] = part
[1][2];
9771 operands
[2] = part
[0][0];
9772 operands
[3] = part
[0][1];
9773 operands
[5] = part
[1][0];
9774 operands
[6] = part
[1][1];
9777 emit_move_insn (operands
[2], operands
[5]);
9778 emit_move_insn (operands
[3], operands
[6]);
9780 emit_move_insn (operands
[4], operands
[7]);
9786 ix86_split_ashldi (operands
, scratch
)
9787 rtx
*operands
, scratch
;
9789 rtx low
[2], high
[2];
9792 if (GET_CODE (operands
[2]) == CONST_INT
)
9794 split_di (operands
, 2, low
, high
);
9795 count
= INTVAL (operands
[2]) & 63;
9799 emit_move_insn (high
[0], low
[1]);
9800 emit_move_insn (low
[0], const0_rtx
);
9803 emit_insn (gen_ashlsi3 (high
[0], high
[0], GEN_INT (count
- 32)));
9807 if (!rtx_equal_p (operands
[0], operands
[1]))
9808 emit_move_insn (operands
[0], operands
[1]);
9809 emit_insn (gen_x86_shld_1 (high
[0], low
[0], GEN_INT (count
)));
9810 emit_insn (gen_ashlsi3 (low
[0], low
[0], GEN_INT (count
)));
9815 if (!rtx_equal_p (operands
[0], operands
[1]))
9816 emit_move_insn (operands
[0], operands
[1]);
9818 split_di (operands
, 1, low
, high
);
9820 emit_insn (gen_x86_shld_1 (high
[0], low
[0], operands
[2]));
9821 emit_insn (gen_ashlsi3 (low
[0], low
[0], operands
[2]));
9823 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
9825 if (! no_new_pseudos
)
9826 scratch
= force_reg (SImode
, const0_rtx
);
9828 emit_move_insn (scratch
, const0_rtx
);
9830 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2],
9834 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
9839 ix86_split_ashrdi (operands
, scratch
)
9840 rtx
*operands
, scratch
;
9842 rtx low
[2], high
[2];
9845 if (GET_CODE (operands
[2]) == CONST_INT
)
9847 split_di (operands
, 2, low
, high
);
9848 count
= INTVAL (operands
[2]) & 63;
9852 emit_move_insn (low
[0], high
[1]);
9854 if (! reload_completed
)
9855 emit_insn (gen_ashrsi3 (high
[0], low
[0], GEN_INT (31)));
9858 emit_move_insn (high
[0], low
[0]);
9859 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
9863 emit_insn (gen_ashrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
9867 if (!rtx_equal_p (operands
[0], operands
[1]))
9868 emit_move_insn (operands
[0], operands
[1]);
9869 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
9870 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (count
)));
9875 if (!rtx_equal_p (operands
[0], operands
[1]))
9876 emit_move_insn (operands
[0], operands
[1]);
9878 split_di (operands
, 1, low
, high
);
9880 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
9881 emit_insn (gen_ashrsi3 (high
[0], high
[0], operands
[2]));
9883 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
9885 if (! no_new_pseudos
)
9886 scratch
= gen_reg_rtx (SImode
);
9887 emit_move_insn (scratch
, high
[0]);
9888 emit_insn (gen_ashrsi3 (scratch
, scratch
, GEN_INT (31)));
9889 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
9893 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
9898 ix86_split_lshrdi (operands
, scratch
)
9899 rtx
*operands
, scratch
;
9901 rtx low
[2], high
[2];
9904 if (GET_CODE (operands
[2]) == CONST_INT
)
9906 split_di (operands
, 2, low
, high
);
9907 count
= INTVAL (operands
[2]) & 63;
9911 emit_move_insn (low
[0], high
[1]);
9912 emit_move_insn (high
[0], const0_rtx
);
9915 emit_insn (gen_lshrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
9919 if (!rtx_equal_p (operands
[0], operands
[1]))
9920 emit_move_insn (operands
[0], operands
[1]);
9921 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
9922 emit_insn (gen_lshrsi3 (high
[0], high
[0], GEN_INT (count
)));
9927 if (!rtx_equal_p (operands
[0], operands
[1]))
9928 emit_move_insn (operands
[0], operands
[1]);
9930 split_di (operands
, 1, low
, high
);
9932 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
9933 emit_insn (gen_lshrsi3 (high
[0], high
[0], operands
[2]));
9935 /* Heh. By reversing the arguments, we can reuse this pattern. */
9936 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
9938 if (! no_new_pseudos
)
9939 scratch
= force_reg (SImode
, const0_rtx
);
9941 emit_move_insn (scratch
, const0_rtx
);
9943 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
9947 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
9951 /* Helper function for the string operations below. Dest VARIABLE whether
9952 it is aligned to VALUE bytes. If true, jump to the label. */
9954 ix86_expand_aligntest (variable
, value
)
9958 rtx label
= gen_label_rtx ();
9959 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
9960 if (GET_MODE (variable
) == DImode
)
9961 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
9963 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
9964 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
9969 /* Adjust COUNTER by the VALUE. */
9971 ix86_adjust_counter (countreg
, value
)
9973 HOST_WIDE_INT value
;
9975 if (GET_MODE (countreg
) == DImode
)
9976 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
9978 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
9981 /* Zero extend possibly SImode EXP to Pmode register. */
9983 ix86_zero_extend_to_Pmode (exp
)
9987 if (GET_MODE (exp
) == VOIDmode
)
9988 return force_reg (Pmode
, exp
);
9989 if (GET_MODE (exp
) == Pmode
)
9990 return copy_to_mode_reg (Pmode
, exp
);
9991 r
= gen_reg_rtx (Pmode
);
9992 emit_insn (gen_zero_extendsidi2 (r
, exp
));
9996 /* Expand string move (memcpy) operation. Use i386 string operations when
9997 profitable. expand_clrstr contains similar code. */
9999 ix86_expand_movstr (dst
, src
, count_exp
, align_exp
)
10000 rtx dst
, src
, count_exp
, align_exp
;
10002 rtx srcreg
, destreg
, countreg
;
10003 enum machine_mode counter_mode
;
10004 HOST_WIDE_INT align
= 0;
10005 unsigned HOST_WIDE_INT count
= 0;
10010 if (GET_CODE (align_exp
) == CONST_INT
)
10011 align
= INTVAL (align_exp
);
10013 /* This simple hack avoids all inlining code and simplifies code below. */
10014 if (!TARGET_ALIGN_STRINGOPS
)
10017 if (GET_CODE (count_exp
) == CONST_INT
)
10018 count
= INTVAL (count_exp
);
10020 /* Figure out proper mode for counter. For 32bits it is always SImode,
10021 for 64bits use SImode when possible, otherwise DImode.
10022 Set count to number of bytes copied when known at compile time. */
10023 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
10024 || x86_64_zero_extended_value (count_exp
))
10025 counter_mode
= SImode
;
10027 counter_mode
= DImode
;
10029 if (counter_mode
!= SImode
&& counter_mode
!= DImode
)
10032 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
10033 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
10035 emit_insn (gen_cld ());
10037 /* When optimizing for size emit simple rep ; movsb instruction for
10038 counts not divisible by 4. */
10040 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
10042 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
10044 emit_insn (gen_rep_movqi_rex64 (destreg
, srcreg
, countreg
,
10045 destreg
, srcreg
, countreg
));
10047 emit_insn (gen_rep_movqi (destreg
, srcreg
, countreg
,
10048 destreg
, srcreg
, countreg
));
10051 /* For constant aligned (or small unaligned) copies use rep movsl
10052 followed by code copying the rest. For PentiumPro ensure 8 byte
10053 alignment to allow rep movsl acceleration. */
10055 else if (count
!= 0
10057 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
10058 || optimize_size
|| count
< (unsigned int) 64))
10060 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
10061 if (count
& ~(size
- 1))
10063 countreg
= copy_to_mode_reg (counter_mode
,
10064 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
10065 & (TARGET_64BIT
? -1 : 0x3fffffff)));
10066 countreg
= ix86_zero_extend_to_Pmode (countreg
);
10070 emit_insn (gen_rep_movsi_rex64 (destreg
, srcreg
, countreg
,
10071 destreg
, srcreg
, countreg
));
10073 emit_insn (gen_rep_movsi (destreg
, srcreg
, countreg
,
10074 destreg
, srcreg
, countreg
));
10077 emit_insn (gen_rep_movdi_rex64 (destreg
, srcreg
, countreg
,
10078 destreg
, srcreg
, countreg
));
10080 if (size
== 8 && (count
& 0x04))
10081 emit_insn (gen_strmovsi (destreg
, srcreg
));
10083 emit_insn (gen_strmovhi (destreg
, srcreg
));
10085 emit_insn (gen_strmovqi (destreg
, srcreg
));
10087 /* The generic code based on the glibc implementation:
10088 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10089 allowing accelerated copying there)
10090 - copy the data using rep movsl
10091 - copy the rest. */
10096 int desired_alignment
= (TARGET_PENTIUMPRO
10097 && (count
== 0 || count
>= (unsigned int) 260)
10098 ? 8 : UNITS_PER_WORD
);
10100 /* In case we don't know anything about the alignment, default to
10101 library version, since it is usually equally fast and result in
10103 if (!TARGET_INLINE_ALL_STRINGOPS
&& align
< UNITS_PER_WORD
)
10109 if (TARGET_SINGLE_STRINGOP
)
10110 emit_insn (gen_cld ());
10112 countreg2
= gen_reg_rtx (Pmode
);
10113 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
10115 /* We don't use loops to align destination and to copy parts smaller
10116 than 4 bytes, because gcc is able to optimize such code better (in
10117 the case the destination or the count really is aligned, gcc is often
10118 able to predict the branches) and also it is friendlier to the
10119 hardware branch prediction.
10121 Using loops is benefical for generic case, because we can
10122 handle small counts using the loops. Many CPUs (such as Athlon)
10123 have large REP prefix setup costs.
10125 This is quite costy. Maybe we can revisit this decision later or
10126 add some customizability to this code. */
10128 if (count
== 0 && align
< desired_alignment
)
10130 label
= gen_label_rtx ();
10131 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
10132 LEU
, 0, counter_mode
, 1, label
);
10136 rtx label
= ix86_expand_aligntest (destreg
, 1);
10137 emit_insn (gen_strmovqi (destreg
, srcreg
));
10138 ix86_adjust_counter (countreg
, 1);
10139 emit_label (label
);
10140 LABEL_NUSES (label
) = 1;
10144 rtx label
= ix86_expand_aligntest (destreg
, 2);
10145 emit_insn (gen_strmovhi (destreg
, srcreg
));
10146 ix86_adjust_counter (countreg
, 2);
10147 emit_label (label
);
10148 LABEL_NUSES (label
) = 1;
10150 if (align
<= 4 && desired_alignment
> 4)
10152 rtx label
= ix86_expand_aligntest (destreg
, 4);
10153 emit_insn (gen_strmovsi (destreg
, srcreg
));
10154 ix86_adjust_counter (countreg
, 4);
10155 emit_label (label
);
10156 LABEL_NUSES (label
) = 1;
10159 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
10161 emit_label (label
);
10162 LABEL_NUSES (label
) = 1;
10165 if (!TARGET_SINGLE_STRINGOP
)
10166 emit_insn (gen_cld ());
10169 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
10171 emit_insn (gen_rep_movdi_rex64 (destreg
, srcreg
, countreg2
,
10172 destreg
, srcreg
, countreg2
));
10176 emit_insn (gen_lshrsi3 (countreg2
, countreg
, GEN_INT (2)));
10177 emit_insn (gen_rep_movsi (destreg
, srcreg
, countreg2
,
10178 destreg
, srcreg
, countreg2
));
10183 emit_label (label
);
10184 LABEL_NUSES (label
) = 1;
10186 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
10187 emit_insn (gen_strmovsi (destreg
, srcreg
));
10188 if ((align
<= 4 || count
== 0) && TARGET_64BIT
)
10190 rtx label
= ix86_expand_aligntest (countreg
, 4);
10191 emit_insn (gen_strmovsi (destreg
, srcreg
));
10192 emit_label (label
);
10193 LABEL_NUSES (label
) = 1;
10195 if (align
> 2 && count
!= 0 && (count
& 2))
10196 emit_insn (gen_strmovhi (destreg
, srcreg
));
10197 if (align
<= 2 || count
== 0)
10199 rtx label
= ix86_expand_aligntest (countreg
, 2);
10200 emit_insn (gen_strmovhi (destreg
, srcreg
));
10201 emit_label (label
);
10202 LABEL_NUSES (label
) = 1;
10204 if (align
> 1 && count
!= 0 && (count
& 1))
10205 emit_insn (gen_strmovqi (destreg
, srcreg
));
10206 if (align
<= 1 || count
== 0)
10208 rtx label
= ix86_expand_aligntest (countreg
, 1);
10209 emit_insn (gen_strmovqi (destreg
, srcreg
));
10210 emit_label (label
);
10211 LABEL_NUSES (label
) = 1;
10215 insns
= get_insns ();
10218 ix86_set_move_mem_attrs (insns
, dst
, src
, destreg
, srcreg
);
10223 /* Expand string clear operation (bzero). Use i386 string operations when
10224 profitable. expand_movstr contains similar code. */
10226 ix86_expand_clrstr (src
, count_exp
, align_exp
)
10227 rtx src
, count_exp
, align_exp
;
10229 rtx destreg
, zeroreg
, countreg
;
10230 enum machine_mode counter_mode
;
10231 HOST_WIDE_INT align
= 0;
10232 unsigned HOST_WIDE_INT count
= 0;
10234 if (GET_CODE (align_exp
) == CONST_INT
)
10235 align
= INTVAL (align_exp
);
10237 /* This simple hack avoids all inlining code and simplifies code below. */
10238 if (!TARGET_ALIGN_STRINGOPS
)
10241 if (GET_CODE (count_exp
) == CONST_INT
)
10242 count
= INTVAL (count_exp
);
10243 /* Figure out proper mode for counter. For 32bits it is always SImode,
10244 for 64bits use SImode when possible, otherwise DImode.
10245 Set count to number of bytes copied when known at compile time. */
10246 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
10247 || x86_64_zero_extended_value (count_exp
))
10248 counter_mode
= SImode
;
10250 counter_mode
= DImode
;
10252 destreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
10254 emit_insn (gen_cld ());
10256 /* When optimizing for size emit simple rep ; movsb instruction for
10257 counts not divisible by 4. */
10259 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
10261 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
10262 zeroreg
= copy_to_mode_reg (QImode
, const0_rtx
);
10264 emit_insn (gen_rep_stosqi_rex64 (destreg
, countreg
, zeroreg
,
10265 destreg
, countreg
));
10267 emit_insn (gen_rep_stosqi (destreg
, countreg
, zeroreg
,
10268 destreg
, countreg
));
10270 else if (count
!= 0
10272 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
10273 || optimize_size
|| count
< (unsigned int) 64))
10275 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
10276 zeroreg
= copy_to_mode_reg (size
== 4 ? SImode
: DImode
, const0_rtx
);
10277 if (count
& ~(size
- 1))
10279 countreg
= copy_to_mode_reg (counter_mode
,
10280 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
10281 & (TARGET_64BIT
? -1 : 0x3fffffff)));
10282 countreg
= ix86_zero_extend_to_Pmode (countreg
);
10286 emit_insn (gen_rep_stossi_rex64 (destreg
, countreg
, zeroreg
,
10287 destreg
, countreg
));
10289 emit_insn (gen_rep_stossi (destreg
, countreg
, zeroreg
,
10290 destreg
, countreg
));
10293 emit_insn (gen_rep_stosdi_rex64 (destreg
, countreg
, zeroreg
,
10294 destreg
, countreg
));
10296 if (size
== 8 && (count
& 0x04))
10297 emit_insn (gen_strsetsi (destreg
,
10298 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
10300 emit_insn (gen_strsethi (destreg
,
10301 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10303 emit_insn (gen_strsetqi (destreg
,
10304 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10310 /* Compute desired alignment of the string operation. */
10311 int desired_alignment
= (TARGET_PENTIUMPRO
10312 && (count
== 0 || count
>= (unsigned int) 260)
10313 ? 8 : UNITS_PER_WORD
);
10315 /* In case we don't know anything about the alignment, default to
10316 library version, since it is usually equally fast and result in
10318 if (!TARGET_INLINE_ALL_STRINGOPS
&& align
< UNITS_PER_WORD
)
10321 if (TARGET_SINGLE_STRINGOP
)
10322 emit_insn (gen_cld ());
10324 countreg2
= gen_reg_rtx (Pmode
);
10325 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
10326 zeroreg
= copy_to_mode_reg (Pmode
, const0_rtx
);
10328 if (count
== 0 && align
< desired_alignment
)
10330 label
= gen_label_rtx ();
10331 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
10332 LEU
, 0, counter_mode
, 1, label
);
10336 rtx label
= ix86_expand_aligntest (destreg
, 1);
10337 emit_insn (gen_strsetqi (destreg
,
10338 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10339 ix86_adjust_counter (countreg
, 1);
10340 emit_label (label
);
10341 LABEL_NUSES (label
) = 1;
10345 rtx label
= ix86_expand_aligntest (destreg
, 2);
10346 emit_insn (gen_strsethi (destreg
,
10347 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10348 ix86_adjust_counter (countreg
, 2);
10349 emit_label (label
);
10350 LABEL_NUSES (label
) = 1;
10352 if (align
<= 4 && desired_alignment
> 4)
10354 rtx label
= ix86_expand_aligntest (destreg
, 4);
10355 emit_insn (gen_strsetsi (destreg
, (TARGET_64BIT
10356 ? gen_rtx_SUBREG (SImode
, zeroreg
, 0)
10358 ix86_adjust_counter (countreg
, 4);
10359 emit_label (label
);
10360 LABEL_NUSES (label
) = 1;
10363 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
10365 emit_label (label
);
10366 LABEL_NUSES (label
) = 1;
10370 if (!TARGET_SINGLE_STRINGOP
)
10371 emit_insn (gen_cld ());
10374 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
10376 emit_insn (gen_rep_stosdi_rex64 (destreg
, countreg2
, zeroreg
,
10377 destreg
, countreg2
));
10381 emit_insn (gen_lshrsi3 (countreg2
, countreg
, GEN_INT (2)));
10382 emit_insn (gen_rep_stossi (destreg
, countreg2
, zeroreg
,
10383 destreg
, countreg2
));
10387 emit_label (label
);
10388 LABEL_NUSES (label
) = 1;
10391 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
10392 emit_insn (gen_strsetsi (destreg
,
10393 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
10394 if (TARGET_64BIT
&& (align
<= 4 || count
== 0))
10396 rtx label
= ix86_expand_aligntest (countreg
, 4);
10397 emit_insn (gen_strsetsi (destreg
,
10398 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
10399 emit_label (label
);
10400 LABEL_NUSES (label
) = 1;
10402 if (align
> 2 && count
!= 0 && (count
& 2))
10403 emit_insn (gen_strsethi (destreg
,
10404 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10405 if (align
<= 2 || count
== 0)
10407 rtx label
= ix86_expand_aligntest (countreg
, 2);
10408 emit_insn (gen_strsethi (destreg
,
10409 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10410 emit_label (label
);
10411 LABEL_NUSES (label
) = 1;
10413 if (align
> 1 && count
!= 0 && (count
& 1))
10414 emit_insn (gen_strsetqi (destreg
,
10415 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10416 if (align
<= 1 || count
== 0)
10418 rtx label
= ix86_expand_aligntest (countreg
, 1);
10419 emit_insn (gen_strsetqi (destreg
,
10420 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10421 emit_label (label
);
10422 LABEL_NUSES (label
) = 1;
10427 /* Expand strlen. */
10429 ix86_expand_strlen (out
, src
, eoschar
, align
)
10430 rtx out
, src
, eoschar
, align
;
10432 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
10434 /* The generic case of strlen expander is long. Avoid it's
10435 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10437 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
10438 && !TARGET_INLINE_ALL_STRINGOPS
10440 && (GET_CODE (align
) != CONST_INT
|| INTVAL (align
) < 4))
10443 addr
= force_reg (Pmode
, XEXP (src
, 0));
10444 scratch1
= gen_reg_rtx (Pmode
);
10446 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
10449 /* Well it seems that some optimizer does not combine a call like
10450 foo(strlen(bar), strlen(bar));
10451 when the move and the subtraction is done here. It does calculate
10452 the length just once when these instructions are done inside of
10453 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10454 often used and I use one fewer register for the lifetime of
10455 output_strlen_unroll() this is better. */
10457 emit_move_insn (out
, addr
);
10459 ix86_expand_strlensi_unroll_1 (out
, align
);
10461 /* strlensi_unroll_1 returns the address of the zero at the end of
10462 the string, like memchr(), so compute the length by subtracting
10463 the start address. */
10465 emit_insn (gen_subdi3 (out
, out
, addr
));
10467 emit_insn (gen_subsi3 (out
, out
, addr
));
10471 scratch2
= gen_reg_rtx (Pmode
);
10472 scratch3
= gen_reg_rtx (Pmode
);
10473 scratch4
= force_reg (Pmode
, constm1_rtx
);
10475 emit_move_insn (scratch3
, addr
);
10476 eoschar
= force_reg (QImode
, eoschar
);
10478 emit_insn (gen_cld ());
10481 emit_insn (gen_strlenqi_rex_1 (scratch1
, scratch3
, eoschar
,
10482 align
, scratch4
, scratch3
));
10483 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
10484 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
10488 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, eoschar
,
10489 align
, scratch4
, scratch3
));
10490 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
10491 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
10497 /* Expand the appropriate insns for doing strlen if not just doing
10500 out = result, initialized with the start address
10501 align_rtx = alignment of the address.
10502 scratch = scratch register, initialized with the startaddress when
10503 not aligned, otherwise undefined
10505 This is just the body. It needs the initialisations mentioned above and
10506 some address computing at the end. These things are done in i386.md. */
10509 ix86_expand_strlensi_unroll_1 (out
, align_rtx
)
10510 rtx out
, align_rtx
;
10514 rtx align_2_label
= NULL_RTX
;
10515 rtx align_3_label
= NULL_RTX
;
10516 rtx align_4_label
= gen_label_rtx ();
10517 rtx end_0_label
= gen_label_rtx ();
10519 rtx tmpreg
= gen_reg_rtx (SImode
);
10520 rtx scratch
= gen_reg_rtx (SImode
);
10523 if (GET_CODE (align_rtx
) == CONST_INT
)
10524 align
= INTVAL (align_rtx
);
10526 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10528 /* Is there a known alignment and is it less than 4? */
10531 rtx scratch1
= gen_reg_rtx (Pmode
);
10532 emit_move_insn (scratch1
, out
);
10533 /* Is there a known alignment and is it not 2? */
10536 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
10537 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
10539 /* Leave just the 3 lower bits. */
10540 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
10541 NULL_RTX
, 0, OPTAB_WIDEN
);
10543 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
10544 Pmode
, 1, align_4_label
);
10545 emit_cmp_and_jump_insns (align_rtx
, GEN_INT (2), EQ
, NULL
,
10546 Pmode
, 1, align_2_label
);
10547 emit_cmp_and_jump_insns (align_rtx
, GEN_INT (2), GTU
, NULL
,
10548 Pmode
, 1, align_3_label
);
10552 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10553 check if is aligned to 4 - byte. */
10555 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (2),
10556 NULL_RTX
, 0, OPTAB_WIDEN
);
10558 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
10559 Pmode
, 1, align_4_label
);
10562 mem
= gen_rtx_MEM (QImode
, out
);
10564 /* Now compare the bytes. */
10566 /* Compare the first n unaligned byte on a byte per byte basis. */
10567 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
10568 QImode
, 1, end_0_label
);
10570 /* Increment the address. */
10572 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
10574 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
10576 /* Not needed with an alignment of 2 */
10579 emit_label (align_2_label
);
10581 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
10585 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
10587 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
10589 emit_label (align_3_label
);
10592 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
10596 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
10598 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
10601 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10602 align this loop. It gives only huge programs, but does not help to
10604 emit_label (align_4_label
);
10606 mem
= gen_rtx_MEM (SImode
, out
);
10607 emit_move_insn (scratch
, mem
);
10609 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
10611 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
10613 /* This formula yields a nonzero result iff one of the bytes is zero.
10614 This saves three branches inside loop and many cycles. */
10616 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
10617 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
10618 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
10619 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
10620 gen_int_mode (0x80808080, SImode
)));
10621 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
10626 rtx reg
= gen_reg_rtx (SImode
);
10627 rtx reg2
= gen_reg_rtx (Pmode
);
10628 emit_move_insn (reg
, tmpreg
);
10629 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
10631 /* If zero is not in the first two bytes, move two bytes forward. */
10632 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
10633 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
10634 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
10635 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
10636 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
10639 /* Emit lea manually to avoid clobbering of flags. */
10640 emit_insn (gen_rtx_SET (SImode
, reg2
,
10641 gen_rtx_PLUS (Pmode
, out
, GEN_INT (2))));
10643 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
10644 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
10645 emit_insn (gen_rtx_SET (VOIDmode
, out
,
10646 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
10653 rtx end_2_label
= gen_label_rtx ();
10654 /* Is zero in the first two bytes? */
10656 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
10657 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
10658 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
10659 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
10660 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
10662 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
10663 JUMP_LABEL (tmp
) = end_2_label
;
10665 /* Not in the first two. Move two bytes forward. */
10666 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
10668 emit_insn (gen_adddi3 (out
, out
, GEN_INT (2)));
10670 emit_insn (gen_addsi3 (out
, out
, GEN_INT (2)));
10672 emit_label (end_2_label
);
10676 /* Avoid branch in fixing the byte. */
10677 tmpreg
= gen_lowpart (QImode
, tmpreg
);
10678 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
10680 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3)));
10682 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3)));
10684 emit_label (end_0_label
);
10688 ix86_expand_call (retval
, fnaddr
, callarg1
, callarg2
, pop
)
10689 rtx retval
, fnaddr
, callarg1
, callarg2
, pop
;
10691 rtx use
= NULL
, call
;
10693 if (pop
== const0_rtx
)
10695 if (TARGET_64BIT
&& pop
)
10699 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
10700 fnaddr
= machopic_indirect_call_target (fnaddr
);
10702 /* Static functions and indirect calls don't need the pic register. */
10703 if (! TARGET_64BIT
&& flag_pic
10704 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
10705 && ! SYMBOL_REF_FLAG (XEXP (fnaddr
, 0)))
10706 use_reg (&use
, pic_offset_table_rtx
);
10708 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
10710 rtx al
= gen_rtx_REG (QImode
, 0);
10711 emit_move_insn (al
, callarg2
);
10712 use_reg (&use
, al
);
10714 #endif /* TARGET_MACHO */
10716 if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
10718 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
10719 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
10722 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
10724 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
10727 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
10728 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
10729 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
10732 call
= emit_call_insn (call
);
10734 CALL_INSN_FUNCTION_USAGE (call
) = use
;
10738 /* Clear stack slot assignments remembered from previous functions.
10739 This is called from INIT_EXPANDERS once before RTL is emitted for each
10742 static struct machine_function
*
10743 ix86_init_machine_status ()
10745 return ggc_alloc_cleared (sizeof (struct machine_function
));
10748 /* Return a MEM corresponding to a stack slot with mode MODE.
10749 Allocate a new slot if necessary.
10751 The RTL for a function can have several slots available: N is
10752 which slot to use. */
10755 assign_386_stack_local (mode
, n
)
10756 enum machine_mode mode
;
10759 if (n
< 0 || n
>= MAX_386_STACK_LOCALS
)
10762 if (ix86_stack_locals
[(int) mode
][n
] == NULL_RTX
)
10763 ix86_stack_locals
[(int) mode
][n
]
10764 = assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
10766 return ix86_stack_locals
[(int) mode
][n
];
10769 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10771 static GTY(()) rtx ix86_tls_symbol
;
10773 ix86_tls_get_addr ()
10776 if (!ix86_tls_symbol
)
10778 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, (TARGET_GNU_TLS
10779 ? "___tls_get_addr"
10780 : "__tls_get_addr"));
10783 return ix86_tls_symbol
;
10786 /* Calculate the length of the memory address in the instruction
10787 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10790 memory_address_length (addr
)
10793 struct ix86_address parts
;
10794 rtx base
, index
, disp
;
10797 if (GET_CODE (addr
) == PRE_DEC
10798 || GET_CODE (addr
) == POST_INC
10799 || GET_CODE (addr
) == PRE_MODIFY
10800 || GET_CODE (addr
) == POST_MODIFY
)
10803 if (! ix86_decompose_address (addr
, &parts
))
10807 index
= parts
.index
;
10811 /* Register Indirect. */
10812 if (base
&& !index
&& !disp
)
10814 /* Special cases: ebp and esp need the two-byte modrm form. */
10815 if (addr
== stack_pointer_rtx
10816 || addr
== arg_pointer_rtx
10817 || addr
== frame_pointer_rtx
10818 || addr
== hard_frame_pointer_rtx
)
10822 /* Direct Addressing. */
10823 else if (disp
&& !base
&& !index
)
10828 /* Find the length of the displacement constant. */
10831 if (GET_CODE (disp
) == CONST_INT
10832 && CONST_OK_FOR_LETTER_P (INTVAL (disp
), 'K'))
10838 /* An index requires the two-byte modrm form. */
10846 /* Compute default value for "length_immediate" attribute. When SHORTFORM
10847 is set, expect that insn have 8bit immediate alternative. */
10849 ix86_attr_length_immediate_default (insn
, shortform
)
10855 extract_insn_cached (insn
);
10856 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
10857 if (CONSTANT_P (recog_data
.operand
[i
]))
10862 && GET_CODE (recog_data
.operand
[i
]) == CONST_INT
10863 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data
.operand
[i
]), 'K'))
10867 switch (get_attr_mode (insn
))
10878 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
10883 fatal_insn ("unknown insn mode", insn
);
10889 /* Compute default value for "length_address" attribute. */
10891 ix86_attr_length_address_default (insn
)
10895 extract_insn_cached (insn
);
10896 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
10897 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
10899 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
10905 /* Return the maximum number of instructions a cpu can issue. */
10912 case PROCESSOR_PENTIUM
:
10916 case PROCESSOR_PENTIUMPRO
:
10917 case PROCESSOR_PENTIUM4
:
10918 case PROCESSOR_ATHLON
:
10926 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10927 by DEP_INSN and nothing set by DEP_INSN. */
10930 ix86_flags_dependant (insn
, dep_insn
, insn_type
)
10931 rtx insn
, dep_insn
;
10932 enum attr_type insn_type
;
10936 /* Simplify the test for uninteresting insns. */
10937 if (insn_type
!= TYPE_SETCC
10938 && insn_type
!= TYPE_ICMOV
10939 && insn_type
!= TYPE_FCMOV
10940 && insn_type
!= TYPE_IBR
)
10943 if ((set
= single_set (dep_insn
)) != 0)
10945 set
= SET_DEST (set
);
10948 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
10949 && XVECLEN (PATTERN (dep_insn
), 0) == 2
10950 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
10951 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
10953 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
10954 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
10959 if (GET_CODE (set
) != REG
|| REGNO (set
) != FLAGS_REG
)
10962 /* This test is true if the dependent insn reads the flags but
10963 not any other potentially set register. */
10964 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
10967 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
10973 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
10974 address with operands set by DEP_INSN. */
10977 ix86_agi_dependant (insn
, dep_insn
, insn_type
)
10978 rtx insn
, dep_insn
;
10979 enum attr_type insn_type
;
10983 if (insn_type
== TYPE_LEA
10986 addr
= PATTERN (insn
);
10987 if (GET_CODE (addr
) == SET
)
10989 else if (GET_CODE (addr
) == PARALLEL
10990 && GET_CODE (XVECEXP (addr
, 0, 0)) == SET
)
10991 addr
= XVECEXP (addr
, 0, 0);
10994 addr
= SET_SRC (addr
);
10999 extract_insn_cached (insn
);
11000 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
11001 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
11003 addr
= XEXP (recog_data
.operand
[i
], 0);
11010 return modified_in_p (addr
, dep_insn
);
11014 ix86_adjust_cost (insn
, link
, dep_insn
, cost
)
11015 rtx insn
, link
, dep_insn
;
11018 enum attr_type insn_type
, dep_insn_type
;
11019 enum attr_memory memory
, dep_memory
;
11021 int dep_insn_code_number
;
11023 /* Anti and output depenancies have zero cost on all CPUs. */
11024 if (REG_NOTE_KIND (link
) != 0)
11027 dep_insn_code_number
= recog_memoized (dep_insn
);
11029 /* If we can't recognize the insns, we can't really do anything. */
11030 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
11033 insn_type
= get_attr_type (insn
);
11034 dep_insn_type
= get_attr_type (dep_insn
);
11038 case PROCESSOR_PENTIUM
:
11039 /* Address Generation Interlock adds a cycle of latency. */
11040 if (ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11043 /* ??? Compares pair with jump/setcc. */
11044 if (ix86_flags_dependant (insn
, dep_insn
, insn_type
))
11047 /* Floating point stores require value to be ready one cycle ealier. */
11048 if (insn_type
== TYPE_FMOV
11049 && get_attr_memory (insn
) == MEMORY_STORE
11050 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11054 case PROCESSOR_PENTIUMPRO
:
11055 memory
= get_attr_memory (insn
);
11056 dep_memory
= get_attr_memory (dep_insn
);
11058 /* Since we can't represent delayed latencies of load+operation,
11059 increase the cost here for non-imov insns. */
11060 if (dep_insn_type
!= TYPE_IMOV
11061 && dep_insn_type
!= TYPE_FMOV
11062 && (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
))
11065 /* INT->FP conversion is expensive. */
11066 if (get_attr_fp_int_src (dep_insn
))
11069 /* There is one cycle extra latency between an FP op and a store. */
11070 if (insn_type
== TYPE_FMOV
11071 && (set
= single_set (dep_insn
)) != NULL_RTX
11072 && (set2
= single_set (insn
)) != NULL_RTX
11073 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
11074 && GET_CODE (SET_DEST (set2
)) == MEM
)
11077 /* Show ability of reorder buffer to hide latency of load by executing
11078 in parallel with previous instruction in case
11079 previous instruction is not needed to compute the address. */
11080 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
11081 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11083 /* Claim moves to take one cycle, as core can issue one load
11084 at time and the next load can start cycle later. */
11085 if (dep_insn_type
== TYPE_IMOV
11086 || dep_insn_type
== TYPE_FMOV
)
11094 memory
= get_attr_memory (insn
);
11095 dep_memory
= get_attr_memory (dep_insn
);
11096 /* The esp dependency is resolved before the instruction is really
11098 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
11099 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
11102 /* Since we can't represent delayed latencies of load+operation,
11103 increase the cost here for non-imov insns. */
11104 if (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
)
11105 cost
+= (dep_insn_type
!= TYPE_IMOV
) ? 2 : 1;
11107 /* INT->FP conversion is expensive. */
11108 if (get_attr_fp_int_src (dep_insn
))
11111 /* Show ability of reorder buffer to hide latency of load by executing
11112 in parallel with previous instruction in case
11113 previous instruction is not needed to compute the address. */
11114 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
11115 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11117 /* Claim moves to take one cycle, as core can issue one load
11118 at time and the next load can start cycle later. */
11119 if (dep_insn_type
== TYPE_IMOV
11120 || dep_insn_type
== TYPE_FMOV
)
11129 case PROCESSOR_ATHLON
:
11130 memory
= get_attr_memory (insn
);
11131 dep_memory
= get_attr_memory (dep_insn
);
11133 if (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
)
11135 if (dep_insn_type
== TYPE_IMOV
|| dep_insn_type
== TYPE_FMOV
)
11140 /* Show ability of reorder buffer to hide latency of load by executing
11141 in parallel with previous instruction in case
11142 previous instruction is not needed to compute the address. */
11143 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
11144 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11146 /* Claim moves to take one cycle, as core can issue one load
11147 at time and the next load can start cycle later. */
11148 if (dep_insn_type
== TYPE_IMOV
11149 || dep_insn_type
== TYPE_FMOV
)
11151 else if (cost
>= 3)
11166 struct ppro_sched_data
11169 int issued_this_cycle
;
11173 static enum attr_ppro_uops
11174 ix86_safe_ppro_uops (insn
)
11177 if (recog_memoized (insn
) >= 0)
11178 return get_attr_ppro_uops (insn
);
11180 return PPRO_UOPS_MANY
;
11184 ix86_dump_ppro_packet (dump
)
11187 if (ix86_sched_data
.ppro
.decode
[0])
11189 fprintf (dump
, "PPRO packet: %d",
11190 INSN_UID (ix86_sched_data
.ppro
.decode
[0]));
11191 if (ix86_sched_data
.ppro
.decode
[1])
11192 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[1]));
11193 if (ix86_sched_data
.ppro
.decode
[2])
11194 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[2]));
11195 fputc ('\n', dump
);
11199 /* We're beginning a new block. Initialize data structures as necessary. */
11202 ix86_sched_init (dump
, sched_verbose
, veclen
)
11203 FILE *dump ATTRIBUTE_UNUSED
;
11204 int sched_verbose ATTRIBUTE_UNUSED
;
11205 int veclen ATTRIBUTE_UNUSED
;
11207 memset (&ix86_sched_data
, 0, sizeof (ix86_sched_data
));
11210 /* Shift INSN to SLOT, and shift everything else down. */
11213 ix86_reorder_insn (insnp
, slot
)
11220 insnp
[0] = insnp
[1];
11221 while (++insnp
!= slot
);
11227 ix86_sched_reorder_ppro (ready
, e_ready
)
11232 enum attr_ppro_uops cur_uops
;
11233 int issued_this_cycle
;
11237 /* At this point .ppro.decode contains the state of the three
11238 decoders from last "cycle". That is, those insns that were
11239 actually independent. But here we're scheduling for the
11240 decoder, and we may find things that are decodable in the
11243 memcpy (decode
, ix86_sched_data
.ppro
.decode
, sizeof (decode
));
11244 issued_this_cycle
= 0;
11247 cur_uops
= ix86_safe_ppro_uops (*insnp
);
11249 /* If the decoders are empty, and we've a complex insn at the
11250 head of the priority queue, let it issue without complaint. */
11251 if (decode
[0] == NULL
)
11253 if (cur_uops
== PPRO_UOPS_MANY
)
11255 decode
[0] = *insnp
;
11259 /* Otherwise, search for a 2-4 uop unsn to issue. */
11260 while (cur_uops
!= PPRO_UOPS_FEW
)
11262 if (insnp
== ready
)
11264 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
11267 /* If so, move it to the head of the line. */
11268 if (cur_uops
== PPRO_UOPS_FEW
)
11269 ix86_reorder_insn (insnp
, e_ready
);
11271 /* Issue the head of the queue. */
11272 issued_this_cycle
= 1;
11273 decode
[0] = *e_ready
--;
11276 /* Look for simple insns to fill in the other two slots. */
11277 for (i
= 1; i
< 3; ++i
)
11278 if (decode
[i
] == NULL
)
11280 if (ready
> e_ready
)
11284 cur_uops
= ix86_safe_ppro_uops (*insnp
);
11285 while (cur_uops
!= PPRO_UOPS_ONE
)
11287 if (insnp
== ready
)
11289 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
11292 /* Found one. Move it to the head of the queue and issue it. */
11293 if (cur_uops
== PPRO_UOPS_ONE
)
11295 ix86_reorder_insn (insnp
, e_ready
);
11296 decode
[i
] = *e_ready
--;
11297 issued_this_cycle
++;
11301 /* ??? Didn't find one. Ideally, here we would do a lazy split
11302 of 2-uop insns, issue one and queue the other. */
11306 if (issued_this_cycle
== 0)
11307 issued_this_cycle
= 1;
11308 ix86_sched_data
.ppro
.issued_this_cycle
= issued_this_cycle
;
11311 /* We are about to being issuing insns for this clock cycle.
11312 Override the default sort algorithm to better slot instructions. */
11314 ix86_sched_reorder (dump
, sched_verbose
, ready
, n_readyp
, clock_var
)
11315 FILE *dump ATTRIBUTE_UNUSED
;
11316 int sched_verbose ATTRIBUTE_UNUSED
;
11319 int clock_var ATTRIBUTE_UNUSED
;
11321 int n_ready
= *n_readyp
;
11322 rtx
*e_ready
= ready
+ n_ready
- 1;
11324 /* Make sure to go ahead and initialize key items in
11325 ix86_sched_data if we are not going to bother trying to
11326 reorder the ready queue. */
11329 ix86_sched_data
.ppro
.issued_this_cycle
= 1;
11338 case PROCESSOR_PENTIUMPRO
:
11339 ix86_sched_reorder_ppro (ready
, e_ready
);
11344 return ix86_issue_rate ();
11347 /* We are about to issue INSN. Return the number of insns left on the
11348 ready queue that can be issued this cycle. */
11351 ix86_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
)
11355 int can_issue_more
;
11361 return can_issue_more
- 1;
11363 case PROCESSOR_PENTIUMPRO
:
11365 enum attr_ppro_uops uops
= ix86_safe_ppro_uops (insn
);
11367 if (uops
== PPRO_UOPS_MANY
)
11370 ix86_dump_ppro_packet (dump
);
11371 ix86_sched_data
.ppro
.decode
[0] = insn
;
11372 ix86_sched_data
.ppro
.decode
[1] = NULL
;
11373 ix86_sched_data
.ppro
.decode
[2] = NULL
;
11375 ix86_dump_ppro_packet (dump
);
11376 ix86_sched_data
.ppro
.decode
[0] = NULL
;
11378 else if (uops
== PPRO_UOPS_FEW
)
11381 ix86_dump_ppro_packet (dump
);
11382 ix86_sched_data
.ppro
.decode
[0] = insn
;
11383 ix86_sched_data
.ppro
.decode
[1] = NULL
;
11384 ix86_sched_data
.ppro
.decode
[2] = NULL
;
11388 for (i
= 0; i
< 3; ++i
)
11389 if (ix86_sched_data
.ppro
.decode
[i
] == NULL
)
11391 ix86_sched_data
.ppro
.decode
[i
] = insn
;
11399 ix86_dump_ppro_packet (dump
);
11400 ix86_sched_data
.ppro
.decode
[0] = NULL
;
11401 ix86_sched_data
.ppro
.decode
[1] = NULL
;
11402 ix86_sched_data
.ppro
.decode
[2] = NULL
;
11406 return --ix86_sched_data
.ppro
.issued_this_cycle
;
11411 ia32_use_dfa_pipeline_interface ()
11413 if (ix86_cpu
== PROCESSOR_PENTIUM
)
11418 /* How many alternative schedules to try. This should be as wide as the
11419 scheduling freedom in the DFA, but no wider. Making this value too
11420 large results extra work for the scheduler. */
11423 ia32_multipass_dfa_lookahead ()
11425 if (ix86_cpu
== PROCESSOR_PENTIUM
)
11432 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11433 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11437 ix86_set_move_mem_attrs (insns
, dstref
, srcref
, dstreg
, srcreg
)
11439 rtx dstref
, srcref
, dstreg
, srcreg
;
11443 for (insn
= insns
; insn
!= 0 ; insn
= NEXT_INSN (insn
))
11445 ix86_set_move_mem_attrs_1 (PATTERN (insn
), dstref
, srcref
,
11449 /* Subroutine of above to actually do the updating by recursively walking
11453 ix86_set_move_mem_attrs_1 (x
, dstref
, srcref
, dstreg
, srcreg
)
11455 rtx dstref
, srcref
, dstreg
, srcreg
;
11457 enum rtx_code code
= GET_CODE (x
);
11458 const char *format_ptr
= GET_RTX_FORMAT (code
);
11461 if (code
== MEM
&& XEXP (x
, 0) == dstreg
)
11462 MEM_COPY_ATTRIBUTES (x
, dstref
);
11463 else if (code
== MEM
&& XEXP (x
, 0) == srcreg
)
11464 MEM_COPY_ATTRIBUTES (x
, srcref
);
11466 for (i
= 0; i
< GET_RTX_LENGTH (code
); i
++, format_ptr
++)
11468 if (*format_ptr
== 'e')
11469 ix86_set_move_mem_attrs_1 (XEXP (x
, i
), dstref
, srcref
,
11471 else if (*format_ptr
== 'E')
11472 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
11473 ix86_set_move_mem_attrs_1 (XVECEXP (x
, i
, j
), dstref
, srcref
,
11478 /* Compute the alignment given to a constant that is being placed in memory.
11479 EXP is the constant and ALIGN is the alignment that the object would
11481 The value of this function is used instead of that alignment to align
11485 ix86_constant_alignment (exp
, align
)
11489 if (TREE_CODE (exp
) == REAL_CST
)
11491 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
11493 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
11496 else if (TREE_CODE (exp
) == STRING_CST
&& TREE_STRING_LENGTH (exp
) >= 31
11503 /* Compute the alignment for a static variable.
11504 TYPE is the data type, and ALIGN is the alignment that
11505 the object would ordinarily have. The value of this function is used
11506 instead of that alignment to align the object. */
11509 ix86_data_alignment (type
, align
)
11513 if (AGGREGATE_TYPE_P (type
)
11514 && TYPE_SIZE (type
)
11515 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
11516 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 256
11517 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 256)
11520 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11521 to 16byte boundary. */
11524 if (AGGREGATE_TYPE_P (type
)
11525 && TYPE_SIZE (type
)
11526 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
11527 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
11528 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
11532 if (TREE_CODE (type
) == ARRAY_TYPE
)
11534 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
11536 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
11539 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
11542 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
11544 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
11547 else if ((TREE_CODE (type
) == RECORD_TYPE
11548 || TREE_CODE (type
) == UNION_TYPE
11549 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
11550 && TYPE_FIELDS (type
))
11552 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
11554 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
11557 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
11558 || TREE_CODE (type
) == INTEGER_TYPE
)
11560 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
11562 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
11569 /* Compute the alignment for a local variable.
11570 TYPE is the data type, and ALIGN is the alignment that
11571 the object would ordinarily have. The value of this macro is used
11572 instead of that alignment to align the object. */
11575 ix86_local_alignment (type
, align
)
11579 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11580 to 16byte boundary. */
11583 if (AGGREGATE_TYPE_P (type
)
11584 && TYPE_SIZE (type
)
11585 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
11586 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
11587 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
11590 if (TREE_CODE (type
) == ARRAY_TYPE
)
11592 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
11594 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
11597 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
11599 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
11601 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
11604 else if ((TREE_CODE (type
) == RECORD_TYPE
11605 || TREE_CODE (type
) == UNION_TYPE
11606 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
11607 && TYPE_FIELDS (type
))
11609 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
11611 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
11614 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
11615 || TREE_CODE (type
) == INTEGER_TYPE
)
11618 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
11620 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
11626 /* Emit RTL insns to initialize the variable parts of a trampoline.
11627 FNADDR is an RTX for the address of the function's pure code.
11628 CXT is an RTX for the static chain value for the function. */
11630 x86_initialize_trampoline (tramp
, fnaddr
, cxt
)
11631 rtx tramp
, fnaddr
, cxt
;
11635 /* Compute offset from the end of the jmp to the target function. */
11636 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
11637 plus_constant (tramp
, 10),
11638 NULL_RTX
, 1, OPTAB_DIRECT
);
11639 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
11640 gen_int_mode (0xb9, QImode
));
11641 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
11642 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
11643 gen_int_mode (0xe9, QImode
));
11644 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
11649 /* Try to load address using shorter movl instead of movabs.
11650 We may want to support movq for kernel mode, but kernel does not use
11651 trampolines at the moment. */
11652 if (x86_64_zero_extended_value (fnaddr
))
11654 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
11655 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11656 gen_int_mode (0xbb41, HImode
));
11657 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
11658 gen_lowpart (SImode
, fnaddr
));
11663 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11664 gen_int_mode (0xbb49, HImode
));
11665 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
11669 /* Load static chain using movabs to r10. */
11670 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11671 gen_int_mode (0xba49, HImode
));
11672 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
11675 /* Jump to the r11 */
11676 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11677 gen_int_mode (0xff49, HImode
));
11678 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
11679 gen_int_mode (0xe3, QImode
));
11681 if (offset
> TRAMPOLINE_SIZE
)
11686 #define def_builtin(MASK, NAME, TYPE, CODE) \
11688 if ((MASK) & target_flags) \
11689 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11690 NULL, NULL_TREE); \
11693 struct builtin_description
11695 const unsigned int mask
;
11696 const enum insn_code icode
;
11697 const char *const name
;
11698 const enum ix86_builtins code
;
11699 const enum rtx_code comparison
;
11700 const unsigned int flag
;
11703 /* Used for builtins that are enabled both by -msse and -msse2. */
11704 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11706 static const struct builtin_description bdesc_comi
[] =
11708 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, EQ
, 0 },
11709 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, LT
, 0 },
11710 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, LE
, 0 },
11711 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, LT
, 1 },
11712 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, LE
, 1 },
11713 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, NE
, 0 },
11714 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, EQ
, 0 },
11715 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, LT
, 0 },
11716 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, LE
, 0 },
11717 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, LT
, 1 },
11718 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, LE
, 1 },
11719 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, NE
, 0 },
11720 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, EQ
, 0 },
11721 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, LT
, 0 },
11722 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, LE
, 0 },
11723 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, LT
, 1 },
11724 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, LE
, 1 },
11725 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, NE
, 0 },
11726 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, EQ
, 0 },
11727 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, LT
, 0 },
11728 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, LE
, 0 },
11729 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, LT
, 1 },
11730 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, LE
, 1 },
11731 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, NE
, 0 },
11734 static const struct builtin_description bdesc_2arg
[] =
11737 { MASK_SSE1
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
11738 { MASK_SSE1
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
11739 { MASK_SSE1
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
11740 { MASK_SSE1
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
11741 { MASK_SSE1
, CODE_FOR_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
11742 { MASK_SSE1
, CODE_FOR_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
11743 { MASK_SSE1
, CODE_FOR_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
11744 { MASK_SSE1
, CODE_FOR_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
11746 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
11747 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
11748 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
11749 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, 1 },
11750 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, 1 },
11751 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
11752 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, EQ
, 0 },
11753 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, LT
, 0 },
11754 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, LE
, 0 },
11755 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, LT
, 1 },
11756 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, LE
, 1 },
11757 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, UNORDERED
, 0 },
11758 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
11759 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
11760 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
11761 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS
, LT
, 1 },
11762 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS
, LE
, 1 },
11763 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
11764 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, EQ
, 0 },
11765 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, LT
, 0 },
11766 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, LE
, 0 },
11767 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, LT
, 1 },
11768 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, LE
, 1 },
11769 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
11771 { MASK_SSE1
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
11772 { MASK_SSE1
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
11773 { MASK_SSE1
, CODE_FOR_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
11774 { MASK_SSE1
, CODE_FOR_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
11776 { MASK_SSE1
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
11777 { MASK_SSE1
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
11778 { MASK_SSE1
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
11779 { MASK_SSE1
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
11780 { MASK_SSE1
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
11783 { MASK_MMX
, CODE_FOR_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
11784 { MASK_MMX
, CODE_FOR_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
11785 { MASK_MMX
, CODE_FOR_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
11786 { MASK_MMX
, CODE_FOR_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
11787 { MASK_MMX
, CODE_FOR_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
11788 { MASK_MMX
, CODE_FOR_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
11790 { MASK_MMX
, CODE_FOR_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
11791 { MASK_MMX
, CODE_FOR_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
11792 { MASK_MMX
, CODE_FOR_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
11793 { MASK_MMX
, CODE_FOR_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
11794 { MASK_MMX
, CODE_FOR_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
11795 { MASK_MMX
, CODE_FOR_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
11796 { MASK_MMX
, CODE_FOR_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
11797 { MASK_MMX
, CODE_FOR_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
11799 { MASK_MMX
, CODE_FOR_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
11800 { MASK_MMX
, CODE_FOR_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
11801 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
11803 { MASK_MMX
, CODE_FOR_mmx_anddi3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
11804 { MASK_MMX
, CODE_FOR_mmx_nanddi3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
11805 { MASK_MMX
, CODE_FOR_mmx_iordi3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
11806 { MASK_MMX
, CODE_FOR_mmx_xordi3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
11808 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
11809 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
11811 { MASK_MMX
, CODE_FOR_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
11812 { MASK_MMX
, CODE_FOR_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
11813 { MASK_MMX
, CODE_FOR_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
11814 { MASK_MMX
, CODE_FOR_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
11815 { MASK_MMX
, CODE_FOR_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
11816 { MASK_MMX
, CODE_FOR_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
11818 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
11819 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
11820 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
11821 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
11823 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
11824 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
11825 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
11826 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
11827 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
11828 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
11831 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
11832 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
11833 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
11835 { MASK_SSE1
, CODE_FOR_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
11836 { MASK_SSE1
, CODE_FOR_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
11838 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
11839 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
11840 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
11841 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
11842 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
11843 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
11845 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
11846 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
11847 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
11848 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
11849 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
11850 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
11852 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
11853 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
11854 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
11855 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
11857 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
11858 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
11861 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
11862 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
11863 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
11864 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
11865 { MASK_SSE2
, CODE_FOR_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
11866 { MASK_SSE2
, CODE_FOR_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
11867 { MASK_SSE2
, CODE_FOR_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
11868 { MASK_SSE2
, CODE_FOR_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
11870 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
11871 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
11872 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
11873 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, 1 },
11874 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, 1 },
11875 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
11876 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, EQ
, 0 },
11877 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, LT
, 0 },
11878 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, LE
, 0 },
11879 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, LT
, 1 },
11880 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, LE
, 1 },
11881 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, UNORDERED
, 0 },
11882 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
11883 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
11884 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
11885 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD
, LT
, 1 },
11886 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD
, LE
, 1 },
11887 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
11888 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, EQ
, 0 },
11889 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, LT
, 0 },
11890 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, LE
, 0 },
11891 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD
, LT
, 1 },
11892 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD
, LE
, 1 },
11893 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, UNORDERED
, 0 },
11895 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
11896 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
11897 { MASK_SSE2
, CODE_FOR_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
11898 { MASK_SSE2
, CODE_FOR_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
11900 { MASK_SSE2
, CODE_FOR_sse2_anddf3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
11901 { MASK_SSE2
, CODE_FOR_sse2_nanddf3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
11902 { MASK_SSE2
, CODE_FOR_sse2_iordf3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
11903 { MASK_SSE2
, CODE_FOR_sse2_xordf3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
11905 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
11906 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
11907 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
11910 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
11911 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
11912 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
11913 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
11914 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
11915 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
11916 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
11917 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
11919 { MASK_MMX
, CODE_FOR_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
11920 { MASK_MMX
, CODE_FOR_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
11921 { MASK_MMX
, CODE_FOR_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
11922 { MASK_MMX
, CODE_FOR_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
11923 { MASK_MMX
, CODE_FOR_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
11924 { MASK_MMX
, CODE_FOR_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
11925 { MASK_MMX
, CODE_FOR_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
11926 { MASK_MMX
, CODE_FOR_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
11928 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
11929 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
11930 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, 0, 0 },
11931 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, 0, 0 },
11933 { MASK_SSE2
, CODE_FOR_sse2_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
11934 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
11935 { MASK_SSE2
, CODE_FOR_sse2_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
11936 { MASK_SSE2
, CODE_FOR_sse2_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
11938 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
11939 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
11941 { MASK_SSE2
, CODE_FOR_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
11942 { MASK_SSE2
, CODE_FOR_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
11943 { MASK_SSE2
, CODE_FOR_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
11944 { MASK_SSE2
, CODE_FOR_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
11945 { MASK_SSE2
, CODE_FOR_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
11946 { MASK_SSE2
, CODE_FOR_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
11948 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
11949 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
11950 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
11951 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
11953 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
11954 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
11955 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
11956 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
11957 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
11958 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
11960 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
11961 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
11962 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
11964 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
11965 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
11967 { MASK_SSE2
, CODE_FOR_ashlv8hi3_ti
, 0, IX86_BUILTIN_PSLLW128
, 0, 0 },
11968 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
11969 { MASK_SSE2
, CODE_FOR_ashlv4si3_ti
, 0, IX86_BUILTIN_PSLLD128
, 0, 0 },
11970 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
11971 { MASK_SSE2
, CODE_FOR_ashlv2di3_ti
, 0, IX86_BUILTIN_PSLLQ128
, 0, 0 },
11972 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
11974 { MASK_SSE2
, CODE_FOR_lshrv8hi3_ti
, 0, IX86_BUILTIN_PSRLW128
, 0, 0 },
11975 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
11976 { MASK_SSE2
, CODE_FOR_lshrv4si3_ti
, 0, IX86_BUILTIN_PSRLD128
, 0, 0 },
11977 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
11978 { MASK_SSE2
, CODE_FOR_lshrv2di3_ti
, 0, IX86_BUILTIN_PSRLQ128
, 0, 0 },
11979 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
11981 { MASK_SSE2
, CODE_FOR_ashrv8hi3_ti
, 0, IX86_BUILTIN_PSRAW128
, 0, 0 },
11982 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
11983 { MASK_SSE2
, CODE_FOR_ashrv4si3_ti
, 0, IX86_BUILTIN_PSRAD128
, 0, 0 },
11984 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
11986 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
11988 { MASK_SSE2
, CODE_FOR_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
11989 { MASK_SSE2
, CODE_FOR_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
11990 { MASK_SSE2
, CODE_FOR_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 }
11993 static const struct builtin_description bdesc_1arg
[] =
11995 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
11996 { MASK_SSE1
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
11998 { MASK_SSE1
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
11999 { MASK_SSE1
, CODE_FOR_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
12000 { MASK_SSE1
, CODE_FOR_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
12002 { MASK_SSE1
, CODE_FOR_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
12003 { MASK_SSE1
, CODE_FOR_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
12004 { MASK_SSE1
, CODE_FOR_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
12005 { MASK_SSE1
, CODE_FOR_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
12007 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
12008 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
12009 { MASK_SSE2
, CODE_FOR_sse2_movq2dq
, 0, IX86_BUILTIN_MOVQ2DQ
, 0, 0 },
12011 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
12013 { MASK_SSE2
, CODE_FOR_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
12014 { MASK_SSE2
, CODE_FOR_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
12016 { MASK_SSE2
, CODE_FOR_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
12017 { MASK_SSE2
, CODE_FOR_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
12018 { MASK_SSE2
, CODE_FOR_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
12019 { MASK_SSE2
, CODE_FOR_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
12020 { MASK_SSE2
, CODE_FOR_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
12022 { MASK_SSE2
, CODE_FOR_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
12024 { MASK_SSE2
, CODE_FOR_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
12025 { MASK_SSE2
, CODE_FOR_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
12027 { MASK_SSE2
, CODE_FOR_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
12028 { MASK_SSE2
, CODE_FOR_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
12029 { MASK_SSE2
, CODE_FOR_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 }
12033 ix86_init_builtins ()
12036 ix86_init_mmx_sse_builtins ();
12039 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12040 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12043 ix86_init_mmx_sse_builtins ()
12045 const struct builtin_description
* d
;
12048 tree pchar_type_node
= build_pointer_type (char_type_node
);
12049 tree pfloat_type_node
= build_pointer_type (float_type_node
);
12050 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
12051 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
12052 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
12055 tree int_ftype_v4sf_v4sf
12056 = build_function_type_list (integer_type_node
,
12057 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12058 tree v4si_ftype_v4sf_v4sf
12059 = build_function_type_list (V4SI_type_node
,
12060 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12061 /* MMX/SSE/integer conversions. */
12062 tree int_ftype_v4sf
12063 = build_function_type_list (integer_type_node
,
12064 V4SF_type_node
, NULL_TREE
);
12065 tree int_ftype_v8qi
12066 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
12067 tree v4sf_ftype_v4sf_int
12068 = build_function_type_list (V4SF_type_node
,
12069 V4SF_type_node
, integer_type_node
, NULL_TREE
);
12070 tree v4sf_ftype_v4sf_v2si
12071 = build_function_type_list (V4SF_type_node
,
12072 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
12073 tree int_ftype_v4hi_int
12074 = build_function_type_list (integer_type_node
,
12075 V4HI_type_node
, integer_type_node
, NULL_TREE
);
12076 tree v4hi_ftype_v4hi_int_int
12077 = build_function_type_list (V4HI_type_node
, V4HI_type_node
,
12078 integer_type_node
, integer_type_node
,
12080 /* Miscellaneous. */
12081 tree v8qi_ftype_v4hi_v4hi
12082 = build_function_type_list (V8QI_type_node
,
12083 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
12084 tree v4hi_ftype_v2si_v2si
12085 = build_function_type_list (V4HI_type_node
,
12086 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
12087 tree v4sf_ftype_v4sf_v4sf_int
12088 = build_function_type_list (V4SF_type_node
,
12089 V4SF_type_node
, V4SF_type_node
,
12090 integer_type_node
, NULL_TREE
);
12091 tree v2si_ftype_v4hi_v4hi
12092 = build_function_type_list (V2SI_type_node
,
12093 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
12094 tree v4hi_ftype_v4hi_int
12095 = build_function_type_list (V4HI_type_node
,
12096 V4HI_type_node
, integer_type_node
, NULL_TREE
);
12097 tree v4hi_ftype_v4hi_di
12098 = build_function_type_list (V4HI_type_node
,
12099 V4HI_type_node
, long_long_unsigned_type_node
,
12101 tree v2si_ftype_v2si_di
12102 = build_function_type_list (V2SI_type_node
,
12103 V2SI_type_node
, long_long_unsigned_type_node
,
12105 tree void_ftype_void
12106 = build_function_type (void_type_node
, void_list_node
);
12107 tree void_ftype_unsigned
12108 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
12109 tree unsigned_ftype_void
12110 = build_function_type (unsigned_type_node
, void_list_node
);
12112 = build_function_type (long_long_unsigned_type_node
, void_list_node
);
12113 tree v4sf_ftype_void
12114 = build_function_type (V4SF_type_node
, void_list_node
);
12115 tree v2si_ftype_v4sf
12116 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
12117 /* Loads/stores. */
12118 tree void_ftype_v8qi_v8qi_pchar
12119 = build_function_type_list (void_type_node
,
12120 V8QI_type_node
, V8QI_type_node
,
12121 pchar_type_node
, NULL_TREE
);
12122 tree v4sf_ftype_pfloat
12123 = build_function_type_list (V4SF_type_node
, pfloat_type_node
, NULL_TREE
);
12124 /* @@@ the type is bogus */
12125 tree v4sf_ftype_v4sf_pv2si
12126 = build_function_type_list (V4SF_type_node
,
12127 V4SF_type_node
, pv2di_type_node
, NULL_TREE
);
12128 tree void_ftype_pv2si_v4sf
12129 = build_function_type_list (void_type_node
,
12130 pv2di_type_node
, V4SF_type_node
, NULL_TREE
);
12131 tree void_ftype_pfloat_v4sf
12132 = build_function_type_list (void_type_node
,
12133 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
12134 tree void_ftype_pdi_di
12135 = build_function_type_list (void_type_node
,
12136 pdi_type_node
, long_long_unsigned_type_node
,
12138 tree void_ftype_pv2di_v2di
12139 = build_function_type_list (void_type_node
,
12140 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
12141 /* Normal vector unops. */
12142 tree v4sf_ftype_v4sf
12143 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12145 /* Normal vector binops. */
12146 tree v4sf_ftype_v4sf_v4sf
12147 = build_function_type_list (V4SF_type_node
,
12148 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12149 tree v8qi_ftype_v8qi_v8qi
12150 = build_function_type_list (V8QI_type_node
,
12151 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
12152 tree v4hi_ftype_v4hi_v4hi
12153 = build_function_type_list (V4HI_type_node
,
12154 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
12155 tree v2si_ftype_v2si_v2si
12156 = build_function_type_list (V2SI_type_node
,
12157 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
12158 tree di_ftype_di_di
12159 = build_function_type_list (long_long_unsigned_type_node
,
12160 long_long_unsigned_type_node
,
12161 long_long_unsigned_type_node
, NULL_TREE
);
12163 tree v2si_ftype_v2sf
12164 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
12165 tree v2sf_ftype_v2si
12166 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
12167 tree v2si_ftype_v2si
12168 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
12169 tree v2sf_ftype_v2sf
12170 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
12171 tree v2sf_ftype_v2sf_v2sf
12172 = build_function_type_list (V2SF_type_node
,
12173 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
12174 tree v2si_ftype_v2sf_v2sf
12175 = build_function_type_list (V2SI_type_node
,
12176 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
12177 tree pint_type_node
= build_pointer_type (integer_type_node
);
12178 tree pdouble_type_node
= build_pointer_type (double_type_node
);
12179 tree int_ftype_v2df_v2df
12180 = build_function_type_list (integer_type_node
,
12181 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12184 = build_function_type (intTI_type_node
, void_list_node
);
12185 tree ti_ftype_ti_ti
12186 = build_function_type_list (intTI_type_node
,
12187 intTI_type_node
, intTI_type_node
, NULL_TREE
);
12188 tree void_ftype_pvoid
12189 = build_function_type_list (void_type_node
, ptr_type_node
, NULL_TREE
);
12191 = build_function_type_list (V2DI_type_node
,
12192 long_long_unsigned_type_node
, NULL_TREE
);
12193 tree v4sf_ftype_v4si
12194 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
12195 tree v4si_ftype_v4sf
12196 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
12197 tree v2df_ftype_v4si
12198 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
12199 tree v4si_ftype_v2df
12200 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
12201 tree v2si_ftype_v2df
12202 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
12203 tree v4sf_ftype_v2df
12204 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
12205 tree v2df_ftype_v2si
12206 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
12207 tree v2df_ftype_v4sf
12208 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
12209 tree int_ftype_v2df
12210 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
12211 tree v2df_ftype_v2df_int
12212 = build_function_type_list (V2DF_type_node
,
12213 V2DF_type_node
, integer_type_node
, NULL_TREE
);
12214 tree v4sf_ftype_v4sf_v2df
12215 = build_function_type_list (V4SF_type_node
,
12216 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
12217 tree v2df_ftype_v2df_v4sf
12218 = build_function_type_list (V2DF_type_node
,
12219 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
12220 tree v2df_ftype_v2df_v2df_int
12221 = build_function_type_list (V2DF_type_node
,
12222 V2DF_type_node
, V2DF_type_node
,
12225 tree v2df_ftype_v2df_pv2si
12226 = build_function_type_list (V2DF_type_node
,
12227 V2DF_type_node
, pv2si_type_node
, NULL_TREE
);
12228 tree void_ftype_pv2si_v2df
12229 = build_function_type_list (void_type_node
,
12230 pv2si_type_node
, V2DF_type_node
, NULL_TREE
);
12231 tree void_ftype_pdouble_v2df
12232 = build_function_type_list (void_type_node
,
12233 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
12234 tree void_ftype_pint_int
12235 = build_function_type_list (void_type_node
,
12236 pint_type_node
, integer_type_node
, NULL_TREE
);
12237 tree void_ftype_v16qi_v16qi_pchar
12238 = build_function_type_list (void_type_node
,
12239 V16QI_type_node
, V16QI_type_node
,
12240 pchar_type_node
, NULL_TREE
);
12241 tree v2df_ftype_pdouble
12242 = build_function_type_list (V2DF_type_node
, pdouble_type_node
, NULL_TREE
);
12243 tree v2df_ftype_v2df_v2df
12244 = build_function_type_list (V2DF_type_node
,
12245 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12246 tree v16qi_ftype_v16qi_v16qi
12247 = build_function_type_list (V16QI_type_node
,
12248 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
12249 tree v8hi_ftype_v8hi_v8hi
12250 = build_function_type_list (V8HI_type_node
,
12251 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
12252 tree v4si_ftype_v4si_v4si
12253 = build_function_type_list (V4SI_type_node
,
12254 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
12255 tree v2di_ftype_v2di_v2di
12256 = build_function_type_list (V2DI_type_node
,
12257 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
12258 tree v2di_ftype_v2df_v2df
12259 = build_function_type_list (V2DI_type_node
,
12260 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12261 tree v2df_ftype_v2df
12262 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12263 tree v2df_ftype_double
12264 = build_function_type_list (V2DF_type_node
, double_type_node
, NULL_TREE
);
12265 tree v2df_ftype_double_double
12266 = build_function_type_list (V2DF_type_node
,
12267 double_type_node
, double_type_node
, NULL_TREE
);
12268 tree int_ftype_v8hi_int
12269 = build_function_type_list (integer_type_node
,
12270 V8HI_type_node
, integer_type_node
, NULL_TREE
);
12271 tree v8hi_ftype_v8hi_int_int
12272 = build_function_type_list (V8HI_type_node
,
12273 V8HI_type_node
, integer_type_node
,
12274 integer_type_node
, NULL_TREE
);
12275 tree v2di_ftype_v2di_int
12276 = build_function_type_list (V2DI_type_node
,
12277 V2DI_type_node
, integer_type_node
, NULL_TREE
);
12278 tree v4si_ftype_v4si_int
12279 = build_function_type_list (V4SI_type_node
,
12280 V4SI_type_node
, integer_type_node
, NULL_TREE
);
12281 tree v8hi_ftype_v8hi_int
12282 = build_function_type_list (V8HI_type_node
,
12283 V8HI_type_node
, integer_type_node
, NULL_TREE
);
12284 tree v8hi_ftype_v8hi_v2di
12285 = build_function_type_list (V8HI_type_node
,
12286 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
12287 tree v4si_ftype_v4si_v2di
12288 = build_function_type_list (V4SI_type_node
,
12289 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
12290 tree v4si_ftype_v8hi_v8hi
12291 = build_function_type_list (V4SI_type_node
,
12292 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
12293 tree di_ftype_v8qi_v8qi
12294 = build_function_type_list (long_long_unsigned_type_node
,
12295 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
12296 tree v2di_ftype_v16qi_v16qi
12297 = build_function_type_list (V2DI_type_node
,
12298 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
12299 tree int_ftype_v16qi
12300 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
12302 /* Add all builtins that are more or less simple operations on two
12304 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
12306 /* Use one of the operands; the target can have a different mode for
12307 mask-generating compares. */
12308 enum machine_mode mode
;
12313 mode
= insn_data
[d
->icode
].operand
[1].mode
;
12318 type
= v16qi_ftype_v16qi_v16qi
;
12321 type
= v8hi_ftype_v8hi_v8hi
;
12324 type
= v4si_ftype_v4si_v4si
;
12327 type
= v2di_ftype_v2di_v2di
;
12330 type
= v2df_ftype_v2df_v2df
;
12333 type
= ti_ftype_ti_ti
;
12336 type
= v4sf_ftype_v4sf_v4sf
;
12339 type
= v8qi_ftype_v8qi_v8qi
;
12342 type
= v4hi_ftype_v4hi_v4hi
;
12345 type
= v2si_ftype_v2si_v2si
;
12348 type
= di_ftype_di_di
;
12355 /* Override for comparisons. */
12356 if (d
->icode
== CODE_FOR_maskcmpv4sf3
12357 || d
->icode
== CODE_FOR_maskncmpv4sf3
12358 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
12359 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
)
12360 type
= v4si_ftype_v4sf_v4sf
;
12362 if (d
->icode
== CODE_FOR_maskcmpv2df3
12363 || d
->icode
== CODE_FOR_maskncmpv2df3
12364 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
12365 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
12366 type
= v2di_ftype_v2df_v2df
;
12368 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
12371 /* Add the remaining MMX insns with somewhat more complicated types. */
12372 def_builtin (MASK_MMX
, "__builtin_ia32_mmx_zero", di_ftype_void
, IX86_BUILTIN_MMX_ZERO
);
12373 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
12374 def_builtin (MASK_MMX
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
12375 def_builtin (MASK_MMX
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
12376 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
12377 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
12378 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
12380 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
12381 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
12382 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
12384 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
12385 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
12387 def_builtin (MASK_MMX
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
12388 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
12390 /* comi/ucomi insns. */
12391 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
12392 if (d
->mask
== MASK_SSE2
)
12393 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
12395 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
12397 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
12398 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
12399 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
12401 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
12402 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
12403 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
12404 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
12405 def_builtin (MASK_SSE1
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
12406 def_builtin (MASK_SSE1
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
12408 def_builtin (MASK_SSE1
, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf
, IX86_BUILTIN_ANDPS
);
12409 def_builtin (MASK_SSE1
, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf
, IX86_BUILTIN_ANDNPS
);
12410 def_builtin (MASK_SSE1
, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf
, IX86_BUILTIN_ORPS
);
12411 def_builtin (MASK_SSE1
, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf
, IX86_BUILTIN_XORPS
);
12413 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_pextrw", int_ftype_v4hi_int
, IX86_BUILTIN_PEXTRW
);
12414 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int
, IX86_BUILTIN_PINSRW
);
12416 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
12418 def_builtin (MASK_SSE1
, "__builtin_ia32_loadaps", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADAPS
);
12419 def_builtin (MASK_SSE1
, "__builtin_ia32_loadups", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADUPS
);
12420 def_builtin (MASK_SSE1
, "__builtin_ia32_loadss", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADSS
);
12421 def_builtin (MASK_SSE1
, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREAPS
);
12422 def_builtin (MASK_SSE1
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
12423 def_builtin (MASK_SSE1
, "__builtin_ia32_storess", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STORESS
);
12425 def_builtin (MASK_SSE1
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
12426 def_builtin (MASK_SSE1
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
12427 def_builtin (MASK_SSE1
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
12428 def_builtin (MASK_SSE1
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
12430 def_builtin (MASK_SSE1
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
12431 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
12432 def_builtin (MASK_SSE1
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
12433 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
12435 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
12437 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
12439 def_builtin (MASK_SSE1
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
12440 def_builtin (MASK_SSE1
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
12441 def_builtin (MASK_SSE1
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
12442 def_builtin (MASK_SSE1
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
12443 def_builtin (MASK_SSE1
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
12444 def_builtin (MASK_SSE1
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
12446 def_builtin (MASK_SSE1
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
12448 /* Original 3DNow! */
12449 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
12450 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
12451 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
12452 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
12453 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
12454 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
12455 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
12456 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
12457 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
12458 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
12459 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
12460 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
12461 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
12462 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
12463 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
12464 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
12465 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
12466 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
12467 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
12468 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
12470 /* 3DNow! extension as used in the Athlon CPU. */
12471 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
12472 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
12473 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
12474 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
12475 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
12476 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
12478 def_builtin (MASK_SSE1
, "__builtin_ia32_setzerops", v4sf_ftype_void
, IX86_BUILTIN_SSE_ZERO
);
12481 def_builtin (MASK_SSE2
, "__builtin_ia32_pextrw128", int_ftype_v8hi_int
, IX86_BUILTIN_PEXTRW128
);
12482 def_builtin (MASK_SSE2
, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int
, IX86_BUILTIN_PINSRW128
);
12484 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
12485 def_builtin (MASK_SSE2
, "__builtin_ia32_movq2dq", v2di_ftype_di
, IX86_BUILTIN_MOVQ2DQ
);
12487 def_builtin (MASK_SSE2
, "__builtin_ia32_loadapd", v2df_ftype_pdouble
, IX86_BUILTIN_LOADAPD
);
12488 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pdouble
, IX86_BUILTIN_LOADUPD
);
12489 def_builtin (MASK_SSE2
, "__builtin_ia32_loadsd", v2df_ftype_pdouble
, IX86_BUILTIN_LOADSD
);
12490 def_builtin (MASK_SSE2
, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREAPD
);
12491 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
12492 def_builtin (MASK_SSE2
, "__builtin_ia32_storesd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORESD
);
12494 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADHPD
);
12495 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADLPD
);
12496 def_builtin (MASK_SSE2
, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STOREHPD
);
12497 def_builtin (MASK_SSE2
, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STORELPD
);
12499 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
12500 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
12501 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
12502 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
12503 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
12505 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
12506 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
12507 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
12508 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
12510 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
12511 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
12513 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
12515 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
12516 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
12518 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
12519 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
12520 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
12521 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
12522 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
12524 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
12526 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
12527 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
12529 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
12530 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
12531 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
12533 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
12534 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
12535 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
12537 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd1", v2df_ftype_double
, IX86_BUILTIN_SETPD1
);
12538 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd", v2df_ftype_double_double
, IX86_BUILTIN_SETPD
);
12539 def_builtin (MASK_SSE2
, "__builtin_ia32_setzeropd", ti_ftype_void
, IX86_BUILTIN_CLRPD
);
12540 def_builtin (MASK_SSE2
, "__builtin_ia32_loadpd1", v2df_ftype_pdouble
, IX86_BUILTIN_LOADPD1
);
12541 def_builtin (MASK_SSE2
, "__builtin_ia32_loadrpd", v2df_ftype_pdouble
, IX86_BUILTIN_LOADRPD
);
12542 def_builtin (MASK_SSE2
, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREPD1
);
12543 def_builtin (MASK_SSE2
, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORERPD
);
12545 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pvoid
, IX86_BUILTIN_CLFLUSH
);
12546 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
12547 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
12549 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
12550 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
12551 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
12553 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
12554 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
12555 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
12557 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
12558 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
12560 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
12561 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
12562 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
12564 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
12565 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
12566 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
12568 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
12569 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
12571 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
12574 /* Errors in the source file can cause expand_expr to return const0_rtx
12575 where we expect a vector. To avoid crashing, use one of the vector
12576 clear instructions. */
12578 safe_vector_operand (x
, mode
)
12580 enum machine_mode mode
;
12582 if (x
!= const0_rtx
)
12584 x
= gen_reg_rtx (mode
);
12586 if (VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
))
12587 emit_insn (gen_mmx_clrdi (mode
== DImode
? x
12588 : gen_rtx_SUBREG (DImode
, x
, 0)));
12590 emit_insn (gen_sse_clrv4sf (mode
== V4SFmode
? x
12591 : gen_rtx_SUBREG (V4SFmode
, x
, 0)));
12595 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12598 ix86_expand_binop_builtin (icode
, arglist
, target
)
12599 enum insn_code icode
;
12604 tree arg0
= TREE_VALUE (arglist
);
12605 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12606 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12607 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12608 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
12609 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
12610 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
12612 if (VECTOR_MODE_P (mode0
))
12613 op0
= safe_vector_operand (op0
, mode0
);
12614 if (VECTOR_MODE_P (mode1
))
12615 op1
= safe_vector_operand (op1
, mode1
);
12618 || GET_MODE (target
) != tmode
12619 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12620 target
= gen_reg_rtx (tmode
);
12622 /* In case the insn wants input operands in modes different from
12623 the result, abort. */
12624 if (GET_MODE (op0
) != mode0
|| GET_MODE (op1
) != mode1
)
12627 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12628 op0
= copy_to_mode_reg (mode0
, op0
);
12629 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
12630 op1
= copy_to_mode_reg (mode1
, op1
);
12632 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12633 yet one of the two must not be a memory. This is normally enforced
12634 by expanders, but we didn't bother to create one here. */
12635 if (GET_CODE (op0
) == MEM
&& GET_CODE (op1
) == MEM
)
12636 op0
= copy_to_mode_reg (mode0
, op0
);
12638 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
12645 /* In type_for_mode we restrict the ability to create TImode types
12646 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
12647 to have a V4SFmode signature. Convert them in-place to TImode. */
12650 ix86_expand_timode_binop_builtin (icode
, arglist
, target
)
12651 enum insn_code icode
;
12656 tree arg0
= TREE_VALUE (arglist
);
12657 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12658 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12659 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12661 op0
= gen_lowpart (TImode
, op0
);
12662 op1
= gen_lowpart (TImode
, op1
);
12663 target
= gen_reg_rtx (TImode
);
12665 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, TImode
))
12666 op0
= copy_to_mode_reg (TImode
, op0
);
12667 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, TImode
))
12668 op1
= copy_to_mode_reg (TImode
, op1
);
12670 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12671 yet one of the two must not be a memory. This is normally enforced
12672 by expanders, but we didn't bother to create one here. */
12673 if (GET_CODE (op0
) == MEM
&& GET_CODE (op1
) == MEM
)
12674 op0
= copy_to_mode_reg (TImode
, op0
);
12676 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
12681 return gen_lowpart (V4SFmode
, target
);
12684 /* Subroutine of ix86_expand_builtin to take care of stores. */
12687 ix86_expand_store_builtin (icode
, arglist
)
12688 enum insn_code icode
;
12692 tree arg0
= TREE_VALUE (arglist
);
12693 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12694 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12695 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12696 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
12697 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
12699 if (VECTOR_MODE_P (mode1
))
12700 op1
= safe_vector_operand (op1
, mode1
);
12702 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
12704 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
12705 op1
= copy_to_mode_reg (mode1
, op1
);
12707 pat
= GEN_FCN (icode
) (op0
, op1
);
12713 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12716 ix86_expand_unop_builtin (icode
, arglist
, target
, do_load
)
12717 enum insn_code icode
;
12723 tree arg0
= TREE_VALUE (arglist
);
12724 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12725 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
12726 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
12729 || GET_MODE (target
) != tmode
12730 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12731 target
= gen_reg_rtx (tmode
);
12733 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
12736 if (VECTOR_MODE_P (mode0
))
12737 op0
= safe_vector_operand (op0
, mode0
);
12739 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12740 op0
= copy_to_mode_reg (mode0
, op0
);
12743 pat
= GEN_FCN (icode
) (target
, op0
);
12750 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12751 sqrtss, rsqrtss, rcpss. */
12754 ix86_expand_unop1_builtin (icode
, arglist
, target
)
12755 enum insn_code icode
;
12760 tree arg0
= TREE_VALUE (arglist
);
12761 rtx op1
, op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12762 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
12763 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
12766 || GET_MODE (target
) != tmode
12767 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12768 target
= gen_reg_rtx (tmode
);
12770 if (VECTOR_MODE_P (mode0
))
12771 op0
= safe_vector_operand (op0
, mode0
);
12773 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12774 op0
= copy_to_mode_reg (mode0
, op0
);
12777 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
12778 op1
= copy_to_mode_reg (mode0
, op1
);
12780 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
12787 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12790 ix86_expand_sse_compare (d
, arglist
, target
)
12791 const struct builtin_description
*d
;
12796 tree arg0
= TREE_VALUE (arglist
);
12797 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12798 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12799 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12801 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
12802 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
12803 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
12804 enum rtx_code comparison
= d
->comparison
;
12806 if (VECTOR_MODE_P (mode0
))
12807 op0
= safe_vector_operand (op0
, mode0
);
12808 if (VECTOR_MODE_P (mode1
))
12809 op1
= safe_vector_operand (op1
, mode1
);
12811 /* Swap operands if we have a comparison that isn't available in
12815 rtx tmp
= gen_reg_rtx (mode1
);
12816 emit_move_insn (tmp
, op1
);
12822 || GET_MODE (target
) != tmode
12823 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
12824 target
= gen_reg_rtx (tmode
);
12826 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
12827 op0
= copy_to_mode_reg (mode0
, op0
);
12828 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
12829 op1
= copy_to_mode_reg (mode1
, op1
);
12831 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
12832 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
12839 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12842 ix86_expand_sse_comi (d
, arglist
, target
)
12843 const struct builtin_description
*d
;
12848 tree arg0
= TREE_VALUE (arglist
);
12849 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12850 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12851 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12853 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
12854 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
12855 enum rtx_code comparison
= d
->comparison
;
12857 if (VECTOR_MODE_P (mode0
))
12858 op0
= safe_vector_operand (op0
, mode0
);
12859 if (VECTOR_MODE_P (mode1
))
12860 op1
= safe_vector_operand (op1
, mode1
);
12862 /* Swap operands if we have a comparison that isn't available in
12871 target
= gen_reg_rtx (SImode
);
12872 emit_move_insn (target
, const0_rtx
);
12873 target
= gen_rtx_SUBREG (QImode
, target
, 0);
12875 if (! (*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
12876 op0
= copy_to_mode_reg (mode0
, op0
);
12877 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
12878 op1
= copy_to_mode_reg (mode1
, op1
);
12880 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
12881 pat
= GEN_FCN (d
->icode
) (op0
, op1
, op2
);
12885 emit_insn (gen_rtx_SET (VOIDmode
,
12886 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
12887 gen_rtx_fmt_ee (comparison
, QImode
,
12888 gen_rtx_REG (CCmode
, FLAGS_REG
),
12891 return SUBREG_REG (target
);
12894 /* Expand an expression EXP that calls a built-in function,
12895 with result going to TARGET if that's convenient
12896 (and in mode MODE if that's convenient).
12897 SUBTARGET may be used as the target for computing one of EXP's operands.
12898 IGNORE is nonzero if the value is to be ignored. */
12901 ix86_expand_builtin (exp
, target
, subtarget
, mode
, ignore
)
12904 rtx subtarget ATTRIBUTE_UNUSED
;
12905 enum machine_mode mode ATTRIBUTE_UNUSED
;
12906 int ignore ATTRIBUTE_UNUSED
;
12908 const struct builtin_description
*d
;
12910 enum insn_code icode
;
12911 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
12912 tree arglist
= TREE_OPERAND (exp
, 1);
12913 tree arg0
, arg1
, arg2
;
12914 rtx op0
, op1
, op2
, pat
;
12915 enum machine_mode tmode
, mode0
, mode1
, mode2
;
12916 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
12920 case IX86_BUILTIN_EMMS
:
12921 emit_insn (gen_emms ());
12924 case IX86_BUILTIN_SFENCE
:
12925 emit_insn (gen_sfence ());
12928 case IX86_BUILTIN_PEXTRW
:
12929 case IX86_BUILTIN_PEXTRW128
:
12930 icode
= (fcode
== IX86_BUILTIN_PEXTRW
12931 ? CODE_FOR_mmx_pextrw
12932 : CODE_FOR_sse2_pextrw
);
12933 arg0
= TREE_VALUE (arglist
);
12934 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12935 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12936 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12937 tmode
= insn_data
[icode
].operand
[0].mode
;
12938 mode0
= insn_data
[icode
].operand
[1].mode
;
12939 mode1
= insn_data
[icode
].operand
[2].mode
;
12941 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12942 op0
= copy_to_mode_reg (mode0
, op0
);
12943 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
12945 /* @@@ better error message */
12946 error ("selector must be an immediate");
12947 return gen_reg_rtx (tmode
);
12950 || GET_MODE (target
) != tmode
12951 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12952 target
= gen_reg_rtx (tmode
);
12953 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
12959 case IX86_BUILTIN_PINSRW
:
12960 case IX86_BUILTIN_PINSRW128
:
12961 icode
= (fcode
== IX86_BUILTIN_PINSRW
12962 ? CODE_FOR_mmx_pinsrw
12963 : CODE_FOR_sse2_pinsrw
);
12964 arg0
= TREE_VALUE (arglist
);
12965 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12966 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
12967 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12968 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12969 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
12970 tmode
= insn_data
[icode
].operand
[0].mode
;
12971 mode0
= insn_data
[icode
].operand
[1].mode
;
12972 mode1
= insn_data
[icode
].operand
[2].mode
;
12973 mode2
= insn_data
[icode
].operand
[3].mode
;
12975 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12976 op0
= copy_to_mode_reg (mode0
, op0
);
12977 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
12978 op1
= copy_to_mode_reg (mode1
, op1
);
12979 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
12981 /* @@@ better error message */
12982 error ("selector must be an immediate");
12986 || GET_MODE (target
) != tmode
12987 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12988 target
= gen_reg_rtx (tmode
);
12989 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
12995 case IX86_BUILTIN_MASKMOVQ
:
12996 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
12997 ? (TARGET_64BIT
? CODE_FOR_mmx_maskmovq_rex
: CODE_FOR_mmx_maskmovq
)
12998 : CODE_FOR_sse2_maskmovdqu
);
12999 /* Note the arg order is different from the operand order. */
13000 arg1
= TREE_VALUE (arglist
);
13001 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
13002 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13003 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13004 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13005 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13006 mode0
= insn_data
[icode
].operand
[0].mode
;
13007 mode1
= insn_data
[icode
].operand
[1].mode
;
13008 mode2
= insn_data
[icode
].operand
[2].mode
;
13010 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
13011 op0
= copy_to_mode_reg (mode0
, op0
);
13012 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
13013 op1
= copy_to_mode_reg (mode1
, op1
);
13014 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
13015 op2
= copy_to_mode_reg (mode2
, op2
);
13016 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
13022 case IX86_BUILTIN_SQRTSS
:
13023 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2
, arglist
, target
);
13024 case IX86_BUILTIN_RSQRTSS
:
13025 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2
, arglist
, target
);
13026 case IX86_BUILTIN_RCPSS
:
13027 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2
, arglist
, target
);
13029 case IX86_BUILTIN_ANDPS
:
13030 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3
,
13032 case IX86_BUILTIN_ANDNPS
:
13033 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3
,
13035 case IX86_BUILTIN_ORPS
:
13036 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3
,
13038 case IX86_BUILTIN_XORPS
:
13039 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3
,
13042 case IX86_BUILTIN_LOADAPS
:
13043 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps
, arglist
, target
, 1);
13045 case IX86_BUILTIN_LOADUPS
:
13046 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
13048 case IX86_BUILTIN_STOREAPS
:
13049 return ix86_expand_store_builtin (CODE_FOR_sse_movaps
, arglist
);
13050 case IX86_BUILTIN_STOREUPS
:
13051 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
);
13053 case IX86_BUILTIN_LOADSS
:
13054 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss
, arglist
, target
, 1);
13056 case IX86_BUILTIN_STORESS
:
13057 return ix86_expand_store_builtin (CODE_FOR_sse_storess
, arglist
);
13059 case IX86_BUILTIN_LOADHPS
:
13060 case IX86_BUILTIN_LOADLPS
:
13061 case IX86_BUILTIN_LOADHPD
:
13062 case IX86_BUILTIN_LOADLPD
:
13063 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_movhps
13064 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_movlps
13065 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_movhpd
13066 : CODE_FOR_sse2_movlpd
);
13067 arg0
= TREE_VALUE (arglist
);
13068 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13069 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13070 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13071 tmode
= insn_data
[icode
].operand
[0].mode
;
13072 mode0
= insn_data
[icode
].operand
[1].mode
;
13073 mode1
= insn_data
[icode
].operand
[2].mode
;
13075 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13076 op0
= copy_to_mode_reg (mode0
, op0
);
13077 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
13079 || GET_MODE (target
) != tmode
13080 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13081 target
= gen_reg_rtx (tmode
);
13082 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13088 case IX86_BUILTIN_STOREHPS
:
13089 case IX86_BUILTIN_STORELPS
:
13090 case IX86_BUILTIN_STOREHPD
:
13091 case IX86_BUILTIN_STORELPD
:
13092 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_movhps
13093 : fcode
== IX86_BUILTIN_STORELPS
? CODE_FOR_sse_movlps
13094 : fcode
== IX86_BUILTIN_STOREHPD
? CODE_FOR_sse2_movhpd
13095 : CODE_FOR_sse2_movlpd
);
13096 arg0
= TREE_VALUE (arglist
);
13097 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13098 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13099 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13100 mode0
= insn_data
[icode
].operand
[1].mode
;
13101 mode1
= insn_data
[icode
].operand
[2].mode
;
13103 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
13104 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13105 op1
= copy_to_mode_reg (mode1
, op1
);
13107 pat
= GEN_FCN (icode
) (op0
, op0
, op1
);
13113 case IX86_BUILTIN_MOVNTPS
:
13114 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
);
13115 case IX86_BUILTIN_MOVNTQ
:
13116 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
);
13118 case IX86_BUILTIN_LDMXCSR
:
13119 op0
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
13120 target
= assign_386_stack_local (SImode
, 0);
13121 emit_move_insn (target
, op0
);
13122 emit_insn (gen_ldmxcsr (target
));
13125 case IX86_BUILTIN_STMXCSR
:
13126 target
= assign_386_stack_local (SImode
, 0);
13127 emit_insn (gen_stmxcsr (target
));
13128 return copy_to_mode_reg (SImode
, target
);
13130 case IX86_BUILTIN_SHUFPS
:
13131 case IX86_BUILTIN_SHUFPD
:
13132 icode
= (fcode
== IX86_BUILTIN_SHUFPS
13133 ? CODE_FOR_sse_shufps
13134 : CODE_FOR_sse2_shufpd
);
13135 arg0
= TREE_VALUE (arglist
);
13136 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13137 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13138 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13139 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13140 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13141 tmode
= insn_data
[icode
].operand
[0].mode
;
13142 mode0
= insn_data
[icode
].operand
[1].mode
;
13143 mode1
= insn_data
[icode
].operand
[2].mode
;
13144 mode2
= insn_data
[icode
].operand
[3].mode
;
13146 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13147 op0
= copy_to_mode_reg (mode0
, op0
);
13148 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13149 op1
= copy_to_mode_reg (mode1
, op1
);
13150 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
13152 /* @@@ better error message */
13153 error ("mask must be an immediate");
13154 return gen_reg_rtx (tmode
);
13157 || GET_MODE (target
) != tmode
13158 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13159 target
= gen_reg_rtx (tmode
);
13160 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
13166 case IX86_BUILTIN_PSHUFW
:
13167 case IX86_BUILTIN_PSHUFD
:
13168 case IX86_BUILTIN_PSHUFHW
:
13169 case IX86_BUILTIN_PSHUFLW
:
13170 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
13171 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
13172 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
13173 : CODE_FOR_mmx_pshufw
);
13174 arg0
= TREE_VALUE (arglist
);
13175 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13176 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13177 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13178 tmode
= insn_data
[icode
].operand
[0].mode
;
13179 mode1
= insn_data
[icode
].operand
[1].mode
;
13180 mode2
= insn_data
[icode
].operand
[2].mode
;
13182 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
13183 op0
= copy_to_mode_reg (mode1
, op0
);
13184 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
13186 /* @@@ better error message */
13187 error ("mask must be an immediate");
13191 || GET_MODE (target
) != tmode
13192 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13193 target
= gen_reg_rtx (tmode
);
13194 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13200 case IX86_BUILTIN_FEMMS
:
13201 emit_insn (gen_femms ());
13204 case IX86_BUILTIN_PAVGUSB
:
13205 return ix86_expand_binop_builtin (CODE_FOR_pavgusb
, arglist
, target
);
13207 case IX86_BUILTIN_PF2ID
:
13208 return ix86_expand_unop_builtin (CODE_FOR_pf2id
, arglist
, target
, 0);
13210 case IX86_BUILTIN_PFACC
:
13211 return ix86_expand_binop_builtin (CODE_FOR_pfacc
, arglist
, target
);
13213 case IX86_BUILTIN_PFADD
:
13214 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3
, arglist
, target
);
13216 case IX86_BUILTIN_PFCMPEQ
:
13217 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3
, arglist
, target
);
13219 case IX86_BUILTIN_PFCMPGE
:
13220 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3
, arglist
, target
);
13222 case IX86_BUILTIN_PFCMPGT
:
13223 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3
, arglist
, target
);
13225 case IX86_BUILTIN_PFMAX
:
13226 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3
, arglist
, target
);
13228 case IX86_BUILTIN_PFMIN
:
13229 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3
, arglist
, target
);
13231 case IX86_BUILTIN_PFMUL
:
13232 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3
, arglist
, target
);
13234 case IX86_BUILTIN_PFRCP
:
13235 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2
, arglist
, target
, 0);
13237 case IX86_BUILTIN_PFRCPIT1
:
13238 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3
, arglist
, target
);
13240 case IX86_BUILTIN_PFRCPIT2
:
13241 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3
, arglist
, target
);
13243 case IX86_BUILTIN_PFRSQIT1
:
13244 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3
, arglist
, target
);
13246 case IX86_BUILTIN_PFRSQRT
:
13247 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2
, arglist
, target
, 0);
13249 case IX86_BUILTIN_PFSUB
:
13250 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3
, arglist
, target
);
13252 case IX86_BUILTIN_PFSUBR
:
13253 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3
, arglist
, target
);
13255 case IX86_BUILTIN_PI2FD
:
13256 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2
, arglist
, target
, 0);
13258 case IX86_BUILTIN_PMULHRW
:
13259 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3
, arglist
, target
);
13261 case IX86_BUILTIN_PF2IW
:
13262 return ix86_expand_unop_builtin (CODE_FOR_pf2iw
, arglist
, target
, 0);
13264 case IX86_BUILTIN_PFNACC
:
13265 return ix86_expand_binop_builtin (CODE_FOR_pfnacc
, arglist
, target
);
13267 case IX86_BUILTIN_PFPNACC
:
13268 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc
, arglist
, target
);
13270 case IX86_BUILTIN_PI2FW
:
13271 return ix86_expand_unop_builtin (CODE_FOR_pi2fw
, arglist
, target
, 0);
13273 case IX86_BUILTIN_PSWAPDSI
:
13274 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2
, arglist
, target
, 0);
13276 case IX86_BUILTIN_PSWAPDSF
:
13277 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2
, arglist
, target
, 0);
13279 case IX86_BUILTIN_SSE_ZERO
:
13280 target
= gen_reg_rtx (V4SFmode
);
13281 emit_insn (gen_sse_clrv4sf (target
));
13284 case IX86_BUILTIN_MMX_ZERO
:
13285 target
= gen_reg_rtx (DImode
);
13286 emit_insn (gen_mmx_clrdi (target
));
13289 case IX86_BUILTIN_SQRTSD
:
13290 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2
, arglist
, target
);
13291 case IX86_BUILTIN_LOADAPD
:
13292 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
, target
, 1);
13293 case IX86_BUILTIN_LOADUPD
:
13294 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, arglist
, target
, 1);
13296 case IX86_BUILTIN_STOREAPD
:
13297 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
13298 case IX86_BUILTIN_STOREUPD
:
13299 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, arglist
);
13301 case IX86_BUILTIN_LOADSD
:
13302 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
, target
, 1);
13304 case IX86_BUILTIN_STORESD
:
13305 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd
, arglist
);
13307 case IX86_BUILTIN_SETPD1
:
13308 target
= assign_386_stack_local (DFmode
, 0);
13309 arg0
= TREE_VALUE (arglist
);
13310 emit_move_insn (adjust_address (target
, DFmode
, 0),
13311 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
13312 op0
= gen_reg_rtx (V2DFmode
);
13313 emit_insn (gen_sse2_loadsd (op0
, adjust_address (target
, V2DFmode
, 0)));
13314 emit_insn (gen_sse2_shufpd (op0
, op0
, op0
, GEN_INT (0)));
13317 case IX86_BUILTIN_SETPD
:
13318 target
= assign_386_stack_local (V2DFmode
, 0);
13319 arg0
= TREE_VALUE (arglist
);
13320 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13321 emit_move_insn (adjust_address (target
, DFmode
, 0),
13322 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
13323 emit_move_insn (adjust_address (target
, DFmode
, 8),
13324 expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0));
13325 op0
= gen_reg_rtx (V2DFmode
);
13326 emit_insn (gen_sse2_movapd (op0
, target
));
13329 case IX86_BUILTIN_LOADRPD
:
13330 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
,
13331 gen_reg_rtx (V2DFmode
), 1);
13332 emit_insn (gen_sse2_shufpd (target
, target
, target
, GEN_INT (1)));
13335 case IX86_BUILTIN_LOADPD1
:
13336 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
,
13337 gen_reg_rtx (V2DFmode
), 1);
13338 emit_insn (gen_sse2_shufpd (target
, target
, target
, const0_rtx
));
13341 case IX86_BUILTIN_STOREPD1
:
13342 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
13343 case IX86_BUILTIN_STORERPD
:
13344 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
13346 case IX86_BUILTIN_MFENCE
:
13347 emit_insn (gen_sse2_mfence ());
13349 case IX86_BUILTIN_LFENCE
:
13350 emit_insn (gen_sse2_lfence ());
13353 case IX86_BUILTIN_CLFLUSH
:
13354 arg0
= TREE_VALUE (arglist
);
13355 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13356 icode
= CODE_FOR_sse2_clflush
;
13357 mode0
= insn_data
[icode
].operand
[0].mode
;
13358 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
13359 op0
= copy_to_mode_reg (mode0
, op0
);
13361 emit_insn (gen_sse2_clflush (op0
));
13364 case IX86_BUILTIN_MOVNTPD
:
13365 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, arglist
);
13366 case IX86_BUILTIN_MOVNTDQ
:
13367 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, arglist
);
13368 case IX86_BUILTIN_MOVNTI
:
13369 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, arglist
);
13375 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
13376 if (d
->code
== fcode
)
13378 /* Compares are treated specially. */
13379 if (d
->icode
== CODE_FOR_maskcmpv4sf3
13380 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
13381 || d
->icode
== CODE_FOR_maskncmpv4sf3
13382 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
13383 || d
->icode
== CODE_FOR_maskcmpv2df3
13384 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
13385 || d
->icode
== CODE_FOR_maskncmpv2df3
13386 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
13387 return ix86_expand_sse_compare (d
, arglist
, target
);
13389 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
13392 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
13393 if (d
->code
== fcode
)
13394 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
13396 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
13397 if (d
->code
== fcode
)
13398 return ix86_expand_sse_comi (d
, arglist
, target
);
13400 /* @@@ Should really do something sensible here. */
13404 /* Store OPERAND to the memory after reload is completed. This means
13405 that we can't easily use assign_stack_local. */
13407 ix86_force_to_memory (mode
, operand
)
13408 enum machine_mode mode
;
13412 if (!reload_completed
)
13414 if (TARGET_64BIT
&& TARGET_RED_ZONE
)
13416 result
= gen_rtx_MEM (mode
,
13417 gen_rtx_PLUS (Pmode
,
13419 GEN_INT (-RED_ZONE_SIZE
)));
13420 emit_move_insn (result
, operand
);
13422 else if (TARGET_64BIT
&& !TARGET_RED_ZONE
)
13428 operand
= gen_lowpart (DImode
, operand
);
13432 gen_rtx_SET (VOIDmode
,
13433 gen_rtx_MEM (DImode
,
13434 gen_rtx_PRE_DEC (DImode
,
13435 stack_pointer_rtx
)),
13441 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
13450 split_di (&operand
, 1, operands
, operands
+ 1);
13452 gen_rtx_SET (VOIDmode
,
13453 gen_rtx_MEM (SImode
,
13454 gen_rtx_PRE_DEC (Pmode
,
13455 stack_pointer_rtx
)),
13458 gen_rtx_SET (VOIDmode
,
13459 gen_rtx_MEM (SImode
,
13460 gen_rtx_PRE_DEC (Pmode
,
13461 stack_pointer_rtx
)),
13466 /* It is better to store HImodes as SImodes. */
13467 if (!TARGET_PARTIAL_REG_STALL
)
13468 operand
= gen_lowpart (SImode
, operand
);
13472 gen_rtx_SET (VOIDmode
,
13473 gen_rtx_MEM (GET_MODE (operand
),
13474 gen_rtx_PRE_DEC (SImode
,
13475 stack_pointer_rtx
)),
13481 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
13486 /* Free operand from the memory. */
13488 ix86_free_from_memory (mode
)
13489 enum machine_mode mode
;
13491 if (!TARGET_64BIT
|| !TARGET_RED_ZONE
)
13495 if (mode
== DImode
|| TARGET_64BIT
)
13497 else if (mode
== HImode
&& TARGET_PARTIAL_REG_STALL
)
13501 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13502 to pop or add instruction if registers are available. */
13503 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
13504 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
13509 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13510 QImode must go into class Q_REGS.
13511 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13512 movdf to do mem-to-mem moves through integer regs. */
13514 ix86_preferred_reload_class (x
, class)
13516 enum reg_class
class;
13518 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
13520 /* SSE can't load any constant directly yet. */
13521 if (SSE_CLASS_P (class))
13523 /* Floats can load 0 and 1. */
13524 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x
))
13526 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13527 if (MAYBE_SSE_CLASS_P (class))
13528 return (reg_class_subset_p (class, GENERAL_REGS
)
13529 ? GENERAL_REGS
: FLOAT_REGS
);
13533 /* General regs can load everything. */
13534 if (reg_class_subset_p (class, GENERAL_REGS
))
13535 return GENERAL_REGS
;
13536 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13537 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13540 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x
))
13542 if (GET_MODE (x
) == QImode
&& ! reg_class_subset_p (class, Q_REGS
))
13547 /* If we are copying between general and FP registers, we need a memory
13548 location. The same is true for SSE and MMX registers.
13550 The macro can't work reliably when one of the CLASSES is class containing
13551 registers from multiple units (SSE, MMX, integer). We avoid this by never
13552 combining those units in single alternative in the machine description.
13553 Ensure that this constraint holds to avoid unexpected surprises.
13555 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13556 enforce these sanity checks. */
13558 ix86_secondary_memory_needed (class1
, class2
, mode
, strict
)
13559 enum reg_class class1
, class2
;
13560 enum machine_mode mode
;
13563 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
13564 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
13565 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
13566 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
13567 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
13568 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
13575 return (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
)
13576 || (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
)
13577 && (mode
) != SImode
)
13578 || (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
13579 && (mode
) != SImode
));
13581 /* Return the cost of moving data from a register in class CLASS1 to
13582 one in class CLASS2.
13584 It is not required that the cost always equal 2 when FROM is the same as TO;
13585 on some machines it is expensive to move between registers if they are not
13586 general registers. */
13588 ix86_register_move_cost (mode
, class1
, class2
)
13589 enum machine_mode mode
;
13590 enum reg_class class1
, class2
;
13592 /* In case we require secondary memory, compute cost of the store followed
13593 by load. In case of copying from general_purpose_register we may emit
13594 multiple stores followed by single load causing memory size mismatch
13595 stall. Count this as arbitarily high cost of 20. */
13596 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
13599 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
13601 return (MEMORY_MOVE_COST (mode
, class1
, 0)
13602 + MEMORY_MOVE_COST (mode
, class2
, 1) + add_cost
);
13604 /* Moves between SSE/MMX and integer unit are expensive. */
13605 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
13606 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
13607 return ix86_cost
->mmxsse_to_integer
;
13608 if (MAYBE_FLOAT_CLASS_P (class1
))
13609 return ix86_cost
->fp_move
;
13610 if (MAYBE_SSE_CLASS_P (class1
))
13611 return ix86_cost
->sse_move
;
13612 if (MAYBE_MMX_CLASS_P (class1
))
13613 return ix86_cost
->mmx_move
;
13617 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13619 ix86_hard_regno_mode_ok (regno
, mode
)
13621 enum machine_mode mode
;
13623 /* Flags and only flags can only hold CCmode values. */
13624 if (CC_REGNO_P (regno
))
13625 return GET_MODE_CLASS (mode
) == MODE_CC
;
13626 if (GET_MODE_CLASS (mode
) == MODE_CC
13627 || GET_MODE_CLASS (mode
) == MODE_RANDOM
13628 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
13630 if (FP_REGNO_P (regno
))
13631 return VALID_FP_MODE_P (mode
);
13632 if (SSE_REGNO_P (regno
))
13633 return VALID_SSE_REG_MODE (mode
);
13634 if (MMX_REGNO_P (regno
))
13635 return VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
);
13636 /* We handle both integer and floats in the general purpose registers.
13637 In future we should be able to handle vector modes as well. */
13638 if (!VALID_INT_MODE_P (mode
) && !VALID_FP_MODE_P (mode
))
13640 /* Take care for QImode values - they can be in non-QI regs, but then
13641 they do cause partial register stalls. */
13642 if (regno
< 4 || mode
!= QImode
|| TARGET_64BIT
)
13644 return reload_in_progress
|| reload_completed
|| !TARGET_PARTIAL_REG_STALL
;
13647 /* Return the cost of moving data of mode M between a
13648 register and memory. A value of 2 is the default; this cost is
13649 relative to those in `REGISTER_MOVE_COST'.
13651 If moving between registers and memory is more expensive than
13652 between two registers, you should define this macro to express the
13655 Model also increased moving costs of QImode registers in non
13659 ix86_memory_move_cost (mode
, class, in
)
13660 enum machine_mode mode
;
13661 enum reg_class
class;
13664 if (FLOAT_CLASS_P (class))
13682 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
13684 if (SSE_CLASS_P (class))
13687 switch (GET_MODE_SIZE (mode
))
13701 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
13703 if (MMX_CLASS_P (class))
13706 switch (GET_MODE_SIZE (mode
))
13717 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
13719 switch (GET_MODE_SIZE (mode
))
13723 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
13724 : ix86_cost
->movzbl_load
);
13726 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
13727 : ix86_cost
->int_store
[0] + 4);
13730 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
13732 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13733 if (mode
== TFmode
)
13735 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
13736 * (int) GET_MODE_SIZE (mode
) / 4);
13740 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
13742 ix86_svr3_asm_out_constructor (symbol
, priority
)
13744 int priority ATTRIBUTE_UNUSED
;
13747 fputs ("\tpushl $", asm_out_file
);
13748 assemble_name (asm_out_file
, XSTR (symbol
, 0));
13749 fputc ('\n', asm_out_file
);
13755 static int current_machopic_label_num
;
13757 /* Given a symbol name and its associated stub, write out the
13758 definition of the stub. */
13761 machopic_output_stub (file
, symb
, stub
)
13763 const char *symb
, *stub
;
13765 unsigned int length
;
13766 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
13767 int label
= ++current_machopic_label_num
;
13769 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
13770 symb
= (*targetm
.strip_name_encoding
) (symb
);
13772 length
= strlen (stub
);
13773 binder_name
= alloca (length
+ 32);
13774 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
13776 length
= strlen (symb
);
13777 symbol_name
= alloca (length
+ 32);
13778 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
13780 sprintf (lazy_ptr_name
, "L%d$lz", label
);
13783 machopic_picsymbol_stub_section ();
13785 machopic_symbol_stub_section ();
13787 fprintf (file
, "%s:\n", stub
);
13788 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
13792 fprintf (file
, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label
, label
);
13793 fprintf (file
, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
13794 fprintf (file
, "\tjmp %%edx\n");
13797 fprintf (file
, "\tjmp *%s\n", lazy_ptr_name
);
13799 fprintf (file
, "%s:\n", binder_name
);
13803 fprintf (file
, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
13804 fprintf (file
, "\tpushl %%eax\n");
13807 fprintf (file
, "\t pushl $%s\n", lazy_ptr_name
);
13809 fprintf (file
, "\tjmp dyld_stub_binding_helper\n");
13811 machopic_lazy_symbol_ptr_section ();
13812 fprintf (file
, "%s:\n", lazy_ptr_name
);
13813 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
13814 fprintf (file
, "\t.long %s\n", binder_name
);
13816 #endif /* TARGET_MACHO */
13818 /* Order the registers for register allocator. */
13821 x86_order_regs_for_local_alloc ()
13826 /* First allocate the local general purpose registers. */
13827 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
13828 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
13829 reg_alloc_order
[pos
++] = i
;
13831 /* Global general purpose registers. */
13832 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
13833 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
13834 reg_alloc_order
[pos
++] = i
;
13836 /* x87 registers come first in case we are doing FP math
13838 if (!TARGET_SSE_MATH
)
13839 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
13840 reg_alloc_order
[pos
++] = i
;
13842 /* SSE registers. */
13843 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
13844 reg_alloc_order
[pos
++] = i
;
13845 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
13846 reg_alloc_order
[pos
++] = i
;
13848 /* x87 registerts. */
13849 if (TARGET_SSE_MATH
)
13850 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
13851 reg_alloc_order
[pos
++] = i
;
13853 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
13854 reg_alloc_order
[pos
++] = i
;
13856 /* Initialize the rest of array as we do not allocate some registers
13858 while (pos
< FIRST_PSEUDO_REGISTER
)
13859 reg_alloc_order
[pos
++] = 0;
13863 x86_output_mi_thunk (file
, delta
, function
)
13871 if (ix86_regparm
> 0)
13872 parm
= TYPE_ARG_TYPES (TREE_TYPE (function
));
13875 for (; parm
; parm
= TREE_CHAIN (parm
))
13876 if (TREE_VALUE (parm
) == void_type_node
)
13879 xops
[0] = GEN_INT (delta
);
13882 int n
= aggregate_value_p (TREE_TYPE (TREE_TYPE (function
))) != 0;
13883 xops
[1] = gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
13884 output_asm_insn ("add{q} {%0, %1|%1, %0}", xops
);
13887 fprintf (file
, "\tjmp *");
13888 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
13889 fprintf (file
, "@GOTPCREL(%%rip)\n");
13893 fprintf (file
, "\tjmp ");
13894 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
13895 fprintf (file
, "\n");
13901 xops
[1] = gen_rtx_REG (SImode
, 0);
13902 else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
))))
13903 xops
[1] = gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
13905 xops
[1] = gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
13906 output_asm_insn ("add{l} {%0, %1|%1, %0}", xops
);
13910 xops
[0] = pic_offset_table_rtx
;
13911 xops
[1] = gen_label_rtx ();
13912 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
13914 if (ix86_regparm
> 2)
13916 output_asm_insn ("push{l}\t%0", xops
);
13917 output_asm_insn ("call\t%P1", xops
);
13918 ASM_OUTPUT_INTERNAL_LABEL (file
, "L", CODE_LABEL_NUMBER (xops
[1]));
13919 output_asm_insn ("pop{l}\t%0", xops
);
13921 ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops
);
13922 xops
[0] = gen_rtx_MEM (SImode
, XEXP (DECL_RTL (function
), 0));
13924 ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops
);
13925 asm_fprintf (file
, "\tpop{l\t%%ebx|\t%%ebx}\n");
13926 asm_fprintf (file
, "\tjmp\t{*%%ecx|%%ecx}\n");
13930 fprintf (file
, "\tjmp ");
13931 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
13932 fprintf (file
, "\n");
13938 x86_field_alignment (field
, computed
)
13942 enum machine_mode mode
;
13943 tree type
= TREE_TYPE (field
);
13945 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
13947 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
13948 ? get_inner_array_type (type
) : type
);
13949 if (mode
== DFmode
|| mode
== DCmode
13950 || GET_MODE_CLASS (mode
) == MODE_INT
13951 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
13952 return MIN (32, computed
);
13956 /* Implement machine specific optimizations.
13957 At the moment we implement single transformation: AMD Athlon works faster
13958 when RET is not destination of conditional jump or directly preceeded
13959 by other jump instruction. We avoid the penalty by inserting NOP just
13960 before the RET instructions in such cases. */
13962 x86_machine_dependent_reorg (first
)
13963 rtx first ATTRIBUTE_UNUSED
;
13967 if (!TARGET_ATHLON
|| !optimize
|| optimize_size
)
13969 for (e
= EXIT_BLOCK_PTR
->pred
; e
; e
= e
->pred_next
)
13971 basic_block bb
= e
->src
;
13974 bool insert
= false;
13976 if (!returnjump_p (ret
) || !maybe_hot_bb_p (bb
))
13978 prev
= prev_nonnote_insn (ret
);
13979 if (prev
&& GET_CODE (prev
) == CODE_LABEL
)
13982 for (e
= bb
->pred
; e
; e
= e
->pred_next
)
13983 if (EDGE_FREQUENCY (e
) && e
->src
->index
> 0
13984 && !(e
->flags
& EDGE_FALLTHRU
))
13989 prev
= prev_real_insn (ret
);
13990 if (prev
&& GET_CODE (prev
) == JUMP_INSN
13991 && any_condjump_p (prev
))
13995 emit_insn_before (gen_nop (), ret
);
13999 #include "gt-i386.h"