1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
28 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "basic-block.h"
44 #include "target-def.h"
46 #ifndef CHECK_STACK_LIMIT
47 #define CHECK_STACK_LIMIT (-1)
50 /* Processor costs (relative to an add) */
52 struct processor_costs size_cost
= { /* costs for tunning for size */
53 2, /* cost of an add instruction */
54 3, /* cost of a lea instruction */
55 2, /* variable shift costs */
56 3, /* constant shift costs */
57 3, /* cost of starting a multiply */
58 0, /* cost of multiply per each bit set */
59 3, /* cost of a divide/mod */
60 3, /* cost of movsx */
61 3, /* cost of movzx */
64 2, /* cost for loading QImode using movzbl */
65 {2, 2, 2}, /* cost of loading integer registers
66 in QImode, HImode and SImode.
67 Relative to reg-reg move (2). */
68 {2, 2, 2}, /* cost of storing integer registers */
69 2, /* cost of reg,reg fld/fst */
70 {2, 2, 2}, /* cost of loading fp registers
71 in SFmode, DFmode and XFmode */
72 {2, 2, 2}, /* cost of loading integer registers */
73 3, /* cost of moving MMX register */
74 {3, 3}, /* cost of loading MMX registers
75 in SImode and DImode */
76 {3, 3}, /* cost of storing MMX registers
77 in SImode and DImode */
78 3, /* cost of moving SSE register */
79 {3, 3, 3}, /* cost of loading SSE registers
80 in SImode, DImode and TImode */
81 {3, 3, 3}, /* cost of storing SSE registers
82 in SImode, DImode and TImode */
83 3, /* MMX or SSE register to integer */
84 0, /* size of prefetch block */
85 0, /* number of parallel prefetches */
87 /* Processor costs (relative to an add) */
89 struct processor_costs i386_cost
= { /* 386 specific costs */
90 1, /* cost of an add instruction */
91 1, /* cost of a lea instruction */
92 3, /* variable shift costs */
93 2, /* constant shift costs */
94 6, /* cost of starting a multiply */
95 1, /* cost of multiply per each bit set */
96 23, /* cost of a divide/mod */
97 3, /* cost of movsx */
98 2, /* cost of movzx */
99 15, /* "large" insn */
101 4, /* cost for loading QImode using movzbl */
102 {2, 4, 2}, /* cost of loading integer registers
103 in QImode, HImode and SImode.
104 Relative to reg-reg move (2). */
105 {2, 4, 2}, /* cost of storing integer registers */
106 2, /* cost of reg,reg fld/fst */
107 {8, 8, 8}, /* cost of loading fp registers
108 in SFmode, DFmode and XFmode */
109 {8, 8, 8}, /* cost of loading integer registers */
110 2, /* cost of moving MMX register */
111 {4, 8}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {4, 8}, /* cost of storing MMX registers
114 in SImode and DImode */
115 2, /* cost of moving SSE register */
116 {4, 8, 16}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {4, 8, 16}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of prefetch block */
122 0, /* number of parallel prefetches */
126 struct processor_costs i486_cost
= { /* 486 specific costs */
127 1, /* cost of an add instruction */
128 1, /* cost of a lea instruction */
129 3, /* variable shift costs */
130 2, /* constant shift costs */
131 12, /* cost of starting a multiply */
132 1, /* cost of multiply per each bit set */
133 40, /* cost of a divide/mod */
134 3, /* cost of movsx */
135 2, /* cost of movzx */
136 15, /* "large" insn */
138 4, /* cost for loading QImode using movzbl */
139 {2, 4, 2}, /* cost of loading integer registers
140 in QImode, HImode and SImode.
141 Relative to reg-reg move (2). */
142 {2, 4, 2}, /* cost of storing integer registers */
143 2, /* cost of reg,reg fld/fst */
144 {8, 8, 8}, /* cost of loading fp registers
145 in SFmode, DFmode and XFmode */
146 {8, 8, 8}, /* cost of loading integer registers */
147 2, /* cost of moving MMX register */
148 {4, 8}, /* cost of loading MMX registers
149 in SImode and DImode */
150 {4, 8}, /* cost of storing MMX registers
151 in SImode and DImode */
152 2, /* cost of moving SSE register */
153 {4, 8, 16}, /* cost of loading SSE registers
154 in SImode, DImode and TImode */
155 {4, 8, 16}, /* cost of storing SSE registers
156 in SImode, DImode and TImode */
157 3, /* MMX or SSE register to integer */
158 0, /* size of prefetch block */
159 0, /* number of parallel prefetches */
163 struct processor_costs pentium_cost
= {
164 1, /* cost of an add instruction */
165 1, /* cost of a lea instruction */
166 4, /* variable shift costs */
167 1, /* constant shift costs */
168 11, /* cost of starting a multiply */
169 0, /* cost of multiply per each bit set */
170 25, /* cost of a divide/mod */
171 3, /* cost of movsx */
172 2, /* cost of movzx */
173 8, /* "large" insn */
175 6, /* cost for loading QImode using movzbl */
176 {2, 4, 2}, /* cost of loading integer registers
177 in QImode, HImode and SImode.
178 Relative to reg-reg move (2). */
179 {2, 4, 2}, /* cost of storing integer registers */
180 2, /* cost of reg,reg fld/fst */
181 {2, 2, 6}, /* cost of loading fp registers
182 in SFmode, DFmode and XFmode */
183 {4, 4, 6}, /* cost of loading integer registers */
184 8, /* cost of moving MMX register */
185 {8, 8}, /* cost of loading MMX registers
186 in SImode and DImode */
187 {8, 8}, /* cost of storing MMX registers
188 in SImode and DImode */
189 2, /* cost of moving SSE register */
190 {4, 8, 16}, /* cost of loading SSE registers
191 in SImode, DImode and TImode */
192 {4, 8, 16}, /* cost of storing SSE registers
193 in SImode, DImode and TImode */
194 3, /* MMX or SSE register to integer */
195 0, /* size of prefetch block */
196 0, /* number of parallel prefetches */
200 struct processor_costs pentiumpro_cost
= {
201 1, /* cost of an add instruction */
202 1, /* cost of a lea instruction */
203 1, /* variable shift costs */
204 1, /* constant shift costs */
205 4, /* cost of starting a multiply */
206 0, /* cost of multiply per each bit set */
207 17, /* cost of a divide/mod */
208 1, /* cost of movsx */
209 1, /* cost of movzx */
210 8, /* "large" insn */
212 2, /* cost for loading QImode using movzbl */
213 {4, 4, 4}, /* cost of loading integer registers
214 in QImode, HImode and SImode.
215 Relative to reg-reg move (2). */
216 {2, 2, 2}, /* cost of storing integer registers */
217 2, /* cost of reg,reg fld/fst */
218 {2, 2, 6}, /* cost of loading fp registers
219 in SFmode, DFmode and XFmode */
220 {4, 4, 6}, /* cost of loading integer registers */
221 2, /* cost of moving MMX register */
222 {2, 2}, /* cost of loading MMX registers
223 in SImode and DImode */
224 {2, 2}, /* cost of storing MMX registers
225 in SImode and DImode */
226 2, /* cost of moving SSE register */
227 {2, 2, 8}, /* cost of loading SSE registers
228 in SImode, DImode and TImode */
229 {2, 2, 8}, /* cost of storing SSE registers
230 in SImode, DImode and TImode */
231 3, /* MMX or SSE register to integer */
232 32, /* size of prefetch block */
233 6, /* number of parallel prefetches */
237 struct processor_costs k6_cost
= {
238 1, /* cost of an add instruction */
239 2, /* cost of a lea instruction */
240 1, /* variable shift costs */
241 1, /* constant shift costs */
242 3, /* cost of starting a multiply */
243 0, /* cost of multiply per each bit set */
244 18, /* cost of a divide/mod */
245 2, /* cost of movsx */
246 2, /* cost of movzx */
247 8, /* "large" insn */
249 3, /* cost for loading QImode using movzbl */
250 {4, 5, 4}, /* cost of loading integer registers
251 in QImode, HImode and SImode.
252 Relative to reg-reg move (2). */
253 {2, 3, 2}, /* cost of storing integer registers */
254 4, /* cost of reg,reg fld/fst */
255 {6, 6, 6}, /* cost of loading fp registers
256 in SFmode, DFmode and XFmode */
257 {4, 4, 4}, /* cost of loading integer registers */
258 2, /* cost of moving MMX register */
259 {2, 2}, /* cost of loading MMX registers
260 in SImode and DImode */
261 {2, 2}, /* cost of storing MMX registers
262 in SImode and DImode */
263 2, /* cost of moving SSE register */
264 {2, 2, 8}, /* cost of loading SSE registers
265 in SImode, DImode and TImode */
266 {2, 2, 8}, /* cost of storing SSE registers
267 in SImode, DImode and TImode */
268 6, /* MMX or SSE register to integer */
269 32, /* size of prefetch block */
270 1, /* number of parallel prefetches */
274 struct processor_costs athlon_cost
= {
275 1, /* cost of an add instruction */
276 2, /* cost of a lea instruction */
277 1, /* variable shift costs */
278 1, /* constant shift costs */
279 5, /* cost of starting a multiply */
280 0, /* cost of multiply per each bit set */
281 42, /* cost of a divide/mod */
282 1, /* cost of movsx */
283 1, /* cost of movzx */
284 8, /* "large" insn */
286 4, /* cost for loading QImode using movzbl */
287 {4, 5, 4}, /* cost of loading integer registers
288 in QImode, HImode and SImode.
289 Relative to reg-reg move (2). */
290 {2, 3, 2}, /* cost of storing integer registers */
291 4, /* cost of reg,reg fld/fst */
292 {6, 6, 20}, /* cost of loading fp registers
293 in SFmode, DFmode and XFmode */
294 {4, 4, 16}, /* cost of loading integer registers */
295 2, /* cost of moving MMX register */
296 {2, 2}, /* cost of loading MMX registers
297 in SImode and DImode */
298 {2, 2}, /* cost of storing MMX registers
299 in SImode and DImode */
300 2, /* cost of moving SSE register */
301 {2, 2, 8}, /* cost of loading SSE registers
302 in SImode, DImode and TImode */
303 {2, 2, 8}, /* cost of storing SSE registers
304 in SImode, DImode and TImode */
305 6, /* MMX or SSE register to integer */
306 64, /* size of prefetch block */
307 6, /* number of parallel prefetches */
311 struct processor_costs pentium4_cost
= {
312 1, /* cost of an add instruction */
313 1, /* cost of a lea instruction */
314 8, /* variable shift costs */
315 8, /* constant shift costs */
316 30, /* cost of starting a multiply */
317 0, /* cost of multiply per each bit set */
318 112, /* cost of a divide/mod */
319 1, /* cost of movsx */
320 1, /* cost of movzx */
321 16, /* "large" insn */
323 2, /* cost for loading QImode using movzbl */
324 {4, 5, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 3, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of loading integer registers */
332 2, /* cost of moving MMX register */
333 {2, 2}, /* cost of loading MMX registers
334 in SImode and DImode */
335 {2, 2}, /* cost of storing MMX registers
336 in SImode and DImode */
337 12, /* cost of moving SSE register */
338 {12, 12, 12}, /* cost of loading SSE registers
339 in SImode, DImode and TImode */
340 {2, 2, 8}, /* cost of storing SSE registers
341 in SImode, DImode and TImode */
342 10, /* MMX or SSE register to integer */
343 64, /* size of prefetch block */
344 6, /* number of parallel prefetches */
347 const struct processor_costs
*ix86_cost
= &pentium_cost
;
349 /* Processor feature/optimization bitmasks. */
350 #define m_386 (1<<PROCESSOR_I386)
351 #define m_486 (1<<PROCESSOR_I486)
352 #define m_PENT (1<<PROCESSOR_PENTIUM)
353 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
354 #define m_K6 (1<<PROCESSOR_K6)
355 #define m_ATHLON (1<<PROCESSOR_ATHLON)
356 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
358 const int x86_use_leave
= m_386
| m_K6
| m_ATHLON
;
359 const int x86_push_memory
= m_386
| m_K6
| m_ATHLON
| m_PENT4
;
360 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
361 const int x86_movx
= m_ATHLON
| m_PPRO
| m_PENT4
/* m_386 | m_K6 */;
362 const int x86_double_with_add
= ~m_386
;
363 const int x86_use_bit_test
= m_386
;
364 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON
| m_K6
;
365 const int x86_cmove
= m_PPRO
| m_ATHLON
| m_PENT4
;
366 const int x86_3dnow_a
= m_ATHLON
;
367 const int x86_deep_branch
= m_PPRO
| m_K6
| m_ATHLON
| m_PENT4
;
368 const int x86_branch_hints
= m_PENT4
;
369 const int x86_use_sahf
= m_PPRO
| m_K6
| m_PENT4
;
370 const int x86_partial_reg_stall
= m_PPRO
;
371 const int x86_use_loop
= m_K6
;
372 const int x86_use_fiop
= ~(m_PPRO
| m_ATHLON
| m_PENT
);
373 const int x86_use_mov0
= m_K6
;
374 const int x86_use_cltd
= ~(m_PENT
| m_K6
);
375 const int x86_read_modify_write
= ~m_PENT
;
376 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
377 const int x86_split_long_moves
= m_PPRO
;
378 const int x86_promote_QImode
= m_K6
| m_PENT
| m_386
| m_486
;
379 const int x86_single_stringop
= m_386
| m_PENT4
;
380 const int x86_qimode_math
= ~(0);
381 const int x86_promote_qi_regs
= 0;
382 const int x86_himode_math
= ~(m_PPRO
);
383 const int x86_promote_hi_regs
= m_PPRO
;
384 const int x86_sub_esp_4
= m_ATHLON
| m_PPRO
| m_PENT4
;
385 const int x86_sub_esp_8
= m_ATHLON
| m_PPRO
| m_386
| m_486
| m_PENT4
;
386 const int x86_add_esp_4
= m_ATHLON
| m_K6
| m_PENT4
;
387 const int x86_add_esp_8
= m_ATHLON
| m_PPRO
| m_K6
| m_386
| m_486
| m_PENT4
;
388 const int x86_integer_DFmode_moves
= ~(m_ATHLON
| m_PENT4
);
389 const int x86_partial_reg_dependency
= m_ATHLON
| m_PENT4
;
390 const int x86_memory_mismatch_stall
= m_ATHLON
| m_PENT4
;
391 const int x86_accumulate_outgoing_args
= m_ATHLON
| m_PENT4
| m_PPRO
;
392 const int x86_prologue_using_move
= m_ATHLON
| m_PENT4
| m_PPRO
;
393 const int x86_epilogue_using_move
= m_ATHLON
| m_PENT4
| m_PPRO
;
394 const int x86_decompose_lea
= m_PENT4
;
396 /* In case the avreage insn count for single function invocation is
397 lower than this constant, emit fast (but longer) prologue and
399 #define FAST_PROLOGUE_INSN_COUNT 30
400 /* Set by prologue expander and used by epilogue expander to determine
402 static int use_fast_prologue_epilogue
;
404 #define AT_BP(MODE) (gen_rtx_MEM ((MODE), hard_frame_pointer_rtx))
406 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
; /* names for 16 bit regs */
407 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
; /* names for 8 bit regs (low) */
408 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
; /* names for 8 bit regs (high) */
410 /* Array of the smallest class containing reg number REGNO, indexed by
411 REGNO. Used by REGNO_REG_CLASS in i386.h. */
413 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
416 AREG
, DREG
, CREG
, BREG
,
418 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
420 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
421 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
424 /* flags, fpsr, dirflag, frame */
425 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
426 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
428 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
430 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
431 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
432 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
436 /* The "default" register map used in 32bit mode. */
438 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
440 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
441 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
442 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
443 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
444 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
445 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
446 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
449 static int const x86_64_int_parameter_registers
[6] = {5 /*RDI*/, 4 /*RSI*/,
450 1 /*RDX*/, 2 /*RCX*/,
451 FIRST_REX_INT_REG
/*R8 */,
452 FIRST_REX_INT_REG
+ 1 /*R9 */};
453 static int const x86_64_int_return_registers
[4] = {0 /*RAX*/, 1 /*RDI*/, 5, 4};
455 /* The "default" register map used in 64bit mode. */
456 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
458 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
459 33, 34, 35, 36, 37, 38, 39, 40 /* fp regs */
460 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
461 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
462 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
463 8,9,10,11,12,13,14,15, /* extended integer registers */
464 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
467 /* Define the register numbers to be used in Dwarf debugging information.
468 The SVR4 reference port C compiler uses the following register numbers
469 in its Dwarf output code:
470 0 for %eax (gcc regno = 0)
471 1 for %ecx (gcc regno = 2)
472 2 for %edx (gcc regno = 1)
473 3 for %ebx (gcc regno = 3)
474 4 for %esp (gcc regno = 7)
475 5 for %ebp (gcc regno = 6)
476 6 for %esi (gcc regno = 4)
477 7 for %edi (gcc regno = 5)
478 The following three DWARF register numbers are never generated by
479 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
480 believes these numbers have these meanings.
481 8 for %eip (no gcc equivalent)
482 9 for %eflags (gcc regno = 17)
483 10 for %trapno (no gcc equivalent)
484 It is not at all clear how we should number the FP stack registers
485 for the x86 architecture. If the version of SDB on x86/svr4 were
486 a bit less brain dead with respect to floating-point then we would
487 have a precedent to follow with respect to DWARF register numbers
488 for x86 FP registers, but the SDB on x86/svr4 is so completely
489 broken with respect to FP registers that it is hardly worth thinking
490 of it as something to strive for compatibility with.
491 The version of x86/svr4 SDB I have at the moment does (partially)
492 seem to believe that DWARF register number 11 is associated with
493 the x86 register %st(0), but that's about all. Higher DWARF
494 register numbers don't seem to be associated with anything in
495 particular, and even for DWARF regno 11, SDB only seems to under-
496 stand that it should say that a variable lives in %st(0) (when
497 asked via an `=' command) if we said it was in DWARF regno 11,
498 but SDB still prints garbage when asked for the value of the
499 variable in question (via a `/' command).
500 (Also note that the labels SDB prints for various FP stack regs
501 when doing an `x' command are all wrong.)
502 Note that these problems generally don't affect the native SVR4
503 C compiler because it doesn't allow the use of -O with -g and
504 because when it is *not* optimizing, it allocates a memory
505 location for each floating-point variable, and the memory
506 location is what gets described in the DWARF AT_location
507 attribute for the variable in question.
508 Regardless of the severe mental illness of the x86/svr4 SDB, we
509 do something sensible here and we use the following DWARF
510 register numbers. Note that these are all stack-top-relative
512 11 for %st(0) (gcc regno = 8)
513 12 for %st(1) (gcc regno = 9)
514 13 for %st(2) (gcc regno = 10)
515 14 for %st(3) (gcc regno = 11)
516 15 for %st(4) (gcc regno = 12)
517 16 for %st(5) (gcc regno = 13)
518 17 for %st(6) (gcc regno = 14)
519 18 for %st(7) (gcc regno = 15)
521 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
523 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
524 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
525 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
526 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
527 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
528 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
529 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
532 /* Test and compare insns in i386.md store the information needed to
533 generate branch and scc insns here. */
535 rtx ix86_compare_op0
= NULL_RTX
;
536 rtx ix86_compare_op1
= NULL_RTX
;
538 #define MAX_386_STACK_LOCALS 3
539 /* Size of the register save area. */
540 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
542 /* Define the structure for the machine field in struct function. */
543 struct machine_function
545 rtx stack_locals
[(int) MAX_MACHINE_MODE
][MAX_386_STACK_LOCALS
];
546 int save_varrargs_registers
;
547 int accesses_prev_frame
;
550 #define ix86_stack_locals (cfun->machine->stack_locals)
551 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
553 /* Structure describing stack frame layout.
554 Stack grows downward:
560 saved frame pointer if frame_pointer_needed
561 <- HARD_FRAME_POINTER
567 > to_allocate <- FRAME_POINTER
579 int outgoing_arguments_size
;
582 HOST_WIDE_INT to_allocate
;
583 /* The offsets relative to ARG_POINTER. */
584 HOST_WIDE_INT frame_pointer_offset
;
585 HOST_WIDE_INT hard_frame_pointer_offset
;
586 HOST_WIDE_INT stack_pointer_offset
;
589 /* Used to enable/disable debugging features. */
590 const char *ix86_debug_arg_string
, *ix86_debug_addr_string
;
591 /* Code model option as passed by user. */
592 const char *ix86_cmodel_string
;
594 enum cmodel ix86_cmodel
;
596 const char *ix86_asm_string
;
597 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
599 /* which cpu are we scheduling for */
600 enum processor_type ix86_cpu
;
602 /* which unit we are generating floating point math for */
603 enum fpmath_unit ix86_fpmath
;
605 /* which instruction set architecture to use. */
608 /* Strings to hold which cpu and instruction set architecture to use. */
609 const char *ix86_cpu_string
; /* for -mcpu=<xxx> */
610 const char *ix86_arch_string
; /* for -march=<xxx> */
611 const char *ix86_fpmath_string
; /* for -mfpmath=<xxx> */
613 /* # of registers to use to pass arguments. */
614 const char *ix86_regparm_string
;
616 /* true if sse prefetch instruction is not NOOP. */
617 int x86_prefetch_sse
;
619 /* ix86_regparm_string as a number */
622 /* Alignment to use for loops and jumps: */
624 /* Power of two alignment for loops. */
625 const char *ix86_align_loops_string
;
627 /* Power of two alignment for non-loop jumps. */
628 const char *ix86_align_jumps_string
;
630 /* Power of two alignment for stack boundary in bytes. */
631 const char *ix86_preferred_stack_boundary_string
;
633 /* Preferred alignment for stack boundary in bits. */
634 int ix86_preferred_stack_boundary
;
636 /* Values 1-5: see jump.c */
637 int ix86_branch_cost
;
638 const char *ix86_branch_cost_string
;
640 /* Power of two alignment for functions. */
641 const char *ix86_align_funcs_string
;
643 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
644 static char internal_label_prefix
[16];
645 static int internal_label_prefix_len
;
647 static int local_symbolic_operand
PARAMS ((rtx
, enum machine_mode
));
648 static void output_pic_addr_const
PARAMS ((FILE *, rtx
, int));
649 static void put_condition_code
PARAMS ((enum rtx_code
, enum machine_mode
,
651 static rtx ix86_expand_int_compare
PARAMS ((enum rtx_code
, rtx
, rtx
));
652 static enum rtx_code ix86_prepare_fp_compare_args
PARAMS ((enum rtx_code
,
654 static rtx gen_push
PARAMS ((rtx
));
655 static int memory_address_length
PARAMS ((rtx addr
));
656 static int ix86_flags_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
657 static int ix86_agi_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
658 static int ix86_safe_length
PARAMS ((rtx
));
659 static enum attr_memory ix86_safe_memory
PARAMS ((rtx
));
660 static enum attr_pent_pair ix86_safe_pent_pair
PARAMS ((rtx
));
661 static enum attr_ppro_uops ix86_safe_ppro_uops
PARAMS ((rtx
));
662 static void ix86_dump_ppro_packet
PARAMS ((FILE *));
663 static void ix86_reorder_insn
PARAMS ((rtx
*, rtx
*));
664 static rtx
* ix86_pent_find_pair
PARAMS ((rtx
*, rtx
*, enum attr_pent_pair
,
666 static void ix86_init_machine_status
PARAMS ((struct function
*));
667 static void ix86_mark_machine_status
PARAMS ((struct function
*));
668 static void ix86_free_machine_status
PARAMS ((struct function
*));
669 static int ix86_split_to_parts
PARAMS ((rtx
, rtx
*, enum machine_mode
));
670 static int ix86_safe_length_prefix
PARAMS ((rtx
));
671 static int ix86_nsaved_regs
PARAMS ((void));
672 static void ix86_emit_save_regs
PARAMS ((void));
673 static void ix86_emit_save_regs_using_mov
PARAMS ((rtx
, HOST_WIDE_INT
));
674 static void ix86_emit_restore_regs_using_mov
PARAMS ((rtx
, int, int));
675 static void ix86_set_move_mem_attrs_1
PARAMS ((rtx
, rtx
, rtx
, rtx
, rtx
));
676 static void ix86_sched_reorder_pentium
PARAMS ((rtx
*, rtx
*));
677 static void ix86_sched_reorder_ppro
PARAMS ((rtx
*, rtx
*));
678 static HOST_WIDE_INT ix86_GOT_alias_set
PARAMS ((void));
679 static void ix86_adjust_counter
PARAMS ((rtx
, HOST_WIDE_INT
));
680 static rtx ix86_expand_aligntest
PARAMS ((rtx
, int));
681 static void ix86_expand_strlensi_unroll_1
PARAMS ((rtx
, rtx
));
682 static int ix86_issue_rate
PARAMS ((void));
683 static int ix86_adjust_cost
PARAMS ((rtx
, rtx
, rtx
, int));
684 static void ix86_sched_init
PARAMS ((FILE *, int, int));
685 static int ix86_sched_reorder
PARAMS ((FILE *, int, rtx
*, int *, int));
686 static int ix86_variable_issue
PARAMS ((FILE *, int, rtx
, int));
687 static void ix86_init_mmx_sse_builtins
PARAMS ((void));
691 rtx base
, index
, disp
;
695 static int ix86_decompose_address
PARAMS ((rtx
, struct ix86_address
*));
697 struct builtin_description
;
698 static rtx ix86_expand_sse_comi
PARAMS ((const struct builtin_description
*,
700 static rtx ix86_expand_sse_compare
PARAMS ((const struct builtin_description
*,
702 static rtx ix86_expand_unop1_builtin
PARAMS ((enum insn_code
, tree
, rtx
));
703 static rtx ix86_expand_unop_builtin
PARAMS ((enum insn_code
, tree
, rtx
, int));
704 static rtx ix86_expand_binop_builtin
PARAMS ((enum insn_code
, tree
, rtx
));
705 static rtx ix86_expand_timode_binop_builtin
PARAMS ((enum insn_code
,
707 static rtx ix86_expand_store_builtin
PARAMS ((enum insn_code
, tree
));
708 static rtx safe_vector_operand
PARAMS ((rtx
, enum machine_mode
));
709 static enum rtx_code ix86_fp_compare_code_to_integer
PARAMS ((enum rtx_code
));
710 static void ix86_fp_comparison_codes
PARAMS ((enum rtx_code code
,
714 static rtx ix86_expand_fp_compare
PARAMS ((enum rtx_code
, rtx
, rtx
, rtx
,
716 static int ix86_fp_comparison_arithmetics_cost
PARAMS ((enum rtx_code code
));
717 static int ix86_fp_comparison_fcomi_cost
PARAMS ((enum rtx_code code
));
718 static int ix86_fp_comparison_sahf_cost
PARAMS ((enum rtx_code code
));
719 static int ix86_fp_comparison_cost
PARAMS ((enum rtx_code code
));
720 static int ix86_save_reg
PARAMS ((int, int));
721 static void ix86_compute_frame_layout
PARAMS ((struct ix86_frame
*));
722 static int ix86_comp_type_attributes
PARAMS ((tree
, tree
));
723 const struct attribute_spec ix86_attribute_table
[];
724 static tree ix86_handle_cdecl_attribute
PARAMS ((tree
*, tree
, tree
, int, bool *));
725 static tree ix86_handle_regparm_attribute
PARAMS ((tree
*, tree
, tree
, int, bool *));
727 #ifdef DO_GLOBAL_CTORS_BODY
728 static void ix86_svr3_asm_out_constructor
PARAMS ((rtx
, int));
731 /* Register class used for passing given 64bit part of the argument.
732 These represent classes as documented by the PS ABI, with the exception
733 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
734 use SF or DFmode move instead of DImode to avoid reformating penalties.
736 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
737 whenever possible (upper half does contain padding).
739 enum x86_64_reg_class
742 X86_64_INTEGER_CLASS
,
743 X86_64_INTEGERSI_CLASS
,
752 static const char * const x86_64_reg_class_name
[] =
753 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
755 #define MAX_CLASSES 4
756 static int classify_argument
PARAMS ((enum machine_mode
, tree
,
757 enum x86_64_reg_class
[MAX_CLASSES
],
759 static int examine_argument
PARAMS ((enum machine_mode
, tree
, int, int *,
761 static rtx construct_container
PARAMS ((enum machine_mode
, tree
, int, int, int,
763 static enum x86_64_reg_class merge_classes
PARAMS ((enum x86_64_reg_class
,
764 enum x86_64_reg_class
));
766 /* Initialize the GCC target structure. */
767 #undef TARGET_ATTRIBUTE_TABLE
768 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
769 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
770 # undef TARGET_MERGE_DECL_ATTRIBUTES
771 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
774 #undef TARGET_COMP_TYPE_ATTRIBUTES
775 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
777 #undef TARGET_INIT_BUILTINS
778 #define TARGET_INIT_BUILTINS ix86_init_builtins
780 #undef TARGET_EXPAND_BUILTIN
781 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
783 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
784 static void ix86_osf_output_function_prologue
PARAMS ((FILE *,
786 # undef TARGET_ASM_FUNCTION_PROLOGUE
787 # define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue
790 #undef TARGET_ASM_OPEN_PAREN
791 #define TARGET_ASM_OPEN_PAREN ""
792 #undef TARGET_ASM_CLOSE_PAREN
793 #define TARGET_ASM_CLOSE_PAREN ""
795 #undef TARGET_ASM_ALIGNED_HI_OP
796 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
797 #undef TARGET_ASM_ALIGNED_SI_OP
798 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
800 #undef TARGET_ASM_ALIGNED_DI_OP
801 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
804 #undef TARGET_ASM_UNALIGNED_HI_OP
805 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
806 #undef TARGET_ASM_UNALIGNED_SI_OP
807 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
808 #undef TARGET_ASM_UNALIGNED_DI_OP
809 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
811 #undef TARGET_SCHED_ADJUST_COST
812 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
813 #undef TARGET_SCHED_ISSUE_RATE
814 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
815 #undef TARGET_SCHED_VARIABLE_ISSUE
816 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
817 #undef TARGET_SCHED_INIT
818 #define TARGET_SCHED_INIT ix86_sched_init
819 #undef TARGET_SCHED_REORDER
820 #define TARGET_SCHED_REORDER ix86_sched_reorder
822 struct gcc_target targetm
= TARGET_INITIALIZER
;
824 /* Sometimes certain combinations of command options do not make
825 sense on a particular target machine. You can define a macro
826 `OVERRIDE_OPTIONS' to take account of this. This macro, if
827 defined, is executed once just after all the command options have
830 Don't use this macro to turn on various extra optimizations for
831 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
837 /* Comes from final.c -- no real reason to change it. */
838 #define MAX_CODE_ALIGN 16
842 const struct processor_costs
*cost
; /* Processor costs */
843 const int target_enable
; /* Target flags to enable. */
844 const int target_disable
; /* Target flags to disable. */
845 const int align_loop
; /* Default alignments. */
846 const int align_loop_max_skip
;
847 const int align_jump
;
848 const int align_jump_max_skip
;
849 const int align_func
;
850 const int branch_cost
;
852 const processor_target_table
[PROCESSOR_max
] =
854 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4, 1},
855 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16, 1},
856 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16, 1},
857 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16, 1},
858 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32, 1},
859 {&athlon_cost
, 0, 0, 16, 7, 64, 7, 16, 1},
860 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0, 1}
863 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
866 const char *const name
; /* processor name or nickname. */
867 const enum processor_type processor
;
873 PTA_PREFETCH_SSE
= 8,
878 const processor_alias_table
[] =
880 {"i386", PROCESSOR_I386
, 0},
881 {"i486", PROCESSOR_I486
, 0},
882 {"i586", PROCESSOR_PENTIUM
, 0},
883 {"pentium", PROCESSOR_PENTIUM
, 0},
884 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
885 {"i686", PROCESSOR_PENTIUMPRO
, 0},
886 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
887 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
888 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
889 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
|
890 PTA_MMX
| PTA_PREFETCH_SSE
},
891 {"k6", PROCESSOR_K6
, PTA_MMX
},
892 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
893 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
894 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
896 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
897 | PTA_3DNOW
| PTA_3DNOW_A
},
898 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
899 | PTA_3DNOW_A
| PTA_SSE
},
900 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
901 | PTA_3DNOW_A
| PTA_SSE
},
902 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
903 | PTA_3DNOW_A
| PTA_SSE
},
906 int const pta_size
= sizeof (processor_alias_table
) / sizeof (struct pta
);
908 #ifdef SUBTARGET_OVERRIDE_OPTIONS
909 SUBTARGET_OVERRIDE_OPTIONS
;
912 if (!ix86_cpu_string
&& ix86_arch_string
)
913 ix86_cpu_string
= ix86_arch_string
;
914 if (!ix86_cpu_string
)
915 ix86_cpu_string
= cpu_names
[TARGET_CPU_DEFAULT
];
916 if (!ix86_arch_string
)
917 ix86_arch_string
= TARGET_64BIT
? "athlon-4" : "i386";
919 if (ix86_cmodel_string
!= 0)
921 if (!strcmp (ix86_cmodel_string
, "small"))
922 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
924 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string
);
925 else if (!strcmp (ix86_cmodel_string
, "32"))
927 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
928 ix86_cmodel
= CM_KERNEL
;
929 else if (!strcmp (ix86_cmodel_string
, "medium") && !flag_pic
)
930 ix86_cmodel
= CM_MEDIUM
;
931 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
932 ix86_cmodel
= CM_LARGE
;
934 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
940 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
942 if (ix86_asm_string
!= 0)
944 if (!strcmp (ix86_asm_string
, "intel"))
945 ix86_asm_dialect
= ASM_INTEL
;
946 else if (!strcmp (ix86_asm_string
, "att"))
947 ix86_asm_dialect
= ASM_ATT
;
949 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
951 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
952 error ("code model `%s' not supported in the %s bit mode",
953 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
954 if (ix86_cmodel
== CM_LARGE
)
955 sorry ("code model `large' not supported yet");
956 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
957 sorry ("%i-bit mode not compiled in",
958 (target_flags
& MASK_64BIT
) ? 64 : 32);
960 for (i
= 0; i
< pta_size
; i
++)
961 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
963 ix86_arch
= processor_alias_table
[i
].processor
;
964 /* Default cpu tuning to the architecture. */
965 ix86_cpu
= ix86_arch
;
966 if (processor_alias_table
[i
].flags
& PTA_MMX
967 && !(target_flags
& MASK_MMX_SET
))
968 target_flags
|= MASK_MMX
;
969 if (processor_alias_table
[i
].flags
& PTA_3DNOW
970 && !(target_flags
& MASK_3DNOW_SET
))
971 target_flags
|= MASK_3DNOW
;
972 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
973 && !(target_flags
& MASK_3DNOW_A_SET
))
974 target_flags
|= MASK_3DNOW_A
;
975 if (processor_alias_table
[i
].flags
& PTA_SSE
976 && !(target_flags
& MASK_SSE_SET
))
977 target_flags
|= MASK_SSE
;
978 if (processor_alias_table
[i
].flags
& PTA_SSE2
979 && !(target_flags
& MASK_SSE2_SET
))
980 target_flags
|= MASK_SSE2
;
981 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
982 x86_prefetch_sse
= true;
987 error ("bad value (%s) for -march= switch", ix86_arch_string
);
989 for (i
= 0; i
< pta_size
; i
++)
990 if (! strcmp (ix86_cpu_string
, processor_alias_table
[i
].name
))
992 ix86_cpu
= processor_alias_table
[i
].processor
;
995 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
996 x86_prefetch_sse
= true;
998 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string
);
1001 ix86_cost
= &size_cost
;
1003 ix86_cost
= processor_target_table
[ix86_cpu
].cost
;
1004 target_flags
|= processor_target_table
[ix86_cpu
].target_enable
;
1005 target_flags
&= ~processor_target_table
[ix86_cpu
].target_disable
;
1007 /* Arrange to set up i386_stack_locals for all functions. */
1008 init_machine_status
= ix86_init_machine_status
;
1009 mark_machine_status
= ix86_mark_machine_status
;
1010 free_machine_status
= ix86_free_machine_status
;
1012 /* Validate -mregparm= value. */
1013 if (ix86_regparm_string
)
1015 i
= atoi (ix86_regparm_string
);
1016 if (i
< 0 || i
> REGPARM_MAX
)
1017 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
1023 ix86_regparm
= REGPARM_MAX
;
1025 /* If the user has provided any of the -malign-* options,
1026 warn and use that value only if -falign-* is not set.
1027 Remove this code in GCC 3.2 or later. */
1028 if (ix86_align_loops_string
)
1030 warning ("-malign-loops is obsolete, use -falign-loops");
1031 if (align_loops
== 0)
1033 i
= atoi (ix86_align_loops_string
);
1034 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1035 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1037 align_loops
= 1 << i
;
1041 if (ix86_align_jumps_string
)
1043 warning ("-malign-jumps is obsolete, use -falign-jumps");
1044 if (align_jumps
== 0)
1046 i
= atoi (ix86_align_jumps_string
);
1047 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1048 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1050 align_jumps
= 1 << i
;
1054 if (ix86_align_funcs_string
)
1056 warning ("-malign-functions is obsolete, use -falign-functions");
1057 if (align_functions
== 0)
1059 i
= atoi (ix86_align_funcs_string
);
1060 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1061 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1063 align_functions
= 1 << i
;
1067 /* Default align_* from the processor table. */
1068 if (align_loops
== 0)
1070 align_loops
= processor_target_table
[ix86_cpu
].align_loop
;
1071 align_loops_max_skip
= processor_target_table
[ix86_cpu
].align_loop_max_skip
;
1073 if (align_jumps
== 0)
1075 align_jumps
= processor_target_table
[ix86_cpu
].align_jump
;
1076 align_jumps_max_skip
= processor_target_table
[ix86_cpu
].align_jump_max_skip
;
1078 if (align_functions
== 0)
1080 align_functions
= processor_target_table
[ix86_cpu
].align_func
;
1083 /* Validate -mpreferred-stack-boundary= value, or provide default.
1084 The default of 128 bits is for Pentium III's SSE __m128, but we
1085 don't want additional code to keep the stack aligned when
1086 optimizing for code size. */
1087 ix86_preferred_stack_boundary
= (optimize_size
1088 ? TARGET_64BIT
? 64 : 32
1090 if (ix86_preferred_stack_boundary_string
)
1092 i
= atoi (ix86_preferred_stack_boundary_string
);
1093 if (i
< (TARGET_64BIT
? 3 : 2) || i
> 12)
1094 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
1095 TARGET_64BIT
? 3 : 2);
1097 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
1100 /* Validate -mbranch-cost= value, or provide default. */
1101 ix86_branch_cost
= processor_target_table
[ix86_cpu
].branch_cost
;
1102 if (ix86_branch_cost_string
)
1104 i
= atoi (ix86_branch_cost_string
);
1106 error ("-mbranch-cost=%d is not between 0 and 5", i
);
1108 ix86_branch_cost
= i
;
1111 /* Keep nonleaf frame pointers. */
1112 if (TARGET_OMIT_LEAF_FRAME_POINTER
)
1113 flag_omit_frame_pointer
= 1;
1115 /* If we're doing fast math, we don't care about comparison order
1116 wrt NaNs. This lets us use a shorter comparison sequence. */
1117 if (flag_unsafe_math_optimizations
)
1118 target_flags
&= ~MASK_IEEE_FP
;
1122 if (TARGET_ALIGN_DOUBLE
)
1123 error ("-malign-double makes no sense in the 64bit mode");
1125 error ("-mrtd calling convention not supported in the 64bit mode");
1126 /* Enable by default the SSE and MMX builtins. */
1127 target_flags
|= (MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
);
1128 ix86_fpmath
= FPMATH_SSE
;
1131 ix86_fpmath
= FPMATH_387
;
1133 if (ix86_fpmath_string
!= 0)
1135 if (! strcmp (ix86_fpmath_string
, "387"))
1136 ix86_fpmath
= FPMATH_387
;
1137 else if (! strcmp (ix86_fpmath_string
, "sse"))
1141 warning ("SSE instruction set disabled, using 387 arithmetics");
1142 ix86_fpmath
= FPMATH_387
;
1145 ix86_fpmath
= FPMATH_SSE
;
1147 else if (! strcmp (ix86_fpmath_string
, "387,sse")
1148 || ! strcmp (ix86_fpmath_string
, "sse,387"))
1152 warning ("SSE instruction set disabled, using 387 arithmetics");
1153 ix86_fpmath
= FPMATH_387
;
1155 else if (!TARGET_80387
)
1157 warning ("387 instruction set disabled, using SSE arithmetics");
1158 ix86_fpmath
= FPMATH_SSE
;
1161 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
1164 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
1167 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1171 target_flags
|= MASK_MMX
;
1172 x86_prefetch_sse
= true;
1175 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1178 target_flags
|= MASK_MMX
;
1179 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1180 extensions it adds. */
1181 if (x86_3dnow_a
& (1 << ix86_arch
))
1182 target_flags
|= MASK_3DNOW_A
;
1184 if ((x86_accumulate_outgoing_args
& CPUMASK
)
1185 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS_SET
)
1187 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
1189 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1192 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
1193 p
= strchr (internal_label_prefix
, 'X');
1194 internal_label_prefix_len
= p
- internal_label_prefix
;
1200 optimization_options (level
, size
)
1202 int size ATTRIBUTE_UNUSED
;
1204 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1205 make the problem with not enough registers even worse. */
1206 #ifdef INSN_SCHEDULING
1208 flag_schedule_insns
= 0;
1210 if (TARGET_64BIT
&& optimize
>= 1)
1211 flag_omit_frame_pointer
= 1;
1214 flag_pcc_struct_return
= 0;
1215 flag_asynchronous_unwind_tables
= 1;
1219 /* Table of valid machine attributes. */
1220 const struct attribute_spec ix86_attribute_table
[] =
1222 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1223 /* Stdcall attribute says callee is responsible for popping arguments
1224 if they are not variable. */
1225 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1226 /* Cdecl attribute says the callee is a normal C declaration */
1227 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1228 /* Regparm attribute specifies how many integer arguments are to be
1229 passed in registers. */
1230 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute
},
1231 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1232 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute
},
1233 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute
},
1234 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
1236 { NULL
, 0, 0, false, false, false, NULL
}
1239 /* Handle a "cdecl" or "stdcall" attribute;
1240 arguments as in struct attribute_spec.handler. */
1242 ix86_handle_cdecl_attribute (node
, name
, args
, flags
, no_add_attrs
)
1245 tree args ATTRIBUTE_UNUSED
;
1246 int flags ATTRIBUTE_UNUSED
;
1249 if (TREE_CODE (*node
) != FUNCTION_TYPE
1250 && TREE_CODE (*node
) != METHOD_TYPE
1251 && TREE_CODE (*node
) != FIELD_DECL
1252 && TREE_CODE (*node
) != TYPE_DECL
)
1254 warning ("`%s' attribute only applies to functions",
1255 IDENTIFIER_POINTER (name
));
1256 *no_add_attrs
= true;
1261 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name
));
1262 *no_add_attrs
= true;
1268 /* Handle a "regparm" attribute;
1269 arguments as in struct attribute_spec.handler. */
1271 ix86_handle_regparm_attribute (node
, name
, args
, flags
, no_add_attrs
)
1275 int flags ATTRIBUTE_UNUSED
;
1278 if (TREE_CODE (*node
) != FUNCTION_TYPE
1279 && TREE_CODE (*node
) != METHOD_TYPE
1280 && TREE_CODE (*node
) != FIELD_DECL
1281 && TREE_CODE (*node
) != TYPE_DECL
)
1283 warning ("`%s' attribute only applies to functions",
1284 IDENTIFIER_POINTER (name
));
1285 *no_add_attrs
= true;
1291 cst
= TREE_VALUE (args
);
1292 if (TREE_CODE (cst
) != INTEGER_CST
)
1294 warning ("`%s' attribute requires an integer constant argument",
1295 IDENTIFIER_POINTER (name
));
1296 *no_add_attrs
= true;
1298 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
1300 warning ("argument to `%s' attribute larger than %d",
1301 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
1302 *no_add_attrs
= true;
1309 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
1311 /* Generate the assembly code for function entry. FILE is a stdio
1312 stream to output the code to. SIZE is an int: how many units of
1313 temporary storage to allocate.
1315 Refer to the array `regs_ever_live' to determine which registers to
1316 save; `regs_ever_live[I]' is nonzero if register number I is ever
1317 used in the function. This function is responsible for knowing
1318 which registers should not be saved even if used.
1320 We override it here to allow for the new profiling code to go before
1321 the prologue and the old mcount code to go after the prologue (and
1322 after %ebx has been set up for ELF shared library support). */
1325 ix86_osf_output_function_prologue (file
, size
)
1329 const char *prefix
= "";
1330 const char *const lprefix
= LPREFIX
;
1331 int labelno
= profile_label_no
;
1335 if (TARGET_UNDERSCORES
)
1338 if (current_function_profile
&& OSF_PROFILE_BEFORE_PROLOGUE
)
1340 if (!flag_pic
&& !HALF_PIC_P ())
1342 fprintf (file
, "\tmovl $%sP%d,%%edx\n", lprefix
, labelno
);
1343 fprintf (file
, "\tcall *%s_mcount_ptr\n", prefix
);
1346 else if (HALF_PIC_P ())
1350 HALF_PIC_EXTERNAL ("_mcount_ptr");
1351 symref
= HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode
,
1354 fprintf (file
, "\tmovl $%sP%d,%%edx\n", lprefix
, labelno
);
1355 fprintf (file
, "\tmovl %s%s,%%eax\n", prefix
,
1357 fprintf (file
, "\tcall *(%%eax)\n");
1362 static int call_no
= 0;
1364 fprintf (file
, "\tcall %sPc%d\n", lprefix
, call_no
);
1365 fprintf (file
, "%sPc%d:\tpopl %%eax\n", lprefix
, call_no
);
1366 fprintf (file
, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1367 lprefix
, call_no
++);
1368 fprintf (file
, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1370 fprintf (file
, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1372 fprintf (file
, "\tcall *(%%eax)\n");
1378 if (current_function_profile
&& OSF_PROFILE_BEFORE_PROLOGUE
)
1382 fprintf (file
, "\tmovl $%sP%d,%%edx\n", lprefix
, labelno
);
1383 fprintf (file
, "\tcall *%s_mcount_ptr\n", prefix
);
1388 static int call_no
= 0;
1390 fprintf (file
, "\tcall %sPc%d\n", lprefix
, call_no
);
1391 fprintf (file
, "%sPc%d:\tpopl %%eax\n", lprefix
, call_no
);
1392 fprintf (file
, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1393 lprefix
, call_no
++);
1394 fprintf (file
, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1396 fprintf (file
, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1398 fprintf (file
, "\tcall *(%%eax)\n");
1401 #endif /* !OSF_OS */
1403 function_prologue (file
, size
);
1406 #endif /* OSF_OS || TARGET_OSF1ELF */
1408 /* Return 0 if the attributes for two types are incompatible, 1 if they
1409 are compatible, and 2 if they are nearly compatible (which causes a
1410 warning to be generated). */
1413 ix86_comp_type_attributes (type1
, type2
)
1417 /* Check for mismatch of non-default calling convention. */
1418 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
1420 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
1423 /* Check for mismatched return types (cdecl vs stdcall). */
1424 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
1425 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
1430 /* Value is the number of bytes of arguments automatically
1431 popped when returning from a subroutine call.
1432 FUNDECL is the declaration node of the function (as a tree),
1433 FUNTYPE is the data type of the function (as a tree),
1434 or for a library call it is an identifier node for the subroutine name.
1435 SIZE is the number of bytes of arguments passed on the stack.
1437 On the 80386, the RTD insn may be used to pop them if the number
1438 of args is fixed, but if the number is variable then the caller
1439 must pop them all. RTD can't be used for library calls now
1440 because the library is compiled with the Unix compiler.
1441 Use of RTD is a selectable option, since it is incompatible with
1442 standard Unix calling sequences. If the option is not selected,
1443 the caller must always pop the args.
1445 The attribute stdcall is equivalent to RTD on a per module basis. */
1448 ix86_return_pops_args (fundecl
, funtype
, size
)
1453 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
1455 /* Cdecl functions override -mrtd, and never pop the stack. */
1456 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
1458 /* Stdcall functions will pop the stack if not variable args. */
1459 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
)))
1463 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
1464 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
1465 == void_type_node
)))
1469 /* Lose any fake structure return argument. */
1470 if (aggregate_value_p (TREE_TYPE (funtype
))
1472 return GET_MODE_SIZE (Pmode
);
1477 /* Argument support functions. */
1479 /* Return true when register may be used to pass function parameters. */
1481 ix86_function_arg_regno_p (regno
)
1486 return (regno
< REGPARM_MAX
1487 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
1488 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
1490 /* RAX is used as hidden argument to va_arg functions. */
1493 for (i
= 0; i
< REGPARM_MAX
; i
++)
1494 if (regno
== x86_64_int_parameter_registers
[i
])
1499 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1500 for a call to a function whose data type is FNTYPE.
1501 For a library call, FNTYPE is 0. */
1504 init_cumulative_args (cum
, fntype
, libname
)
1505 CUMULATIVE_ARGS
*cum
; /* Argument info to initialize */
1506 tree fntype
; /* tree ptr for function decl */
1507 rtx libname
; /* SYMBOL_REF of library name or 0 */
1509 static CUMULATIVE_ARGS zero_cum
;
1510 tree param
, next_param
;
1512 if (TARGET_DEBUG_ARG
)
1514 fprintf (stderr
, "\ninit_cumulative_args (");
1516 fprintf (stderr
, "fntype code = %s, ret code = %s",
1517 tree_code_name
[(int) TREE_CODE (fntype
)],
1518 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
1520 fprintf (stderr
, "no fntype");
1523 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
1528 /* Set up the number of registers to use for passing arguments. */
1529 cum
->nregs
= ix86_regparm
;
1530 cum
->sse_nregs
= SSE_REGPARM_MAX
;
1531 if (fntype
&& !TARGET_64BIT
)
1533 tree attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype
));
1536 cum
->nregs
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1538 cum
->maybe_vaarg
= false;
1540 /* Determine if this function has variable arguments. This is
1541 indicated by the last argument being 'void_type_mode' if there
1542 are no variable arguments. If there are variable arguments, then
1543 we won't pass anything in registers */
1547 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
1548 param
!= 0; param
= next_param
)
1550 next_param
= TREE_CHAIN (param
);
1551 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
1555 cum
->maybe_vaarg
= true;
1559 if ((!fntype
&& !libname
)
1560 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
1561 cum
->maybe_vaarg
= 1;
1563 if (TARGET_DEBUG_ARG
)
1564 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
1569 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1570 of this code is to classify each 8bytes of incoming argument by the register
1571 class and assign registers accordingly. */
1573 /* Return the union class of CLASS1 and CLASS2.
1574 See the x86-64 PS ABI for details. */
1576 static enum x86_64_reg_class
1577 merge_classes (class1
, class2
)
1578 enum x86_64_reg_class class1
, class2
;
1580 /* Rule #1: If both classes are equal, this is the resulting class. */
1581 if (class1
== class2
)
1584 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1586 if (class1
== X86_64_NO_CLASS
)
1588 if (class2
== X86_64_NO_CLASS
)
1591 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1592 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
1593 return X86_64_MEMORY_CLASS
;
1595 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1596 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
1597 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
1598 return X86_64_INTEGERSI_CLASS
;
1599 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
1600 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
1601 return X86_64_INTEGER_CLASS
;
1603 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1604 if (class1
== X86_64_X87_CLASS
|| class1
== X86_64_X87UP_CLASS
1605 || class2
== X86_64_X87_CLASS
|| class2
== X86_64_X87UP_CLASS
)
1606 return X86_64_MEMORY_CLASS
;
1608 /* Rule #6: Otherwise class SSE is used. */
1609 return X86_64_SSE_CLASS
;
1612 /* Classify the argument of type TYPE and mode MODE.
1613 CLASSES will be filled by the register class used to pass each word
1614 of the operand. The number of words is returned. In case the parameter
1615 should be passed in memory, 0 is returned. As a special case for zero
1616 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1618 BIT_OFFSET is used internally for handling records and specifies offset
1619 of the offset in bits modulo 256 to avoid overflow cases.
1621 See the x86-64 PS ABI for details.
1625 classify_argument (mode
, type
, classes
, bit_offset
)
1626 enum machine_mode mode
;
1628 enum x86_64_reg_class classes
[MAX_CLASSES
];
1632 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1633 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
1635 if (type
&& AGGREGATE_TYPE_P (type
))
1639 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
1641 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1645 for (i
= 0; i
< words
; i
++)
1646 classes
[i
] = X86_64_NO_CLASS
;
1648 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1649 signalize memory class, so handle it as special case. */
1652 classes
[0] = X86_64_NO_CLASS
;
1656 /* Classify each field of record and merge classes. */
1657 if (TREE_CODE (type
) == RECORD_TYPE
)
1659 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
1661 if (TREE_CODE (field
) == FIELD_DECL
)
1665 /* Bitfields are always classified as integer. Handle them
1666 early, since later code would consider them to be
1667 misaligned integers. */
1668 if (DECL_BIT_FIELD (field
))
1670 for (i
= int_bit_position (field
) / 8 / 8;
1671 i
< (int_bit_position (field
)
1672 + tree_low_cst (DECL_SIZE (field
), 0)
1675 merge_classes (X86_64_INTEGER_CLASS
,
1680 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
1681 TREE_TYPE (field
), subclasses
,
1682 (int_bit_position (field
)
1683 + bit_offset
) % 256);
1686 for (i
= 0; i
< num
; i
++)
1689 (int_bit_position (field
) + bit_offset
) / 8 / 8;
1691 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
1697 /* Arrays are handled as small records. */
1698 else if (TREE_CODE (type
) == ARRAY_TYPE
)
1701 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
1702 TREE_TYPE (type
), subclasses
, bit_offset
);
1706 /* The partial classes are now full classes. */
1707 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
1708 subclasses
[0] = X86_64_SSE_CLASS
;
1709 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
1710 subclasses
[0] = X86_64_INTEGER_CLASS
;
1712 for (i
= 0; i
< words
; i
++)
1713 classes
[i
] = subclasses
[i
% num
];
1715 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1716 else if (TREE_CODE (type
) == UNION_TYPE
)
1718 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
1720 if (TREE_CODE (field
) == FIELD_DECL
)
1723 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
1724 TREE_TYPE (field
), subclasses
,
1728 for (i
= 0; i
< num
; i
++)
1729 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
1736 /* Final merger cleanup. */
1737 for (i
= 0; i
< words
; i
++)
1739 /* If one class is MEMORY, everything should be passed in
1741 if (classes
[i
] == X86_64_MEMORY_CLASS
)
1744 /* The X86_64_SSEUP_CLASS should be always preceded by
1745 X86_64_SSE_CLASS. */
1746 if (classes
[i
] == X86_64_SSEUP_CLASS
1747 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
1748 classes
[i
] = X86_64_SSE_CLASS
;
1750 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1751 if (classes
[i
] == X86_64_X87UP_CLASS
1752 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
1753 classes
[i
] = X86_64_SSE_CLASS
;
1758 /* Compute alignment needed. We align all types to natural boundaries with
1759 exception of XFmode that is aligned to 64bits. */
1760 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
1762 int mode_alignment
= GET_MODE_BITSIZE (mode
);
1765 mode_alignment
= 128;
1766 else if (mode
== XCmode
)
1767 mode_alignment
= 256;
1768 /* Misaligned fields are always returned in memory. */
1769 if (bit_offset
% mode_alignment
)
1773 /* Classification of atomic types. */
1783 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
1784 classes
[0] = X86_64_INTEGERSI_CLASS
;
1786 classes
[0] = X86_64_INTEGER_CLASS
;
1790 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
1793 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
1794 classes
[2] = classes
[3] = X86_64_INTEGER_CLASS
;
1797 if (!(bit_offset
% 64))
1798 classes
[0] = X86_64_SSESF_CLASS
;
1800 classes
[0] = X86_64_SSE_CLASS
;
1803 classes
[0] = X86_64_SSEDF_CLASS
;
1806 classes
[0] = X86_64_X87_CLASS
;
1807 classes
[1] = X86_64_X87UP_CLASS
;
1810 classes
[0] = X86_64_X87_CLASS
;
1811 classes
[1] = X86_64_X87UP_CLASS
;
1812 classes
[2] = X86_64_X87_CLASS
;
1813 classes
[3] = X86_64_X87UP_CLASS
;
1816 classes
[0] = X86_64_SSEDF_CLASS
;
1817 classes
[1] = X86_64_SSEDF_CLASS
;
1820 classes
[0] = X86_64_SSE_CLASS
;
1829 /* Examine the argument and return set number of register required in each
1830 class. Return 0 iff parameter should be passed in memory. */
1832 examine_argument (mode
, type
, in_return
, int_nregs
, sse_nregs
)
1833 enum machine_mode mode
;
1835 int *int_nregs
, *sse_nregs
;
1838 enum x86_64_reg_class
class[MAX_CLASSES
];
1839 int n
= classify_argument (mode
, type
, class, 0);
1845 for (n
--; n
>= 0; n
--)
1848 case X86_64_INTEGER_CLASS
:
1849 case X86_64_INTEGERSI_CLASS
:
1852 case X86_64_SSE_CLASS
:
1853 case X86_64_SSESF_CLASS
:
1854 case X86_64_SSEDF_CLASS
:
1857 case X86_64_NO_CLASS
:
1858 case X86_64_SSEUP_CLASS
:
1860 case X86_64_X87_CLASS
:
1861 case X86_64_X87UP_CLASS
:
1865 case X86_64_MEMORY_CLASS
:
1870 /* Construct container for the argument used by GCC interface. See
1871 FUNCTION_ARG for the detailed description. */
1873 construct_container (mode
, type
, in_return
, nintregs
, nsseregs
, intreg
, sse_regno
)
1874 enum machine_mode mode
;
1877 int nintregs
, nsseregs
;
1881 enum machine_mode tmpmode
;
1883 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1884 enum x86_64_reg_class
class[MAX_CLASSES
];
1888 int needed_sseregs
, needed_intregs
;
1889 rtx exp
[MAX_CLASSES
];
1892 n
= classify_argument (mode
, type
, class, 0);
1893 if (TARGET_DEBUG_ARG
)
1896 fprintf (stderr
, "Memory class\n");
1899 fprintf (stderr
, "Classes:");
1900 for (i
= 0; i
< n
; i
++)
1902 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
1904 fprintf (stderr
, "\n");
1909 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
, &needed_sseregs
))
1911 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
1914 /* First construct simple cases. Avoid SCmode, since we want to use
1915 single register to pass this type. */
1916 if (n
== 1 && mode
!= SCmode
)
1919 case X86_64_INTEGER_CLASS
:
1920 case X86_64_INTEGERSI_CLASS
:
1921 return gen_rtx_REG (mode
, intreg
[0]);
1922 case X86_64_SSE_CLASS
:
1923 case X86_64_SSESF_CLASS
:
1924 case X86_64_SSEDF_CLASS
:
1925 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
1926 case X86_64_X87_CLASS
:
1927 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
1928 case X86_64_NO_CLASS
:
1929 /* Zero sized array, struct or class. */
1934 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
)
1935 return gen_rtx_REG (TImode
, SSE_REGNO (sse_regno
));
1937 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
1938 return gen_rtx_REG (TFmode
, FIRST_STACK_REG
);
1939 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
1940 && class[1] == X86_64_INTEGER_CLASS
1941 && (mode
== CDImode
|| mode
== TImode
)
1942 && intreg
[0] + 1 == intreg
[1])
1943 return gen_rtx_REG (mode
, intreg
[0]);
1945 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
1946 && class[2] == X86_64_X87_CLASS
&& class[3] == X86_64_X87UP_CLASS
)
1947 return gen_rtx_REG (TCmode
, FIRST_STACK_REG
);
1949 /* Otherwise figure out the entries of the PARALLEL. */
1950 for (i
= 0; i
< n
; i
++)
1954 case X86_64_NO_CLASS
:
1956 case X86_64_INTEGER_CLASS
:
1957 case X86_64_INTEGERSI_CLASS
:
1958 /* Merge TImodes on aligned occassions here too. */
1959 if (i
* 8 + 8 > bytes
)
1960 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
1961 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
1965 /* We've requested 24 bytes we don't have mode for. Use DImode. */
1966 if (tmpmode
== BLKmode
)
1968 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
1969 gen_rtx_REG (tmpmode
, *intreg
),
1973 case X86_64_SSESF_CLASS
:
1974 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
1975 gen_rtx_REG (SFmode
,
1976 SSE_REGNO (sse_regno
)),
1980 case X86_64_SSEDF_CLASS
:
1981 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
1982 gen_rtx_REG (DFmode
,
1983 SSE_REGNO (sse_regno
)),
1987 case X86_64_SSE_CLASS
:
1988 if (i
< n
&& class[i
+ 1] == X86_64_SSEUP_CLASS
)
1989 tmpmode
= TImode
, i
++;
1992 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
1993 gen_rtx_REG (tmpmode
,
1994 SSE_REGNO (sse_regno
)),
2002 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
2003 for (i
= 0; i
< nexps
; i
++)
2004 XVECEXP (ret
, 0, i
) = exp
[i
];
2008 /* Update the data in CUM to advance over an argument
2009 of mode MODE and data type TYPE.
2010 (TYPE is null for libcalls where that information may not be available.) */
2013 function_arg_advance (cum
, mode
, type
, named
)
2014 CUMULATIVE_ARGS
*cum
; /* current arg information */
2015 enum machine_mode mode
; /* current arg mode */
2016 tree type
; /* type of the argument or 0 if lib support */
2017 int named
; /* whether or not the argument was named */
2020 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2021 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2023 if (TARGET_DEBUG_ARG
)
2025 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2026 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
2029 int int_nregs
, sse_nregs
;
2030 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
2031 cum
->words
+= words
;
2032 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
2034 cum
->nregs
-= int_nregs
;
2035 cum
->sse_nregs
-= sse_nregs
;
2036 cum
->regno
+= int_nregs
;
2037 cum
->sse_regno
+= sse_nregs
;
2040 cum
->words
+= words
;
2044 if (TARGET_SSE
&& mode
== TImode
)
2046 cum
->sse_words
+= words
;
2047 cum
->sse_nregs
-= 1;
2048 cum
->sse_regno
+= 1;
2049 if (cum
->sse_nregs
<= 0)
2057 cum
->words
+= words
;
2058 cum
->nregs
-= words
;
2059 cum
->regno
+= words
;
2061 if (cum
->nregs
<= 0)
2071 /* Define where to put the arguments to a function.
2072 Value is zero to push the argument on the stack,
2073 or a hard register in which to store the argument.
2075 MODE is the argument's machine mode.
2076 TYPE is the data type of the argument (as a tree).
2077 This is null for libcalls where that information may
2079 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2080 the preceding args and about the function being called.
2081 NAMED is nonzero if this argument is a named parameter
2082 (otherwise it is an extra parameter matching an ellipsis). */
2085 function_arg (cum
, mode
, type
, named
)
2086 CUMULATIVE_ARGS
*cum
; /* current arg information */
2087 enum machine_mode mode
; /* current arg mode */
2088 tree type
; /* type of the argument or 0 if lib support */
2089 int named
; /* != 0 for normal args, == 0 for ... args */
2093 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2094 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2096 /* Handle an hidden AL argument containing number of registers for varargs
2097 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2099 if (mode
== VOIDmode
)
2102 return GEN_INT (cum
->maybe_vaarg
2103 ? (cum
->sse_nregs
< 0
2111 ret
= construct_container (mode
, type
, 0, cum
->nregs
, cum
->sse_nregs
,
2112 &x86_64_int_parameter_registers
[cum
->regno
],
2117 /* For now, pass fp/complex values on the stack. */
2126 if (words
<= cum
->nregs
)
2127 ret
= gen_rtx_REG (mode
, cum
->regno
);
2131 ret
= gen_rtx_REG (mode
, cum
->sse_regno
);
2135 if (TARGET_DEBUG_ARG
)
2138 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
2139 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
2142 fprintf (stderr
, ", reg=%%e%s", reg_names
[ REGNO (ret
) ]);
2144 fprintf (stderr
, ", stack");
2146 fprintf (stderr
, " )\n");
2152 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2156 ix86_function_arg_boundary (mode
, type
)
2157 enum machine_mode mode
;
2162 return PARM_BOUNDARY
;
2164 align
= TYPE_ALIGN (type
);
2166 align
= GET_MODE_ALIGNMENT (mode
);
2167 if (align
< PARM_BOUNDARY
)
2168 align
= PARM_BOUNDARY
;
2174 /* Return true if N is a possible register number of function value. */
2176 ix86_function_value_regno_p (regno
)
2181 return ((regno
) == 0
2182 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
2183 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
));
2185 return ((regno
) == 0 || (regno
) == FIRST_FLOAT_REG
2186 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
)
2187 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
));
2190 /* Define how to find the value returned by a function.
2191 VALTYPE is the data type of the value (as a tree).
2192 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2193 otherwise, FUNC is 0. */
2195 ix86_function_value (valtype
)
2200 rtx ret
= construct_container (TYPE_MODE (valtype
), valtype
, 1,
2201 REGPARM_MAX
, SSE_REGPARM_MAX
,
2202 x86_64_int_return_registers
, 0);
2203 /* For zero sized structures, construct_continer return NULL, but we need
2204 to keep rest of compiler happy by returning meaningfull value. */
2206 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
2210 return gen_rtx_REG (TYPE_MODE (valtype
), VALUE_REGNO (TYPE_MODE (valtype
)));
2213 /* Return false iff type is returned in memory. */
2215 ix86_return_in_memory (type
)
2218 int needed_intregs
, needed_sseregs
;
2221 return !examine_argument (TYPE_MODE (type
), type
, 1,
2222 &needed_intregs
, &needed_sseregs
);
2226 if (TYPE_MODE (type
) == BLKmode
2227 || (VECTOR_MODE_P (TYPE_MODE (type
))
2228 && int_size_in_bytes (type
) == 8)
2229 || (int_size_in_bytes (type
) > 12 && TYPE_MODE (type
) != TImode
2230 && TYPE_MODE (type
) != TFmode
2231 && !VECTOR_MODE_P (TYPE_MODE (type
))))
2237 /* Define how to find the value returned by a library function
2238 assuming the value has mode MODE. */
2240 ix86_libcall_value (mode
)
2241 enum machine_mode mode
;
2251 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
2254 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
2256 return gen_rtx_REG (mode
, 0);
2260 return gen_rtx_REG (mode
, VALUE_REGNO (mode
));
2263 /* Create the va_list data type. */
2266 ix86_build_va_list ()
2268 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
2270 /* For i386 we use plain pointer to argument area. */
2272 return build_pointer_type (char_type_node
);
2274 record
= make_lang_type (RECORD_TYPE
);
2275 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
2277 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
2278 unsigned_type_node
);
2279 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
2280 unsigned_type_node
);
2281 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
2283 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
2286 DECL_FIELD_CONTEXT (f_gpr
) = record
;
2287 DECL_FIELD_CONTEXT (f_fpr
) = record
;
2288 DECL_FIELD_CONTEXT (f_ovf
) = record
;
2289 DECL_FIELD_CONTEXT (f_sav
) = record
;
2291 TREE_CHAIN (record
) = type_decl
;
2292 TYPE_NAME (record
) = type_decl
;
2293 TYPE_FIELDS (record
) = f_gpr
;
2294 TREE_CHAIN (f_gpr
) = f_fpr
;
2295 TREE_CHAIN (f_fpr
) = f_ovf
;
2296 TREE_CHAIN (f_ovf
) = f_sav
;
2298 layout_type (record
);
2300 /* The correct type is an array type of one element. */
2301 return build_array_type (record
, build_index_type (size_zero_node
));
2304 /* Perform any needed actions needed for a function that is receiving a
2305 variable number of arguments.
2309 MODE and TYPE are the mode and type of the current parameter.
2311 PRETEND_SIZE is a variable that should be set to the amount of stack
2312 that must be pushed by the prolog to pretend that our caller pushed
2315 Normally, this macro will push all remaining incoming registers on the
2316 stack and set PRETEND_SIZE to the length of the registers pushed. */
2319 ix86_setup_incoming_varargs (cum
, mode
, type
, pretend_size
, no_rtl
)
2320 CUMULATIVE_ARGS
*cum
;
2321 enum machine_mode mode
;
2323 int *pretend_size ATTRIBUTE_UNUSED
;
2327 CUMULATIVE_ARGS next_cum
;
2328 rtx save_area
= NULL_RTX
, mem
;
2341 /* Indicate to allocate space on the stack for varargs save area. */
2342 ix86_save_varrargs_registers
= 1;
2344 fntype
= TREE_TYPE (current_function_decl
);
2345 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
2346 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
2347 != void_type_node
));
2349 /* For varargs, we do not want to skip the dummy va_dcl argument.
2350 For stdargs, we do want to skip the last named argument. */
2353 function_arg_advance (&next_cum
, mode
, type
, 1);
2356 save_area
= frame_pointer_rtx
;
2358 set
= get_varargs_alias_set ();
2360 for (i
= next_cum
.regno
; i
< ix86_regparm
; i
++)
2362 mem
= gen_rtx_MEM (Pmode
,
2363 plus_constant (save_area
, i
* UNITS_PER_WORD
));
2364 set_mem_alias_set (mem
, set
);
2365 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
2366 x86_64_int_parameter_registers
[i
]));
2369 if (next_cum
.sse_nregs
)
2371 /* Now emit code to save SSE registers. The AX parameter contains number
2372 of SSE parameter regsiters used to call this function. We use
2373 sse_prologue_save insn template that produces computed jump across
2374 SSE saves. We need some preparation work to get this working. */
2376 label
= gen_label_rtx ();
2377 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
2379 /* Compute address to jump to :
2380 label - 5*eax + nnamed_sse_arguments*5 */
2381 tmp_reg
= gen_reg_rtx (Pmode
);
2382 nsse_reg
= gen_reg_rtx (Pmode
);
2383 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
2384 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
2385 gen_rtx_MULT (Pmode
, nsse_reg
,
2387 if (next_cum
.sse_regno
)
2390 gen_rtx_CONST (DImode
,
2391 gen_rtx_PLUS (DImode
,
2393 GEN_INT (next_cum
.sse_regno
* 4))));
2395 emit_move_insn (nsse_reg
, label_ref
);
2396 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
2398 /* Compute address of memory block we save into. We always use pointer
2399 pointing 127 bytes after first byte to store - this is needed to keep
2400 instruction size limited by 4 bytes. */
2401 tmp_reg
= gen_reg_rtx (Pmode
);
2402 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
2403 plus_constant (save_area
,
2404 8 * REGPARM_MAX
+ 127)));
2405 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
2406 set_mem_alias_set (mem
, set
);
2407 set_mem_align (mem
, BITS_PER_WORD
);
2409 /* And finally do the dirty job! */
2410 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
2411 GEN_INT (next_cum
.sse_regno
), label
));
2416 /* Implement va_start. */
2419 ix86_va_start (stdarg_p
, valist
, nextarg
)
2424 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
2425 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
2426 tree gpr
, fpr
, ovf
, sav
, t
;
2428 /* Only 64bit target needs something special. */
2431 std_expand_builtin_va_start (stdarg_p
, valist
, nextarg
);
2435 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
2436 f_fpr
= TREE_CHAIN (f_gpr
);
2437 f_ovf
= TREE_CHAIN (f_fpr
);
2438 f_sav
= TREE_CHAIN (f_ovf
);
2440 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
2441 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
);
2442 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
);
2443 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
);
2444 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
);
2446 /* Count number of gp and fp argument registers used. */
2447 words
= current_function_args_info
.words
;
2448 n_gpr
= current_function_args_info
.regno
;
2449 n_fpr
= current_function_args_info
.sse_regno
;
2451 if (TARGET_DEBUG_ARG
)
2452 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2453 (int) words
, (int) n_gpr
, (int) n_fpr
);
2455 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
,
2456 build_int_2 (n_gpr
* 8, 0));
2457 TREE_SIDE_EFFECTS (t
) = 1;
2458 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2460 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
,
2461 build_int_2 (n_fpr
* 16 + 8*REGPARM_MAX
, 0));
2462 TREE_SIDE_EFFECTS (t
) = 1;
2463 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2465 /* Find the overflow area. */
2466 t
= make_tree (TREE_TYPE (ovf
), virtual_incoming_args_rtx
);
2468 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), t
,
2469 build_int_2 (words
* UNITS_PER_WORD
, 0));
2470 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
2471 TREE_SIDE_EFFECTS (t
) = 1;
2472 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2474 /* Find the register save area.
2475 Prologue of the function save it right above stack frame. */
2476 t
= make_tree (TREE_TYPE (sav
), frame_pointer_rtx
);
2477 t
= build (MODIFY_EXPR
, TREE_TYPE (sav
), sav
, t
);
2478 TREE_SIDE_EFFECTS (t
) = 1;
2479 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2482 /* Implement va_arg. */
2484 ix86_va_arg (valist
, type
)
2487 static int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
2488 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
2489 tree gpr
, fpr
, ovf
, sav
, t
;
2491 rtx lab_false
, lab_over
= NULL_RTX
;
2495 /* Only 64bit target needs something special. */
2498 return std_expand_builtin_va_arg (valist
, type
);
2501 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
2502 f_fpr
= TREE_CHAIN (f_gpr
);
2503 f_ovf
= TREE_CHAIN (f_fpr
);
2504 f_sav
= TREE_CHAIN (f_ovf
);
2506 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
2507 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
);
2508 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
);
2509 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
);
2510 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
);
2512 size
= int_size_in_bytes (type
);
2513 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2515 container
= construct_container (TYPE_MODE (type
), type
, 0,
2516 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
2518 * Pull the value out of the saved registers ...
2521 addr_rtx
= gen_reg_rtx (Pmode
);
2525 rtx int_addr_rtx
, sse_addr_rtx
;
2526 int needed_intregs
, needed_sseregs
;
2529 lab_over
= gen_label_rtx ();
2530 lab_false
= gen_label_rtx ();
2532 examine_argument (TYPE_MODE (type
), type
, 0,
2533 &needed_intregs
, &needed_sseregs
);
2536 need_temp
= ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
2537 || TYPE_ALIGN (type
) > 128);
2539 /* In case we are passing structure, verify that it is consetuctive block
2540 on the register save area. If not we need to do moves. */
2541 if (!need_temp
&& !REG_P (container
))
2543 /* Verify that all registers are strictly consetuctive */
2544 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
2548 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
2550 rtx slot
= XVECEXP (container
, 0, i
);
2551 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
2552 || INTVAL (XEXP (slot
, 1)) != i
* 16)
2560 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
2562 rtx slot
= XVECEXP (container
, 0, i
);
2563 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
2564 || INTVAL (XEXP (slot
, 1)) != i
* 8)
2571 int_addr_rtx
= addr_rtx
;
2572 sse_addr_rtx
= addr_rtx
;
2576 int_addr_rtx
= gen_reg_rtx (Pmode
);
2577 sse_addr_rtx
= gen_reg_rtx (Pmode
);
2579 /* First ensure that we fit completely in registers. */
2582 emit_cmp_and_jump_insns (expand_expr
2583 (gpr
, NULL_RTX
, SImode
, EXPAND_NORMAL
),
2584 GEN_INT ((REGPARM_MAX
- needed_intregs
+
2585 1) * 8), GE
, const1_rtx
, SImode
,
2590 emit_cmp_and_jump_insns (expand_expr
2591 (fpr
, NULL_RTX
, SImode
, EXPAND_NORMAL
),
2592 GEN_INT ((SSE_REGPARM_MAX
-
2593 needed_sseregs
+ 1) * 16 +
2594 REGPARM_MAX
* 8), GE
, const1_rtx
,
2595 SImode
, 1, lab_false
);
2598 /* Compute index to start of area used for integer regs. */
2601 t
= build (PLUS_EXPR
, ptr_type_node
, sav
, gpr
);
2602 r
= expand_expr (t
, int_addr_rtx
, Pmode
, EXPAND_NORMAL
);
2603 if (r
!= int_addr_rtx
)
2604 emit_move_insn (int_addr_rtx
, r
);
2608 t
= build (PLUS_EXPR
, ptr_type_node
, sav
, fpr
);
2609 r
= expand_expr (t
, sse_addr_rtx
, Pmode
, EXPAND_NORMAL
);
2610 if (r
!= sse_addr_rtx
)
2611 emit_move_insn (sse_addr_rtx
, r
);
2618 /* Never use the memory itself, as it has the alias set. */
2619 addr_rtx
= XEXP (assign_temp (type
, 0, 1, 0), 0);
2620 mem
= gen_rtx_MEM (BLKmode
, addr_rtx
);
2621 set_mem_alias_set (mem
, get_varargs_alias_set ());
2622 set_mem_align (mem
, BITS_PER_UNIT
);
2624 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
2626 rtx slot
= XVECEXP (container
, 0, i
);
2627 rtx reg
= XEXP (slot
, 0);
2628 enum machine_mode mode
= GET_MODE (reg
);
2634 if (SSE_REGNO_P (REGNO (reg
)))
2636 src_addr
= sse_addr_rtx
;
2637 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
2641 src_addr
= int_addr_rtx
;
2642 src_offset
= REGNO (reg
) * 8;
2644 src_mem
= gen_rtx_MEM (mode
, src_addr
);
2645 set_mem_alias_set (src_mem
, get_varargs_alias_set ());
2646 src_mem
= adjust_address (src_mem
, mode
, src_offset
);
2647 dest_mem
= adjust_address (mem
, mode
, INTVAL (XEXP (slot
, 1)));
2648 emit_move_insn (dest_mem
, src_mem
);
2655 build (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
2656 build_int_2 (needed_intregs
* 8, 0));
2657 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
, t
);
2658 TREE_SIDE_EFFECTS (t
) = 1;
2659 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2664 build (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
2665 build_int_2 (needed_sseregs
* 16, 0));
2666 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
, t
);
2667 TREE_SIDE_EFFECTS (t
) = 1;
2668 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2671 emit_jump_insn (gen_jump (lab_over
));
2673 emit_label (lab_false
);
2676 /* ... otherwise out of the overflow area. */
2678 /* Care for on-stack alignment if needed. */
2679 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64)
2683 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
2684 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
, build_int_2 (align
- 1, 0));
2685 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
, build_int_2 (-align
, -1));
2689 r
= expand_expr (t
, addr_rtx
, Pmode
, EXPAND_NORMAL
);
2691 emit_move_insn (addr_rtx
, r
);
2694 build (PLUS_EXPR
, TREE_TYPE (t
), t
,
2695 build_int_2 (rsize
* UNITS_PER_WORD
, 0));
2696 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
2697 TREE_SIDE_EFFECTS (t
) = 1;
2698 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2701 emit_label (lab_over
);
2706 /* Return nonzero if OP is general operand representable on x86_64. */
2709 x86_64_general_operand (op
, mode
)
2711 enum machine_mode mode
;
2714 return general_operand (op
, mode
);
2715 if (nonimmediate_operand (op
, mode
))
2717 return x86_64_sign_extended_value (op
);
2720 /* Return nonzero if OP is general operand representable on x86_64
2721 as either sign extended or zero extended constant. */
2724 x86_64_szext_general_operand (op
, mode
)
2726 enum machine_mode mode
;
2729 return general_operand (op
, mode
);
2730 if (nonimmediate_operand (op
, mode
))
2732 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
2735 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2738 x86_64_nonmemory_operand (op
, mode
)
2740 enum machine_mode mode
;
2743 return nonmemory_operand (op
, mode
);
2744 if (register_operand (op
, mode
))
2746 return x86_64_sign_extended_value (op
);
2749 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2752 x86_64_movabs_operand (op
, mode
)
2754 enum machine_mode mode
;
2756 if (!TARGET_64BIT
|| !flag_pic
)
2757 return nonmemory_operand (op
, mode
);
2758 if (register_operand (op
, mode
) || x86_64_sign_extended_value (op
))
2760 if (CONSTANT_P (op
) && !symbolic_reference_mentioned_p (op
))
2765 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2768 x86_64_szext_nonmemory_operand (op
, mode
)
2770 enum machine_mode mode
;
2773 return nonmemory_operand (op
, mode
);
2774 if (register_operand (op
, mode
))
2776 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
2779 /* Return nonzero if OP is immediate operand representable on x86_64. */
2782 x86_64_immediate_operand (op
, mode
)
2784 enum machine_mode mode
;
2787 return immediate_operand (op
, mode
);
2788 return x86_64_sign_extended_value (op
);
2791 /* Return nonzero if OP is immediate operand representable on x86_64. */
2794 x86_64_zext_immediate_operand (op
, mode
)
2796 enum machine_mode mode ATTRIBUTE_UNUSED
;
2798 return x86_64_zero_extended_value (op
);
2801 /* Return nonzero if OP is (const_int 1), else return zero. */
2804 const_int_1_operand (op
, mode
)
2806 enum machine_mode mode ATTRIBUTE_UNUSED
;
2808 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) == 1);
2811 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2812 reference and a constant. */
2815 symbolic_operand (op
, mode
)
2817 enum machine_mode mode ATTRIBUTE_UNUSED
;
2819 switch (GET_CODE (op
))
2827 if (GET_CODE (op
) == SYMBOL_REF
2828 || GET_CODE (op
) == LABEL_REF
2829 || (GET_CODE (op
) == UNSPEC
2830 && (XINT (op
, 1) == 6
2831 || XINT (op
, 1) == 7
2832 || XINT (op
, 1) == 15)))
2834 if (GET_CODE (op
) != PLUS
2835 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
2839 if (GET_CODE (op
) == SYMBOL_REF
2840 || GET_CODE (op
) == LABEL_REF
)
2842 /* Only @GOTOFF gets offsets. */
2843 if (GET_CODE (op
) != UNSPEC
2844 || XINT (op
, 1) != 7)
2847 op
= XVECEXP (op
, 0, 0);
2848 if (GET_CODE (op
) == SYMBOL_REF
2849 || GET_CODE (op
) == LABEL_REF
)
2858 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2861 pic_symbolic_operand (op
, mode
)
2863 enum machine_mode mode ATTRIBUTE_UNUSED
;
2865 if (GET_CODE (op
) != CONST
)
2870 if (GET_CODE (XEXP (op
, 0)) == UNSPEC
)
2875 if (GET_CODE (op
) == UNSPEC
)
2877 if (GET_CODE (op
) != PLUS
2878 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
2881 if (GET_CODE (op
) == UNSPEC
)
2887 /* Return true if OP is a symbolic operand that resolves locally. */
2890 local_symbolic_operand (op
, mode
)
2892 enum machine_mode mode ATTRIBUTE_UNUSED
;
2894 if (GET_CODE (op
) == LABEL_REF
)
2897 if (GET_CODE (op
) == CONST
2898 && GET_CODE (XEXP (op
, 0)) == PLUS
2899 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
2900 op
= XEXP (XEXP (op
, 0), 0);
2902 if (GET_CODE (op
) != SYMBOL_REF
)
2905 /* These we've been told are local by varasm and encode_section_info
2907 if (CONSTANT_POOL_ADDRESS_P (op
) || SYMBOL_REF_FLAG (op
))
2910 /* There is, however, a not insubstantial body of code in the rest of
2911 the compiler that assumes it can just stick the results of
2912 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2913 /* ??? This is a hack. Should update the body of the compiler to
2914 always create a DECL an invoke ENCODE_SECTION_INFO. */
2915 if (strncmp (XSTR (op
, 0), internal_label_prefix
,
2916 internal_label_prefix_len
) == 0)
2922 /* Test for a valid operand for a call instruction. Don't allow the
2923 arg pointer register or virtual regs since they may decay into
2924 reg + const, which the patterns can't handle. */
2927 call_insn_operand (op
, mode
)
2929 enum machine_mode mode ATTRIBUTE_UNUSED
;
2931 /* Disallow indirect through a virtual register. This leads to
2932 compiler aborts when trying to eliminate them. */
2933 if (GET_CODE (op
) == REG
2934 && (op
== arg_pointer_rtx
2935 || op
== frame_pointer_rtx
2936 || (REGNO (op
) >= FIRST_PSEUDO_REGISTER
2937 && REGNO (op
) <= LAST_VIRTUAL_REGISTER
)))
2940 /* Disallow `call 1234'. Due to varying assembler lameness this
2941 gets either rejected or translated to `call .+1234'. */
2942 if (GET_CODE (op
) == CONST_INT
)
2945 /* Explicitly allow SYMBOL_REF even if pic. */
2946 if (GET_CODE (op
) == SYMBOL_REF
)
2949 /* Half-pic doesn't allow anything but registers and constants.
2950 We've just taken care of the later. */
2952 return register_operand (op
, Pmode
);
2954 /* Otherwise we can allow any general_operand in the address. */
2955 return general_operand (op
, Pmode
);
2959 constant_call_address_operand (op
, mode
)
2961 enum machine_mode mode ATTRIBUTE_UNUSED
;
2963 if (GET_CODE (op
) == CONST
2964 && GET_CODE (XEXP (op
, 0)) == PLUS
2965 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
2966 op
= XEXP (XEXP (op
, 0), 0);
2967 return GET_CODE (op
) == SYMBOL_REF
;
2970 /* Match exactly zero and one. */
2973 const0_operand (op
, mode
)
2975 enum machine_mode mode
;
2977 return op
== CONST0_RTX (mode
);
2981 const1_operand (op
, mode
)
2983 enum machine_mode mode ATTRIBUTE_UNUSED
;
2985 return op
== const1_rtx
;
2988 /* Match 2, 4, or 8. Used for leal multiplicands. */
2991 const248_operand (op
, mode
)
2993 enum machine_mode mode ATTRIBUTE_UNUSED
;
2995 return (GET_CODE (op
) == CONST_INT
2996 && (INTVAL (op
) == 2 || INTVAL (op
) == 4 || INTVAL (op
) == 8));
2999 /* True if this is a constant appropriate for an increment or decremenmt. */
3002 incdec_operand (op
, mode
)
3004 enum machine_mode mode ATTRIBUTE_UNUSED
;
3006 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3007 registers, since carry flag is not set. */
3008 if (TARGET_PENTIUM4
&& !optimize_size
)
3010 return op
== const1_rtx
|| op
== constm1_rtx
;
3013 /* Return nonzero if OP is acceptable as operand of DImode shift
3017 shiftdi_operand (op
, mode
)
3019 enum machine_mode mode ATTRIBUTE_UNUSED
;
3022 return nonimmediate_operand (op
, mode
);
3024 return register_operand (op
, mode
);
3027 /* Return false if this is the stack pointer, or any other fake
3028 register eliminable to the stack pointer. Otherwise, this is
3031 This is used to prevent esp from being used as an index reg.
3032 Which would only happen in pathological cases. */
3035 reg_no_sp_operand (op
, mode
)
3037 enum machine_mode mode
;
3040 if (GET_CODE (t
) == SUBREG
)
3042 if (t
== stack_pointer_rtx
|| t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
)
3045 return register_operand (op
, mode
);
3049 mmx_reg_operand (op
, mode
)
3051 enum machine_mode mode ATTRIBUTE_UNUSED
;
3053 return MMX_REG_P (op
);
3056 /* Return false if this is any eliminable register. Otherwise
3060 general_no_elim_operand (op
, mode
)
3062 enum machine_mode mode
;
3065 if (GET_CODE (t
) == SUBREG
)
3067 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
3068 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
3069 || t
== virtual_stack_dynamic_rtx
)
3072 && REGNO (t
) >= FIRST_VIRTUAL_REGISTER
3073 && REGNO (t
) <= LAST_VIRTUAL_REGISTER
)
3076 return general_operand (op
, mode
);
3079 /* Return false if this is any eliminable register. Otherwise
3080 register_operand or const_int. */
3083 nonmemory_no_elim_operand (op
, mode
)
3085 enum machine_mode mode
;
3088 if (GET_CODE (t
) == SUBREG
)
3090 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
3091 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
3092 || t
== virtual_stack_dynamic_rtx
)
3095 return GET_CODE (op
) == CONST_INT
|| register_operand (op
, mode
);
3098 /* Return true if op is a Q_REGS class register. */
3101 q_regs_operand (op
, mode
)
3103 enum machine_mode mode
;
3105 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3107 if (GET_CODE (op
) == SUBREG
)
3108 op
= SUBREG_REG (op
);
3109 return QI_REG_P (op
);
3112 /* Return true if op is a NON_Q_REGS class register. */
3115 non_q_regs_operand (op
, mode
)
3117 enum machine_mode mode
;
3119 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3121 if (GET_CODE (op
) == SUBREG
)
3122 op
= SUBREG_REG (op
);
3123 return NON_QI_REG_P (op
);
3126 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3129 sse_comparison_operator (op
, mode
)
3131 enum machine_mode mode ATTRIBUTE_UNUSED
;
3133 enum rtx_code code
= GET_CODE (op
);
3136 /* Operations supported directly. */
3146 /* These are equivalent to ones above in non-IEEE comparisons. */
3153 return !TARGET_IEEE_FP
;
3158 /* Return 1 if OP is a valid comparison operator in valid mode. */
3160 ix86_comparison_operator (op
, mode
)
3162 enum machine_mode mode
;
3164 enum machine_mode inmode
;
3165 enum rtx_code code
= GET_CODE (op
);
3166 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3168 if (GET_RTX_CLASS (code
) != '<')
3170 inmode
= GET_MODE (XEXP (op
, 0));
3172 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3174 enum rtx_code second_code
, bypass_code
;
3175 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
3176 return (bypass_code
== NIL
&& second_code
== NIL
);
3183 if (inmode
== CCmode
|| inmode
== CCGCmode
3184 || inmode
== CCGOCmode
|| inmode
== CCNOmode
)
3187 case LTU
: case GTU
: case LEU
: case ORDERED
: case UNORDERED
: case GEU
:
3188 if (inmode
== CCmode
)
3192 if (inmode
== CCmode
|| inmode
== CCGCmode
|| inmode
== CCNOmode
)
3200 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3203 fcmov_comparison_operator (op
, mode
)
3205 enum machine_mode mode
;
3207 enum machine_mode inmode
;
3208 enum rtx_code code
= GET_CODE (op
);
3209 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3211 if (GET_RTX_CLASS (code
) != '<')
3213 inmode
= GET_MODE (XEXP (op
, 0));
3214 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3216 enum rtx_code second_code
, bypass_code
;
3217 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
3218 if (bypass_code
!= NIL
|| second_code
!= NIL
)
3220 code
= ix86_fp_compare_code_to_integer (code
);
3222 /* i387 supports just limited amount of conditional codes. */
3225 case LTU
: case GTU
: case LEU
: case GEU
:
3226 if (inmode
== CCmode
|| inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3229 case ORDERED
: case UNORDERED
:
3237 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3240 promotable_binary_operator (op
, mode
)
3242 enum machine_mode mode ATTRIBUTE_UNUSED
;
3244 switch (GET_CODE (op
))
3247 /* Modern CPUs have same latency for HImode and SImode multiply,
3248 but 386 and 486 do HImode multiply faster. */
3249 return ix86_cpu
> PROCESSOR_I486
;
3261 /* Nearly general operand, but accept any const_double, since we wish
3262 to be able to drop them into memory rather than have them get pulled
3266 cmp_fp_expander_operand (op
, mode
)
3268 enum machine_mode mode
;
3270 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
3272 if (GET_CODE (op
) == CONST_DOUBLE
)
3274 return general_operand (op
, mode
);
3277 /* Match an SI or HImode register for a zero_extract. */
3280 ext_register_operand (op
, mode
)
3282 enum machine_mode mode ATTRIBUTE_UNUSED
;
3285 if ((!TARGET_64BIT
|| GET_MODE (op
) != DImode
)
3286 && GET_MODE (op
) != SImode
&& GET_MODE (op
) != HImode
)
3289 if (!register_operand (op
, VOIDmode
))
3292 /* Be curefull to accept only registers having upper parts. */
3293 regno
= REG_P (op
) ? REGNO (op
) : REGNO (SUBREG_REG (op
));
3294 return (regno
> LAST_VIRTUAL_REGISTER
|| regno
< 4);
3297 /* Return 1 if this is a valid binary floating-point operation.
3298 OP is the expression matched, and MODE is its mode. */
3301 binary_fp_operator (op
, mode
)
3303 enum machine_mode mode
;
3305 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
3308 switch (GET_CODE (op
))
3314 return GET_MODE_CLASS (GET_MODE (op
)) == MODE_FLOAT
;
3322 mult_operator (op
, mode
)
3324 enum machine_mode mode ATTRIBUTE_UNUSED
;
3326 return GET_CODE (op
) == MULT
;
3330 div_operator (op
, mode
)
3332 enum machine_mode mode ATTRIBUTE_UNUSED
;
3334 return GET_CODE (op
) == DIV
;
3338 arith_or_logical_operator (op
, mode
)
3340 enum machine_mode mode
;
3342 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
3343 && (GET_RTX_CLASS (GET_CODE (op
)) == 'c'
3344 || GET_RTX_CLASS (GET_CODE (op
)) == '2'));
3347 /* Returns 1 if OP is memory operand with a displacement. */
3350 memory_displacement_operand (op
, mode
)
3352 enum machine_mode mode
;
3354 struct ix86_address parts
;
3356 if (! memory_operand (op
, mode
))
3359 if (! ix86_decompose_address (XEXP (op
, 0), &parts
))
3362 return parts
.disp
!= NULL_RTX
;
3365 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3366 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3368 ??? It seems likely that this will only work because cmpsi is an
3369 expander, and no actual insns use this. */
3372 cmpsi_operand (op
, mode
)
3374 enum machine_mode mode
;
3376 if (nonimmediate_operand (op
, mode
))
3379 if (GET_CODE (op
) == AND
3380 && GET_MODE (op
) == SImode
3381 && GET_CODE (XEXP (op
, 0)) == ZERO_EXTRACT
3382 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
3383 && GET_CODE (XEXP (XEXP (op
, 0), 2)) == CONST_INT
3384 && INTVAL (XEXP (XEXP (op
, 0), 1)) == 8
3385 && INTVAL (XEXP (XEXP (op
, 0), 2)) == 8
3386 && GET_CODE (XEXP (op
, 1)) == CONST_INT
)
3392 /* Returns 1 if OP is memory operand that can not be represented by the
3396 long_memory_operand (op
, mode
)
3398 enum machine_mode mode
;
3400 if (! memory_operand (op
, mode
))
3403 return memory_address_length (op
) != 0;
3406 /* Return nonzero if the rtx is known aligned. */
3409 aligned_operand (op
, mode
)
3411 enum machine_mode mode
;
3413 struct ix86_address parts
;
3415 if (!general_operand (op
, mode
))
3418 /* Registers and immediate operands are always "aligned". */
3419 if (GET_CODE (op
) != MEM
)
3422 /* Don't even try to do any aligned optimizations with volatiles. */
3423 if (MEM_VOLATILE_P (op
))
3428 /* Pushes and pops are only valid on the stack pointer. */
3429 if (GET_CODE (op
) == PRE_DEC
3430 || GET_CODE (op
) == POST_INC
)
3433 /* Decode the address. */
3434 if (! ix86_decompose_address (op
, &parts
))
3437 /* Look for some component that isn't known to be aligned. */
3441 && REGNO_POINTER_ALIGN (REGNO (parts
.index
)) < 32)
3446 if (REGNO_POINTER_ALIGN (REGNO (parts
.base
)) < 32)
3451 if (GET_CODE (parts
.disp
) != CONST_INT
3452 || (INTVAL (parts
.disp
) & 3) != 0)
3456 /* Didn't find one -- this must be an aligned address. */
3460 /* Return true if the constant is something that can be loaded with
3461 a special instruction. Only handle 0.0 and 1.0; others are less
3465 standard_80387_constant_p (x
)
3468 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
3470 /* Note that on the 80387, other constants, such as pi, that we should support
3471 too. On some machines, these are much slower to load as standard constant,
3472 than to load from doubles in memory. */
3473 if (x
== CONST0_RTX (GET_MODE (x
)))
3475 if (x
== CONST1_RTX (GET_MODE (x
)))
3480 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3483 standard_sse_constant_p (x
)
3486 if (GET_CODE (x
) != CONST_DOUBLE
)
3488 return (x
== CONST0_RTX (GET_MODE (x
)));
3491 /* Returns 1 if OP contains a symbol reference */
3494 symbolic_reference_mentioned_p (op
)
3497 register const char *fmt
;
3500 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
3503 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
3504 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
3510 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
3511 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
3515 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
3522 /* Return 1 if it is appropriate to emit `ret' instructions in the
3523 body of a function. Do this only if the epilogue is simple, needing a
3524 couple of insns. Prior to reloading, we can't tell how many registers
3525 must be saved, so return 0 then. Return 0 if there is no frame
3526 marker to de-allocate.
3528 If NON_SAVING_SETJMP is defined and true, then it is not possible
3529 for the epilogue to be simple, so return 0. This is a special case
3530 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3531 until final, but jump_optimize may need to know sooner if a
3535 ix86_can_use_return_insn_p ()
3537 struct ix86_frame frame
;
3539 #ifdef NON_SAVING_SETJMP
3540 if (NON_SAVING_SETJMP
&& current_function_calls_setjmp
)
3544 if (! reload_completed
|| frame_pointer_needed
)
3547 /* Don't allow more than 32 pop, since that's all we can do
3548 with one instruction. */
3549 if (current_function_pops_args
3550 && current_function_args_size
>= 32768)
3553 ix86_compute_frame_layout (&frame
);
3554 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
3557 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3559 x86_64_sign_extended_value (value
)
3562 switch (GET_CODE (value
))
3564 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3565 to be at least 32 and this all acceptable constants are
3566 represented as CONST_INT. */
3568 if (HOST_BITS_PER_WIDE_INT
== 32)
3572 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (value
), DImode
);
3573 return trunc_int_for_mode (val
, SImode
) == val
;
3577 /* For certain code models, the symbolic references are known to fit. */
3579 return ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_KERNEL
;
3581 /* For certain code models, the code is near as well. */
3583 return ix86_cmodel
!= CM_LARGE
&& ix86_cmodel
!= CM_SMALL_PIC
;
3585 /* We also may accept the offsetted memory references in certain special
3588 if (GET_CODE (XEXP (value
, 0)) == UNSPEC
3589 && XVECLEN (XEXP (value
, 0), 0) == 1
3590 && XINT (XEXP (value
, 0), 1) == 15)
3592 else if (GET_CODE (XEXP (value
, 0)) == PLUS
)
3594 rtx op1
= XEXP (XEXP (value
, 0), 0);
3595 rtx op2
= XEXP (XEXP (value
, 0), 1);
3596 HOST_WIDE_INT offset
;
3598 if (ix86_cmodel
== CM_LARGE
)
3600 if (GET_CODE (op2
) != CONST_INT
)
3602 offset
= trunc_int_for_mode (INTVAL (op2
), DImode
);
3603 switch (GET_CODE (op1
))
3606 /* For CM_SMALL assume that latest object is 1MB before
3607 end of 31bits boundary. We may also accept pretty
3608 large negative constants knowing that all objects are
3609 in the positive half of address space. */
3610 if (ix86_cmodel
== CM_SMALL
3611 && offset
< 1024*1024*1024
3612 && trunc_int_for_mode (offset
, SImode
) == offset
)
3614 /* For CM_KERNEL we know that all object resist in the
3615 negative half of 32bits address space. We may not
3616 accept negative offsets, since they may be just off
3617 and we may accept pretty large positive ones. */
3618 if (ix86_cmodel
== CM_KERNEL
3620 && trunc_int_for_mode (offset
, SImode
) == offset
)
3624 /* These conditions are similar to SYMBOL_REF ones, just the
3625 constraints for code models differ. */
3626 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
3627 && offset
< 1024*1024*1024
3628 && trunc_int_for_mode (offset
, SImode
) == offset
)
3630 if (ix86_cmodel
== CM_KERNEL
3632 && trunc_int_for_mode (offset
, SImode
) == offset
)
3645 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3647 x86_64_zero_extended_value (value
)
3650 switch (GET_CODE (value
))
3653 if (HOST_BITS_PER_WIDE_INT
== 32)
3654 return (GET_MODE (value
) == VOIDmode
3655 && !CONST_DOUBLE_HIGH (value
));
3659 if (HOST_BITS_PER_WIDE_INT
== 32)
3660 return INTVAL (value
) >= 0;
3662 return !(INTVAL (value
) & ~(HOST_WIDE_INT
) 0xffffffff);
3665 /* For certain code models, the symbolic references are known to fit. */
3667 return ix86_cmodel
== CM_SMALL
;
3669 /* For certain code models, the code is near as well. */
3671 return ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
;
3673 /* We also may accept the offsetted memory references in certain special
3676 if (GET_CODE (XEXP (value
, 0)) == PLUS
)
3678 rtx op1
= XEXP (XEXP (value
, 0), 0);
3679 rtx op2
= XEXP (XEXP (value
, 0), 1);
3681 if (ix86_cmodel
== CM_LARGE
)
3683 switch (GET_CODE (op1
))
3687 /* For small code model we may accept pretty large positive
3688 offsets, since one bit is available for free. Negative
3689 offsets are limited by the size of NULL pointer area
3690 specified by the ABI. */
3691 if (ix86_cmodel
== CM_SMALL
3692 && GET_CODE (op2
) == CONST_INT
3693 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
3694 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
3697 /* ??? For the kernel, we may accept adjustment of
3698 -0x10000000, since we know that it will just convert
3699 negative address space to positive, but perhaps this
3700 is not worthwhile. */
3703 /* These conditions are similar to SYMBOL_REF ones, just the
3704 constraints for code models differ. */
3705 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
3706 && GET_CODE (op2
) == CONST_INT
3707 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
3708 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
3722 /* Value should be nonzero if functions must have frame pointers.
3723 Zero means the frame pointer need not be set up (and parms may
3724 be accessed via the stack pointer) in functions that seem suitable. */
3727 ix86_frame_pointer_required ()
3729 /* If we accessed previous frames, then the generated code expects
3730 to be able to access the saved ebp value in our frame. */
3731 if (cfun
->machine
->accesses_prev_frame
)
3734 /* Several x86 os'es need a frame pointer for other reasons,
3735 usually pertaining to setjmp. */
3736 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
3739 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3740 the frame pointer by default. Turn it back on now if we've not
3741 got a leaf function. */
3742 if (TARGET_OMIT_LEAF_FRAME_POINTER
&& ! leaf_function_p ())
3748 /* Record that the current function accesses previous call frames. */
3751 ix86_setup_frame_addresses ()
3753 cfun
->machine
->accesses_prev_frame
= 1;
3756 static char pic_label_name
[32];
3758 /* This function generates code for -fpic that loads %ebx with
3759 the return address of the caller and then returns. */
3762 ix86_asm_file_end (file
)
3767 if (! TARGET_DEEP_BRANCH_PREDICTION
|| pic_label_name
[0] == 0)
3770 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
3771 to updating relocations to a section being discarded such that this
3772 doesn't work. Ought to detect this at configure time. */
3774 /* The trick here is to create a linkonce section containing the
3775 pic label thunk, but to refer to it with an internal label.
3776 Because the label is internal, we don't have inter-dso name
3777 binding issues on hosts that don't support ".hidden".
3779 In order to use these macros, however, we must create a fake
3781 if (targetm
.have_named_sections
)
3783 tree decl
= build_decl (FUNCTION_DECL
,
3784 get_identifier ("i686.get_pc_thunk"),
3786 DECL_ONE_ONLY (decl
) = 1;
3787 UNIQUE_SECTION (decl
, 0);
3788 named_section (decl
, NULL
);
3795 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
3796 internal (non-global) label that's being emitted, it didn't make
3797 sense to have .type information for local labels. This caused
3798 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
3799 me debug info for a label that you're declaring non-global?) this
3800 was changed to call ASM_OUTPUT_LABEL() instead. */
3802 ASM_OUTPUT_LABEL (file
, pic_label_name
);
3804 xops
[0] = pic_offset_table_rtx
;
3805 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
3806 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
3807 output_asm_insn ("ret", xops
);
3811 load_pic_register ()
3818 gotsym
= gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
3820 if (TARGET_DEEP_BRANCH_PREDICTION
)
3822 if (! pic_label_name
[0])
3823 ASM_GENERATE_INTERNAL_LABEL (pic_label_name
, "LPR", 0);
3824 pclab
= gen_rtx_MEM (QImode
, gen_rtx_SYMBOL_REF (Pmode
, pic_label_name
));
3828 pclab
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
3831 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx
, pclab
));
3833 if (! TARGET_DEEP_BRANCH_PREDICTION
)
3834 emit_insn (gen_popsi1 (pic_offset_table_rtx
));
3836 emit_insn (gen_prologue_set_got (pic_offset_table_rtx
, gotsym
, pclab
));
3839 /* Generate an "push" pattern for input ARG. */
3845 return gen_rtx_SET (VOIDmode
,
3847 gen_rtx_PRE_DEC (Pmode
,
3848 stack_pointer_rtx
)),
3852 /* Return 1 if we need to save REGNO. */
3854 ix86_save_reg (regno
, maybe_eh_return
)
3856 int maybe_eh_return
;
3860 && regno
== PIC_OFFSET_TABLE_REGNUM
3861 && (current_function_uses_pic_offset_table
3862 || current_function_uses_const_pool
3863 || current_function_calls_eh_return
))
3866 if (current_function_calls_eh_return
&& maybe_eh_return
)
3871 unsigned test
= EH_RETURN_DATA_REGNO (i
);
3872 if (test
== INVALID_REGNUM
)
3874 if (test
== (unsigned) regno
)
3879 return (regs_ever_live
[regno
]
3880 && !call_used_regs
[regno
]
3881 && !fixed_regs
[regno
]
3882 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
3885 /* Return number of registers to be saved on the stack. */
3893 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
3894 if (ix86_save_reg (regno
, true))
3899 /* Return the offset between two registers, one to be eliminated, and the other
3900 its replacement, at the start of a routine. */
3903 ix86_initial_elimination_offset (from
, to
)
3907 struct ix86_frame frame
;
3908 ix86_compute_frame_layout (&frame
);
3910 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3911 return frame
.hard_frame_pointer_offset
;
3912 else if (from
== FRAME_POINTER_REGNUM
3913 && to
== HARD_FRAME_POINTER_REGNUM
)
3914 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
3917 if (to
!= STACK_POINTER_REGNUM
)
3919 else if (from
== ARG_POINTER_REGNUM
)
3920 return frame
.stack_pointer_offset
;
3921 else if (from
!= FRAME_POINTER_REGNUM
)
3924 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
3928 /* Fill structure ix86_frame about frame of currently computed function. */
3931 ix86_compute_frame_layout (frame
)
3932 struct ix86_frame
*frame
;
3934 HOST_WIDE_INT total_size
;
3935 int stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
3937 int preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
3938 HOST_WIDE_INT size
= get_frame_size ();
3940 frame
->nregs
= ix86_nsaved_regs ();
3943 /* Skip return value and save base pointer. */
3944 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
3946 frame
->hard_frame_pointer_offset
= offset
;
3948 /* Do some sanity checking of stack_alignment_needed and
3949 preferred_alignment, since i386 port is the only using those features
3950 that may break easily. */
3952 if (size
&& !stack_alignment_needed
)
3954 if (preferred_alignment
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
3956 if (preferred_alignment
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
3958 if (stack_alignment_needed
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
3961 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
3962 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
3964 /* Register save area */
3965 offset
+= frame
->nregs
* UNITS_PER_WORD
;
3968 if (ix86_save_varrargs_registers
)
3970 offset
+= X86_64_VARARGS_SIZE
;
3971 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
3974 frame
->va_arg_size
= 0;
3976 /* Align start of frame for local function. */
3977 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
3978 & -stack_alignment_needed
) - offset
;
3980 offset
+= frame
->padding1
;
3982 /* Frame pointer points here. */
3983 frame
->frame_pointer_offset
= offset
;
3987 /* Add outgoing arguments area. */
3988 if (ACCUMULATE_OUTGOING_ARGS
)
3990 offset
+= current_function_outgoing_args_size
;
3991 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
3994 frame
->outgoing_arguments_size
= 0;
3996 /* Align stack boundary. */
3997 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
3998 & -preferred_alignment
) - offset
;
4000 offset
+= frame
->padding2
;
4002 /* We've reached end of stack frame. */
4003 frame
->stack_pointer_offset
= offset
;
4005 /* Size prologue needs to allocate. */
4006 frame
->to_allocate
=
4007 (size
+ frame
->padding1
+ frame
->padding2
4008 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
4010 if (TARGET_64BIT
&& TARGET_RED_ZONE
&& current_function_sp_is_unchanging
4011 && current_function_is_leaf
)
4013 frame
->red_zone_size
= frame
->to_allocate
;
4014 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
4015 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
4018 frame
->red_zone_size
= 0;
4019 frame
->to_allocate
-= frame
->red_zone_size
;
4020 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
4022 fprintf (stderr
, "nregs: %i\n", frame
->nregs
);
4023 fprintf (stderr
, "size: %i\n", size
);
4024 fprintf (stderr
, "alignment1: %i\n", stack_alignment_needed
);
4025 fprintf (stderr
, "padding1: %i\n", frame
->padding1
);
4026 fprintf (stderr
, "va_arg: %i\n", frame
->va_arg_size
);
4027 fprintf (stderr
, "padding2: %i\n", frame
->padding2
);
4028 fprintf (stderr
, "to_allocate: %i\n", frame
->to_allocate
);
4029 fprintf (stderr
, "red_zone_size: %i\n", frame
->red_zone_size
);
4030 fprintf (stderr
, "frame_pointer_offset: %i\n", frame
->frame_pointer_offset
);
4031 fprintf (stderr
, "hard_frame_pointer_offset: %i\n",
4032 frame
->hard_frame_pointer_offset
);
4033 fprintf (stderr
, "stack_pointer_offset: %i\n", frame
->stack_pointer_offset
);
4037 /* Emit code to save registers in the prologue. */
4040 ix86_emit_save_regs ()
4045 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
4046 if (ix86_save_reg (regno
, true))
4048 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
4049 RTX_FRAME_RELATED_P (insn
) = 1;
4053 /* Emit code to save registers using MOV insns. First register
4054 is restored from POINTER + OFFSET. */
4056 ix86_emit_save_regs_using_mov (pointer
, offset
)
4058 HOST_WIDE_INT offset
;
4063 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4064 if (ix86_save_reg (regno
, true))
4066 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
4068 gen_rtx_REG (Pmode
, regno
));
4069 RTX_FRAME_RELATED_P (insn
) = 1;
4070 offset
+= UNITS_PER_WORD
;
4074 /* Expand the prologue into a bunch of separate insns. */
4077 ix86_expand_prologue ()
4080 int pic_reg_used
= (flag_pic
&& (current_function_uses_pic_offset_table
4081 || current_function_uses_const_pool
)
4083 struct ix86_frame frame
;
4085 HOST_WIDE_INT allocate
;
4089 use_fast_prologue_epilogue
4090 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT
);
4091 if (TARGET_PROLOGUE_USING_MOVE
)
4092 use_mov
= use_fast_prologue_epilogue
;
4094 ix86_compute_frame_layout (&frame
);
4096 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4097 slower on all targets. Also sdb doesn't like it. */
4099 if (frame_pointer_needed
)
4101 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
4102 RTX_FRAME_RELATED_P (insn
) = 1;
4104 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
4105 RTX_FRAME_RELATED_P (insn
) = 1;
4108 allocate
= frame
.to_allocate
;
4109 /* In case we are dealing only with single register and empty frame,
4110 push is equivalent of the mov+add sequence. */
4111 if (allocate
== 0 && frame
.nregs
<= 1)
4115 ix86_emit_save_regs ();
4117 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
4121 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
4123 insn
= emit_insn (gen_pro_epilogue_adjust_stack
4124 (stack_pointer_rtx
, stack_pointer_rtx
,
4125 GEN_INT (-allocate
)));
4126 RTX_FRAME_RELATED_P (insn
) = 1;
4130 /* ??? Is this only valid for Win32? */
4137 arg0
= gen_rtx_REG (SImode
, 0);
4138 emit_move_insn (arg0
, GEN_INT (allocate
));
4140 sym
= gen_rtx_MEM (FUNCTION_MODE
,
4141 gen_rtx_SYMBOL_REF (Pmode
, "_alloca"));
4142 insn
= emit_call_insn (gen_call (sym
, const0_rtx
, constm1_rtx
));
4144 CALL_INSN_FUNCTION_USAGE (insn
)
4145 = gen_rtx_EXPR_LIST (VOIDmode
, gen_rtx_USE (VOIDmode
, arg0
),
4146 CALL_INSN_FUNCTION_USAGE (insn
));
4150 if (!frame_pointer_needed
|| !frame
.to_allocate
)
4151 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
4153 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
4154 -frame
.nregs
* UNITS_PER_WORD
);
4157 #ifdef SUBTARGET_PROLOGUE
4162 load_pic_register ();
4164 /* If we are profiling, make sure no instructions are scheduled before
4165 the call to mcount. However, if -fpic, the above call will have
4167 if (current_function_profile
&& ! pic_reg_used
)
4168 emit_insn (gen_blockage ());
4171 /* Emit code to restore saved registers using MOV insns. First register
4172 is restored from POINTER + OFFSET. */
4174 ix86_emit_restore_regs_using_mov (pointer
, offset
, maybe_eh_return
)
4177 int maybe_eh_return
;
4181 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4182 if (ix86_save_reg (regno
, maybe_eh_return
))
4184 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
4185 adjust_address (gen_rtx_MEM (Pmode
, pointer
),
4187 offset
+= UNITS_PER_WORD
;
4191 /* Restore function stack, frame, and registers. */
4194 ix86_expand_epilogue (style
)
4198 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
4199 struct ix86_frame frame
;
4200 HOST_WIDE_INT offset
;
4202 ix86_compute_frame_layout (&frame
);
4204 /* Calculate start of saved registers relative to ebp. Special care
4205 must be taken for the normal return case of a function using
4206 eh_return: the eax and edx registers are marked as saved, but not
4207 restored along this path. */
4208 offset
= frame
.nregs
;
4209 if (current_function_calls_eh_return
&& style
!= 2)
4211 offset
*= -UNITS_PER_WORD
;
4213 /* If we're only restoring one register and sp is not valid then
4214 using a move instruction to restore the register since it's
4215 less work than reloading sp and popping the register.
4217 The default code result in stack adjustment using add/lea instruction,
4218 while this code results in LEAVE instruction (or discrete equivalent),
4219 so it is profitable in some other cases as well. Especially when there
4220 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4221 and there is exactly one register to pop. This heruistic may need some
4222 tuning in future. */
4223 if ((!sp_valid
&& frame
.nregs
<= 1)
4224 || (TARGET_EPILOGUE_USING_MOVE
4225 && use_fast_prologue_epilogue
4226 && (frame
.nregs
> 1 || frame
.to_allocate
))
4227 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
4228 || (frame_pointer_needed
&& TARGET_USE_LEAVE
4229 && use_fast_prologue_epilogue
&& frame
.nregs
== 1)
4230 || current_function_calls_eh_return
)
4232 /* Restore registers. We can use ebp or esp to address the memory
4233 locations. If both are available, default to ebp, since offsets
4234 are known to be small. Only exception is esp pointing directly to the
4235 end of block of saved registers, where we may simplify addressing
4238 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
4239 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
4240 frame
.to_allocate
, style
== 2);
4242 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
4243 offset
, style
== 2);
4245 /* eh_return epilogues need %ecx added to the stack pointer. */
4248 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
4250 if (frame_pointer_needed
)
4252 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
4253 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
4254 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
4256 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
4257 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
4259 emit_insn (gen_pro_epilogue_adjust_stack
4260 (stack_pointer_rtx
, sa
, const0_rtx
));
4264 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
4265 tmp
= plus_constant (tmp
, (frame
.to_allocate
4266 + frame
.nregs
* UNITS_PER_WORD
));
4267 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
4270 else if (!frame_pointer_needed
)
4271 emit_insn (gen_pro_epilogue_adjust_stack
4272 (stack_pointer_rtx
, stack_pointer_rtx
,
4273 GEN_INT (frame
.to_allocate
4274 + frame
.nregs
* UNITS_PER_WORD
)));
4275 /* If not an i386, mov & pop is faster than "leave". */
4276 else if (TARGET_USE_LEAVE
|| optimize_size
|| !use_fast_prologue_epilogue
)
4277 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4280 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
4281 hard_frame_pointer_rtx
,
4284 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4286 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4291 /* First step is to deallocate the stack frame so that we can
4292 pop the registers. */
4295 if (!frame_pointer_needed
)
4297 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
4298 hard_frame_pointer_rtx
,
4301 else if (frame
.to_allocate
)
4302 emit_insn (gen_pro_epilogue_adjust_stack
4303 (stack_pointer_rtx
, stack_pointer_rtx
,
4304 GEN_INT (frame
.to_allocate
)));
4306 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4307 if (ix86_save_reg (regno
, false))
4310 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
4312 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
4314 if (frame_pointer_needed
)
4316 /* Leave results in shorter dependency chains on CPUs that are
4317 able to grok it fast. */
4318 if (TARGET_USE_LEAVE
)
4319 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4320 else if (TARGET_64BIT
)
4321 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4323 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4327 /* Sibcall epilogues don't want a return instruction. */
4331 if (current_function_pops_args
&& current_function_args_size
)
4333 rtx popc
= GEN_INT (current_function_pops_args
);
4335 /* i386 can only pop 64K bytes. If asked to pop more, pop
4336 return address, do explicit add, and jump indirectly to the
4339 if (current_function_pops_args
>= 65536)
4341 rtx ecx
= gen_rtx_REG (SImode
, 2);
4343 /* There are is no "pascal" calling convention in 64bit ABI. */
4347 emit_insn (gen_popsi1 (ecx
));
4348 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
4349 emit_jump_insn (gen_return_indirect_internal (ecx
));
4352 emit_jump_insn (gen_return_pop_internal (popc
));
4355 emit_jump_insn (gen_return_internal ());
4358 /* Extract the parts of an RTL expression that is a valid memory address
4359 for an instruction. Return 0 if the structure of the address is
4360 grossly off. Return -1 if the address contains ASHIFT, so it is not
4361 strictly valid, but still used for computing length of lea instruction.
4365 ix86_decompose_address (addr
, out
)
4367 struct ix86_address
*out
;
4369 rtx base
= NULL_RTX
;
4370 rtx index
= NULL_RTX
;
4371 rtx disp
= NULL_RTX
;
4372 HOST_WIDE_INT scale
= 1;
4373 rtx scale_rtx
= NULL_RTX
;
4376 if (GET_CODE (addr
) == REG
|| GET_CODE (addr
) == SUBREG
)
4378 else if (GET_CODE (addr
) == PLUS
)
4380 rtx op0
= XEXP (addr
, 0);
4381 rtx op1
= XEXP (addr
, 1);
4382 enum rtx_code code0
= GET_CODE (op0
);
4383 enum rtx_code code1
= GET_CODE (op1
);
4385 if (code0
== REG
|| code0
== SUBREG
)
4387 if (code1
== REG
|| code1
== SUBREG
)
4388 index
= op0
, base
= op1
; /* index + base */
4390 base
= op0
, disp
= op1
; /* base + displacement */
4392 else if (code0
== MULT
)
4394 index
= XEXP (op0
, 0);
4395 scale_rtx
= XEXP (op0
, 1);
4396 if (code1
== REG
|| code1
== SUBREG
)
4397 base
= op1
; /* index*scale + base */
4399 disp
= op1
; /* index*scale + disp */
4401 else if (code0
== PLUS
&& GET_CODE (XEXP (op0
, 0)) == MULT
)
4403 index
= XEXP (XEXP (op0
, 0), 0); /* index*scale + base + disp */
4404 scale_rtx
= XEXP (XEXP (op0
, 0), 1);
4405 base
= XEXP (op0
, 1);
4408 else if (code0
== PLUS
)
4410 index
= XEXP (op0
, 0); /* index + base + disp */
4411 base
= XEXP (op0
, 1);
4417 else if (GET_CODE (addr
) == MULT
)
4419 index
= XEXP (addr
, 0); /* index*scale */
4420 scale_rtx
= XEXP (addr
, 1);
4422 else if (GET_CODE (addr
) == ASHIFT
)
4426 /* We're called for lea too, which implements ashift on occasion. */
4427 index
= XEXP (addr
, 0);
4428 tmp
= XEXP (addr
, 1);
4429 if (GET_CODE (tmp
) != CONST_INT
)
4431 scale
= INTVAL (tmp
);
4432 if ((unsigned HOST_WIDE_INT
) scale
> 3)
4438 disp
= addr
; /* displacement */
4440 /* Extract the integral value of scale. */
4443 if (GET_CODE (scale_rtx
) != CONST_INT
)
4445 scale
= INTVAL (scale_rtx
);
4448 /* Allow arg pointer and stack pointer as index if there is not scaling */
4449 if (base
&& index
&& scale
== 1
4450 && (index
== arg_pointer_rtx
|| index
== frame_pointer_rtx
4451 || index
== stack_pointer_rtx
))
4458 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4459 if ((base
== hard_frame_pointer_rtx
4460 || base
== frame_pointer_rtx
4461 || base
== arg_pointer_rtx
) && !disp
)
4464 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4465 Avoid this by transforming to [%esi+0]. */
4466 if (ix86_cpu
== PROCESSOR_K6
&& !optimize_size
4467 && base
&& !index
&& !disp
4469 && REGNO_REG_CLASS (REGNO (base
)) == SIREG
)
4472 /* Special case: encode reg+reg instead of reg*2. */
4473 if (!base
&& index
&& scale
&& scale
== 2)
4474 base
= index
, scale
= 1;
4476 /* Special case: scaling cannot be encoded without base or displacement. */
4477 if (!base
&& !disp
&& index
&& scale
!= 1)
4488 /* Return cost of the memory address x.
4489 For i386, it is better to use a complex address than let gcc copy
4490 the address into a reg and make a new pseudo. But not if the address
4491 requires to two regs - that would mean more pseudos with longer
4494 ix86_address_cost (x
)
4497 struct ix86_address parts
;
4500 if (!ix86_decompose_address (x
, &parts
))
4503 /* More complex memory references are better. */
4504 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
4507 /* Attempt to minimize number of registers in the address. */
4509 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
4511 && (!REG_P (parts
.index
)
4512 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
4516 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
4518 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
4519 && parts
.base
!= parts
.index
)
4522 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4523 since it's predecode logic can't detect the length of instructions
4524 and it degenerates to vector decoded. Increase cost of such
4525 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4526 to split such addresses or even refuse such addresses at all.
4528 Following addressing modes are affected:
4533 The first and last case may be avoidable by explicitly coding the zero in
4534 memory address, but I don't have AMD-K6 machine handy to check this
4538 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
4539 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
4540 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
4546 /* If X is a machine specific address (i.e. a symbol or label being
4547 referenced as a displacement from the GOT implemented using an
4548 UNSPEC), then return the base term. Otherwise return X. */
4551 ix86_find_base_term (x
)
4558 if (GET_CODE (x
) != CONST
)
4561 if (GET_CODE (term
) == PLUS
4562 && (GET_CODE (XEXP (term
, 1)) == CONST_INT
4563 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
4564 term
= XEXP (term
, 0);
4565 if (GET_CODE (term
) != UNSPEC
4566 || XVECLEN (term
, 0) != 1
4567 || XINT (term
, 1) != 15)
4570 term
= XVECEXP (term
, 0, 0);
4572 if (GET_CODE (term
) != SYMBOL_REF
4573 && GET_CODE (term
) != LABEL_REF
)
4579 if (GET_CODE (x
) != PLUS
4580 || XEXP (x
, 0) != pic_offset_table_rtx
4581 || GET_CODE (XEXP (x
, 1)) != CONST
)
4584 term
= XEXP (XEXP (x
, 1), 0);
4586 if (GET_CODE (term
) == PLUS
&& GET_CODE (XEXP (term
, 1)) == CONST_INT
)
4587 term
= XEXP (term
, 0);
4589 if (GET_CODE (term
) != UNSPEC
4590 || XVECLEN (term
, 0) != 1
4591 || XINT (term
, 1) != 7)
4594 term
= XVECEXP (term
, 0, 0);
4596 if (GET_CODE (term
) != SYMBOL_REF
4597 && GET_CODE (term
) != LABEL_REF
)
4603 /* Determine if a given CONST RTX is a valid memory displacement
4607 legitimate_pic_address_disp_p (disp
)
4610 /* In 64bit mode we can allow direct addresses of symbols and labels
4611 when they are not dynamic symbols. */
4615 if (GET_CODE (disp
) == CONST
)
4617 /* ??? Handle PIC code models */
4618 if (GET_CODE (x
) == PLUS
4619 && (GET_CODE (XEXP (x
, 1)) == CONST_INT
4620 && ix86_cmodel
== CM_SMALL_PIC
4621 && INTVAL (XEXP (x
, 1)) < 1024*1024*1024
4622 && INTVAL (XEXP (x
, 1)) > -1024*1024*1024))
4624 if (local_symbolic_operand (x
, Pmode
))
4627 if (GET_CODE (disp
) != CONST
)
4629 disp
= XEXP (disp
, 0);
4633 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4634 of GOT tables. We should not need these anyway. */
4635 if (GET_CODE (disp
) != UNSPEC
4636 || XVECLEN (disp
, 0) != 1
4637 || XINT (disp
, 1) != 15)
4640 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
4641 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
4646 if (GET_CODE (disp
) == PLUS
)
4648 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
4650 disp
= XEXP (disp
, 0);
4653 if (GET_CODE (disp
) != UNSPEC
4654 || XVECLEN (disp
, 0) != 1)
4657 /* Must be @GOT or @GOTOFF. */
4658 switch (XINT (disp
, 1))
4661 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
4663 case 7: /* @GOTOFF */
4664 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
4670 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4671 memory address for an instruction. The MODE argument is the machine mode
4672 for the MEM expression that wants to use this address.
4674 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
4675 convert common non-canonical forms to canonical form so that they will
4679 legitimate_address_p (mode
, addr
, strict
)
4680 enum machine_mode mode
;
4684 struct ix86_address parts
;
4685 rtx base
, index
, disp
;
4686 HOST_WIDE_INT scale
;
4687 const char *reason
= NULL
;
4688 rtx reason_rtx
= NULL_RTX
;
4690 if (TARGET_DEBUG_ADDR
)
4693 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
4694 GET_MODE_NAME (mode
), strict
);
4698 if (ix86_decompose_address (addr
, &parts
) <= 0)
4700 reason
= "decomposition failed";
4705 index
= parts
.index
;
4707 scale
= parts
.scale
;
4709 /* Validate base register.
4711 Don't allow SUBREG's here, it can lead to spill failures when the base
4712 is one word out of a two word structure, which is represented internally
4719 if (GET_CODE (base
) != REG
)
4721 reason
= "base is not a register";
4725 if (GET_MODE (base
) != Pmode
)
4727 reason
= "base is not in Pmode";
4731 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (base
))
4732 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (base
)))
4734 reason
= "base is not valid";
4739 /* Validate index register.
4741 Don't allow SUBREG's here, it can lead to spill failures when the index
4742 is one word out of a two word structure, which is represented internally
4749 if (GET_CODE (index
) != REG
)
4751 reason
= "index is not a register";
4755 if (GET_MODE (index
) != Pmode
)
4757 reason
= "index is not in Pmode";
4761 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (index
))
4762 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (index
)))
4764 reason
= "index is not valid";
4769 /* Validate scale factor. */
4772 reason_rtx
= GEN_INT (scale
);
4775 reason
= "scale without index";
4779 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
4781 reason
= "scale is not a valid multiplier";
4786 /* Validate displacement. */
4791 if (!CONSTANT_ADDRESS_P (disp
))
4793 reason
= "displacement is not constant";
4799 if (!x86_64_sign_extended_value (disp
))
4801 reason
= "displacement is out of range";
4807 if (GET_CODE (disp
) == CONST_DOUBLE
)
4809 reason
= "displacement is a const_double";
4814 if (flag_pic
&& SYMBOLIC_CONST (disp
))
4816 if (TARGET_64BIT
&& (index
|| base
))
4818 reason
= "non-constant pic memory reference";
4821 if (! legitimate_pic_address_disp_p (disp
))
4823 reason
= "displacement is an invalid pic construct";
4827 /* This code used to verify that a symbolic pic displacement
4828 includes the pic_offset_table_rtx register.
4830 While this is good idea, unfortunately these constructs may
4831 be created by "adds using lea" optimization for incorrect
4840 This code is nonsensical, but results in addressing
4841 GOT table with pic_offset_table_rtx base. We can't
4842 just refuse it easily, since it gets matched by
4843 "addsi3" pattern, that later gets split to lea in the
4844 case output register differs from input. While this
4845 can be handled by separate addsi pattern for this case
4846 that never results in lea, this seems to be easier and
4847 correct fix for crash to disable this test. */
4849 else if (HALF_PIC_P ())
4851 if (! HALF_PIC_ADDRESS_P (disp
)
4852 || (base
!= NULL_RTX
|| index
!= NULL_RTX
))
4854 reason
= "displacement is an invalid half-pic reference";
4860 /* Everything looks valid. */
4861 if (TARGET_DEBUG_ADDR
)
4862 fprintf (stderr
, "Success.\n");
4866 if (TARGET_DEBUG_ADDR
)
4868 fprintf (stderr
, "Error: %s\n", reason
);
4869 debug_rtx (reason_rtx
);
4874 /* Return an unique alias set for the GOT. */
4876 static HOST_WIDE_INT
4877 ix86_GOT_alias_set ()
4879 static HOST_WIDE_INT set
= -1;
4881 set
= new_alias_set ();
4885 /* Return a legitimate reference for ORIG (an address) using the
4886 register REG. If REG is 0, a new pseudo is generated.
4888 There are two types of references that must be handled:
4890 1. Global data references must load the address from the GOT, via
4891 the PIC reg. An insn is emitted to do this load, and the reg is
4894 2. Static data references, constant pool addresses, and code labels
4895 compute the address as an offset from the GOT, whose base is in
4896 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
4897 differentiate them from global data objects. The returned
4898 address is the PIC reg + an unspec constant.
4900 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
4901 reg also appears in the address. */
4904 legitimize_pic_address (orig
, reg
)
4912 if (local_symbolic_operand (addr
, Pmode
))
4914 /* In 64bit mode we can address such objects directly. */
4919 /* This symbol may be referenced via a displacement from the PIC
4920 base address (@GOTOFF). */
4922 current_function_uses_pic_offset_table
= 1;
4923 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), 7);
4924 new = gen_rtx_CONST (Pmode
, new);
4925 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
4929 emit_move_insn (reg
, new);
4934 else if (GET_CODE (addr
) == SYMBOL_REF
)
4938 current_function_uses_pic_offset_table
= 1;
4939 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), 15);
4940 new = gen_rtx_CONST (Pmode
, new);
4941 new = gen_rtx_MEM (Pmode
, new);
4942 RTX_UNCHANGING_P (new) = 1;
4943 set_mem_alias_set (new, ix86_GOT_alias_set ());
4946 reg
= gen_reg_rtx (Pmode
);
4947 /* Use directly gen_movsi, otherwise the address is loaded
4948 into register for CSE. We don't want to CSE this addresses,
4949 instead we CSE addresses from the GOT table, so skip this. */
4950 emit_insn (gen_movsi (reg
, new));
4955 /* This symbol must be referenced via a load from the
4956 Global Offset Table (@GOT). */
4958 current_function_uses_pic_offset_table
= 1;
4959 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), 6);
4960 new = gen_rtx_CONST (Pmode
, new);
4961 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
4962 new = gen_rtx_MEM (Pmode
, new);
4963 RTX_UNCHANGING_P (new) = 1;
4964 set_mem_alias_set (new, ix86_GOT_alias_set ());
4967 reg
= gen_reg_rtx (Pmode
);
4968 emit_move_insn (reg
, new);
4974 if (GET_CODE (addr
) == CONST
)
4976 addr
= XEXP (addr
, 0);
4977 if (GET_CODE (addr
) == UNSPEC
)
4979 /* Check that the unspec is one of the ones we generate? */
4981 else if (GET_CODE (addr
) != PLUS
)
4984 if (GET_CODE (addr
) == PLUS
)
4986 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
4988 /* Check first to see if this is a constant offset from a @GOTOFF
4989 symbol reference. */
4990 if (local_symbolic_operand (op0
, Pmode
)
4991 && GET_CODE (op1
) == CONST_INT
)
4995 current_function_uses_pic_offset_table
= 1;
4996 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
), 7);
4997 new = gen_rtx_PLUS (Pmode
, new, op1
);
4998 new = gen_rtx_CONST (Pmode
, new);
4999 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5003 emit_move_insn (reg
, new);
5009 /* ??? We need to limit offsets here. */
5014 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
5015 new = legitimize_pic_address (XEXP (addr
, 1),
5016 base
== reg
? NULL_RTX
: reg
);
5018 if (GET_CODE (new) == CONST_INT
)
5019 new = plus_constant (base
, INTVAL (new));
5022 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
5024 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
5025 new = XEXP (new, 1);
5027 new = gen_rtx_PLUS (Pmode
, base
, new);
5035 /* Try machine-dependent ways of modifying an illegitimate address
5036 to be legitimate. If we find one, return the new, valid address.
5037 This macro is used in only one place: `memory_address' in explow.c.
5039 OLDX is the address as it was before break_out_memory_refs was called.
5040 In some cases it is useful to look at this to decide what needs to be done.
5042 MODE and WIN are passed so that this macro can use
5043 GO_IF_LEGITIMATE_ADDRESS.
5045 It is always safe for this macro to do nothing. It exists to recognize
5046 opportunities to optimize the output.
5048 For the 80386, we handle X+REG by loading X into a register R and
5049 using R+REG. R will go in a general reg and indexing will be used.
5050 However, if REG is a broken-out memory address or multiplication,
5051 nothing needs to be done because REG can certainly go in a general reg.
5053 When -fpic is used, special handling is needed for symbolic references.
5054 See comments by legitimize_pic_address in i386.c for details. */
5057 legitimize_address (x
, oldx
, mode
)
5059 register rtx oldx ATTRIBUTE_UNUSED
;
5060 enum machine_mode mode
;
5065 if (TARGET_DEBUG_ADDR
)
5067 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5068 GET_MODE_NAME (mode
));
5072 if (flag_pic
&& SYMBOLIC_CONST (x
))
5073 return legitimize_pic_address (x
, 0);
5075 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5076 if (GET_CODE (x
) == ASHIFT
5077 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5078 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (x
, 1)))) < 4)
5081 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
5082 GEN_INT (1 << log
));
5085 if (GET_CODE (x
) == PLUS
)
5087 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5089 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
5090 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
5091 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)))) < 4)
5094 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
5095 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
5096 GEN_INT (1 << log
));
5099 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
5100 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
5101 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 1), 1)))) < 4)
5104 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
5105 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
5106 GEN_INT (1 << log
));
5109 /* Put multiply first if it isn't already. */
5110 if (GET_CODE (XEXP (x
, 1)) == MULT
)
5112 rtx tmp
= XEXP (x
, 0);
5113 XEXP (x
, 0) = XEXP (x
, 1);
5118 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5119 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5120 created by virtual register instantiation, register elimination, and
5121 similar optimizations. */
5122 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
5125 x
= gen_rtx_PLUS (Pmode
,
5126 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
5127 XEXP (XEXP (x
, 1), 0)),
5128 XEXP (XEXP (x
, 1), 1));
5132 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5133 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5134 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
5135 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
5136 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
5137 && CONSTANT_P (XEXP (x
, 1)))
5140 rtx other
= NULL_RTX
;
5142 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5144 constant
= XEXP (x
, 1);
5145 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5147 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
5149 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5150 other
= XEXP (x
, 1);
5158 x
= gen_rtx_PLUS (Pmode
,
5159 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
5160 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
5161 plus_constant (other
, INTVAL (constant
)));
5165 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
5168 if (GET_CODE (XEXP (x
, 0)) == MULT
)
5171 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
5174 if (GET_CODE (XEXP (x
, 1)) == MULT
)
5177 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
5181 && GET_CODE (XEXP (x
, 1)) == REG
5182 && GET_CODE (XEXP (x
, 0)) == REG
)
5185 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
5188 x
= legitimize_pic_address (x
, 0);
5191 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
5194 if (GET_CODE (XEXP (x
, 0)) == REG
)
5196 register rtx temp
= gen_reg_rtx (Pmode
);
5197 register rtx val
= force_operand (XEXP (x
, 1), temp
);
5199 emit_move_insn (temp
, val
);
5205 else if (GET_CODE (XEXP (x
, 1)) == REG
)
5207 register rtx temp
= gen_reg_rtx (Pmode
);
5208 register rtx val
= force_operand (XEXP (x
, 0), temp
);
5210 emit_move_insn (temp
, val
);
5220 /* Print an integer constant expression in assembler syntax. Addition
5221 and subtraction are the only arithmetic that may appear in these
5222 expressions. FILE is the stdio stream to write to, X is the rtx, and
5223 CODE is the operand print code from the output string. */
5226 output_pic_addr_const (file
, x
, code
)
5233 switch (GET_CODE (x
))
5243 assemble_name (file
, XSTR (x
, 0));
5244 if (code
== 'P' && ! SYMBOL_REF_FLAG (x
))
5245 fputs ("@PLT", file
);
5252 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
5253 assemble_name (asm_out_file
, buf
);
5257 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
5261 /* This used to output parentheses around the expression,
5262 but that does not work on the 386 (either ATT or BSD assembler). */
5263 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5267 if (GET_MODE (x
) == VOIDmode
)
5269 /* We can use %d if the number is <32 bits and positive. */
5270 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
5271 fprintf (file
, "0x%lx%08lx",
5272 (unsigned long) CONST_DOUBLE_HIGH (x
),
5273 (unsigned long) CONST_DOUBLE_LOW (x
));
5275 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
5278 /* We can't handle floating point constants;
5279 PRINT_OPERAND must handle them. */
5280 output_operand_lossage ("floating constant misused");
5284 /* Some assemblers need integer constants to appear first. */
5285 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
5287 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5289 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5291 else if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5293 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5295 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5302 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
5303 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5305 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5306 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
5310 if (XVECLEN (x
, 0) != 1)
5312 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
5313 switch (XINT (x
, 1))
5316 fputs ("@GOT", file
);
5319 fputs ("@GOTOFF", file
);
5322 fputs ("@PLT", file
);
5325 fputs ("@GOTPCREL(%RIP)", file
);
5328 output_operand_lossage ("invalid UNSPEC as operand");
5334 output_operand_lossage ("invalid expression as operand");
5338 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5339 We need to handle our special PIC relocations. */
5342 i386_dwarf_output_addr_const (file
, x
)
5347 fprintf (file
, "%s", TARGET_64BIT
? ASM_QUAD
: ASM_LONG
);
5351 fprintf (file
, "%s", ASM_LONG
);
5354 output_pic_addr_const (file
, x
, '\0');
5356 output_addr_const (file
, x
);
5360 /* In the name of slightly smaller debug output, and to cater to
5361 general assembler losage, recognize PIC+GOTOFF and turn it back
5362 into a direct symbol reference. */
5365 i386_simplify_dwarf_addr (orig_x
)
5372 if (GET_CODE (x
) != CONST
5373 || GET_CODE (XEXP (x
, 0)) != UNSPEC
5374 || XINT (XEXP (x
, 0), 1) != 15)
5376 return XVECEXP (XEXP (x
, 0), 0, 0);
5379 if (GET_CODE (x
) != PLUS
5380 || GET_CODE (XEXP (x
, 1)) != CONST
)
5383 if (GET_CODE (XEXP (x
, 0)) == REG
5384 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
5385 /* %ebx + GOT/GOTOFF */
5387 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
5389 /* %ebx + %reg * scale + GOT/GOTOFF */
5391 if (GET_CODE (XEXP (y
, 0)) == REG
5392 && REGNO (XEXP (y
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
5394 else if (GET_CODE (XEXP (y
, 1)) == REG
5395 && REGNO (XEXP (y
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
5399 if (GET_CODE (y
) != REG
5400 && GET_CODE (y
) != MULT
5401 && GET_CODE (y
) != ASHIFT
)
5407 x
= XEXP (XEXP (x
, 1), 0);
5408 if (GET_CODE (x
) == UNSPEC
5409 && (XINT (x
, 1) == 6
5410 || XINT (x
, 1) == 7))
5413 return gen_rtx_PLUS (Pmode
, y
, XVECEXP (x
, 0, 0));
5414 return XVECEXP (x
, 0, 0);
5417 if (GET_CODE (x
) == PLUS
5418 && GET_CODE (XEXP (x
, 0)) == UNSPEC
5419 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5420 && (XINT (XEXP (x
, 0), 1) == 6
5421 || XINT (XEXP (x
, 0), 1) == 7))
5423 x
= gen_rtx_PLUS (VOIDmode
, XVECEXP (XEXP (x
, 0), 0, 0), XEXP (x
, 1));
5425 return gen_rtx_PLUS (Pmode
, y
, x
);
5433 put_condition_code (code
, mode
, reverse
, fp
, file
)
5435 enum machine_mode mode
;
5441 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
5443 enum rtx_code second_code
, bypass_code
;
5444 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
5445 if (bypass_code
!= NIL
|| second_code
!= NIL
)
5447 code
= ix86_fp_compare_code_to_integer (code
);
5451 code
= reverse_condition (code
);
5462 if (mode
!= CCmode
&& mode
!= CCNOmode
&& mode
!= CCGCmode
)
5467 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
5468 Those same assemblers have the same but opposite losage on cmov. */
5471 suffix
= fp
? "nbe" : "a";
5474 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
5476 else if (mode
== CCmode
|| mode
== CCGCmode
)
5487 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
5489 else if (mode
== CCmode
|| mode
== CCGCmode
)
5498 suffix
= fp
? "nb" : "ae";
5501 if (mode
!= CCmode
&& mode
!= CCGCmode
&& mode
!= CCNOmode
)
5511 suffix
= fp
? "u" : "p";
5514 suffix
= fp
? "nu" : "np";
5519 fputs (suffix
, file
);
5523 print_reg (x
, code
, file
)
5528 if (REGNO (x
) == ARG_POINTER_REGNUM
5529 || REGNO (x
) == FRAME_POINTER_REGNUM
5530 || REGNO (x
) == FLAGS_REG
5531 || REGNO (x
) == FPSR_REG
)
5534 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
5537 if (code
== 'w' || MMX_REG_P (x
))
5539 else if (code
== 'b')
5541 else if (code
== 'k')
5543 else if (code
== 'q')
5545 else if (code
== 'y')
5547 else if (code
== 'h')
5550 code
= GET_MODE_SIZE (GET_MODE (x
));
5552 /* Irritatingly, AMD extended registers use different naming convention
5553 from the normal registers. */
5554 if (REX_INT_REG_P (x
))
5561 error ("extended registers have no high halves");
5564 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
5567 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
5570 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
5573 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
5576 error ("unsupported operand size for extended register");
5584 if (STACK_TOP_P (x
))
5586 fputs ("st(0)", file
);
5593 if (! ANY_FP_REG_P (x
))
5594 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
5598 fputs (hi_reg_name
[REGNO (x
)], file
);
5601 fputs (qi_reg_name
[REGNO (x
)], file
);
5604 fputs (qi_high_reg_name
[REGNO (x
)], file
);
5612 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
5613 C -- print opcode suffix for set/cmov insn.
5614 c -- like C, but print reversed condition
5615 F,f -- likewise, but for floating-point.
5616 R -- print the prefix for register names.
5617 z -- print the opcode suffix for the size of the current operand.
5618 * -- print a star (in certain assembler syntax)
5619 A -- print an absolute memory reference.
5620 w -- print the operand as if it's a "word" (HImode) even if it isn't.
5621 s -- print a shift double count, followed by the assemblers argument
5623 b -- print the QImode name of the register for the indicated operand.
5624 %b0 would print %al if operands[0] is reg 0.
5625 w -- likewise, print the HImode name of the register.
5626 k -- likewise, print the SImode name of the register.
5627 q -- likewise, print the DImode name of the register.
5628 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
5629 y -- print "st(0)" instead of "st" as a register.
5630 D -- print condition for SSE cmp instruction.
5631 P -- if PIC, print an @PLT suffix.
5632 X -- don't print any sort of PIC '@' suffix for a symbol.
5636 print_operand (file
, x
, code
)
5646 if (ASSEMBLER_DIALECT
== ASM_ATT
)
5651 if (ASSEMBLER_DIALECT
== ASM_ATT
)
5653 else if (ASSEMBLER_DIALECT
== ASM_INTEL
)
5655 /* Intel syntax. For absolute addresses, registers should not
5656 be surrounded by braces. */
5657 if (GET_CODE (x
) != REG
)
5660 PRINT_OPERAND (file
, x
, 0);
5668 PRINT_OPERAND (file
, x
, 0);
5673 if (ASSEMBLER_DIALECT
== ASM_ATT
)
5678 if (ASSEMBLER_DIALECT
== ASM_ATT
)
5683 if (ASSEMBLER_DIALECT
== ASM_ATT
)
5688 if (ASSEMBLER_DIALECT
== ASM_ATT
)
5693 if (ASSEMBLER_DIALECT
== ASM_ATT
)
5698 if (ASSEMBLER_DIALECT
== ASM_ATT
)
5703 /* 387 opcodes don't get size suffixes if the operands are
5706 if (STACK_REG_P (x
))
5709 /* this is the size of op from size of operand */
5710 switch (GET_MODE_SIZE (GET_MODE (x
)))
5713 #ifdef HAVE_GAS_FILDS_FISTS
5719 if (GET_MODE (x
) == SFmode
)
5734 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
5736 #ifdef GAS_MNEMONICS
5762 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
5764 PRINT_OPERAND (file
, x
, 0);
5770 /* Little bit of braindamage here. The SSE compare instructions
5771 does use completely different names for the comparisons that the
5772 fp conditional moves. */
5773 switch (GET_CODE (x
))
5788 fputs ("unord", file
);
5792 fputs ("neq", file
);
5796 fputs ("nlt", file
);
5800 fputs ("nle", file
);
5803 fputs ("ord", file
);
5811 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
5814 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
5817 /* Like above, but reverse condition */
5819 /* Check to see if argument to %c is really a constant
5820 and not a condition code which needs to be reversed. */
5821 if (GET_RTX_CLASS (GET_CODE (x
)) != '<')
5823 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
5826 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
5829 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
5835 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
5838 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
5841 int pred_val
= INTVAL (XEXP (x
, 0));
5843 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
5844 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
5846 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
5847 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
5849 /* Emit hints only in the case default branch prediction
5850 heruistics would fail. */
5851 if (taken
!= cputaken
)
5853 /* We use 3e (DS) prefix for taken branches and
5854 2e (CS) prefix for not taken branches. */
5856 fputs ("ds ; ", file
);
5858 fputs ("cs ; ", file
);
5867 sprintf (str
, "invalid operand code `%c'", code
);
5868 output_operand_lossage (str
);
5873 if (GET_CODE (x
) == REG
)
5875 PRINT_REG (x
, code
, file
);
5878 else if (GET_CODE (x
) == MEM
)
5880 /* No `byte ptr' prefix for call instructions. */
5881 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
5884 switch (GET_MODE_SIZE (GET_MODE (x
)))
5886 case 1: size
= "BYTE"; break;
5887 case 2: size
= "WORD"; break;
5888 case 4: size
= "DWORD"; break;
5889 case 8: size
= "QWORD"; break;
5890 case 12: size
= "XWORD"; break;
5891 case 16: size
= "XMMWORD"; break;
5896 /* Check for explicit size override (codes 'b', 'w' and 'k') */
5899 else if (code
== 'w')
5901 else if (code
== 'k')
5905 fputs (" PTR ", file
);
5909 if (flag_pic
&& CONSTANT_ADDRESS_P (x
))
5910 output_pic_addr_const (file
, x
, code
);
5911 /* Avoid (%rip) for call operands. */
5912 else if (CONSTANT_ADDRESS_P (x
) && code
=='P'
5913 && GET_CODE (x
) != CONST_INT
)
5914 output_addr_const (file
, x
);
5919 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
5924 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
5925 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
5927 if (ASSEMBLER_DIALECT
== ASM_ATT
)
5929 fprintf (file
, "0x%lx", l
);
5932 /* These float cases don't actually occur as immediate operands. */
5933 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
5938 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
5939 REAL_VALUE_TO_DECIMAL (r
, "%.22e", dstr
);
5940 fprintf (file
, "%s", dstr
);
5943 else if (GET_CODE (x
) == CONST_DOUBLE
5944 && (GET_MODE (x
) == XFmode
|| GET_MODE (x
) == TFmode
))
5949 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
5950 REAL_VALUE_TO_DECIMAL (r
, "%.22e", dstr
);
5951 fprintf (file
, "%s", dstr
);
5957 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
5959 if (ASSEMBLER_DIALECT
== ASM_ATT
)
5962 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
5963 || GET_CODE (x
) == LABEL_REF
)
5965 if (ASSEMBLER_DIALECT
== ASM_ATT
)
5968 fputs ("OFFSET FLAT:", file
);
5971 if (GET_CODE (x
) == CONST_INT
)
5972 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
5974 output_pic_addr_const (file
, x
, code
);
5976 output_addr_const (file
, x
);
5980 /* Print a memory operand whose address is ADDR. */
5983 print_operand_address (file
, addr
)
5987 struct ix86_address parts
;
5988 rtx base
, index
, disp
;
5991 if (! ix86_decompose_address (addr
, &parts
))
5995 index
= parts
.index
;
5997 scale
= parts
.scale
;
5999 if (!base
&& !index
)
6001 /* Displacement only requires special attention. */
6003 if (GET_CODE (disp
) == CONST_INT
)
6005 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6007 if (USER_LABEL_PREFIX
[0] == 0)
6009 fputs ("ds:", file
);
6011 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (addr
));
6014 output_pic_addr_const (file
, addr
, 0);
6016 output_addr_const (file
, addr
);
6018 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6019 if (GET_CODE (disp
) != CONST_INT
&& TARGET_64BIT
)
6020 fputs ("(%rip)", file
);
6024 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6029 output_pic_addr_const (file
, disp
, 0);
6030 else if (GET_CODE (disp
) == LABEL_REF
)
6031 output_asm_label (disp
);
6033 output_addr_const (file
, disp
);
6038 PRINT_REG (base
, 0, file
);
6042 PRINT_REG (index
, 0, file
);
6044 fprintf (file
, ",%d", scale
);
6050 rtx offset
= NULL_RTX
;
6054 /* Pull out the offset of a symbol; print any symbol itself. */
6055 if (GET_CODE (disp
) == CONST
6056 && GET_CODE (XEXP (disp
, 0)) == PLUS
6057 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
6059 offset
= XEXP (XEXP (disp
, 0), 1);
6060 disp
= gen_rtx_CONST (VOIDmode
,
6061 XEXP (XEXP (disp
, 0), 0));
6065 output_pic_addr_const (file
, disp
, 0);
6066 else if (GET_CODE (disp
) == LABEL_REF
)
6067 output_asm_label (disp
);
6068 else if (GET_CODE (disp
) == CONST_INT
)
6071 output_addr_const (file
, disp
);
6077 PRINT_REG (base
, 0, file
);
6080 if (INTVAL (offset
) >= 0)
6082 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
6086 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
6093 PRINT_REG (index
, 0, file
);
6095 fprintf (file
, "*%d", scale
);
6102 /* Split one or more DImode RTL references into pairs of SImode
6103 references. The RTL can be REG, offsettable MEM, integer constant, or
6104 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6105 split and "num" is its length. lo_half and hi_half are output arrays
6106 that parallel "operands". */
6109 split_di (operands
, num
, lo_half
, hi_half
)
6112 rtx lo_half
[], hi_half
[];
6116 rtx op
= operands
[num
];
6118 /* simplify_subreg refuse to split volatile memory addresses,
6119 but we still have to handle it. */
6120 if (GET_CODE (op
) == MEM
)
6122 lo_half
[num
] = adjust_address (op
, SImode
, 0);
6123 hi_half
[num
] = adjust_address (op
, SImode
, 4);
6127 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
6128 GET_MODE (op
) == VOIDmode
6129 ? DImode
: GET_MODE (op
), 0);
6130 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
6131 GET_MODE (op
) == VOIDmode
6132 ? DImode
: GET_MODE (op
), 4);
6136 /* Split one or more TImode RTL references into pairs of SImode
6137 references. The RTL can be REG, offsettable MEM, integer constant, or
6138 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6139 split and "num" is its length. lo_half and hi_half are output arrays
6140 that parallel "operands". */
6143 split_ti (operands
, num
, lo_half
, hi_half
)
6146 rtx lo_half
[], hi_half
[];
6150 rtx op
= operands
[num
];
6152 /* simplify_subreg refuse to split volatile memory addresses, but we
6153 still have to handle it. */
6154 if (GET_CODE (op
) == MEM
)
6156 lo_half
[num
] = adjust_address (op
, DImode
, 0);
6157 hi_half
[num
] = adjust_address (op
, DImode
, 8);
6161 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
6162 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
6167 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6168 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6169 is the expression of the binary operation. The output may either be
6170 emitted here, or returned to the caller, like all output_* functions.
6172 There is no guarantee that the operands are the same mode, as they
6173 might be within FLOAT or FLOAT_EXTEND expressions. */
6175 #ifndef SYSV386_COMPAT
6176 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6177 wants to fix the assemblers because that causes incompatibility
6178 with gcc. No-one wants to fix gcc because that causes
6179 incompatibility with assemblers... You can use the option of
6180 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6181 #define SYSV386_COMPAT 1
6185 output_387_binary_op (insn
, operands
)
6189 static char buf
[30];
6192 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]) | SSE_REG_P (operands
[2]);
6194 #ifdef ENABLE_CHECKING
6195 /* Even if we do not want to check the inputs, this documents input
6196 constraints. Which helps in understanding the following code. */
6197 if (STACK_REG_P (operands
[0])
6198 && ((REG_P (operands
[1])
6199 && REGNO (operands
[0]) == REGNO (operands
[1])
6200 && (STACK_REG_P (operands
[2]) || GET_CODE (operands
[2]) == MEM
))
6201 || (REG_P (operands
[2])
6202 && REGNO (operands
[0]) == REGNO (operands
[2])
6203 && (STACK_REG_P (operands
[1]) || GET_CODE (operands
[1]) == MEM
)))
6204 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
6210 switch (GET_CODE (operands
[3]))
6213 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6214 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6222 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6223 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6231 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6232 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6240 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6241 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6255 if (GET_MODE (operands
[0]) == SFmode
)
6256 strcat (buf
, "ss\t{%2, %0|%0, %2}");
6258 strcat (buf
, "sd\t{%2, %0|%0, %2}");
6263 switch (GET_CODE (operands
[3]))
6267 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
6269 rtx temp
= operands
[2];
6270 operands
[2] = operands
[1];
6274 /* know operands[0] == operands[1]. */
6276 if (GET_CODE (operands
[2]) == MEM
)
6282 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
6284 if (STACK_TOP_P (operands
[0]))
6285 /* How is it that we are storing to a dead operand[2]?
6286 Well, presumably operands[1] is dead too. We can't
6287 store the result to st(0) as st(0) gets popped on this
6288 instruction. Instead store to operands[2] (which I
6289 think has to be st(1)). st(1) will be popped later.
6290 gcc <= 2.8.1 didn't have this check and generated
6291 assembly code that the Unixware assembler rejected. */
6292 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6294 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6298 if (STACK_TOP_P (operands
[0]))
6299 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6301 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6306 if (GET_CODE (operands
[1]) == MEM
)
6312 if (GET_CODE (operands
[2]) == MEM
)
6318 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
6321 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6322 derived assemblers, confusingly reverse the direction of
6323 the operation for fsub{r} and fdiv{r} when the
6324 destination register is not st(0). The Intel assembler
6325 doesn't have this brain damage. Read !SYSV386_COMPAT to
6326 figure out what the hardware really does. */
6327 if (STACK_TOP_P (operands
[0]))
6328 p
= "{p\t%0, %2|rp\t%2, %0}";
6330 p
= "{rp\t%2, %0|p\t%0, %2}";
6332 if (STACK_TOP_P (operands
[0]))
6333 /* As above for fmul/fadd, we can't store to st(0). */
6334 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6336 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6341 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
6344 if (STACK_TOP_P (operands
[0]))
6345 p
= "{rp\t%0, %1|p\t%1, %0}";
6347 p
= "{p\t%1, %0|rp\t%0, %1}";
6349 if (STACK_TOP_P (operands
[0]))
6350 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
6352 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
6357 if (STACK_TOP_P (operands
[0]))
6359 if (STACK_TOP_P (operands
[1]))
6360 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6362 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
6365 else if (STACK_TOP_P (operands
[1]))
6368 p
= "{\t%1, %0|r\t%0, %1}";
6370 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
6376 p
= "{r\t%2, %0|\t%0, %2}";
6378 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6391 /* Output code to initialize control word copies used by
6392 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
6393 is set to control word rounding downwards. */
6395 emit_i387_cw_initialization (normal
, round_down
)
6396 rtx normal
, round_down
;
6398 rtx reg
= gen_reg_rtx (HImode
);
6400 emit_insn (gen_x86_fnstcw_1 (normal
));
6401 emit_move_insn (reg
, normal
);
6402 if (!TARGET_PARTIAL_REG_STALL
&& !optimize_size
6404 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
6406 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0xc00)));
6407 emit_move_insn (round_down
, reg
);
6410 /* Output code for INSN to convert a float to a signed int. OPERANDS
6411 are the insn operands. The output may be [HSD]Imode and the input
6412 operand may be [SDX]Fmode. */
6415 output_fix_trunc (insn
, operands
)
6419 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
6420 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
6422 /* Jump through a hoop or two for DImode, since the hardware has no
6423 non-popping instruction. We used to do this a different way, but
6424 that was somewhat fragile and broke with post-reload splitters. */
6425 if (dimode_p
&& !stack_top_dies
)
6426 output_asm_insn ("fld\t%y1", operands
);
6428 if (!STACK_TOP_P (operands
[1]))
6431 if (GET_CODE (operands
[0]) != MEM
)
6434 output_asm_insn ("fldcw\t%3", operands
);
6435 if (stack_top_dies
|| dimode_p
)
6436 output_asm_insn ("fistp%z0\t%0", operands
);
6438 output_asm_insn ("fist%z0\t%0", operands
);
6439 output_asm_insn ("fldcw\t%2", operands
);
6444 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
6445 should be used and 2 when fnstsw should be used. UNORDERED_P is true
6446 when fucom should be used. */
6449 output_fp_compare (insn
, operands
, eflags_p
, unordered_p
)
6452 int eflags_p
, unordered_p
;
6455 rtx cmp_op0
= operands
[0];
6456 rtx cmp_op1
= operands
[1];
6457 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]);
6462 cmp_op1
= operands
[2];
6466 if (GET_MODE (operands
[0]) == SFmode
)
6468 return "ucomiss\t{%1, %0|%0, %1}";
6470 return "comiss\t{%1, %0|%0, %y}";
6473 return "ucomisd\t{%1, %0|%0, %1}";
6475 return "comisd\t{%1, %0|%0, %y}";
6478 if (! STACK_TOP_P (cmp_op0
))
6481 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
6483 if (STACK_REG_P (cmp_op1
)
6485 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
6486 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
6488 /* If both the top of the 387 stack dies, and the other operand
6489 is also a stack register that dies, then this must be a
6490 `fcompp' float compare */
6494 /* There is no double popping fcomi variant. Fortunately,
6495 eflags is immune from the fstp's cc clobbering. */
6497 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
6499 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
6507 return "fucompp\n\tfnstsw\t%0";
6509 return "fcompp\n\tfnstsw\t%0";
6522 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
6524 static const char * const alt
[24] =
6536 "fcomi\t{%y1, %0|%0, %y1}",
6537 "fcomip\t{%y1, %0|%0, %y1}",
6538 "fucomi\t{%y1, %0|%0, %y1}",
6539 "fucomip\t{%y1, %0|%0, %y1}",
6546 "fcom%z2\t%y2\n\tfnstsw\t%0",
6547 "fcomp%z2\t%y2\n\tfnstsw\t%0",
6548 "fucom%z2\t%y2\n\tfnstsw\t%0",
6549 "fucomp%z2\t%y2\n\tfnstsw\t%0",
6551 "ficom%z2\t%y2\n\tfnstsw\t%0",
6552 "ficomp%z2\t%y2\n\tfnstsw\t%0",
6560 mask
= eflags_p
<< 3;
6561 mask
|= (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
) << 2;
6562 mask
|= unordered_p
<< 1;
6563 mask
|= stack_top_dies
;
6576 ix86_output_addr_vec_elt (file
, value
)
6580 const char *directive
= ASM_LONG
;
6585 directive
= ASM_QUAD
;
6591 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
6595 ix86_output_addr_diff_elt (file
, value
, rel
)
6600 fprintf (file
, "%s%s%d-.+4+(.-%s%d)\n",
6601 ASM_LONG
, LPREFIX
, value
, LPREFIX
, rel
);
6602 else if (HAVE_AS_GOTOFF_IN_DATA
)
6603 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
6605 asm_fprintf (file
, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
6606 ASM_LONG
, LPREFIX
, value
);
6609 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
6613 ix86_expand_clear (dest
)
6618 /* We play register width games, which are only valid after reload. */
6619 if (!reload_completed
)
6622 /* Avoid HImode and its attendant prefix byte. */
6623 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
6624 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
6626 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
6628 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
6629 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
6631 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
6632 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
6639 ix86_expand_move (mode
, operands
)
6640 enum machine_mode mode
;
6643 int strict
= (reload_in_progress
|| reload_completed
);
6646 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (operands
[1], Pmode
))
6648 /* Emit insns to move operands[1] into operands[0]. */
6650 if (GET_CODE (operands
[0]) == MEM
)
6651 operands
[1] = force_reg (Pmode
, operands
[1]);
6654 rtx temp
= operands
[0];
6655 if (GET_CODE (temp
) != REG
)
6656 temp
= gen_reg_rtx (Pmode
);
6657 temp
= legitimize_pic_address (operands
[1], temp
);
6658 if (temp
== operands
[0])
6665 if (GET_CODE (operands
[0]) == MEM
6666 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
6667 || !push_operand (operands
[0], mode
))
6668 && GET_CODE (operands
[1]) == MEM
)
6669 operands
[1] = force_reg (mode
, operands
[1]);
6671 if (push_operand (operands
[0], mode
)
6672 && ! general_no_elim_operand (operands
[1], mode
))
6673 operands
[1] = copy_to_mode_reg (mode
, operands
[1]);
6675 /* Force large constants in 64bit compilation into register
6676 to get them CSEed. */
6677 if (TARGET_64BIT
&& mode
== DImode
6678 && immediate_operand (operands
[1], mode
)
6679 && !x86_64_zero_extended_value (operands
[1])
6680 && !register_operand (operands
[0], mode
)
6681 && optimize
&& !reload_completed
&& !reload_in_progress
)
6682 operands
[1] = copy_to_mode_reg (mode
, operands
[1]);
6684 if (FLOAT_MODE_P (mode
))
6686 /* If we are loading a floating point constant to a register,
6687 force the value to memory now, since we'll get better code
6688 out the back end. */
6692 else if (GET_CODE (operands
[1]) == CONST_DOUBLE
6693 && register_operand (operands
[0], mode
))
6694 operands
[1] = validize_mem (force_const_mem (mode
, operands
[1]));
6698 insn
= gen_rtx_SET (VOIDmode
, operands
[0], operands
[1]);
6704 ix86_expand_vector_move (mode
, operands
)
6705 enum machine_mode mode
;
6708 /* Force constants other than zero into memory. We do not know how
6709 the instructions used to build constants modify the upper 64 bits
6710 of the register, once we have that information we may be able
6711 to handle some of them more efficiently. */
6712 if ((reload_in_progress
| reload_completed
) == 0
6713 && register_operand (operands
[0], mode
)
6714 && CONSTANT_P (operands
[1]))
6716 rtx addr
= gen_reg_rtx (Pmode
);
6717 emit_move_insn (addr
, XEXP (force_const_mem (mode
, operands
[1]), 0));
6718 operands
[1] = gen_rtx_MEM (mode
, addr
);
6721 /* Make operand1 a register if it isn't already. */
6722 if ((reload_in_progress
| reload_completed
) == 0
6723 && !register_operand (operands
[0], mode
)
6724 && !register_operand (operands
[1], mode
)
6725 && operands
[1] != CONST0_RTX (mode
))
6727 rtx temp
= force_reg (TImode
, operands
[1]);
6728 emit_move_insn (operands
[0], temp
);
6732 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], operands
[1]));
6735 /* Attempt to expand a binary operator. Make the expansion closer to the
6736 actual machine, then just general_operand, which will allow 3 separate
6737 memory references (one output, two input) in a single insn. */
6740 ix86_expand_binary_operator (code
, mode
, operands
)
6742 enum machine_mode mode
;
6745 int matching_memory
;
6746 rtx src1
, src2
, dst
, op
, clob
;
6752 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
6753 if (GET_RTX_CLASS (code
) == 'c'
6754 && (rtx_equal_p (dst
, src2
)
6755 || immediate_operand (src1
, mode
)))
6762 /* If the destination is memory, and we do not have matching source
6763 operands, do things in registers. */
6764 matching_memory
= 0;
6765 if (GET_CODE (dst
) == MEM
)
6767 if (rtx_equal_p (dst
, src1
))
6768 matching_memory
= 1;
6769 else if (GET_RTX_CLASS (code
) == 'c'
6770 && rtx_equal_p (dst
, src2
))
6771 matching_memory
= 2;
6773 dst
= gen_reg_rtx (mode
);
6776 /* Both source operands cannot be in memory. */
6777 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
6779 if (matching_memory
!= 2)
6780 src2
= force_reg (mode
, src2
);
6782 src1
= force_reg (mode
, src1
);
6785 /* If the operation is not commutable, source 1 cannot be a constant
6786 or non-matching memory. */
6787 if ((CONSTANT_P (src1
)
6788 || (!matching_memory
&& GET_CODE (src1
) == MEM
))
6789 && GET_RTX_CLASS (code
) != 'c')
6790 src1
= force_reg (mode
, src1
);
6792 /* If optimizing, copy to regs to improve CSE */
6793 if (optimize
&& ! no_new_pseudos
)
6795 if (GET_CODE (dst
) == MEM
)
6796 dst
= gen_reg_rtx (mode
);
6797 if (GET_CODE (src1
) == MEM
)
6798 src1
= force_reg (mode
, src1
);
6799 if (GET_CODE (src2
) == MEM
)
6800 src2
= force_reg (mode
, src2
);
6803 /* Emit the instruction. */
6805 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
6806 if (reload_in_progress
)
6808 /* Reload doesn't know about the flags register, and doesn't know that
6809 it doesn't want to clobber it. We can only do this with PLUS. */
6816 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
6817 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
6820 /* Fix up the destination if needed. */
6821 if (dst
!= operands
[0])
6822 emit_move_insn (operands
[0], dst
);
6825 /* Return TRUE or FALSE depending on whether the binary operator meets the
6826 appropriate constraints. */
6829 ix86_binary_operator_ok (code
, mode
, operands
)
6831 enum machine_mode mode ATTRIBUTE_UNUSED
;
6834 /* Both source operands cannot be in memory. */
6835 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
6837 /* If the operation is not commutable, source 1 cannot be a constant. */
6838 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != 'c')
6840 /* If the destination is memory, we must have a matching source operand. */
6841 if (GET_CODE (operands
[0]) == MEM
6842 && ! (rtx_equal_p (operands
[0], operands
[1])
6843 || (GET_RTX_CLASS (code
) == 'c'
6844 && rtx_equal_p (operands
[0], operands
[2]))))
6846 /* If the operation is not commutable and the source 1 is memory, we must
6847 have a matching destination. */
6848 if (GET_CODE (operands
[1]) == MEM
6849 && GET_RTX_CLASS (code
) != 'c'
6850 && ! rtx_equal_p (operands
[0], operands
[1]))
6855 /* Attempt to expand a unary operator. Make the expansion closer to the
6856 actual machine, then just general_operand, which will allow 2 separate
6857 memory references (one output, one input) in a single insn. */
6860 ix86_expand_unary_operator (code
, mode
, operands
)
6862 enum machine_mode mode
;
6865 int matching_memory
;
6866 rtx src
, dst
, op
, clob
;
6871 /* If the destination is memory, and we do not have matching source
6872 operands, do things in registers. */
6873 matching_memory
= 0;
6874 if (GET_CODE (dst
) == MEM
)
6876 if (rtx_equal_p (dst
, src
))
6877 matching_memory
= 1;
6879 dst
= gen_reg_rtx (mode
);
6882 /* When source operand is memory, destination must match. */
6883 if (!matching_memory
&& GET_CODE (src
) == MEM
)
6884 src
= force_reg (mode
, src
);
6886 /* If optimizing, copy to regs to improve CSE */
6887 if (optimize
&& ! no_new_pseudos
)
6889 if (GET_CODE (dst
) == MEM
)
6890 dst
= gen_reg_rtx (mode
);
6891 if (GET_CODE (src
) == MEM
)
6892 src
= force_reg (mode
, src
);
6895 /* Emit the instruction. */
6897 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
6898 if (reload_in_progress
|| code
== NOT
)
6900 /* Reload doesn't know about the flags register, and doesn't know that
6901 it doesn't want to clobber it. */
6908 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
6909 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
6912 /* Fix up the destination if needed. */
6913 if (dst
!= operands
[0])
6914 emit_move_insn (operands
[0], dst
);
6917 /* Return TRUE or FALSE depending on whether the unary operator meets the
6918 appropriate constraints. */
6921 ix86_unary_operator_ok (code
, mode
, operands
)
6922 enum rtx_code code ATTRIBUTE_UNUSED
;
6923 enum machine_mode mode ATTRIBUTE_UNUSED
;
6924 rtx operands
[2] ATTRIBUTE_UNUSED
;
6926 /* If one of operands is memory, source and destination must match. */
6927 if ((GET_CODE (operands
[0]) == MEM
6928 || GET_CODE (operands
[1]) == MEM
)
6929 && ! rtx_equal_p (operands
[0], operands
[1]))
6934 /* Return TRUE or FALSE depending on whether the first SET in INSN
6935 has source and destination with matching CC modes, and that the
6936 CC mode is at least as constrained as REQ_MODE. */
6939 ix86_match_ccmode (insn
, req_mode
)
6941 enum machine_mode req_mode
;
6944 enum machine_mode set_mode
;
6946 set
= PATTERN (insn
);
6947 if (GET_CODE (set
) == PARALLEL
)
6948 set
= XVECEXP (set
, 0, 0);
6949 if (GET_CODE (set
) != SET
)
6951 if (GET_CODE (SET_SRC (set
)) != COMPARE
)
6954 set_mode
= GET_MODE (SET_DEST (set
));
6958 if (req_mode
!= CCNOmode
6959 && (req_mode
!= CCmode
6960 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
6964 if (req_mode
== CCGCmode
)
6968 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
6972 if (req_mode
== CCZmode
)
6982 return (GET_MODE (SET_SRC (set
)) == set_mode
);
6985 /* Generate insn patterns to do an integer compare of OPERANDS. */
6988 ix86_expand_int_compare (code
, op0
, op1
)
6992 enum machine_mode cmpmode
;
6995 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
6996 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
6998 /* This is very simple, but making the interface the same as in the
6999 FP case makes the rest of the code easier. */
7000 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
7001 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
7003 /* Return the test that should be put into the flags user, i.e.
7004 the bcc, scc, or cmov instruction. */
7005 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
7008 /* Figure out whether to use ordered or unordered fp comparisons.
7009 Return the appropriate mode to use. */
7012 ix86_fp_compare_mode (code
)
7013 enum rtx_code code ATTRIBUTE_UNUSED
;
7015 /* ??? In order to make all comparisons reversible, we do all comparisons
7016 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7017 all forms trapping and nontrapping comparisons, we can make inequality
7018 comparisons trapping again, since it results in better code when using
7019 FCOM based compares. */
7020 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
7024 ix86_cc_mode (code
, op0
, op1
)
7028 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
7029 return ix86_fp_compare_mode (code
);
7032 /* Only zero flag is needed. */
7034 case NE
: /* ZF!=0 */
7036 /* Codes needing carry flag. */
7037 case GEU
: /* CF=0 */
7038 case GTU
: /* CF=0 & ZF=0 */
7039 case LTU
: /* CF=1 */
7040 case LEU
: /* CF=1 | ZF=1 */
7042 /* Codes possibly doable only with sign flag when
7043 comparing against zero. */
7044 case GE
: /* SF=OF or SF=0 */
7045 case LT
: /* SF<>OF or SF=1 */
7046 if (op1
== const0_rtx
)
7049 /* For other cases Carry flag is not required. */
7051 /* Codes doable only with sign flag when comparing
7052 against zero, but we miss jump instruction for it
7053 so we need to use relational tests agains overflow
7054 that thus needs to be zero. */
7055 case GT
: /* ZF=0 & SF=OF */
7056 case LE
: /* ZF=1 | SF<>OF */
7057 if (op1
== const0_rtx
)
7061 /* strcmp pattern do (use flags) and combine may ask us for proper
7070 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7073 ix86_use_fcomi_compare (code
)
7074 enum rtx_code code ATTRIBUTE_UNUSED
;
7076 enum rtx_code swapped_code
= swap_condition (code
);
7077 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
7078 || (ix86_fp_comparison_cost (swapped_code
)
7079 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
7082 /* Swap, force into registers, or otherwise massage the two operands
7083 to a fp comparison. The operands are updated in place; the new
7084 comparsion code is returned. */
7086 static enum rtx_code
7087 ix86_prepare_fp_compare_args (code
, pop0
, pop1
)
7091 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
7092 rtx op0
= *pop0
, op1
= *pop1
;
7093 enum machine_mode op_mode
= GET_MODE (op0
);
7094 int is_sse
= SSE_REG_P (op0
) | SSE_REG_P (op1
);
7096 /* All of the unordered compare instructions only work on registers.
7097 The same is true of the XFmode compare instructions. The same is
7098 true of the fcomi compare instructions. */
7101 && (fpcmp_mode
== CCFPUmode
7102 || op_mode
== XFmode
7103 || op_mode
== TFmode
7104 || ix86_use_fcomi_compare (code
)))
7106 op0
= force_reg (op_mode
, op0
);
7107 op1
= force_reg (op_mode
, op1
);
7111 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7112 things around if they appear profitable, otherwise force op0
7115 if (standard_80387_constant_p (op0
) == 0
7116 || (GET_CODE (op0
) == MEM
7117 && ! (standard_80387_constant_p (op1
) == 0
7118 || GET_CODE (op1
) == MEM
)))
7121 tmp
= op0
, op0
= op1
, op1
= tmp
;
7122 code
= swap_condition (code
);
7125 if (GET_CODE (op0
) != REG
)
7126 op0
= force_reg (op_mode
, op0
);
7128 if (CONSTANT_P (op1
))
7130 if (standard_80387_constant_p (op1
))
7131 op1
= force_reg (op_mode
, op1
);
7133 op1
= validize_mem (force_const_mem (op_mode
, op1
));
7137 /* Try to rearrange the comparison to make it cheaper. */
7138 if (ix86_fp_comparison_cost (code
)
7139 > ix86_fp_comparison_cost (swap_condition (code
))
7140 && (GET_CODE (op1
) == REG
|| !no_new_pseudos
))
7143 tmp
= op0
, op0
= op1
, op1
= tmp
;
7144 code
= swap_condition (code
);
7145 if (GET_CODE (op0
) != REG
)
7146 op0
= force_reg (op_mode
, op0
);
7154 /* Convert comparison codes we use to represent FP comparison to integer
7155 code that will result in proper branch. Return UNKNOWN if no such code
7157 static enum rtx_code
7158 ix86_fp_compare_code_to_integer (code
)
7188 /* Split comparison code CODE into comparisons we can do using branch
7189 instructions. BYPASS_CODE is comparison code for branch that will
7190 branch around FIRST_CODE and SECOND_CODE. If some of branches
7191 is not required, set value to NIL.
7192 We never require more than two branches. */
7194 ix86_fp_comparison_codes (code
, bypass_code
, first_code
, second_code
)
7195 enum rtx_code code
, *bypass_code
, *first_code
, *second_code
;
7201 /* The fcomi comparison sets flags as follows:
7211 case GT
: /* GTU - CF=0 & ZF=0 */
7212 case GE
: /* GEU - CF=0 */
7213 case ORDERED
: /* PF=0 */
7214 case UNORDERED
: /* PF=1 */
7215 case UNEQ
: /* EQ - ZF=1 */
7216 case UNLT
: /* LTU - CF=1 */
7217 case UNLE
: /* LEU - CF=1 | ZF=1 */
7218 case LTGT
: /* EQ - ZF=0 */
7220 case LT
: /* LTU - CF=1 - fails on unordered */
7222 *bypass_code
= UNORDERED
;
7224 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
7226 *bypass_code
= UNORDERED
;
7228 case EQ
: /* EQ - ZF=1 - fails on unordered */
7230 *bypass_code
= UNORDERED
;
7232 case NE
: /* NE - ZF=0 - fails on unordered */
7234 *second_code
= UNORDERED
;
7236 case UNGE
: /* GEU - CF=0 - fails on unordered */
7238 *second_code
= UNORDERED
;
7240 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
7242 *second_code
= UNORDERED
;
7247 if (!TARGET_IEEE_FP
)
7254 /* Return cost of comparison done fcom + arithmetics operations on AX.
7255 All following functions do use number of instructions as an cost metrics.
7256 In future this should be tweaked to compute bytes for optimize_size and
7257 take into account performance of various instructions on various CPUs. */
7259 ix86_fp_comparison_arithmetics_cost (code
)
7262 if (!TARGET_IEEE_FP
)
7264 /* The cost of code output by ix86_expand_fp_compare. */
7292 /* Return cost of comparison done using fcomi operation.
7293 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7295 ix86_fp_comparison_fcomi_cost (code
)
7298 enum rtx_code bypass_code
, first_code
, second_code
;
7299 /* Return arbitarily high cost when instruction is not supported - this
7300 prevents gcc from using it. */
7303 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
7304 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 2;
7307 /* Return cost of comparison done using sahf operation.
7308 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7310 ix86_fp_comparison_sahf_cost (code
)
7313 enum rtx_code bypass_code
, first_code
, second_code
;
7314 /* Return arbitarily high cost when instruction is not preferred - this
7315 avoids gcc from using it. */
7316 if (!TARGET_USE_SAHF
&& !optimize_size
)
7318 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
7319 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 3;
7322 /* Compute cost of the comparison done using any method.
7323 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7325 ix86_fp_comparison_cost (code
)
7328 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
7331 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
7332 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
7334 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
7335 if (min
> sahf_cost
)
7337 if (min
> fcomi_cost
)
7342 /* Generate insn patterns to do a floating point compare of OPERANDS. */
7345 ix86_expand_fp_compare (code
, op0
, op1
, scratch
, second_test
, bypass_test
)
7347 rtx op0
, op1
, scratch
;
7351 enum machine_mode fpcmp_mode
, intcmp_mode
;
7353 int cost
= ix86_fp_comparison_cost (code
);
7354 enum rtx_code bypass_code
, first_code
, second_code
;
7356 fpcmp_mode
= ix86_fp_compare_mode (code
);
7357 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
7360 *second_test
= NULL_RTX
;
7362 *bypass_test
= NULL_RTX
;
7364 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
7366 /* Do fcomi/sahf based test when profitable. */
7367 if ((bypass_code
== NIL
|| bypass_test
)
7368 && (second_code
== NIL
|| second_test
)
7369 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
7373 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
7374 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
7380 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
7381 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), 9);
7383 scratch
= gen_reg_rtx (HImode
);
7384 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
7385 emit_insn (gen_x86_sahf_1 (scratch
));
7388 /* The FP codes work out to act like unsigned. */
7389 intcmp_mode
= fpcmp_mode
;
7391 if (bypass_code
!= NIL
)
7392 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
7393 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
7395 if (second_code
!= NIL
)
7396 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
7397 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
7402 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
7403 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
7404 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), 9);
7406 scratch
= gen_reg_rtx (HImode
);
7407 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
7409 /* In the unordered case, we have to check C2 for NaN's, which
7410 doesn't happen to work out to anything nice combination-wise.
7411 So do some bit twiddling on the value we've got in AH to come
7412 up with an appropriate set of condition codes. */
7414 intcmp_mode
= CCNOmode
;
7419 if (code
== GT
|| !TARGET_IEEE_FP
)
7421 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
7426 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
7427 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
7428 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
7429 intcmp_mode
= CCmode
;
7435 if (code
== LT
&& TARGET_IEEE_FP
)
7437 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
7438 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
7439 intcmp_mode
= CCmode
;
7444 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
7450 if (code
== GE
|| !TARGET_IEEE_FP
)
7452 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
7457 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
7458 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
7465 if (code
== LE
&& TARGET_IEEE_FP
)
7467 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
7468 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
7469 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
7470 intcmp_mode
= CCmode
;
7475 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
7481 if (code
== EQ
&& TARGET_IEEE_FP
)
7483 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
7484 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
7485 intcmp_mode
= CCmode
;
7490 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
7497 if (code
== NE
&& TARGET_IEEE_FP
)
7499 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
7500 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
7506 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
7512 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
7516 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
7525 /* Return the test that should be put into the flags user, i.e.
7526 the bcc, scc, or cmov instruction. */
7527 return gen_rtx_fmt_ee (code
, VOIDmode
,
7528 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
7533 ix86_expand_compare (code
, second_test
, bypass_test
)
7535 rtx
*second_test
, *bypass_test
;
7538 op0
= ix86_compare_op0
;
7539 op1
= ix86_compare_op1
;
7542 *second_test
= NULL_RTX
;
7544 *bypass_test
= NULL_RTX
;
7546 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
7547 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
7548 second_test
, bypass_test
);
7550 ret
= ix86_expand_int_compare (code
, op0
, op1
);
7555 /* Return true if the CODE will result in nontrivial jump sequence. */
7557 ix86_fp_jump_nontrivial_p (code
)
7560 enum rtx_code bypass_code
, first_code
, second_code
;
7563 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
7564 return bypass_code
!= NIL
|| second_code
!= NIL
;
7568 ix86_expand_branch (code
, label
)
7574 switch (GET_MODE (ix86_compare_op0
))
7580 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
7581 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
7582 gen_rtx_LABEL_REF (VOIDmode
, label
),
7584 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
7594 enum rtx_code bypass_code
, first_code
, second_code
;
7596 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
7599 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
7601 /* Check whether we will use the natural sequence with one jump. If
7602 so, we can expand jump early. Otherwise delay expansion by
7603 creating compound insn to not confuse optimizers. */
7604 if (bypass_code
== NIL
&& second_code
== NIL
7607 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
7608 gen_rtx_LABEL_REF (VOIDmode
, label
),
7613 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
7614 ix86_compare_op0
, ix86_compare_op1
);
7615 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
7616 gen_rtx_LABEL_REF (VOIDmode
, label
),
7618 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
7620 use_fcomi
= ix86_use_fcomi_compare (code
);
7621 vec
= rtvec_alloc (3 + !use_fcomi
);
7622 RTVEC_ELT (vec
, 0) = tmp
;
7624 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
7626 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
7629 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
7631 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
7639 /* Expand DImode branch into multiple compare+branch. */
7641 rtx lo
[2], hi
[2], label2
;
7642 enum rtx_code code1
, code2
, code3
;
7644 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
7646 tmp
= ix86_compare_op0
;
7647 ix86_compare_op0
= ix86_compare_op1
;
7648 ix86_compare_op1
= tmp
;
7649 code
= swap_condition (code
);
7651 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
7652 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
7654 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
7655 avoid two branches. This costs one extra insn, so disable when
7656 optimizing for size. */
7658 if ((code
== EQ
|| code
== NE
)
7660 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
7665 if (hi
[1] != const0_rtx
)
7666 xor1
= expand_binop (SImode
, xor_optab
, xor1
, hi
[1],
7667 NULL_RTX
, 0, OPTAB_WIDEN
);
7670 if (lo
[1] != const0_rtx
)
7671 xor0
= expand_binop (SImode
, xor_optab
, xor0
, lo
[1],
7672 NULL_RTX
, 0, OPTAB_WIDEN
);
7674 tmp
= expand_binop (SImode
, ior_optab
, xor1
, xor0
,
7675 NULL_RTX
, 0, OPTAB_WIDEN
);
7677 ix86_compare_op0
= tmp
;
7678 ix86_compare_op1
= const0_rtx
;
7679 ix86_expand_branch (code
, label
);
7683 /* Otherwise, if we are doing less-than or greater-or-equal-than,
7684 op1 is a constant and the low word is zero, then we can just
7685 examine the high word. */
7687 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
)
7690 case LT
: case LTU
: case GE
: case GEU
:
7691 ix86_compare_op0
= hi
[0];
7692 ix86_compare_op1
= hi
[1];
7693 ix86_expand_branch (code
, label
);
7699 /* Otherwise, we need two or three jumps. */
7701 label2
= gen_label_rtx ();
7704 code2
= swap_condition (code
);
7705 code3
= unsigned_condition (code
);
7709 case LT
: case GT
: case LTU
: case GTU
:
7712 case LE
: code1
= LT
; code2
= GT
; break;
7713 case GE
: code1
= GT
; code2
= LT
; break;
7714 case LEU
: code1
= LTU
; code2
= GTU
; break;
7715 case GEU
: code1
= GTU
; code2
= LTU
; break;
7717 case EQ
: code1
= NIL
; code2
= NE
; break;
7718 case NE
: code2
= NIL
; break;
7726 * if (hi(a) < hi(b)) goto true;
7727 * if (hi(a) > hi(b)) goto false;
7728 * if (lo(a) < lo(b)) goto true;
7732 ix86_compare_op0
= hi
[0];
7733 ix86_compare_op1
= hi
[1];
7736 ix86_expand_branch (code1
, label
);
7738 ix86_expand_branch (code2
, label2
);
7740 ix86_compare_op0
= lo
[0];
7741 ix86_compare_op1
= lo
[1];
7742 ix86_expand_branch (code3
, label
);
7745 emit_label (label2
);
7754 /* Split branch based on floating point condition. */
7756 ix86_split_fp_branch (code
, op1
, op2
, target1
, target2
, tmp
)
7758 rtx op1
, op2
, target1
, target2
, tmp
;
7761 rtx label
= NULL_RTX
;
7763 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
7766 if (target2
!= pc_rtx
)
7769 code
= reverse_condition_maybe_unordered (code
);
7774 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
7775 tmp
, &second
, &bypass
);
7777 if (split_branch_probability
>= 0)
7779 /* Distribute the probabilities across the jumps.
7780 Assume the BYPASS and SECOND to be always test
7782 probability
= split_branch_probability
;
7784 /* Value of 1 is low enough to make no need for probability
7785 to be updated. Later we may run some experiments and see
7786 if unordered values are more frequent in practice. */
7788 bypass_probability
= 1;
7790 second_probability
= 1;
7792 if (bypass
!= NULL_RTX
)
7794 label
= gen_label_rtx ();
7795 i
= emit_jump_insn (gen_rtx_SET
7797 gen_rtx_IF_THEN_ELSE (VOIDmode
,
7799 gen_rtx_LABEL_REF (VOIDmode
,
7802 if (bypass_probability
>= 0)
7804 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
7805 GEN_INT (bypass_probability
),
7808 i
= emit_jump_insn (gen_rtx_SET
7810 gen_rtx_IF_THEN_ELSE (VOIDmode
,
7811 condition
, target1
, target2
)));
7812 if (probability
>= 0)
7814 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
7815 GEN_INT (probability
),
7817 if (second
!= NULL_RTX
)
7819 i
= emit_jump_insn (gen_rtx_SET
7821 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
7823 if (second_probability
>= 0)
7825 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
7826 GEN_INT (second_probability
),
7829 if (label
!= NULL_RTX
)
7834 ix86_expand_setcc (code
, dest
)
7838 rtx ret
, tmp
, tmpreg
;
7839 rtx second_test
, bypass_test
;
7841 if (GET_MODE (ix86_compare_op0
) == DImode
7843 return 0; /* FAIL */
7845 if (GET_MODE (dest
) != QImode
)
7848 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
7849 PUT_MODE (ret
, QImode
);
7854 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
7855 if (bypass_test
|| second_test
)
7857 rtx test
= second_test
;
7859 rtx tmp2
= gen_reg_rtx (QImode
);
7866 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
7868 PUT_MODE (test
, QImode
);
7869 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
7872 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
7874 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
7877 return 1; /* DONE */
7881 ix86_expand_int_movcc (operands
)
7884 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
7885 rtx compare_seq
, compare_op
;
7886 rtx second_test
, bypass_test
;
7887 enum machine_mode mode
= GET_MODE (operands
[0]);
7889 /* When the compare code is not LTU or GEU, we can not use sbbl case.
7890 In case comparsion is done with immediate, we can convert it to LTU or
7891 GEU by altering the integer. */
7893 if ((code
== LEU
|| code
== GTU
)
7894 && GET_CODE (ix86_compare_op1
) == CONST_INT
7896 && (unsigned int) INTVAL (ix86_compare_op1
) != 0xffffffff
7897 && GET_CODE (operands
[2]) == CONST_INT
7898 && GET_CODE (operands
[3]) == CONST_INT
)
7904 ix86_compare_op1
= GEN_INT (INTVAL (ix86_compare_op1
) + 1);
7908 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
7909 compare_seq
= gen_sequence ();
7912 compare_code
= GET_CODE (compare_op
);
7914 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
7915 HImode insns, we'd be swallowed in word prefix ops. */
7918 && (mode
!= DImode
|| TARGET_64BIT
)
7919 && GET_CODE (operands
[2]) == CONST_INT
7920 && GET_CODE (operands
[3]) == CONST_INT
)
7922 rtx out
= operands
[0];
7923 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
7924 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
7927 if ((compare_code
== LTU
|| compare_code
== GEU
)
7928 && !second_test
&& !bypass_test
)
7931 /* Detect overlap between destination and compare sources. */
7934 /* To simplify rest of code, restrict to the GEU case. */
7935 if (compare_code
== LTU
)
7940 compare_code
= reverse_condition (compare_code
);
7941 code
= reverse_condition (code
);
7945 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
7946 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
7947 tmp
= gen_reg_rtx (mode
);
7949 emit_insn (compare_seq
);
7951 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
));
7953 emit_insn (gen_x86_movsicc_0_m1 (tmp
));
7965 tmp
= expand_simple_binop (mode
, PLUS
,
7967 tmp
, 1, OPTAB_DIRECT
);
7978 tmp
= expand_simple_binop (mode
, IOR
,
7980 tmp
, 1, OPTAB_DIRECT
);
7982 else if (diff
== -1 && ct
)
7992 tmp
= expand_simple_unop (mode
, NOT
, tmp
, tmp
, 1);
7994 tmp
= expand_simple_binop (mode
, PLUS
,
7996 tmp
, 1, OPTAB_DIRECT
);
8003 * andl cf - ct, dest
8008 tmp
= expand_simple_binop (mode
, AND
,
8010 GEN_INT (trunc_int_for_mode
8012 tmp
, 1, OPTAB_DIRECT
);
8014 tmp
= expand_simple_binop (mode
, PLUS
,
8016 tmp
, 1, OPTAB_DIRECT
);
8020 emit_move_insn (out
, tmp
);
8022 return 1; /* DONE */
8029 tmp
= ct
, ct
= cf
, cf
= tmp
;
8031 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
8033 /* We may be reversing unordered compare to normal compare, that
8034 is not valid in general (we may convert non-trapping condition
8035 to trapping one), however on i386 we currently emit all
8036 comparisons unordered. */
8037 compare_code
= reverse_condition_maybe_unordered (compare_code
);
8038 code
= reverse_condition_maybe_unordered (code
);
8042 compare_code
= reverse_condition (compare_code
);
8043 code
= reverse_condition (code
);
8046 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
8047 || diff
== 3 || diff
== 5 || diff
== 9)
8048 && (mode
!= DImode
|| x86_64_sign_extended_value (GEN_INT (cf
))))
8054 * lea cf(dest*(ct-cf)),dest
8058 * This also catches the degenerate setcc-only case.
8064 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
8065 ix86_compare_op1
, VOIDmode
, 0, 1);
8068 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
8069 done in proper mode to match. */
8076 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
8080 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
8086 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
8090 && (GET_CODE (tmp
) != SUBREG
|| SUBREG_REG (tmp
) != out
))
8096 clob
= gen_rtx_REG (CCmode
, FLAGS_REG
);
8097 clob
= gen_rtx_CLOBBER (VOIDmode
, clob
);
8099 tmp
= gen_rtx_SET (VOIDmode
, out
, tmp
);
8100 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
8104 emit_insn (gen_rtx_SET (VOIDmode
, out
, tmp
));
8106 if (out
!= operands
[0])
8107 emit_move_insn (operands
[0], out
);
8109 return 1; /* DONE */
8113 * General case: Jumpful:
8114 * xorl dest,dest cmpl op1, op2
8115 * cmpl op1, op2 movl ct, dest
8117 * decl dest movl cf, dest
8118 * andl (cf-ct),dest 1:
8123 * This is reasonably steep, but branch mispredict costs are
8124 * high on modern cpus, so consider failing only if optimizing
8127 * %%% Parameterize branch_cost on the tuning architecture, then
8128 * use that. The 80386 couldn't care less about mispredicts.
8131 if (!optimize_size
&& !TARGET_CMOVE
)
8137 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
8139 /* We may be reversing unordered compare to normal compare,
8140 that is not valid in general (we may convert non-trapping
8141 condition to trapping one), however on i386 we currently
8142 emit all comparisons unordered. */
8143 compare_code
= reverse_condition_maybe_unordered (compare_code
);
8144 code
= reverse_condition_maybe_unordered (code
);
8148 compare_code
= reverse_condition (compare_code
);
8149 code
= reverse_condition (code
);
8153 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
8154 ix86_compare_op1
, VOIDmode
, 0, 1);
8156 out
= expand_simple_binop (mode
, PLUS
,
8158 out
, 1, OPTAB_DIRECT
);
8159 out
= expand_simple_binop (mode
, AND
,
8161 GEN_INT (trunc_int_for_mode
8163 out
, 1, OPTAB_DIRECT
);
8164 out
= expand_simple_binop (mode
, PLUS
,
8166 out
, 1, OPTAB_DIRECT
);
8167 if (out
!= operands
[0])
8168 emit_move_insn (operands
[0], out
);
8170 return 1; /* DONE */
8176 /* Try a few things more with specific constants and a variable. */
8179 rtx var
, orig_out
, out
, tmp
;
8182 return 0; /* FAIL */
8184 /* If one of the two operands is an interesting constant, load a
8185 constant with the above and mask it in with a logical operation. */
8187 if (GET_CODE (operands
[2]) == CONST_INT
)
8190 if (INTVAL (operands
[2]) == 0)
8191 operands
[3] = constm1_rtx
, op
= and_optab
;
8192 else if (INTVAL (operands
[2]) == -1)
8193 operands
[3] = const0_rtx
, op
= ior_optab
;
8195 return 0; /* FAIL */
8197 else if (GET_CODE (operands
[3]) == CONST_INT
)
8200 if (INTVAL (operands
[3]) == 0)
8201 operands
[2] = constm1_rtx
, op
= and_optab
;
8202 else if (INTVAL (operands
[3]) == -1)
8203 operands
[2] = const0_rtx
, op
= ior_optab
;
8205 return 0; /* FAIL */
8208 return 0; /* FAIL */
8210 orig_out
= operands
[0];
8211 tmp
= gen_reg_rtx (mode
);
8214 /* Recurse to get the constant loaded. */
8215 if (ix86_expand_int_movcc (operands
) == 0)
8216 return 0; /* FAIL */
8218 /* Mask in the interesting variable. */
8219 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
8221 if (out
!= orig_out
)
8222 emit_move_insn (orig_out
, out
);
8224 return 1; /* DONE */
8228 * For comparison with above,
8238 if (! nonimmediate_operand (operands
[2], mode
))
8239 operands
[2] = force_reg (mode
, operands
[2]);
8240 if (! nonimmediate_operand (operands
[3], mode
))
8241 operands
[3] = force_reg (mode
, operands
[3]);
8243 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
8245 rtx tmp
= gen_reg_rtx (mode
);
8246 emit_move_insn (tmp
, operands
[3]);
8249 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
8251 rtx tmp
= gen_reg_rtx (mode
);
8252 emit_move_insn (tmp
, operands
[2]);
8255 if (! register_operand (operands
[2], VOIDmode
)
8256 && ! register_operand (operands
[3], VOIDmode
))
8257 operands
[2] = force_reg (mode
, operands
[2]);
8259 emit_insn (compare_seq
);
8260 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
8261 gen_rtx_IF_THEN_ELSE (mode
,
8262 compare_op
, operands
[2],
8265 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
8266 gen_rtx_IF_THEN_ELSE (mode
,
8271 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
8272 gen_rtx_IF_THEN_ELSE (mode
,
8277 return 1; /* DONE */
8281 ix86_expand_fp_movcc (operands
)
8286 rtx compare_op
, second_test
, bypass_test
;
8288 /* For SF/DFmode conditional moves based on comparisons
8289 in same mode, we may want to use SSE min/max instructions. */
8290 if (((TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == SFmode
)
8291 || (TARGET_SSE2
&& TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == DFmode
))
8292 && GET_MODE (ix86_compare_op0
) == GET_MODE (operands
[0])
8293 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
8295 || (GET_CODE (operands
[1]) != LTGT
&& GET_CODE (operands
[1]) != UNEQ
))
8296 /* We may be called from the post-reload splitter. */
8297 && (!REG_P (operands
[0])
8298 || SSE_REG_P (operands
[0])
8299 || REGNO (operands
[0]) >= FIRST_PSEUDO_REGISTER
))
8301 rtx op0
= ix86_compare_op0
, op1
= ix86_compare_op1
;
8302 code
= GET_CODE (operands
[1]);
8304 /* See if we have (cross) match between comparison operands and
8305 conditional move operands. */
8306 if (rtx_equal_p (operands
[2], op1
))
8311 code
= reverse_condition_maybe_unordered (code
);
8313 if (rtx_equal_p (operands
[2], op0
) && rtx_equal_p (operands
[3], op1
))
8315 /* Check for min operation. */
8318 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
8319 if (memory_operand (op0
, VOIDmode
))
8320 op0
= force_reg (GET_MODE (operands
[0]), op0
);
8321 if (GET_MODE (operands
[0]) == SFmode
)
8322 emit_insn (gen_minsf3 (operands
[0], op0
, op1
));
8324 emit_insn (gen_mindf3 (operands
[0], op0
, op1
));
8327 /* Check for max operation. */
8330 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
8331 if (memory_operand (op0
, VOIDmode
))
8332 op0
= force_reg (GET_MODE (operands
[0]), op0
);
8333 if (GET_MODE (operands
[0]) == SFmode
)
8334 emit_insn (gen_maxsf3 (operands
[0], op0
, op1
));
8336 emit_insn (gen_maxdf3 (operands
[0], op0
, op1
));
8340 /* Manage condition to be sse_comparison_operator. In case we are
8341 in non-ieee mode, try to canonicalize the destination operand
8342 to be first in the comparison - this helps reload to avoid extra
8344 if (!sse_comparison_operator (operands
[1], VOIDmode
)
8345 || (rtx_equal_p (operands
[0], ix86_compare_op1
) && !TARGET_IEEE_FP
))
8347 rtx tmp
= ix86_compare_op0
;
8348 ix86_compare_op0
= ix86_compare_op1
;
8349 ix86_compare_op1
= tmp
;
8350 operands
[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands
[1])),
8351 VOIDmode
, ix86_compare_op0
,
8354 /* Similary try to manage result to be first operand of conditional
8355 move. We also don't support the NE comparison on SSE, so try to
8357 if ((rtx_equal_p (operands
[0], operands
[3])
8358 && (!TARGET_IEEE_FP
|| GET_CODE (operands
[1]) != EQ
))
8359 || (GET_CODE (operands
[1]) == NE
&& TARGET_IEEE_FP
))
8361 rtx tmp
= operands
[2];
8362 operands
[2] = operands
[3];
8364 operands
[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
8365 (GET_CODE (operands
[1])),
8366 VOIDmode
, ix86_compare_op0
,
8369 if (GET_MODE (operands
[0]) == SFmode
)
8370 emit_insn (gen_sse_movsfcc (operands
[0], operands
[1],
8371 operands
[2], operands
[3],
8372 ix86_compare_op0
, ix86_compare_op1
));
8374 emit_insn (gen_sse_movdfcc (operands
[0], operands
[1],
8375 operands
[2], operands
[3],
8376 ix86_compare_op0
, ix86_compare_op1
));
8380 /* The floating point conditional move instructions don't directly
8381 support conditions resulting from a signed integer comparison. */
8383 code
= GET_CODE (operands
[1]);
8384 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
8386 /* The floating point conditional move instructions don't directly
8387 support signed integer comparisons. */
8389 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
8391 if (second_test
!= NULL
|| bypass_test
!= NULL
)
8393 tmp
= gen_reg_rtx (QImode
);
8394 ix86_expand_setcc (code
, tmp
);
8396 ix86_compare_op0
= tmp
;
8397 ix86_compare_op1
= const0_rtx
;
8398 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
8400 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
8402 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
8403 emit_move_insn (tmp
, operands
[3]);
8406 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
8408 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
8409 emit_move_insn (tmp
, operands
[2]);
8413 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
8414 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
8419 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
8420 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
8425 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
8426 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
8434 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
8435 works for floating pointer parameters and nonoffsetable memories.
8436 For pushes, it returns just stack offsets; the values will be saved
8437 in the right order. Maximally three parts are generated. */
8440 ix86_split_to_parts (operand
, parts
, mode
)
8443 enum machine_mode mode
;
8448 size
= mode
== TFmode
? 3 : (GET_MODE_SIZE (mode
) / 4);
8450 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
8452 if (GET_CODE (operand
) == REG
&& MMX_REGNO_P (REGNO (operand
)))
8454 if (size
< 2 || size
> 3)
8457 /* Optimize constant pool reference to immediates. This is used by fp moves,
8458 that force all constants to memory to allow combining. */
8460 if (GET_CODE (operand
) == MEM
8461 && GET_CODE (XEXP (operand
, 0)) == SYMBOL_REF
8462 && CONSTANT_POOL_ADDRESS_P (XEXP (operand
, 0)))
8463 operand
= get_pool_constant (XEXP (operand
, 0));
8465 if (GET_CODE (operand
) == MEM
&& !offsettable_memref_p (operand
))
8467 /* The only non-offsetable memories we handle are pushes. */
8468 if (! push_operand (operand
, VOIDmode
))
8471 operand
= copy_rtx (operand
);
8472 PUT_MODE (operand
, Pmode
);
8473 parts
[0] = parts
[1] = parts
[2] = operand
;
8475 else if (!TARGET_64BIT
)
8478 split_di (&operand
, 1, &parts
[0], &parts
[1]);
8481 if (REG_P (operand
))
8483 if (!reload_completed
)
8485 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
8486 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
8488 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
8490 else if (offsettable_memref_p (operand
))
8492 operand
= adjust_address (operand
, SImode
, 0);
8494 parts
[1] = adjust_address (operand
, SImode
, 4);
8496 parts
[2] = adjust_address (operand
, SImode
, 8);
8498 else if (GET_CODE (operand
) == CONST_DOUBLE
)
8503 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
8508 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
8509 parts
[2] = GEN_INT (trunc_int_for_mode (l
[2], SImode
));
8512 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
8517 parts
[1] = GEN_INT (trunc_int_for_mode (l
[1], SImode
));
8518 parts
[0] = GEN_INT (trunc_int_for_mode (l
[0], SImode
));
8527 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
8528 if (mode
== XFmode
|| mode
== TFmode
)
8530 if (REG_P (operand
))
8532 if (!reload_completed
)
8534 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
8535 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
8537 else if (offsettable_memref_p (operand
))
8539 operand
= adjust_address (operand
, DImode
, 0);
8541 parts
[1] = adjust_address (operand
, SImode
, 8);
8543 else if (GET_CODE (operand
) == CONST_DOUBLE
)
8548 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
8549 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
8550 /* Do not use shift by 32 to avoid warning on 32bit systems. */
8551 if (HOST_BITS_PER_WIDE_INT
>= 64)
8553 = GEN_INT (trunc_int_for_mode
8554 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
8555 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
8558 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
8559 parts
[1] = GEN_INT (trunc_int_for_mode (l
[2], SImode
));
8569 /* Emit insns to perform a move or push of DI, DF, and XF values.
8570 Return false when normal moves are needed; true when all required
8571 insns have been emitted. Operands 2-4 contain the input values
8572 int the correct order; operands 5-7 contain the output values. */
8575 ix86_split_long_move (operands
)
8582 enum machine_mode mode
= GET_MODE (operands
[0]);
8584 /* The DFmode expanders may ask us to move double.
8585 For 64bit target this is single move. By hiding the fact
8586 here we simplify i386.md splitters. */
8587 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
8589 /* Optimize constant pool reference to immediates. This is used by
8590 fp moves, that force all constants to memory to allow combining. */
8592 if (GET_CODE (operands
[1]) == MEM
8593 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
8594 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
8595 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
8596 if (push_operand (operands
[0], VOIDmode
))
8598 operands
[0] = copy_rtx (operands
[0]);
8599 PUT_MODE (operands
[0], Pmode
);
8602 operands
[0] = gen_lowpart (DImode
, operands
[0]);
8603 operands
[1] = gen_lowpart (DImode
, operands
[1]);
8604 emit_move_insn (operands
[0], operands
[1]);
8608 /* The only non-offsettable memory we handle is push. */
8609 if (push_operand (operands
[0], VOIDmode
))
8611 else if (GET_CODE (operands
[0]) == MEM
8612 && ! offsettable_memref_p (operands
[0]))
8615 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
8616 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
8618 /* When emitting push, take care for source operands on the stack. */
8619 if (push
&& GET_CODE (operands
[1]) == MEM
8620 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
8623 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
8624 XEXP (part
[1][2], 0));
8625 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
8626 XEXP (part
[1][1], 0));
8629 /* We need to do copy in the right order in case an address register
8630 of the source overlaps the destination. */
8631 if (REG_P (part
[0][0]) && GET_CODE (part
[1][0]) == MEM
)
8633 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
8635 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
8638 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
8641 /* Collision in the middle part can be handled by reordering. */
8642 if (collisions
== 1 && nparts
== 3
8643 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
8646 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
8647 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
8650 /* If there are more collisions, we can't handle it by reordering.
8651 Do an lea to the last part and use only one colliding move. */
8652 else if (collisions
> 1)
8655 emit_insn (gen_rtx_SET (VOIDmode
, part
[0][nparts
- 1],
8656 XEXP (part
[1][0], 0)));
8657 part
[1][0] = change_address (part
[1][0],
8658 TARGET_64BIT
? DImode
: SImode
,
8659 part
[0][nparts
- 1]);
8660 part
[1][1] = adjust_address (part
[1][0], VOIDmode
, UNITS_PER_WORD
);
8662 part
[1][2] = adjust_address (part
[1][0], VOIDmode
, 8);
8672 /* We use only first 12 bytes of TFmode value, but for pushing we
8673 are required to adjust stack as if we were pushing real 16byte
8675 if (mode
== TFmode
&& !TARGET_64BIT
)
8676 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
8678 emit_move_insn (part
[0][2], part
[1][2]);
8683 /* In 64bit mode we don't have 32bit push available. In case this is
8684 register, it is OK - we will just use larger counterpart. We also
8685 retype memory - these comes from attempt to avoid REX prefix on
8686 moving of second half of TFmode value. */
8687 if (GET_MODE (part
[1][1]) == SImode
)
8689 if (GET_CODE (part
[1][1]) == MEM
)
8690 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
8691 else if (REG_P (part
[1][1]))
8692 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
8695 if (GET_MODE (part
[1][0]) == SImode
)
8696 part
[1][0] = part
[1][1];
8699 emit_move_insn (part
[0][1], part
[1][1]);
8700 emit_move_insn (part
[0][0], part
[1][0]);
8704 /* Choose correct order to not overwrite the source before it is copied. */
8705 if ((REG_P (part
[0][0])
8706 && REG_P (part
[1][1])
8707 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
8709 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
8711 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
8715 operands
[2] = part
[0][2];
8716 operands
[3] = part
[0][1];
8717 operands
[4] = part
[0][0];
8718 operands
[5] = part
[1][2];
8719 operands
[6] = part
[1][1];
8720 operands
[7] = part
[1][0];
8724 operands
[2] = part
[0][1];
8725 operands
[3] = part
[0][0];
8726 operands
[5] = part
[1][1];
8727 operands
[6] = part
[1][0];
8734 operands
[2] = part
[0][0];
8735 operands
[3] = part
[0][1];
8736 operands
[4] = part
[0][2];
8737 operands
[5] = part
[1][0];
8738 operands
[6] = part
[1][1];
8739 operands
[7] = part
[1][2];
8743 operands
[2] = part
[0][0];
8744 operands
[3] = part
[0][1];
8745 operands
[5] = part
[1][0];
8746 operands
[6] = part
[1][1];
8749 emit_move_insn (operands
[2], operands
[5]);
8750 emit_move_insn (operands
[3], operands
[6]);
8752 emit_move_insn (operands
[4], operands
[7]);
8758 ix86_split_ashldi (operands
, scratch
)
8759 rtx
*operands
, scratch
;
8761 rtx low
[2], high
[2];
8764 if (GET_CODE (operands
[2]) == CONST_INT
)
8766 split_di (operands
, 2, low
, high
);
8767 count
= INTVAL (operands
[2]) & 63;
8771 emit_move_insn (high
[0], low
[1]);
8772 emit_move_insn (low
[0], const0_rtx
);
8775 emit_insn (gen_ashlsi3 (high
[0], high
[0], GEN_INT (count
- 32)));
8779 if (!rtx_equal_p (operands
[0], operands
[1]))
8780 emit_move_insn (operands
[0], operands
[1]);
8781 emit_insn (gen_x86_shld_1 (high
[0], low
[0], GEN_INT (count
)));
8782 emit_insn (gen_ashlsi3 (low
[0], low
[0], GEN_INT (count
)));
8787 if (!rtx_equal_p (operands
[0], operands
[1]))
8788 emit_move_insn (operands
[0], operands
[1]);
8790 split_di (operands
, 1, low
, high
);
8792 emit_insn (gen_x86_shld_1 (high
[0], low
[0], operands
[2]));
8793 emit_insn (gen_ashlsi3 (low
[0], low
[0], operands
[2]));
8795 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
8797 if (! no_new_pseudos
)
8798 scratch
= force_reg (SImode
, const0_rtx
);
8800 emit_move_insn (scratch
, const0_rtx
);
8802 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2],
8806 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
8811 ix86_split_ashrdi (operands
, scratch
)
8812 rtx
*operands
, scratch
;
8814 rtx low
[2], high
[2];
8817 if (GET_CODE (operands
[2]) == CONST_INT
)
8819 split_di (operands
, 2, low
, high
);
8820 count
= INTVAL (operands
[2]) & 63;
8824 emit_move_insn (low
[0], high
[1]);
8826 if (! reload_completed
)
8827 emit_insn (gen_ashrsi3 (high
[0], low
[0], GEN_INT (31)));
8830 emit_move_insn (high
[0], low
[0]);
8831 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
8835 emit_insn (gen_ashrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
8839 if (!rtx_equal_p (operands
[0], operands
[1]))
8840 emit_move_insn (operands
[0], operands
[1]);
8841 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
8842 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (count
)));
8847 if (!rtx_equal_p (operands
[0], operands
[1]))
8848 emit_move_insn (operands
[0], operands
[1]);
8850 split_di (operands
, 1, low
, high
);
8852 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
8853 emit_insn (gen_ashrsi3 (high
[0], high
[0], operands
[2]));
8855 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
8857 if (! no_new_pseudos
)
8858 scratch
= gen_reg_rtx (SImode
);
8859 emit_move_insn (scratch
, high
[0]);
8860 emit_insn (gen_ashrsi3 (scratch
, scratch
, GEN_INT (31)));
8861 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
8865 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
8870 ix86_split_lshrdi (operands
, scratch
)
8871 rtx
*operands
, scratch
;
8873 rtx low
[2], high
[2];
8876 if (GET_CODE (operands
[2]) == CONST_INT
)
8878 split_di (operands
, 2, low
, high
);
8879 count
= INTVAL (operands
[2]) & 63;
8883 emit_move_insn (low
[0], high
[1]);
8884 emit_move_insn (high
[0], const0_rtx
);
8887 emit_insn (gen_lshrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
8891 if (!rtx_equal_p (operands
[0], operands
[1]))
8892 emit_move_insn (operands
[0], operands
[1]);
8893 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
8894 emit_insn (gen_lshrsi3 (high
[0], high
[0], GEN_INT (count
)));
8899 if (!rtx_equal_p (operands
[0], operands
[1]))
8900 emit_move_insn (operands
[0], operands
[1]);
8902 split_di (operands
, 1, low
, high
);
8904 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
8905 emit_insn (gen_lshrsi3 (high
[0], high
[0], operands
[2]));
8907 /* Heh. By reversing the arguments, we can reuse this pattern. */
8908 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
8910 if (! no_new_pseudos
)
8911 scratch
= force_reg (SImode
, const0_rtx
);
8913 emit_move_insn (scratch
, const0_rtx
);
8915 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
8919 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
8923 /* Helper function for the string operations below. Dest VARIABLE whether
8924 it is aligned to VALUE bytes. If true, jump to the label. */
8926 ix86_expand_aligntest (variable
, value
)
8930 rtx label
= gen_label_rtx ();
8931 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
8932 if (GET_MODE (variable
) == DImode
)
8933 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
8935 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
8936 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
8941 /* Adjust COUNTER by the VALUE. */
8943 ix86_adjust_counter (countreg
, value
)
8945 HOST_WIDE_INT value
;
8947 if (GET_MODE (countreg
) == DImode
)
8948 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
8950 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
8953 /* Zero extend possibly SImode EXP to Pmode register. */
8955 ix86_zero_extend_to_Pmode (exp
)
8959 if (GET_MODE (exp
) == VOIDmode
)
8960 return force_reg (Pmode
, exp
);
8961 if (GET_MODE (exp
) == Pmode
)
8962 return copy_to_mode_reg (Pmode
, exp
);
8963 r
= gen_reg_rtx (Pmode
);
8964 emit_insn (gen_zero_extendsidi2 (r
, exp
));
8968 /* Expand string move (memcpy) operation. Use i386 string operations when
8969 profitable. expand_clrstr contains similar code. */
8971 ix86_expand_movstr (dst
, src
, count_exp
, align_exp
)
8972 rtx dst
, src
, count_exp
, align_exp
;
8974 rtx srcreg
, destreg
, countreg
;
8975 enum machine_mode counter_mode
;
8976 HOST_WIDE_INT align
= 0;
8977 unsigned HOST_WIDE_INT count
= 0;
8982 if (GET_CODE (align_exp
) == CONST_INT
)
8983 align
= INTVAL (align_exp
);
8985 /* This simple hack avoids all inlining code and simplifies code below. */
8986 if (!TARGET_ALIGN_STRINGOPS
)
8989 if (GET_CODE (count_exp
) == CONST_INT
)
8990 count
= INTVAL (count_exp
);
8992 /* Figure out proper mode for counter. For 32bits it is always SImode,
8993 for 64bits use SImode when possible, otherwise DImode.
8994 Set count to number of bytes copied when known at compile time. */
8995 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
8996 || x86_64_zero_extended_value (count_exp
))
8997 counter_mode
= SImode
;
8999 counter_mode
= DImode
;
9001 if (counter_mode
!= SImode
&& counter_mode
!= DImode
)
9004 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
9005 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
9007 emit_insn (gen_cld ());
9009 /* When optimizing for size emit simple rep ; movsb instruction for
9010 counts not divisible by 4. */
9012 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
9014 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
9016 emit_insn (gen_rep_movqi_rex64 (destreg
, srcreg
, countreg
,
9017 destreg
, srcreg
, countreg
));
9019 emit_insn (gen_rep_movqi (destreg
, srcreg
, countreg
,
9020 destreg
, srcreg
, countreg
));
9023 /* For constant aligned (or small unaligned) copies use rep movsl
9024 followed by code copying the rest. For PentiumPro ensure 8 byte
9025 alignment to allow rep movsl acceleration. */
9029 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
9030 || optimize_size
|| count
< (unsigned int) 64))
9032 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
9033 if (count
& ~(size
- 1))
9035 countreg
= copy_to_mode_reg (counter_mode
,
9036 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
9037 & (TARGET_64BIT
? -1 : 0x3fffffff)));
9038 countreg
= ix86_zero_extend_to_Pmode (countreg
);
9042 emit_insn (gen_rep_movsi_rex64 (destreg
, srcreg
, countreg
,
9043 destreg
, srcreg
, countreg
));
9045 emit_insn (gen_rep_movsi (destreg
, srcreg
, countreg
,
9046 destreg
, srcreg
, countreg
));
9049 emit_insn (gen_rep_movdi_rex64 (destreg
, srcreg
, countreg
,
9050 destreg
, srcreg
, countreg
));
9052 if (size
== 8 && (count
& 0x04))
9053 emit_insn (gen_strmovsi (destreg
, srcreg
));
9055 emit_insn (gen_strmovhi (destreg
, srcreg
));
9057 emit_insn (gen_strmovqi (destreg
, srcreg
));
9059 /* The generic code based on the glibc implementation:
9060 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9061 allowing accelerated copying there)
9062 - copy the data using rep movsl
9069 /* In case we don't know anything about the alignment, default to
9070 library version, since it is usually equally fast and result in
9072 if (!TARGET_INLINE_ALL_STRINGOPS
&& align
< UNITS_PER_WORD
)
9078 if (TARGET_SINGLE_STRINGOP
)
9079 emit_insn (gen_cld ());
9081 countreg2
= gen_reg_rtx (Pmode
);
9082 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
9084 /* We don't use loops to align destination and to copy parts smaller
9085 than 4 bytes, because gcc is able to optimize such code better (in
9086 the case the destination or the count really is aligned, gcc is often
9087 able to predict the branches) and also it is friendlier to the
9088 hardware branch prediction.
9090 Using loops is benefical for generic case, because we can
9091 handle small counts using the loops. Many CPUs (such as Athlon)
9092 have large REP prefix setup costs.
9094 This is quite costy. Maybe we can revisit this decision later or
9095 add some customizability to this code. */
9098 && align
< (TARGET_PENTIUMPRO
&& (count
== 0
9099 || count
>= (unsigned int) 260)
9100 ? 8 : UNITS_PER_WORD
))
9102 label
= gen_label_rtx ();
9103 emit_cmp_and_jump_insns (countreg
, GEN_INT (UNITS_PER_WORD
- 1),
9104 LEU
, 0, counter_mode
, 1, label
);
9108 rtx label
= ix86_expand_aligntest (destreg
, 1);
9109 emit_insn (gen_strmovqi (destreg
, srcreg
));
9110 ix86_adjust_counter (countreg
, 1);
9112 LABEL_NUSES (label
) = 1;
9116 rtx label
= ix86_expand_aligntest (destreg
, 2);
9117 emit_insn (gen_strmovhi (destreg
, srcreg
));
9118 ix86_adjust_counter (countreg
, 2);
9120 LABEL_NUSES (label
) = 1;
9123 && ((TARGET_PENTIUMPRO
&& (count
== 0
9124 || count
>= (unsigned int) 260))
9127 rtx label
= ix86_expand_aligntest (destreg
, 4);
9128 emit_insn (gen_strmovsi (destreg
, srcreg
));
9129 ix86_adjust_counter (countreg
, 4);
9131 LABEL_NUSES (label
) = 1;
9134 if (!TARGET_SINGLE_STRINGOP
)
9135 emit_insn (gen_cld ());
9138 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
9140 emit_insn (gen_rep_movdi_rex64 (destreg
, srcreg
, countreg2
,
9141 destreg
, srcreg
, countreg2
));
9145 emit_insn (gen_lshrsi3 (countreg2
, countreg
, GEN_INT (2)));
9146 emit_insn (gen_rep_movsi (destreg
, srcreg
, countreg2
,
9147 destreg
, srcreg
, countreg2
));
9153 LABEL_NUSES (label
) = 1;
9155 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
9156 emit_insn (gen_strmovsi (destreg
, srcreg
));
9157 if ((align
<= 4 || count
== 0) && TARGET_64BIT
)
9159 rtx label
= ix86_expand_aligntest (countreg
, 4);
9160 emit_insn (gen_strmovsi (destreg
, srcreg
));
9162 LABEL_NUSES (label
) = 1;
9164 if (align
> 2 && count
!= 0 && (count
& 2))
9165 emit_insn (gen_strmovhi (destreg
, srcreg
));
9166 if (align
<= 2 || count
== 0)
9168 rtx label
= ix86_expand_aligntest (countreg
, 2);
9169 emit_insn (gen_strmovhi (destreg
, srcreg
));
9171 LABEL_NUSES (label
) = 1;
9173 if (align
> 1 && count
!= 0 && (count
& 1))
9174 emit_insn (gen_strmovqi (destreg
, srcreg
));
9175 if (align
<= 1 || count
== 0)
9177 rtx label
= ix86_expand_aligntest (countreg
, 1);
9178 emit_insn (gen_strmovqi (destreg
, srcreg
));
9180 LABEL_NUSES (label
) = 1;
9184 insns
= get_insns ();
9187 ix86_set_move_mem_attrs (insns
, dst
, src
, destreg
, srcreg
);
9192 /* Expand string clear operation (bzero). Use i386 string operations when
9193 profitable. expand_movstr contains similar code. */
9195 ix86_expand_clrstr (src
, count_exp
, align_exp
)
9196 rtx src
, count_exp
, align_exp
;
9198 rtx destreg
, zeroreg
, countreg
;
9199 enum machine_mode counter_mode
;
9200 HOST_WIDE_INT align
= 0;
9201 unsigned HOST_WIDE_INT count
= 0;
9203 if (GET_CODE (align_exp
) == CONST_INT
)
9204 align
= INTVAL (align_exp
);
9206 /* This simple hack avoids all inlining code and simplifies code below. */
9207 if (!TARGET_ALIGN_STRINGOPS
)
9210 if (GET_CODE (count_exp
) == CONST_INT
)
9211 count
= INTVAL (count_exp
);
9212 /* Figure out proper mode for counter. For 32bits it is always SImode,
9213 for 64bits use SImode when possible, otherwise DImode.
9214 Set count to number of bytes copied when known at compile time. */
9215 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
9216 || x86_64_zero_extended_value (count_exp
))
9217 counter_mode
= SImode
;
9219 counter_mode
= DImode
;
9221 destreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
9223 emit_insn (gen_cld ());
9225 /* When optimizing for size emit simple rep ; movsb instruction for
9226 counts not divisible by 4. */
9228 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
9230 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
9231 zeroreg
= copy_to_mode_reg (QImode
, const0_rtx
);
9233 emit_insn (gen_rep_stosqi_rex64 (destreg
, countreg
, zeroreg
,
9234 destreg
, countreg
));
9236 emit_insn (gen_rep_stosqi (destreg
, countreg
, zeroreg
,
9237 destreg
, countreg
));
9241 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
9242 || optimize_size
|| count
< (unsigned int) 64))
9244 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
9245 zeroreg
= copy_to_mode_reg (size
== 4 ? SImode
: DImode
, const0_rtx
);
9246 if (count
& ~(size
- 1))
9248 countreg
= copy_to_mode_reg (counter_mode
,
9249 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
9250 & (TARGET_64BIT
? -1 : 0x3fffffff)));
9251 countreg
= ix86_zero_extend_to_Pmode (countreg
);
9255 emit_insn (gen_rep_stossi_rex64 (destreg
, countreg
, zeroreg
,
9256 destreg
, countreg
));
9258 emit_insn (gen_rep_stossi (destreg
, countreg
, zeroreg
,
9259 destreg
, countreg
));
9262 emit_insn (gen_rep_stosdi_rex64 (destreg
, countreg
, zeroreg
,
9263 destreg
, countreg
));
9265 if (size
== 8 && (count
& 0x04))
9266 emit_insn (gen_strsetsi (destreg
,
9267 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
9269 emit_insn (gen_strsethi (destreg
,
9270 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
9272 emit_insn (gen_strsetqi (destreg
,
9273 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
9280 /* In case we don't know anything about the alignment, default to
9281 library version, since it is usually equally fast and result in
9283 if (!TARGET_INLINE_ALL_STRINGOPS
&& align
< UNITS_PER_WORD
)
9286 if (TARGET_SINGLE_STRINGOP
)
9287 emit_insn (gen_cld ());
9289 countreg2
= gen_reg_rtx (Pmode
);
9290 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
9291 zeroreg
= copy_to_mode_reg (Pmode
, const0_rtx
);
9294 && align
< (TARGET_PENTIUMPRO
&& (count
== 0
9295 || count
>= (unsigned int) 260)
9296 ? 8 : UNITS_PER_WORD
))
9298 label
= gen_label_rtx ();
9299 emit_cmp_and_jump_insns (countreg
, GEN_INT (UNITS_PER_WORD
- 1),
9300 LEU
, 0, counter_mode
, 1, label
);
9304 rtx label
= ix86_expand_aligntest (destreg
, 1);
9305 emit_insn (gen_strsetqi (destreg
,
9306 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
9307 ix86_adjust_counter (countreg
, 1);
9309 LABEL_NUSES (label
) = 1;
9313 rtx label
= ix86_expand_aligntest (destreg
, 2);
9314 emit_insn (gen_strsethi (destreg
,
9315 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
9316 ix86_adjust_counter (countreg
, 2);
9318 LABEL_NUSES (label
) = 1;
9320 if (align
<= 4 && TARGET_PENTIUMPRO
&& (count
== 0
9321 || count
>= (unsigned int) 260))
9323 rtx label
= ix86_expand_aligntest (destreg
, 4);
9324 emit_insn (gen_strsetsi (destreg
, (TARGET_64BIT
9325 ? gen_rtx_SUBREG (SImode
, zeroreg
, 0)
9327 ix86_adjust_counter (countreg
, 4);
9329 LABEL_NUSES (label
) = 1;
9332 if (!TARGET_SINGLE_STRINGOP
)
9333 emit_insn (gen_cld ());
9336 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
9338 emit_insn (gen_rep_stosdi_rex64 (destreg
, countreg2
, zeroreg
,
9339 destreg
, countreg2
));
9343 emit_insn (gen_lshrsi3 (countreg2
, countreg
, GEN_INT (2)));
9344 emit_insn (gen_rep_stossi (destreg
, countreg2
, zeroreg
,
9345 destreg
, countreg2
));
9351 LABEL_NUSES (label
) = 1;
9353 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
9354 emit_insn (gen_strsetsi (destreg
,
9355 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
9356 if (TARGET_64BIT
&& (align
<= 4 || count
== 0))
9358 rtx label
= ix86_expand_aligntest (destreg
, 2);
9359 emit_insn (gen_strsetsi (destreg
,
9360 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
9362 LABEL_NUSES (label
) = 1;
9364 if (align
> 2 && count
!= 0 && (count
& 2))
9365 emit_insn (gen_strsethi (destreg
,
9366 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
9367 if (align
<= 2 || count
== 0)
9369 rtx label
= ix86_expand_aligntest (destreg
, 2);
9370 emit_insn (gen_strsethi (destreg
,
9371 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
9373 LABEL_NUSES (label
) = 1;
9375 if (align
> 1 && count
!= 0 && (count
& 1))
9376 emit_insn (gen_strsetqi (destreg
,
9377 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
9378 if (align
<= 1 || count
== 0)
9380 rtx label
= ix86_expand_aligntest (destreg
, 1);
9381 emit_insn (gen_strsetqi (destreg
,
9382 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
9384 LABEL_NUSES (label
) = 1;
9389 /* Expand strlen. */
9391 ix86_expand_strlen (out
, src
, eoschar
, align
)
9392 rtx out
, src
, eoschar
, align
;
9394 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
9396 /* The generic case of strlen expander is long. Avoid it's
9397 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
9399 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
9400 && !TARGET_INLINE_ALL_STRINGOPS
9402 && (GET_CODE (align
) != CONST_INT
|| INTVAL (align
) < 4))
9405 addr
= force_reg (Pmode
, XEXP (src
, 0));
9406 scratch1
= gen_reg_rtx (Pmode
);
9408 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
9411 /* Well it seems that some optimizer does not combine a call like
9412 foo(strlen(bar), strlen(bar));
9413 when the move and the subtraction is done here. It does calculate
9414 the length just once when these instructions are done inside of
9415 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
9416 often used and I use one fewer register for the lifetime of
9417 output_strlen_unroll() this is better. */
9419 emit_move_insn (out
, addr
);
9421 ix86_expand_strlensi_unroll_1 (out
, align
);
9423 /* strlensi_unroll_1 returns the address of the zero at the end of
9424 the string, like memchr(), so compute the length by subtracting
9425 the start address. */
9427 emit_insn (gen_subdi3 (out
, out
, addr
));
9429 emit_insn (gen_subsi3 (out
, out
, addr
));
9433 scratch2
= gen_reg_rtx (Pmode
);
9434 scratch3
= gen_reg_rtx (Pmode
);
9435 scratch4
= force_reg (Pmode
, constm1_rtx
);
9437 emit_move_insn (scratch3
, addr
);
9438 eoschar
= force_reg (QImode
, eoschar
);
9440 emit_insn (gen_cld ());
9443 emit_insn (gen_strlenqi_rex_1 (scratch1
, scratch3
, eoschar
,
9444 align
, scratch4
, scratch3
));
9445 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
9446 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
9450 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, eoschar
,
9451 align
, scratch4
, scratch3
));
9452 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
9453 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
9459 /* Expand the appropriate insns for doing strlen if not just doing
9462 out = result, initialized with the start address
9463 align_rtx = alignment of the address.
9464 scratch = scratch register, initialized with the startaddress when
9465 not aligned, otherwise undefined
9467 This is just the body. It needs the initialisations mentioned above and
9468 some address computing at the end. These things are done in i386.md. */
9471 ix86_expand_strlensi_unroll_1 (out
, align_rtx
)
9476 rtx align_2_label
= NULL_RTX
;
9477 rtx align_3_label
= NULL_RTX
;
9478 rtx align_4_label
= gen_label_rtx ();
9479 rtx end_0_label
= gen_label_rtx ();
9481 rtx tmpreg
= gen_reg_rtx (SImode
);
9482 rtx scratch
= gen_reg_rtx (SImode
);
9485 if (GET_CODE (align_rtx
) == CONST_INT
)
9486 align
= INTVAL (align_rtx
);
9488 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
9490 /* Is there a known alignment and is it less than 4? */
9493 rtx scratch1
= gen_reg_rtx (Pmode
);
9494 emit_move_insn (scratch1
, out
);
9495 /* Is there a known alignment and is it not 2? */
9498 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
9499 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
9501 /* Leave just the 3 lower bits. */
9502 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
9503 NULL_RTX
, 0, OPTAB_WIDEN
);
9505 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
9506 Pmode
, 1, align_4_label
);
9507 emit_cmp_and_jump_insns (align_rtx
, GEN_INT (2), EQ
, NULL
,
9508 Pmode
, 1, align_2_label
);
9509 emit_cmp_and_jump_insns (align_rtx
, GEN_INT (2), GTU
, NULL
,
9510 Pmode
, 1, align_3_label
);
9514 /* Since the alignment is 2, we have to check 2 or 0 bytes;
9515 check if is aligned to 4 - byte. */
9517 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (2),
9518 NULL_RTX
, 0, OPTAB_WIDEN
);
9520 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
9521 Pmode
, 1, align_4_label
);
9524 mem
= gen_rtx_MEM (QImode
, out
);
9526 /* Now compare the bytes. */
9528 /* Compare the first n unaligned byte on a byte per byte basis. */
9529 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
9530 QImode
, 1, end_0_label
);
9532 /* Increment the address. */
9534 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
9536 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
9538 /* Not needed with an alignment of 2 */
9541 emit_label (align_2_label
);
9543 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
9547 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
9549 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
9551 emit_label (align_3_label
);
9554 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
9558 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
9560 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
9563 /* Generate loop to check 4 bytes at a time. It is not a good idea to
9564 align this loop. It gives only huge programs, but does not help to
9566 emit_label (align_4_label
);
9568 mem
= gen_rtx_MEM (SImode
, out
);
9569 emit_move_insn (scratch
, mem
);
9571 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
9573 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
9575 /* This formula yields a nonzero result iff one of the bytes is zero.
9576 This saves three branches inside loop and many cycles. */
9578 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
9579 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
9580 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
9581 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
9582 GEN_INT (trunc_int_for_mode
9583 (0x80808080, SImode
))));
9584 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
9589 rtx reg
= gen_reg_rtx (SImode
);
9590 rtx reg2
= gen_reg_rtx (Pmode
);
9591 emit_move_insn (reg
, tmpreg
);
9592 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
9594 /* If zero is not in the first two bytes, move two bytes forward. */
9595 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
9596 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
9597 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
9598 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
9599 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
9602 /* Emit lea manually to avoid clobbering of flags. */
9603 emit_insn (gen_rtx_SET (SImode
, reg2
,
9604 gen_rtx_PLUS (Pmode
, out
, GEN_INT (2))));
9606 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
9607 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
9608 emit_insn (gen_rtx_SET (VOIDmode
, out
,
9609 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
9616 rtx end_2_label
= gen_label_rtx ();
9617 /* Is zero in the first two bytes? */
9619 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
9620 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
9621 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
9622 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
9623 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
9625 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
9626 JUMP_LABEL (tmp
) = end_2_label
;
9628 /* Not in the first two. Move two bytes forward. */
9629 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
9631 emit_insn (gen_adddi3 (out
, out
, GEN_INT (2)));
9633 emit_insn (gen_addsi3 (out
, out
, GEN_INT (2)));
9635 emit_label (end_2_label
);
9639 /* Avoid branch in fixing the byte. */
9640 tmpreg
= gen_lowpart (QImode
, tmpreg
);
9641 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
9643 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3)));
9645 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3)));
9647 emit_label (end_0_label
);
9650 /* Clear stack slot assignments remembered from previous functions.
9651 This is called from INIT_EXPANDERS once before RTL is emitted for each
9655 ix86_init_machine_status (p
)
9658 p
->machine
= (struct machine_function
*)
9659 xcalloc (1, sizeof (struct machine_function
));
9662 /* Mark machine specific bits of P for GC. */
9664 ix86_mark_machine_status (p
)
9667 struct machine_function
*machine
= p
->machine
;
9668 enum machine_mode mode
;
9674 for (mode
= VOIDmode
; (int) mode
< (int) MAX_MACHINE_MODE
;
9675 mode
= (enum machine_mode
) ((int) mode
+ 1))
9676 for (n
= 0; n
< MAX_386_STACK_LOCALS
; n
++)
9677 ggc_mark_rtx (machine
->stack_locals
[(int) mode
][n
]);
9681 ix86_free_machine_status (p
)
9688 /* Return a MEM corresponding to a stack slot with mode MODE.
9689 Allocate a new slot if necessary.
9691 The RTL for a function can have several slots available: N is
9692 which slot to use. */
9695 assign_386_stack_local (mode
, n
)
9696 enum machine_mode mode
;
9699 if (n
< 0 || n
>= MAX_386_STACK_LOCALS
)
9702 if (ix86_stack_locals
[(int) mode
][n
] == NULL_RTX
)
9703 ix86_stack_locals
[(int) mode
][n
]
9704 = assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
9706 return ix86_stack_locals
[(int) mode
][n
];
9709 /* Calculate the length of the memory address in the instruction
9710 encoding. Does not include the one-byte modrm, opcode, or prefix. */
9713 memory_address_length (addr
)
9716 struct ix86_address parts
;
9717 rtx base
, index
, disp
;
9720 if (GET_CODE (addr
) == PRE_DEC
9721 || GET_CODE (addr
) == POST_INC
9722 || GET_CODE (addr
) == PRE_MODIFY
9723 || GET_CODE (addr
) == POST_MODIFY
)
9726 if (! ix86_decompose_address (addr
, &parts
))
9730 index
= parts
.index
;
9734 /* Register Indirect. */
9735 if (base
&& !index
&& !disp
)
9737 /* Special cases: ebp and esp need the two-byte modrm form. */
9738 if (addr
== stack_pointer_rtx
9739 || addr
== arg_pointer_rtx
9740 || addr
== frame_pointer_rtx
9741 || addr
== hard_frame_pointer_rtx
)
9745 /* Direct Addressing. */
9746 else if (disp
&& !base
&& !index
)
9751 /* Find the length of the displacement constant. */
9754 if (GET_CODE (disp
) == CONST_INT
9755 && CONST_OK_FOR_LETTER_P (INTVAL (disp
), 'K'))
9761 /* An index requires the two-byte modrm form. */
9769 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
9770 expect that insn have 8bit immediate alternative. */
9772 ix86_attr_length_immediate_default (insn
, shortform
)
9778 extract_insn_cached (insn
);
9779 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
9780 if (CONSTANT_P (recog_data
.operand
[i
]))
9785 && GET_CODE (recog_data
.operand
[i
]) == CONST_INT
9786 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data
.operand
[i
]), 'K'))
9790 switch (get_attr_mode (insn
))
9801 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
9806 fatal_insn ("unknown insn mode", insn
);
9812 /* Compute default value for "length_address" attribute. */
9814 ix86_attr_length_address_default (insn
)
9818 extract_insn_cached (insn
);
9819 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
9820 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
9822 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
9828 /* Return the maximum number of instructions a cpu can issue. */
9835 case PROCESSOR_PENTIUM
:
9839 case PROCESSOR_PENTIUMPRO
:
9840 case PROCESSOR_PENTIUM4
:
9841 case PROCESSOR_ATHLON
:
9849 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
9850 by DEP_INSN and nothing set by DEP_INSN. */
9853 ix86_flags_dependant (insn
, dep_insn
, insn_type
)
9855 enum attr_type insn_type
;
9859 /* Simplify the test for uninteresting insns. */
9860 if (insn_type
!= TYPE_SETCC
9861 && insn_type
!= TYPE_ICMOV
9862 && insn_type
!= TYPE_FCMOV
9863 && insn_type
!= TYPE_IBR
)
9866 if ((set
= single_set (dep_insn
)) != 0)
9868 set
= SET_DEST (set
);
9871 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
9872 && XVECLEN (PATTERN (dep_insn
), 0) == 2
9873 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
9874 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
9876 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
9877 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
9882 if (GET_CODE (set
) != REG
|| REGNO (set
) != FLAGS_REG
)
9885 /* This test is true if the dependent insn reads the flags but
9886 not any other potentially set register. */
9887 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
9890 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
9896 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
9897 address with operands set by DEP_INSN. */
9900 ix86_agi_dependant (insn
, dep_insn
, insn_type
)
9902 enum attr_type insn_type
;
9906 if (insn_type
== TYPE_LEA
9909 addr
= PATTERN (insn
);
9910 if (GET_CODE (addr
) == SET
)
9912 else if (GET_CODE (addr
) == PARALLEL
9913 && GET_CODE (XVECEXP (addr
, 0, 0)) == SET
)
9914 addr
= XVECEXP (addr
, 0, 0);
9917 addr
= SET_SRC (addr
);
9922 extract_insn_cached (insn
);
9923 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
9924 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
9926 addr
= XEXP (recog_data
.operand
[i
], 0);
9933 return modified_in_p (addr
, dep_insn
);
9937 ix86_adjust_cost (insn
, link
, dep_insn
, cost
)
9938 rtx insn
, link
, dep_insn
;
9941 enum attr_type insn_type
, dep_insn_type
;
9942 enum attr_memory memory
, dep_memory
;
9944 int dep_insn_code_number
;
9946 /* Anti and output depenancies have zero cost on all CPUs. */
9947 if (REG_NOTE_KIND (link
) != 0)
9950 dep_insn_code_number
= recog_memoized (dep_insn
);
9952 /* If we can't recognize the insns, we can't really do anything. */
9953 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
9956 insn_type
= get_attr_type (insn
);
9957 dep_insn_type
= get_attr_type (dep_insn
);
9961 case PROCESSOR_PENTIUM
:
9962 /* Address Generation Interlock adds a cycle of latency. */
9963 if (ix86_agi_dependant (insn
, dep_insn
, insn_type
))
9966 /* ??? Compares pair with jump/setcc. */
9967 if (ix86_flags_dependant (insn
, dep_insn
, insn_type
))
9970 /* Floating point stores require value to be ready one cycle ealier. */
9971 if (insn_type
== TYPE_FMOV
9972 && get_attr_memory (insn
) == MEMORY_STORE
9973 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
9977 case PROCESSOR_PENTIUMPRO
:
9978 memory
= get_attr_memory (insn
);
9979 dep_memory
= get_attr_memory (dep_insn
);
9981 /* Since we can't represent delayed latencies of load+operation,
9982 increase the cost here for non-imov insns. */
9983 if (dep_insn_type
!= TYPE_IMOV
9984 && dep_insn_type
!= TYPE_FMOV
9985 && (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
))
9988 /* INT->FP conversion is expensive. */
9989 if (get_attr_fp_int_src (dep_insn
))
9992 /* There is one cycle extra latency between an FP op and a store. */
9993 if (insn_type
== TYPE_FMOV
9994 && (set
= single_set (dep_insn
)) != NULL_RTX
9995 && (set2
= single_set (insn
)) != NULL_RTX
9996 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
9997 && GET_CODE (SET_DEST (set2
)) == MEM
)
10000 /* Show ability of reorder buffer to hide latency of load by executing
10001 in parallel with previous instruction in case
10002 previous instruction is not needed to compute the address. */
10003 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
10004 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
10006 /* Claim moves to take one cycle, as core can issue one load
10007 at time and the next load can start cycle later. */
10008 if (dep_insn_type
== TYPE_IMOV
10009 || dep_insn_type
== TYPE_FMOV
)
10017 memory
= get_attr_memory (insn
);
10018 dep_memory
= get_attr_memory (dep_insn
);
10019 /* The esp dependency is resolved before the instruction is really
10021 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
10022 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
10025 /* Since we can't represent delayed latencies of load+operation,
10026 increase the cost here for non-imov insns. */
10027 if (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
)
10028 cost
+= (dep_insn_type
!= TYPE_IMOV
) ? 2 : 1;
10030 /* INT->FP conversion is expensive. */
10031 if (get_attr_fp_int_src (dep_insn
))
10034 /* Show ability of reorder buffer to hide latency of load by executing
10035 in parallel with previous instruction in case
10036 previous instruction is not needed to compute the address. */
10037 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
10038 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
10040 /* Claim moves to take one cycle, as core can issue one load
10041 at time and the next load can start cycle later. */
10042 if (dep_insn_type
== TYPE_IMOV
10043 || dep_insn_type
== TYPE_FMOV
)
10052 case PROCESSOR_ATHLON
:
10053 memory
= get_attr_memory (insn
);
10054 dep_memory
= get_attr_memory (dep_insn
);
10056 if (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
)
10058 if (dep_insn_type
== TYPE_IMOV
|| dep_insn_type
== TYPE_FMOV
)
10063 /* Show ability of reorder buffer to hide latency of load by executing
10064 in parallel with previous instruction in case
10065 previous instruction is not needed to compute the address. */
10066 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
10067 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
10069 /* Claim moves to take one cycle, as core can issue one load
10070 at time and the next load can start cycle later. */
10071 if (dep_insn_type
== TYPE_IMOV
10072 || dep_insn_type
== TYPE_FMOV
)
10074 else if (cost
>= 3)
10089 struct ppro_sched_data
10092 int issued_this_cycle
;
10097 ix86_safe_length (insn
)
10100 if (recog_memoized (insn
) >= 0)
10101 return get_attr_length (insn
);
10107 ix86_safe_length_prefix (insn
)
10110 if (recog_memoized (insn
) >= 0)
10111 return get_attr_length (insn
);
10116 static enum attr_memory
10117 ix86_safe_memory (insn
)
10120 if (recog_memoized (insn
) >= 0)
10121 return get_attr_memory (insn
);
10123 return MEMORY_UNKNOWN
;
10126 static enum attr_pent_pair
10127 ix86_safe_pent_pair (insn
)
10130 if (recog_memoized (insn
) >= 0)
10131 return get_attr_pent_pair (insn
);
10133 return PENT_PAIR_NP
;
10136 static enum attr_ppro_uops
10137 ix86_safe_ppro_uops (insn
)
10140 if (recog_memoized (insn
) >= 0)
10141 return get_attr_ppro_uops (insn
);
10143 return PPRO_UOPS_MANY
;
10147 ix86_dump_ppro_packet (dump
)
10150 if (ix86_sched_data
.ppro
.decode
[0])
10152 fprintf (dump
, "PPRO packet: %d",
10153 INSN_UID (ix86_sched_data
.ppro
.decode
[0]));
10154 if (ix86_sched_data
.ppro
.decode
[1])
10155 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[1]));
10156 if (ix86_sched_data
.ppro
.decode
[2])
10157 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[2]));
10158 fputc ('\n', dump
);
10162 /* We're beginning a new block. Initialize data structures as necessary. */
10165 ix86_sched_init (dump
, sched_verbose
, veclen
)
10166 FILE *dump ATTRIBUTE_UNUSED
;
10167 int sched_verbose ATTRIBUTE_UNUSED
;
10168 int veclen ATTRIBUTE_UNUSED
;
10170 memset (&ix86_sched_data
, 0, sizeof (ix86_sched_data
));
10173 /* Shift INSN to SLOT, and shift everything else down. */
10176 ix86_reorder_insn (insnp
, slot
)
10183 insnp
[0] = insnp
[1];
10184 while (++insnp
!= slot
);
10189 /* Find an instruction with given pairability and minimal amount of cycles
10190 lost by the fact that the CPU waits for both pipelines to finish before
10191 reading next instructions. Also take care that both instructions together
10192 can not exceed 7 bytes. */
10195 ix86_pent_find_pair (e_ready
, ready
, type
, first
)
10198 enum attr_pent_pair type
;
10201 int mincycles
, cycles
;
10202 enum attr_pent_pair tmp
;
10203 enum attr_memory memory
;
10204 rtx
*insnp
, *bestinsnp
= NULL
;
10206 if (ix86_safe_length (first
) > 7 + ix86_safe_length_prefix (first
))
10209 memory
= ix86_safe_memory (first
);
10210 cycles
= result_ready_cost (first
);
10211 mincycles
= INT_MAX
;
10213 for (insnp
= e_ready
; insnp
>= ready
&& mincycles
; --insnp
)
10214 if ((tmp
= ix86_safe_pent_pair (*insnp
)) == type
10215 && ix86_safe_length (*insnp
) <= 7 + ix86_safe_length_prefix (*insnp
))
10217 enum attr_memory second_memory
;
10218 int secondcycles
, currentcycles
;
10220 second_memory
= ix86_safe_memory (*insnp
);
10221 secondcycles
= result_ready_cost (*insnp
);
10222 currentcycles
= abs (cycles
- secondcycles
);
10224 if (secondcycles
>= 1 && cycles
>= 1)
10226 /* Two read/modify/write instructions together takes two
10228 if (memory
== MEMORY_BOTH
&& second_memory
== MEMORY_BOTH
)
10229 currentcycles
+= 2;
10231 /* Read modify/write instruction followed by read/modify
10232 takes one cycle longer. */
10233 if (memory
== MEMORY_BOTH
&& second_memory
== MEMORY_LOAD
10234 && tmp
!= PENT_PAIR_UV
10235 && ix86_safe_pent_pair (first
) != PENT_PAIR_UV
)
10236 currentcycles
+= 1;
10238 if (currentcycles
< mincycles
)
10239 bestinsnp
= insnp
, mincycles
= currentcycles
;
10245 /* Subroutines of ix86_sched_reorder. */
10248 ix86_sched_reorder_pentium (ready
, e_ready
)
10252 enum attr_pent_pair pair1
, pair2
;
10255 /* This wouldn't be necessary if Haifa knew that static insn ordering
10256 is important to which pipe an insn is issued to. So we have to make
10257 some minor rearrangements. */
10259 pair1
= ix86_safe_pent_pair (*e_ready
);
10261 /* If the first insn is non-pairable, let it be. */
10262 if (pair1
== PENT_PAIR_NP
)
10265 pair2
= PENT_PAIR_NP
;
10268 /* If the first insn is UV or PV pairable, search for a PU
10269 insn to go with. */
10270 if (pair1
== PENT_PAIR_UV
|| pair1
== PENT_PAIR_PV
)
10272 insnp
= ix86_pent_find_pair (e_ready
-1, ready
,
10273 PENT_PAIR_PU
, *e_ready
);
10275 pair2
= PENT_PAIR_PU
;
10278 /* If the first insn is PU or UV pairable, search for a PV
10279 insn to go with. */
10280 if (pair2
== PENT_PAIR_NP
10281 && (pair1
== PENT_PAIR_PU
|| pair1
== PENT_PAIR_UV
))
10283 insnp
= ix86_pent_find_pair (e_ready
-1, ready
,
10284 PENT_PAIR_PV
, *e_ready
);
10286 pair2
= PENT_PAIR_PV
;
10289 /* If the first insn is pairable, search for a UV
10290 insn to go with. */
10291 if (pair2
== PENT_PAIR_NP
)
10293 insnp
= ix86_pent_find_pair (e_ready
-1, ready
,
10294 PENT_PAIR_UV
, *e_ready
);
10296 pair2
= PENT_PAIR_UV
;
10299 if (pair2
== PENT_PAIR_NP
)
10302 /* Found something! Decide if we need to swap the order. */
10303 if (pair1
== PENT_PAIR_PV
|| pair2
== PENT_PAIR_PU
10304 || (pair1
== PENT_PAIR_UV
&& pair2
== PENT_PAIR_UV
10305 && ix86_safe_memory (*e_ready
) == MEMORY_BOTH
10306 && ix86_safe_memory (*insnp
) == MEMORY_LOAD
))
10307 ix86_reorder_insn (insnp
, e_ready
);
10309 ix86_reorder_insn (insnp
, e_ready
- 1);
10313 ix86_sched_reorder_ppro (ready
, e_ready
)
10318 enum attr_ppro_uops cur_uops
;
10319 int issued_this_cycle
;
10323 /* At this point .ppro.decode contains the state of the three
10324 decoders from last "cycle". That is, those insns that were
10325 actually independent. But here we're scheduling for the
10326 decoder, and we may find things that are decodable in the
10329 memcpy (decode
, ix86_sched_data
.ppro
.decode
, sizeof (decode
));
10330 issued_this_cycle
= 0;
10333 cur_uops
= ix86_safe_ppro_uops (*insnp
);
10335 /* If the decoders are empty, and we've a complex insn at the
10336 head of the priority queue, let it issue without complaint. */
10337 if (decode
[0] == NULL
)
10339 if (cur_uops
== PPRO_UOPS_MANY
)
10341 decode
[0] = *insnp
;
10345 /* Otherwise, search for a 2-4 uop unsn to issue. */
10346 while (cur_uops
!= PPRO_UOPS_FEW
)
10348 if (insnp
== ready
)
10350 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
10353 /* If so, move it to the head of the line. */
10354 if (cur_uops
== PPRO_UOPS_FEW
)
10355 ix86_reorder_insn (insnp
, e_ready
);
10357 /* Issue the head of the queue. */
10358 issued_this_cycle
= 1;
10359 decode
[0] = *e_ready
--;
10362 /* Look for simple insns to fill in the other two slots. */
10363 for (i
= 1; i
< 3; ++i
)
10364 if (decode
[i
] == NULL
)
10366 if (ready
>= e_ready
)
10370 cur_uops
= ix86_safe_ppro_uops (*insnp
);
10371 while (cur_uops
!= PPRO_UOPS_ONE
)
10373 if (insnp
== ready
)
10375 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
10378 /* Found one. Move it to the head of the queue and issue it. */
10379 if (cur_uops
== PPRO_UOPS_ONE
)
10381 ix86_reorder_insn (insnp
, e_ready
);
10382 decode
[i
] = *e_ready
--;
10383 issued_this_cycle
++;
10387 /* ??? Didn't find one. Ideally, here we would do a lazy split
10388 of 2-uop insns, issue one and queue the other. */
10392 if (issued_this_cycle
== 0)
10393 issued_this_cycle
= 1;
10394 ix86_sched_data
.ppro
.issued_this_cycle
= issued_this_cycle
;
10397 /* We are about to being issuing insns for this clock cycle.
10398 Override the default sort algorithm to better slot instructions. */
10400 ix86_sched_reorder (dump
, sched_verbose
, ready
, n_readyp
, clock_var
)
10401 FILE *dump ATTRIBUTE_UNUSED
;
10402 int sched_verbose ATTRIBUTE_UNUSED
;
10405 int clock_var ATTRIBUTE_UNUSED
;
10407 int n_ready
= *n_readyp
;
10408 rtx
*e_ready
= ready
+ n_ready
- 1;
10418 case PROCESSOR_PENTIUM
:
10419 ix86_sched_reorder_pentium (ready
, e_ready
);
10422 case PROCESSOR_PENTIUMPRO
:
10423 ix86_sched_reorder_ppro (ready
, e_ready
);
10428 return ix86_issue_rate ();
10431 /* We are about to issue INSN. Return the number of insns left on the
10432 ready queue that can be issued this cycle. */
10435 ix86_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
)
10439 int can_issue_more
;
10445 return can_issue_more
- 1;
10447 case PROCESSOR_PENTIUMPRO
:
10449 enum attr_ppro_uops uops
= ix86_safe_ppro_uops (insn
);
10451 if (uops
== PPRO_UOPS_MANY
)
10454 ix86_dump_ppro_packet (dump
);
10455 ix86_sched_data
.ppro
.decode
[0] = insn
;
10456 ix86_sched_data
.ppro
.decode
[1] = NULL
;
10457 ix86_sched_data
.ppro
.decode
[2] = NULL
;
10459 ix86_dump_ppro_packet (dump
);
10460 ix86_sched_data
.ppro
.decode
[0] = NULL
;
10462 else if (uops
== PPRO_UOPS_FEW
)
10465 ix86_dump_ppro_packet (dump
);
10466 ix86_sched_data
.ppro
.decode
[0] = insn
;
10467 ix86_sched_data
.ppro
.decode
[1] = NULL
;
10468 ix86_sched_data
.ppro
.decode
[2] = NULL
;
10472 for (i
= 0; i
< 3; ++i
)
10473 if (ix86_sched_data
.ppro
.decode
[i
] == NULL
)
10475 ix86_sched_data
.ppro
.decode
[i
] = insn
;
10483 ix86_dump_ppro_packet (dump
);
10484 ix86_sched_data
.ppro
.decode
[0] = NULL
;
10485 ix86_sched_data
.ppro
.decode
[1] = NULL
;
10486 ix86_sched_data
.ppro
.decode
[2] = NULL
;
10490 return --ix86_sched_data
.ppro
.issued_this_cycle
;
10494 /* Walk through INSNS and look for MEM references whose address is DSTREG or
10495 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
10499 ix86_set_move_mem_attrs (insns
, dstref
, srcref
, dstreg
, srcreg
)
10501 rtx dstref
, srcref
, dstreg
, srcreg
;
10505 for (insn
= insns
; insn
!= 0 ; insn
= NEXT_INSN (insn
))
10507 ix86_set_move_mem_attrs_1 (PATTERN (insn
), dstref
, srcref
,
10511 /* Subroutine of above to actually do the updating by recursively walking
10515 ix86_set_move_mem_attrs_1 (x
, dstref
, srcref
, dstreg
, srcreg
)
10517 rtx dstref
, srcref
, dstreg
, srcreg
;
10519 enum rtx_code code
= GET_CODE (x
);
10520 const char *format_ptr
= GET_RTX_FORMAT (code
);
10523 if (code
== MEM
&& XEXP (x
, 0) == dstreg
)
10524 MEM_COPY_ATTRIBUTES (x
, dstref
);
10525 else if (code
== MEM
&& XEXP (x
, 0) == srcreg
)
10526 MEM_COPY_ATTRIBUTES (x
, srcref
);
10528 for (i
= 0; i
< GET_RTX_LENGTH (code
); i
++, format_ptr
++)
10530 if (*format_ptr
== 'e')
10531 ix86_set_move_mem_attrs_1 (XEXP (x
, i
), dstref
, srcref
,
10533 else if (*format_ptr
== 'E')
10534 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
10535 ix86_set_move_mem_attrs_1 (XVECEXP (x
, i
, j
), dstref
, srcref
,
10540 /* Compute the alignment given to a constant that is being placed in memory.
10541 EXP is the constant and ALIGN is the alignment that the object would
10543 The value of this function is used instead of that alignment to align
10547 ix86_constant_alignment (exp
, align
)
10551 if (TREE_CODE (exp
) == REAL_CST
)
10553 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
10555 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
10558 else if (TREE_CODE (exp
) == STRING_CST
&& TREE_STRING_LENGTH (exp
) >= 31
10565 /* Compute the alignment for a static variable.
10566 TYPE is the data type, and ALIGN is the alignment that
10567 the object would ordinarily have. The value of this function is used
10568 instead of that alignment to align the object. */
10571 ix86_data_alignment (type
, align
)
10575 if (AGGREGATE_TYPE_P (type
)
10576 && TYPE_SIZE (type
)
10577 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
10578 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 256
10579 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 256)
10582 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10583 to 16byte boundary. */
10586 if (AGGREGATE_TYPE_P (type
)
10587 && TYPE_SIZE (type
)
10588 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
10589 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
10590 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
10594 if (TREE_CODE (type
) == ARRAY_TYPE
)
10596 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
10598 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
10601 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
10604 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
10606 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
10609 else if ((TREE_CODE (type
) == RECORD_TYPE
10610 || TREE_CODE (type
) == UNION_TYPE
10611 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
10612 && TYPE_FIELDS (type
))
10614 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
10616 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
10619 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
10620 || TREE_CODE (type
) == INTEGER_TYPE
)
10622 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
10624 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
10631 /* Compute the alignment for a local variable.
10632 TYPE is the data type, and ALIGN is the alignment that
10633 the object would ordinarily have. The value of this macro is used
10634 instead of that alignment to align the object. */
10637 ix86_local_alignment (type
, align
)
10641 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10642 to 16byte boundary. */
10645 if (AGGREGATE_TYPE_P (type
)
10646 && TYPE_SIZE (type
)
10647 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
10648 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
10649 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
10652 if (TREE_CODE (type
) == ARRAY_TYPE
)
10654 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
10656 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
10659 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
10661 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
10663 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
10666 else if ((TREE_CODE (type
) == RECORD_TYPE
10667 || TREE_CODE (type
) == UNION_TYPE
10668 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
10669 && TYPE_FIELDS (type
))
10671 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
10673 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
10676 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
10677 || TREE_CODE (type
) == INTEGER_TYPE
)
10680 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
10682 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
10688 /* Emit RTL insns to initialize the variable parts of a trampoline.
10689 FNADDR is an RTX for the address of the function's pure code.
10690 CXT is an RTX for the static chain value for the function. */
10692 x86_initialize_trampoline (tramp
, fnaddr
, cxt
)
10693 rtx tramp
, fnaddr
, cxt
;
10697 /* Compute offset from the end of the jmp to the target function. */
10698 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
10699 plus_constant (tramp
, 10),
10700 NULL_RTX
, 1, OPTAB_DIRECT
);
10701 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
10702 GEN_INT (trunc_int_for_mode (0xb9, QImode
)));
10703 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
10704 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
10705 GEN_INT (trunc_int_for_mode (0xe9, QImode
)));
10706 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
10711 /* Try to load address using shorter movl instead of movabs.
10712 We may want to support movq for kernel mode, but kernel does not use
10713 trampolines at the moment. */
10714 if (x86_64_zero_extended_value (fnaddr
))
10716 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
10717 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
10718 GEN_INT (trunc_int_for_mode (0xbb41, HImode
)));
10719 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
10720 gen_lowpart (SImode
, fnaddr
));
10725 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
10726 GEN_INT (trunc_int_for_mode (0xbb49, HImode
)));
10727 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
10731 /* Load static chain using movabs to r10. */
10732 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
10733 GEN_INT (trunc_int_for_mode (0xba49, HImode
)));
10734 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
10737 /* Jump to the r11 */
10738 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
10739 GEN_INT (trunc_int_for_mode (0xff49, HImode
)));
10740 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
10741 GEN_INT (trunc_int_for_mode (0xe3, QImode
)));
10743 if (offset
> TRAMPOLINE_SIZE
)
10748 #define def_builtin(MASK, NAME, TYPE, CODE) \
10750 if ((MASK) & target_flags) \
10751 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL); \
10754 struct builtin_description
10756 const unsigned int mask
;
10757 const enum insn_code icode
;
10758 const char *const name
;
10759 const enum ix86_builtins code
;
10760 const enum rtx_code comparison
;
10761 const unsigned int flag
;
10764 static const struct builtin_description bdesc_comi
[] =
10766 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, EQ
, 0 },
10767 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, LT
, 0 },
10768 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, LE
, 0 },
10769 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, LT
, 1 },
10770 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, LE
, 1 },
10771 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, NE
, 0 },
10772 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, EQ
, 0 },
10773 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, LT
, 0 },
10774 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, LE
, 0 },
10775 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, LT
, 1 },
10776 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, LE
, 1 },
10777 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, NE
, 0 }
10780 static const struct builtin_description bdesc_2arg
[] =
10783 { MASK_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
10784 { MASK_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
10785 { MASK_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
10786 { MASK_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
10787 { MASK_SSE
, CODE_FOR_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
10788 { MASK_SSE
, CODE_FOR_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
10789 { MASK_SSE
, CODE_FOR_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
10790 { MASK_SSE
, CODE_FOR_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
10792 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
10793 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
10794 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
10795 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, 1 },
10796 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, 1 },
10797 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
10798 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, EQ
, 0 },
10799 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, LT
, 0 },
10800 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, LE
, 0 },
10801 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, LT
, 1 },
10802 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, LE
, 1 },
10803 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, UNORDERED
, 0 },
10804 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
10805 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
10806 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
10807 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS
, LT
, 1 },
10808 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS
, LE
, 1 },
10809 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
10810 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, EQ
, 0 },
10811 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, LT
, 0 },
10812 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, LE
, 0 },
10813 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, LT
, 1 },
10814 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, LE
, 1 },
10815 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
10817 { MASK_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
10818 { MASK_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
10819 { MASK_SSE
, CODE_FOR_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
10820 { MASK_SSE
, CODE_FOR_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
10822 { MASK_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
10823 { MASK_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
10824 { MASK_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
10825 { MASK_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
10826 { MASK_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
10829 { MASK_MMX
, CODE_FOR_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
10830 { MASK_MMX
, CODE_FOR_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
10831 { MASK_MMX
, CODE_FOR_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
10832 { MASK_MMX
, CODE_FOR_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
10833 { MASK_MMX
, CODE_FOR_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
10834 { MASK_MMX
, CODE_FOR_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
10836 { MASK_MMX
, CODE_FOR_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
10837 { MASK_MMX
, CODE_FOR_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
10838 { MASK_MMX
, CODE_FOR_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
10839 { MASK_MMX
, CODE_FOR_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
10840 { MASK_MMX
, CODE_FOR_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
10841 { MASK_MMX
, CODE_FOR_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
10842 { MASK_MMX
, CODE_FOR_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
10843 { MASK_MMX
, CODE_FOR_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
10845 { MASK_MMX
, CODE_FOR_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
10846 { MASK_MMX
, CODE_FOR_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
10847 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
10849 { MASK_MMX
, CODE_FOR_mmx_anddi3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
10850 { MASK_MMX
, CODE_FOR_mmx_nanddi3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
10851 { MASK_MMX
, CODE_FOR_mmx_iordi3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
10852 { MASK_MMX
, CODE_FOR_mmx_xordi3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
10854 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
10855 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
10857 { MASK_MMX
, CODE_FOR_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
10858 { MASK_MMX
, CODE_FOR_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
10859 { MASK_MMX
, CODE_FOR_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
10860 { MASK_MMX
, CODE_FOR_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
10861 { MASK_MMX
, CODE_FOR_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
10862 { MASK_MMX
, CODE_FOR_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
10864 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
10865 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
10866 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
10867 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
10869 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
10870 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
10871 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
10872 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
10873 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
10874 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
10877 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
10878 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
10879 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
10881 { MASK_SSE
, CODE_FOR_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
10882 { MASK_SSE
, CODE_FOR_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
10884 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
10885 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
10886 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
10887 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
10888 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
10889 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
10891 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
10892 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
10893 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
10894 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
10895 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
10896 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
10898 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
10899 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
10900 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
10901 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
10903 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
10904 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 }
10908 static const struct builtin_description bdesc_1arg
[] =
10910 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
10911 { MASK_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
10913 { MASK_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
10914 { MASK_SSE
, CODE_FOR_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
10915 { MASK_SSE
, CODE_FOR_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
10917 { MASK_SSE
, CODE_FOR_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
10918 { MASK_SSE
, CODE_FOR_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
10919 { MASK_SSE
, CODE_FOR_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
10920 { MASK_SSE
, CODE_FOR_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 }
10925 ix86_init_builtins ()
10928 ix86_init_mmx_sse_builtins ();
10931 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
10932 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
10935 ix86_init_mmx_sse_builtins ()
10937 const struct builtin_description
* d
;
10939 tree endlink
= void_list_node
;
10941 tree pchar_type_node
= build_pointer_type (char_type_node
);
10942 tree pfloat_type_node
= build_pointer_type (float_type_node
);
10943 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
10944 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
10947 tree int_ftype_v4sf_v4sf
10948 = build_function_type (integer_type_node
,
10949 tree_cons (NULL_TREE
, V4SF_type_node
,
10950 tree_cons (NULL_TREE
,
10953 tree v4si_ftype_v4sf_v4sf
10954 = build_function_type (V4SI_type_node
,
10955 tree_cons (NULL_TREE
, V4SF_type_node
,
10956 tree_cons (NULL_TREE
,
10959 /* MMX/SSE/integer conversions. */
10960 tree int_ftype_v4sf
10961 = build_function_type (integer_type_node
,
10962 tree_cons (NULL_TREE
, V4SF_type_node
,
10964 tree int_ftype_v8qi
10965 = build_function_type (integer_type_node
,
10966 tree_cons (NULL_TREE
, V8QI_type_node
,
10968 tree v4sf_ftype_v4sf_int
10969 = build_function_type (V4SF_type_node
,
10970 tree_cons (NULL_TREE
, V4SF_type_node
,
10971 tree_cons (NULL_TREE
, integer_type_node
,
10973 tree v4sf_ftype_v4sf_v2si
10974 = build_function_type (V4SF_type_node
,
10975 tree_cons (NULL_TREE
, V4SF_type_node
,
10976 tree_cons (NULL_TREE
, V2SI_type_node
,
10978 tree int_ftype_v4hi_int
10979 = build_function_type (integer_type_node
,
10980 tree_cons (NULL_TREE
, V4HI_type_node
,
10981 tree_cons (NULL_TREE
, integer_type_node
,
10983 tree v4hi_ftype_v4hi_int_int
10984 = build_function_type (V4HI_type_node
,
10985 tree_cons (NULL_TREE
, V4HI_type_node
,
10986 tree_cons (NULL_TREE
, integer_type_node
,
10987 tree_cons (NULL_TREE
,
10990 /* Miscellaneous. */
10991 tree v8qi_ftype_v4hi_v4hi
10992 = build_function_type (V8QI_type_node
,
10993 tree_cons (NULL_TREE
, V4HI_type_node
,
10994 tree_cons (NULL_TREE
, V4HI_type_node
,
10996 tree v4hi_ftype_v2si_v2si
10997 = build_function_type (V4HI_type_node
,
10998 tree_cons (NULL_TREE
, V2SI_type_node
,
10999 tree_cons (NULL_TREE
, V2SI_type_node
,
11001 tree v4sf_ftype_v4sf_v4sf_int
11002 = build_function_type (V4SF_type_node
,
11003 tree_cons (NULL_TREE
, V4SF_type_node
,
11004 tree_cons (NULL_TREE
, V4SF_type_node
,
11005 tree_cons (NULL_TREE
,
11008 tree v4hi_ftype_v8qi_v8qi
11009 = build_function_type (V4HI_type_node
,
11010 tree_cons (NULL_TREE
, V8QI_type_node
,
11011 tree_cons (NULL_TREE
, V8QI_type_node
,
11013 tree v2si_ftype_v4hi_v4hi
11014 = build_function_type (V2SI_type_node
,
11015 tree_cons (NULL_TREE
, V4HI_type_node
,
11016 tree_cons (NULL_TREE
, V4HI_type_node
,
11018 tree v4hi_ftype_v4hi_int
11019 = build_function_type (V4HI_type_node
,
11020 tree_cons (NULL_TREE
, V4HI_type_node
,
11021 tree_cons (NULL_TREE
, integer_type_node
,
11023 tree v4hi_ftype_v4hi_di
11024 = build_function_type (V4HI_type_node
,
11025 tree_cons (NULL_TREE
, V4HI_type_node
,
11026 tree_cons (NULL_TREE
,
11027 long_long_integer_type_node
,
11029 tree v2si_ftype_v2si_di
11030 = build_function_type (V2SI_type_node
,
11031 tree_cons (NULL_TREE
, V2SI_type_node
,
11032 tree_cons (NULL_TREE
,
11033 long_long_integer_type_node
,
11035 tree void_ftype_void
11036 = build_function_type (void_type_node
, endlink
);
11037 tree void_ftype_unsigned
11038 = build_function_type (void_type_node
,
11039 tree_cons (NULL_TREE
, unsigned_type_node
,
11041 tree unsigned_ftype_void
11042 = build_function_type (unsigned_type_node
, endlink
);
11044 = build_function_type (long_long_unsigned_type_node
, endlink
);
11045 tree v4sf_ftype_void
11046 = build_function_type (V4SF_type_node
, endlink
);
11047 tree v2si_ftype_v4sf
11048 = build_function_type (V2SI_type_node
,
11049 tree_cons (NULL_TREE
, V4SF_type_node
,
11051 /* Loads/stores. */
11052 tree maskmovq_args
= tree_cons (NULL_TREE
, V8QI_type_node
,
11053 tree_cons (NULL_TREE
, V8QI_type_node
,
11054 tree_cons (NULL_TREE
,
11057 tree void_ftype_v8qi_v8qi_pchar
11058 = build_function_type (void_type_node
, maskmovq_args
);
11059 tree v4sf_ftype_pfloat
11060 = build_function_type (V4SF_type_node
,
11061 tree_cons (NULL_TREE
, pfloat_type_node
,
11063 /* @@@ the type is bogus */
11064 tree v4sf_ftype_v4sf_pv2si
11065 = build_function_type (V4SF_type_node
,
11066 tree_cons (NULL_TREE
, V4SF_type_node
,
11067 tree_cons (NULL_TREE
, pv2si_type_node
,
11069 tree void_ftype_pv2si_v4sf
11070 = build_function_type (void_type_node
,
11071 tree_cons (NULL_TREE
, pv2si_type_node
,
11072 tree_cons (NULL_TREE
, V4SF_type_node
,
11074 tree void_ftype_pfloat_v4sf
11075 = build_function_type (void_type_node
,
11076 tree_cons (NULL_TREE
, pfloat_type_node
,
11077 tree_cons (NULL_TREE
, V4SF_type_node
,
11079 tree void_ftype_pdi_di
11080 = build_function_type (void_type_node
,
11081 tree_cons (NULL_TREE
, pdi_type_node
,
11082 tree_cons (NULL_TREE
,
11083 long_long_unsigned_type_node
,
11085 /* Normal vector unops. */
11086 tree v4sf_ftype_v4sf
11087 = build_function_type (V4SF_type_node
,
11088 tree_cons (NULL_TREE
, V4SF_type_node
,
11091 /* Normal vector binops. */
11092 tree v4sf_ftype_v4sf_v4sf
11093 = build_function_type (V4SF_type_node
,
11094 tree_cons (NULL_TREE
, V4SF_type_node
,
11095 tree_cons (NULL_TREE
, V4SF_type_node
,
11097 tree v8qi_ftype_v8qi_v8qi
11098 = build_function_type (V8QI_type_node
,
11099 tree_cons (NULL_TREE
, V8QI_type_node
,
11100 tree_cons (NULL_TREE
, V8QI_type_node
,
11102 tree v4hi_ftype_v4hi_v4hi
11103 = build_function_type (V4HI_type_node
,
11104 tree_cons (NULL_TREE
, V4HI_type_node
,
11105 tree_cons (NULL_TREE
, V4HI_type_node
,
11107 tree v2si_ftype_v2si_v2si
11108 = build_function_type (V2SI_type_node
,
11109 tree_cons (NULL_TREE
, V2SI_type_node
,
11110 tree_cons (NULL_TREE
, V2SI_type_node
,
11112 tree di_ftype_di_di
11113 = build_function_type (long_long_unsigned_type_node
,
11114 tree_cons (NULL_TREE
, long_long_unsigned_type_node
,
11115 tree_cons (NULL_TREE
,
11116 long_long_unsigned_type_node
,
11119 tree v2si_ftype_v2sf
11120 = build_function_type (V2SI_type_node
,
11121 tree_cons (NULL_TREE
, V2SF_type_node
,
11123 tree v2sf_ftype_v2si
11124 = build_function_type (V2SF_type_node
,
11125 tree_cons (NULL_TREE
, V2SI_type_node
,
11127 tree v2si_ftype_v2si
11128 = build_function_type (V2SI_type_node
,
11129 tree_cons (NULL_TREE
, V2SI_type_node
,
11131 tree v2sf_ftype_v2sf
11132 = build_function_type (V2SF_type_node
,
11133 tree_cons (NULL_TREE
, V2SF_type_node
,
11135 tree v2sf_ftype_v2sf_v2sf
11136 = build_function_type (V2SF_type_node
,
11137 tree_cons (NULL_TREE
, V2SF_type_node
,
11138 tree_cons (NULL_TREE
,
11141 tree v2si_ftype_v2sf_v2sf
11142 = build_function_type (V2SI_type_node
,
11143 tree_cons (NULL_TREE
, V2SF_type_node
,
11144 tree_cons (NULL_TREE
,
11148 /* Add all builtins that are more or less simple operations on two
11150 for (i
= 0, d
= bdesc_2arg
; i
< sizeof (bdesc_2arg
) / sizeof *d
; i
++, d
++)
11152 /* Use one of the operands; the target can have a different mode for
11153 mask-generating compares. */
11154 enum machine_mode mode
;
11159 mode
= insn_data
[d
->icode
].operand
[1].mode
;
11164 type
= v4sf_ftype_v4sf_v4sf
;
11167 type
= v8qi_ftype_v8qi_v8qi
;
11170 type
= v4hi_ftype_v4hi_v4hi
;
11173 type
= v2si_ftype_v2si_v2si
;
11176 type
= di_ftype_di_di
;
11183 /* Override for comparisons. */
11184 if (d
->icode
== CODE_FOR_maskcmpv4sf3
11185 || d
->icode
== CODE_FOR_maskncmpv4sf3
11186 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
11187 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
)
11188 type
= v4si_ftype_v4sf_v4sf
;
11190 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
11193 /* Add the remaining MMX insns with somewhat more complicated types. */
11194 def_builtin (MASK_MMX
, "__builtin_ia32_mmx_zero", di_ftype_void
, IX86_BUILTIN_MMX_ZERO
);
11195 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
11196 def_builtin (MASK_MMX
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
11197 def_builtin (MASK_MMX
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
11198 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
11199 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
11200 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
11202 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
11203 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
11204 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
11206 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
11207 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
11209 def_builtin (MASK_MMX
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
11210 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
11212 /* comi/ucomi insns. */
11213 for (i
= 0, d
= bdesc_comi
; i
< sizeof (bdesc_comi
) / sizeof *d
; i
++, d
++)
11214 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
11216 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
11217 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
11218 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
11220 def_builtin (MASK_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
11221 def_builtin (MASK_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
11222 def_builtin (MASK_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
11223 def_builtin (MASK_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
11224 def_builtin (MASK_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
11225 def_builtin (MASK_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
11227 def_builtin (MASK_SSE
, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf
, IX86_BUILTIN_ANDPS
);
11228 def_builtin (MASK_SSE
, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf
, IX86_BUILTIN_ANDNPS
);
11229 def_builtin (MASK_SSE
, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf
, IX86_BUILTIN_ORPS
);
11230 def_builtin (MASK_SSE
, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf
, IX86_BUILTIN_XORPS
);
11232 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pextrw", int_ftype_v4hi_int
, IX86_BUILTIN_PEXTRW
);
11233 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int
, IX86_BUILTIN_PINSRW
);
11235 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
11237 def_builtin (MASK_SSE
, "__builtin_ia32_loadaps", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADAPS
);
11238 def_builtin (MASK_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADUPS
);
11239 def_builtin (MASK_SSE
, "__builtin_ia32_loadss", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADSS
);
11240 def_builtin (MASK_SSE
, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREAPS
);
11241 def_builtin (MASK_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
11242 def_builtin (MASK_SSE
, "__builtin_ia32_storess", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STORESS
);
11244 def_builtin (MASK_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
11245 def_builtin (MASK_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
11246 def_builtin (MASK_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
11247 def_builtin (MASK_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
11249 def_builtin (MASK_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
11250 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
11251 def_builtin (MASK_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
11252 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
11254 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
11256 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
11258 def_builtin (MASK_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
11259 def_builtin (MASK_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
11260 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
11261 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
11262 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
11263 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
11265 def_builtin (MASK_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
11267 /* Original 3DNow! */
11268 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
11269 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
11270 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
11271 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
11272 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
11273 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
11274 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
11275 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
11276 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
11277 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
11278 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
11279 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
11280 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
11281 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
11282 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
11283 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
11284 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
11285 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
11286 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
11287 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
11289 /* 3DNow! extension as used in the Athlon CPU. */
11290 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
11291 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
11292 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
11293 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
11294 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
11295 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
11297 def_builtin (MASK_SSE
, "__builtin_ia32_setzerops", v4sf_ftype_void
, IX86_BUILTIN_SSE_ZERO
);
11300 /* Errors in the source file can cause expand_expr to return const0_rtx
11301 where we expect a vector. To avoid crashing, use one of the vector
11302 clear instructions. */
11304 safe_vector_operand (x
, mode
)
11306 enum machine_mode mode
;
11308 if (x
!= const0_rtx
)
11310 x
= gen_reg_rtx (mode
);
11312 if (VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
))
11313 emit_insn (gen_mmx_clrdi (mode
== DImode
? x
11314 : gen_rtx_SUBREG (DImode
, x
, 0)));
11316 emit_insn (gen_sse_clrv4sf (mode
== V4SFmode
? x
11317 : gen_rtx_SUBREG (V4SFmode
, x
, 0)));
11321 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
11324 ix86_expand_binop_builtin (icode
, arglist
, target
)
11325 enum insn_code icode
;
11330 tree arg0
= TREE_VALUE (arglist
);
11331 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11332 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11333 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11334 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
11335 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
11336 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
11338 if (VECTOR_MODE_P (mode0
))
11339 op0
= safe_vector_operand (op0
, mode0
);
11340 if (VECTOR_MODE_P (mode1
))
11341 op1
= safe_vector_operand (op1
, mode1
);
11344 || GET_MODE (target
) != tmode
11345 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
11346 target
= gen_reg_rtx (tmode
);
11348 /* In case the insn wants input operands in modes different from
11349 the result, abort. */
11350 if (GET_MODE (op0
) != mode0
|| GET_MODE (op1
) != mode1
)
11353 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
11354 op0
= copy_to_mode_reg (mode0
, op0
);
11355 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
11356 op1
= copy_to_mode_reg (mode1
, op1
);
11358 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
11365 /* In type_for_mode we restrict the ability to create TImode types
11366 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
11367 to have a V4SFmode signature. Convert them in-place to TImode. */
11370 ix86_expand_timode_binop_builtin (icode
, arglist
, target
)
11371 enum insn_code icode
;
11376 tree arg0
= TREE_VALUE (arglist
);
11377 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11378 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11379 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11381 op0
= gen_lowpart (TImode
, op0
);
11382 op1
= gen_lowpart (TImode
, op1
);
11383 target
= gen_reg_rtx (TImode
);
11385 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, TImode
))
11386 op0
= copy_to_mode_reg (TImode
, op0
);
11387 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, TImode
))
11388 op1
= copy_to_mode_reg (TImode
, op1
);
11390 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
11395 return gen_lowpart (V4SFmode
, target
);
11398 /* Subroutine of ix86_expand_builtin to take care of stores. */
11401 ix86_expand_store_builtin (icode
, arglist
)
11402 enum insn_code icode
;
11406 tree arg0
= TREE_VALUE (arglist
);
11407 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11408 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11409 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11410 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
11411 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
11413 if (VECTOR_MODE_P (mode1
))
11414 op1
= safe_vector_operand (op1
, mode1
);
11416 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
11417 pat
= GEN_FCN (icode
) (op0
, op1
);
11423 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
11426 ix86_expand_unop_builtin (icode
, arglist
, target
, do_load
)
11427 enum insn_code icode
;
11433 tree arg0
= TREE_VALUE (arglist
);
11434 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11435 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
11436 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
11439 || GET_MODE (target
) != tmode
11440 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
11441 target
= gen_reg_rtx (tmode
);
11443 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
11446 if (VECTOR_MODE_P (mode0
))
11447 op0
= safe_vector_operand (op0
, mode0
);
11449 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
11450 op0
= copy_to_mode_reg (mode0
, op0
);
11453 pat
= GEN_FCN (icode
) (target
, op0
);
11460 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
11461 sqrtss, rsqrtss, rcpss. */
11464 ix86_expand_unop1_builtin (icode
, arglist
, target
)
11465 enum insn_code icode
;
11470 tree arg0
= TREE_VALUE (arglist
);
11471 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11472 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
11473 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
11476 || GET_MODE (target
) != tmode
11477 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
11478 target
= gen_reg_rtx (tmode
);
11480 if (VECTOR_MODE_P (mode0
))
11481 op0
= safe_vector_operand (op0
, mode0
);
11483 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
11484 op0
= copy_to_mode_reg (mode0
, op0
);
11486 pat
= GEN_FCN (icode
) (target
, op0
, op0
);
11493 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
11496 ix86_expand_sse_compare (d
, arglist
, target
)
11497 const struct builtin_description
*d
;
11502 tree arg0
= TREE_VALUE (arglist
);
11503 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11504 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11505 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11507 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
11508 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
11509 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
11510 enum rtx_code comparison
= d
->comparison
;
11512 if (VECTOR_MODE_P (mode0
))
11513 op0
= safe_vector_operand (op0
, mode0
);
11514 if (VECTOR_MODE_P (mode1
))
11515 op1
= safe_vector_operand (op1
, mode1
);
11517 /* Swap operands if we have a comparison that isn't available in
11521 rtx tmp
= gen_reg_rtx (mode1
);
11522 emit_move_insn (tmp
, op1
);
11528 || GET_MODE (target
) != tmode
11529 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
11530 target
= gen_reg_rtx (tmode
);
11532 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
11533 op0
= copy_to_mode_reg (mode0
, op0
);
11534 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
11535 op1
= copy_to_mode_reg (mode1
, op1
);
11537 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
11538 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
11545 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
11548 ix86_expand_sse_comi (d
, arglist
, target
)
11549 const struct builtin_description
*d
;
11554 tree arg0
= TREE_VALUE (arglist
);
11555 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11556 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11557 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11559 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
11560 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
11561 enum rtx_code comparison
= d
->comparison
;
11563 if (VECTOR_MODE_P (mode0
))
11564 op0
= safe_vector_operand (op0
, mode0
);
11565 if (VECTOR_MODE_P (mode1
))
11566 op1
= safe_vector_operand (op1
, mode1
);
11568 /* Swap operands if we have a comparison that isn't available in
11577 target
= gen_reg_rtx (SImode
);
11578 emit_move_insn (target
, const0_rtx
);
11579 target
= gen_rtx_SUBREG (QImode
, target
, 0);
11581 if (! (*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
11582 op0
= copy_to_mode_reg (mode0
, op0
);
11583 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
11584 op1
= copy_to_mode_reg (mode1
, op1
);
11586 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
11587 pat
= GEN_FCN (d
->icode
) (op0
, op1
, op2
);
11591 emit_insn (gen_rtx_SET (VOIDmode
,
11592 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
11593 gen_rtx_fmt_ee (comparison
, QImode
,
11594 gen_rtx_REG (CCmode
, FLAGS_REG
),
11597 return SUBREG_REG (target
);
11600 /* Expand an expression EXP that calls a built-in function,
11601 with result going to TARGET if that's convenient
11602 (and in mode MODE if that's convenient).
11603 SUBTARGET may be used as the target for computing one of EXP's operands.
11604 IGNORE is nonzero if the value is to be ignored. */
11607 ix86_expand_builtin (exp
, target
, subtarget
, mode
, ignore
)
11610 rtx subtarget ATTRIBUTE_UNUSED
;
11611 enum machine_mode mode ATTRIBUTE_UNUSED
;
11612 int ignore ATTRIBUTE_UNUSED
;
11614 const struct builtin_description
*d
;
11616 enum insn_code icode
;
11617 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
11618 tree arglist
= TREE_OPERAND (exp
, 1);
11619 tree arg0
, arg1
, arg2
;
11620 rtx op0
, op1
, op2
, pat
;
11621 enum machine_mode tmode
, mode0
, mode1
, mode2
;
11622 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
11626 case IX86_BUILTIN_EMMS
:
11627 emit_insn (gen_emms ());
11630 case IX86_BUILTIN_SFENCE
:
11631 emit_insn (gen_sfence ());
11634 case IX86_BUILTIN_PEXTRW
:
11635 icode
= CODE_FOR_mmx_pextrw
;
11636 arg0
= TREE_VALUE (arglist
);
11637 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11638 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11639 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11640 tmode
= insn_data
[icode
].operand
[0].mode
;
11641 mode0
= insn_data
[icode
].operand
[1].mode
;
11642 mode1
= insn_data
[icode
].operand
[2].mode
;
11644 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
11645 op0
= copy_to_mode_reg (mode0
, op0
);
11646 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
11648 /* @@@ better error message */
11649 error ("selector must be an immediate");
11650 return gen_reg_rtx (tmode
);
11653 || GET_MODE (target
) != tmode
11654 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
11655 target
= gen_reg_rtx (tmode
);
11656 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
11662 case IX86_BUILTIN_PINSRW
:
11663 icode
= CODE_FOR_mmx_pinsrw
;
11664 arg0
= TREE_VALUE (arglist
);
11665 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11666 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
11667 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11668 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11669 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
11670 tmode
= insn_data
[icode
].operand
[0].mode
;
11671 mode0
= insn_data
[icode
].operand
[1].mode
;
11672 mode1
= insn_data
[icode
].operand
[2].mode
;
11673 mode2
= insn_data
[icode
].operand
[3].mode
;
11675 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
11676 op0
= copy_to_mode_reg (mode0
, op0
);
11677 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
11678 op1
= copy_to_mode_reg (mode1
, op1
);
11679 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
11681 /* @@@ better error message */
11682 error ("selector must be an immediate");
11686 || GET_MODE (target
) != tmode
11687 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
11688 target
= gen_reg_rtx (tmode
);
11689 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
11695 case IX86_BUILTIN_MASKMOVQ
:
11696 icode
= CODE_FOR_mmx_maskmovq
;
11697 /* Note the arg order is different from the operand order. */
11698 arg1
= TREE_VALUE (arglist
);
11699 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
11700 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
11701 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11702 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11703 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
11704 mode0
= insn_data
[icode
].operand
[0].mode
;
11705 mode1
= insn_data
[icode
].operand
[1].mode
;
11706 mode2
= insn_data
[icode
].operand
[2].mode
;
11708 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
11709 op0
= copy_to_mode_reg (mode0
, op0
);
11710 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
11711 op1
= copy_to_mode_reg (mode1
, op1
);
11712 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
11713 op2
= copy_to_mode_reg (mode2
, op2
);
11714 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
11720 case IX86_BUILTIN_SQRTSS
:
11721 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2
, arglist
, target
);
11722 case IX86_BUILTIN_RSQRTSS
:
11723 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2
, arglist
, target
);
11724 case IX86_BUILTIN_RCPSS
:
11725 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2
, arglist
, target
);
11727 case IX86_BUILTIN_ANDPS
:
11728 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3
,
11730 case IX86_BUILTIN_ANDNPS
:
11731 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3
,
11733 case IX86_BUILTIN_ORPS
:
11734 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3
,
11736 case IX86_BUILTIN_XORPS
:
11737 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3
,
11740 case IX86_BUILTIN_LOADAPS
:
11741 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps
, arglist
, target
, 1);
11743 case IX86_BUILTIN_LOADUPS
:
11744 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
11746 case IX86_BUILTIN_STOREAPS
:
11747 return ix86_expand_store_builtin (CODE_FOR_sse_movaps
, arglist
);
11748 case IX86_BUILTIN_STOREUPS
:
11749 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
);
11751 case IX86_BUILTIN_LOADSS
:
11752 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss
, arglist
, target
, 1);
11754 case IX86_BUILTIN_STORESS
:
11755 return ix86_expand_store_builtin (CODE_FOR_sse_storess
, arglist
);
11757 case IX86_BUILTIN_LOADHPS
:
11758 case IX86_BUILTIN_LOADLPS
:
11759 icode
= (fcode
== IX86_BUILTIN_LOADHPS
11760 ? CODE_FOR_sse_movhps
: CODE_FOR_sse_movlps
);
11761 arg0
= TREE_VALUE (arglist
);
11762 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11763 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11764 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11765 tmode
= insn_data
[icode
].operand
[0].mode
;
11766 mode0
= insn_data
[icode
].operand
[1].mode
;
11767 mode1
= insn_data
[icode
].operand
[2].mode
;
11769 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
11770 op0
= copy_to_mode_reg (mode0
, op0
);
11771 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
11773 || GET_MODE (target
) != tmode
11774 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
11775 target
= gen_reg_rtx (tmode
);
11776 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
11782 case IX86_BUILTIN_STOREHPS
:
11783 case IX86_BUILTIN_STORELPS
:
11784 icode
= (fcode
== IX86_BUILTIN_STOREHPS
11785 ? CODE_FOR_sse_movhps
: CODE_FOR_sse_movlps
);
11786 arg0
= TREE_VALUE (arglist
);
11787 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11788 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11789 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11790 mode0
= insn_data
[icode
].operand
[1].mode
;
11791 mode1
= insn_data
[icode
].operand
[2].mode
;
11793 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
11794 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
11795 op1
= copy_to_mode_reg (mode1
, op1
);
11797 pat
= GEN_FCN (icode
) (op0
, op0
, op1
);
11803 case IX86_BUILTIN_MOVNTPS
:
11804 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
);
11805 case IX86_BUILTIN_MOVNTQ
:
11806 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
);
11808 case IX86_BUILTIN_LDMXCSR
:
11809 op0
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
11810 target
= assign_386_stack_local (SImode
, 0);
11811 emit_move_insn (target
, op0
);
11812 emit_insn (gen_ldmxcsr (target
));
11815 case IX86_BUILTIN_STMXCSR
:
11816 target
= assign_386_stack_local (SImode
, 0);
11817 emit_insn (gen_stmxcsr (target
));
11818 return copy_to_mode_reg (SImode
, target
);
11820 case IX86_BUILTIN_SHUFPS
:
11821 icode
= CODE_FOR_sse_shufps
;
11822 arg0
= TREE_VALUE (arglist
);
11823 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11824 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
11825 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11826 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11827 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
11828 tmode
= insn_data
[icode
].operand
[0].mode
;
11829 mode0
= insn_data
[icode
].operand
[1].mode
;
11830 mode1
= insn_data
[icode
].operand
[2].mode
;
11831 mode2
= insn_data
[icode
].operand
[3].mode
;
11833 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
11834 op0
= copy_to_mode_reg (mode0
, op0
);
11835 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
11836 op1
= copy_to_mode_reg (mode1
, op1
);
11837 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
11839 /* @@@ better error message */
11840 error ("mask must be an immediate");
11841 return gen_reg_rtx (tmode
);
11844 || GET_MODE (target
) != tmode
11845 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
11846 target
= gen_reg_rtx (tmode
);
11847 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
11853 case IX86_BUILTIN_PSHUFW
:
11854 icode
= CODE_FOR_mmx_pshufw
;
11855 arg0
= TREE_VALUE (arglist
);
11856 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11857 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11858 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11859 tmode
= insn_data
[icode
].operand
[0].mode
;
11860 mode1
= insn_data
[icode
].operand
[1].mode
;
11861 mode2
= insn_data
[icode
].operand
[2].mode
;
11863 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
11864 op0
= copy_to_mode_reg (mode1
, op0
);
11865 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
11867 /* @@@ better error message */
11868 error ("mask must be an immediate");
11872 || GET_MODE (target
) != tmode
11873 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
11874 target
= gen_reg_rtx (tmode
);
11875 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
11881 case IX86_BUILTIN_FEMMS
:
11882 emit_insn (gen_femms ());
11885 case IX86_BUILTIN_PAVGUSB
:
11886 return ix86_expand_binop_builtin (CODE_FOR_pavgusb
, arglist
, target
);
11888 case IX86_BUILTIN_PF2ID
:
11889 return ix86_expand_unop_builtin (CODE_FOR_pf2id
, arglist
, target
, 0);
11891 case IX86_BUILTIN_PFACC
:
11892 return ix86_expand_binop_builtin (CODE_FOR_pfacc
, arglist
, target
);
11894 case IX86_BUILTIN_PFADD
:
11895 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3
, arglist
, target
);
11897 case IX86_BUILTIN_PFCMPEQ
:
11898 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3
, arglist
, target
);
11900 case IX86_BUILTIN_PFCMPGE
:
11901 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3
, arglist
, target
);
11903 case IX86_BUILTIN_PFCMPGT
:
11904 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3
, arglist
, target
);
11906 case IX86_BUILTIN_PFMAX
:
11907 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3
, arglist
, target
);
11909 case IX86_BUILTIN_PFMIN
:
11910 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3
, arglist
, target
);
11912 case IX86_BUILTIN_PFMUL
:
11913 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3
, arglist
, target
);
11915 case IX86_BUILTIN_PFRCP
:
11916 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2
, arglist
, target
, 0);
11918 case IX86_BUILTIN_PFRCPIT1
:
11919 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3
, arglist
, target
);
11921 case IX86_BUILTIN_PFRCPIT2
:
11922 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3
, arglist
, target
);
11924 case IX86_BUILTIN_PFRSQIT1
:
11925 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3
, arglist
, target
);
11927 case IX86_BUILTIN_PFRSQRT
:
11928 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2
, arglist
, target
, 0);
11930 case IX86_BUILTIN_PFSUB
:
11931 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3
, arglist
, target
);
11933 case IX86_BUILTIN_PFSUBR
:
11934 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3
, arglist
, target
);
11936 case IX86_BUILTIN_PI2FD
:
11937 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2
, arglist
, target
, 0);
11939 case IX86_BUILTIN_PMULHRW
:
11940 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3
, arglist
, target
);
11942 case IX86_BUILTIN_PF2IW
:
11943 return ix86_expand_unop_builtin (CODE_FOR_pf2iw
, arglist
, target
, 0);
11945 case IX86_BUILTIN_PFNACC
:
11946 return ix86_expand_binop_builtin (CODE_FOR_pfnacc
, arglist
, target
);
11948 case IX86_BUILTIN_PFPNACC
:
11949 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc
, arglist
, target
);
11951 case IX86_BUILTIN_PI2FW
:
11952 return ix86_expand_unop_builtin (CODE_FOR_pi2fw
, arglist
, target
, 0);
11954 case IX86_BUILTIN_PSWAPDSI
:
11955 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2
, arglist
, target
, 0);
11957 case IX86_BUILTIN_PSWAPDSF
:
11958 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2
, arglist
, target
, 0);
11960 case IX86_BUILTIN_SSE_ZERO
:
11961 target
= gen_reg_rtx (V4SFmode
);
11962 emit_insn (gen_sse_clrv4sf (target
));
11965 case IX86_BUILTIN_MMX_ZERO
:
11966 target
= gen_reg_rtx (DImode
);
11967 emit_insn (gen_mmx_clrdi (target
));
11974 for (i
= 0, d
= bdesc_2arg
; i
< sizeof (bdesc_2arg
) / sizeof *d
; i
++, d
++)
11975 if (d
->code
== fcode
)
11977 /* Compares are treated specially. */
11978 if (d
->icode
== CODE_FOR_maskcmpv4sf3
11979 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
11980 || d
->icode
== CODE_FOR_maskncmpv4sf3
11981 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
)
11982 return ix86_expand_sse_compare (d
, arglist
, target
);
11984 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
11987 for (i
= 0, d
= bdesc_1arg
; i
< sizeof (bdesc_1arg
) / sizeof *d
; i
++, d
++)
11988 if (d
->code
== fcode
)
11989 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
11991 for (i
= 0, d
= bdesc_comi
; i
< sizeof (bdesc_comi
) / sizeof *d
; i
++, d
++)
11992 if (d
->code
== fcode
)
11993 return ix86_expand_sse_comi (d
, arglist
, target
);
11995 /* @@@ Should really do something sensible here. */
11999 /* Store OPERAND to the memory after reload is completed. This means
12000 that we can't easily use assign_stack_local. */
12002 ix86_force_to_memory (mode
, operand
)
12003 enum machine_mode mode
;
12007 if (!reload_completed
)
12009 if (TARGET_64BIT
&& TARGET_RED_ZONE
)
12011 result
= gen_rtx_MEM (mode
,
12012 gen_rtx_PLUS (Pmode
,
12014 GEN_INT (-RED_ZONE_SIZE
)));
12015 emit_move_insn (result
, operand
);
12017 else if (TARGET_64BIT
&& !TARGET_RED_ZONE
)
12023 operand
= gen_lowpart (DImode
, operand
);
12027 gen_rtx_SET (VOIDmode
,
12028 gen_rtx_MEM (DImode
,
12029 gen_rtx_PRE_DEC (DImode
,
12030 stack_pointer_rtx
)),
12036 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
12045 split_di (&operand
, 1, operands
, operands
+ 1);
12047 gen_rtx_SET (VOIDmode
,
12048 gen_rtx_MEM (SImode
,
12049 gen_rtx_PRE_DEC (Pmode
,
12050 stack_pointer_rtx
)),
12053 gen_rtx_SET (VOIDmode
,
12054 gen_rtx_MEM (SImode
,
12055 gen_rtx_PRE_DEC (Pmode
,
12056 stack_pointer_rtx
)),
12061 /* It is better to store HImodes as SImodes. */
12062 if (!TARGET_PARTIAL_REG_STALL
)
12063 operand
= gen_lowpart (SImode
, operand
);
12067 gen_rtx_SET (VOIDmode
,
12068 gen_rtx_MEM (GET_MODE (operand
),
12069 gen_rtx_PRE_DEC (SImode
,
12070 stack_pointer_rtx
)),
12076 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
12081 /* Free operand from the memory. */
12083 ix86_free_from_memory (mode
)
12084 enum machine_mode mode
;
12086 if (!TARGET_64BIT
|| !TARGET_RED_ZONE
)
12090 if (mode
== DImode
|| TARGET_64BIT
)
12092 else if (mode
== HImode
&& TARGET_PARTIAL_REG_STALL
)
12096 /* Use LEA to deallocate stack space. In peephole2 it will be converted
12097 to pop or add instruction if registers are available. */
12098 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
12099 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
12104 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
12105 QImode must go into class Q_REGS.
12106 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
12107 movdf to do mem-to-mem moves through integer regs. */
12109 ix86_preferred_reload_class (x
, class)
12111 enum reg_class
class;
12113 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
12115 /* SSE can't load any constant directly yet. */
12116 if (SSE_CLASS_P (class))
12118 /* Floats can load 0 and 1. */
12119 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x
))
12121 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
12122 if (MAYBE_SSE_CLASS_P (class))
12123 return (reg_class_subset_p (class, GENERAL_REGS
)
12124 ? GENERAL_REGS
: FLOAT_REGS
);
12128 /* General regs can load everything. */
12129 if (reg_class_subset_p (class, GENERAL_REGS
))
12130 return GENERAL_REGS
;
12131 /* In case we haven't resolved FLOAT or SSE yet, give up. */
12132 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
12135 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x
))
12137 if (GET_MODE (x
) == QImode
&& ! reg_class_subset_p (class, Q_REGS
))
12142 /* If we are copying between general and FP registers, we need a memory
12143 location. The same is true for SSE and MMX registers.
12145 The macro can't work reliably when one of the CLASSES is class containing
12146 registers from multiple units (SSE, MMX, integer). We avoid this by never
12147 combining those units in single alternative in the machine description.
12148 Ensure that this constraint holds to avoid unexpected surprises.
12150 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
12151 enforce these sanity checks. */
12153 ix86_secondary_memory_needed (class1
, class2
, mode
, strict
)
12154 enum reg_class class1
, class2
;
12155 enum machine_mode mode
;
12158 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
12159 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
12160 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
12161 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
12162 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
12163 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
12170 return (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
)
12171 || (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
)
12172 && (mode
) != SImode
)
12173 || (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
12174 && (mode
) != SImode
));
12176 /* Return the cost of moving data from a register in class CLASS1 to
12177 one in class CLASS2.
12179 It is not required that the cost always equal 2 when FROM is the same as TO;
12180 on some machines it is expensive to move between registers if they are not
12181 general registers. */
12183 ix86_register_move_cost (mode
, class1
, class2
)
12184 enum machine_mode mode
;
12185 enum reg_class class1
, class2
;
12187 /* In case we require secondary memory, compute cost of the store followed
12188 by load. In case of copying from general_purpose_register we may emit
12189 multiple stores followed by single load causing memory size mismatch
12190 stall. Count this as arbitarily high cost of 20. */
12191 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
12194 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
12196 return (MEMORY_MOVE_COST (mode
, class1
, 0)
12197 + MEMORY_MOVE_COST (mode
, class2
, 1) + add_cost
);
12199 /* Moves between SSE/MMX and integer unit are expensive. */
12200 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
12201 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
12202 return ix86_cost
->mmxsse_to_integer
;
12203 if (MAYBE_FLOAT_CLASS_P (class1
))
12204 return ix86_cost
->fp_move
;
12205 if (MAYBE_SSE_CLASS_P (class1
))
12206 return ix86_cost
->sse_move
;
12207 if (MAYBE_MMX_CLASS_P (class1
))
12208 return ix86_cost
->mmx_move
;
12212 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
12214 ix86_hard_regno_mode_ok (regno
, mode
)
12216 enum machine_mode mode
;
12218 /* Flags and only flags can only hold CCmode values. */
12219 if (CC_REGNO_P (regno
))
12220 return GET_MODE_CLASS (mode
) == MODE_CC
;
12221 if (GET_MODE_CLASS (mode
) == MODE_CC
12222 || GET_MODE_CLASS (mode
) == MODE_RANDOM
12223 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
12225 if (FP_REGNO_P (regno
))
12226 return VALID_FP_MODE_P (mode
);
12227 if (SSE_REGNO_P (regno
))
12228 return VALID_SSE_REG_MODE (mode
);
12229 if (MMX_REGNO_P (regno
))
12230 return VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
);
12231 /* We handle both integer and floats in the general purpose registers.
12232 In future we should be able to handle vector modes as well. */
12233 if (!VALID_INT_MODE_P (mode
) && !VALID_FP_MODE_P (mode
))
12235 /* Take care for QImode values - they can be in non-QI regs, but then
12236 they do cause partial register stalls. */
12237 if (regno
< 4 || mode
!= QImode
|| TARGET_64BIT
)
12239 return reload_in_progress
|| reload_completed
|| !TARGET_PARTIAL_REG_STALL
;
12242 /* Return the cost of moving data of mode M between a
12243 register and memory. A value of 2 is the default; this cost is
12244 relative to those in `REGISTER_MOVE_COST'.
12246 If moving between registers and memory is more expensive than
12247 between two registers, you should define this macro to express the
12250 Model also increased moving costs of QImode registers in non
12254 ix86_memory_move_cost (mode
, class, in
)
12255 enum machine_mode mode
;
12256 enum reg_class
class;
12259 if (FLOAT_CLASS_P (class))
12277 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
12279 if (SSE_CLASS_P (class))
12282 switch (GET_MODE_SIZE (mode
))
12296 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
12298 if (MMX_CLASS_P (class))
12301 switch (GET_MODE_SIZE (mode
))
12312 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
12314 switch (GET_MODE_SIZE (mode
))
12318 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
12319 : ix86_cost
->movzbl_load
);
12321 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
12322 : ix86_cost
->int_store
[0] + 4);
12325 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
12327 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
12328 if (mode
== TFmode
)
12330 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
12331 * (int) GET_MODE_SIZE (mode
) / 4);
12335 #ifdef DO_GLOBAL_CTORS_BODY
12337 ix86_svr3_asm_out_constructor (symbol
, priority
)
12339 int priority ATTRIBUTE_UNUSED
;
12342 fputs ("\tpushl $", asm_out_file
);
12343 assemble_name (asm_out_file
, XSTR (symbol
, 0));
12344 fputc ('\n', asm_out_file
);
12348 /* Order the registers for register allocator. */
12351 x86_order_regs_for_local_alloc ()
12356 /* First allocate the local general purpose registers. */
12357 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
12358 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
12359 reg_alloc_order
[pos
++] = i
;
12361 /* Global general purpose registers. */
12362 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
12363 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
12364 reg_alloc_order
[pos
++] = i
;
12366 /* x87 registers come first in case we are doing FP math
12368 if (!TARGET_SSE_MATH
)
12369 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
12370 reg_alloc_order
[pos
++] = i
;
12372 /* SSE registers. */
12373 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
12374 reg_alloc_order
[pos
++] = i
;
12375 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
12376 reg_alloc_order
[pos
++] = i
;
12378 /* x87 registerts. */
12379 if (TARGET_SSE_MATH
)
12380 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
12381 reg_alloc_order
[pos
++] = i
;
12383 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
12384 reg_alloc_order
[pos
++] = i
;
12386 /* Initialize the rest of array as we do not allocate some registers
12388 while (pos
< FIRST_PSEUDO_REGISTER
)
12389 reg_alloc_order
[pos
++] = 0;