1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
28 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "basic-block.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
51 /* Processor costs (relative to an add) */
53 struct processor_costs size_cost
= { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
88 /* Processor costs (relative to an add) */
90 struct processor_costs i386_cost
= { /* 386 specific costs */
91 1, /* cost of an add instruction */
92 1, /* cost of a lea instruction */
93 3, /* variable shift costs */
94 2, /* constant shift costs */
95 6, /* cost of starting a multiply */
96 1, /* cost of multiply per each bit set */
97 23, /* cost of a divide/mod */
98 3, /* cost of movsx */
99 2, /* cost of movzx */
100 15, /* "large" insn */
102 4, /* cost for loading QImode using movzbl */
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
105 Relative to reg-reg move (2). */
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {8, 8, 8}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
110 {8, 8, 8}, /* cost of loading integer registers */
111 2, /* cost of moving MMX register */
112 {4, 8}, /* cost of loading MMX registers
113 in SImode and DImode */
114 {4, 8}, /* cost of storing MMX registers
115 in SImode and DImode */
116 2, /* cost of moving SSE register */
117 {4, 8, 16}, /* cost of loading SSE registers
118 in SImode, DImode and TImode */
119 {4, 8, 16}, /* cost of storing SSE registers
120 in SImode, DImode and TImode */
121 3, /* MMX or SSE register to integer */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
127 struct processor_costs i486_cost
= { /* 486 specific costs */
128 1, /* cost of an add instruction */
129 1, /* cost of a lea instruction */
130 3, /* variable shift costs */
131 2, /* constant shift costs */
132 12, /* cost of starting a multiply */
133 1, /* cost of multiply per each bit set */
134 40, /* cost of a divide/mod */
135 3, /* cost of movsx */
136 2, /* cost of movzx */
137 15, /* "large" insn */
139 4, /* cost for loading QImode using movzbl */
140 {2, 4, 2}, /* cost of loading integer registers
141 in QImode, HImode and SImode.
142 Relative to reg-reg move (2). */
143 {2, 4, 2}, /* cost of storing integer registers */
144 2, /* cost of reg,reg fld/fst */
145 {8, 8, 8}, /* cost of loading fp registers
146 in SFmode, DFmode and XFmode */
147 {8, 8, 8}, /* cost of loading integer registers */
148 2, /* cost of moving MMX register */
149 {4, 8}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {4, 8}, /* cost of storing MMX registers
152 in SImode and DImode */
153 2, /* cost of moving SSE register */
154 {4, 8, 16}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {4, 8, 16}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of prefetch block */
160 0, /* number of parallel prefetches */
164 struct processor_costs pentium_cost
= {
165 1, /* cost of an add instruction */
166 1, /* cost of a lea instruction */
167 4, /* variable shift costs */
168 1, /* constant shift costs */
169 11, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
171 25, /* cost of a divide/mod */
172 3, /* cost of movsx */
173 2, /* cost of movzx */
174 8, /* "large" insn */
176 6, /* cost for loading QImode using movzbl */
177 {2, 4, 2}, /* cost of loading integer registers
178 in QImode, HImode and SImode.
179 Relative to reg-reg move (2). */
180 {2, 4, 2}, /* cost of storing integer registers */
181 2, /* cost of reg,reg fld/fst */
182 {2, 2, 6}, /* cost of loading fp registers
183 in SFmode, DFmode and XFmode */
184 {4, 4, 6}, /* cost of loading integer registers */
185 8, /* cost of moving MMX register */
186 {8, 8}, /* cost of loading MMX registers
187 in SImode and DImode */
188 {8, 8}, /* cost of storing MMX registers
189 in SImode and DImode */
190 2, /* cost of moving SSE register */
191 {4, 8, 16}, /* cost of loading SSE registers
192 in SImode, DImode and TImode */
193 {4, 8, 16}, /* cost of storing SSE registers
194 in SImode, DImode and TImode */
195 3, /* MMX or SSE register to integer */
196 0, /* size of prefetch block */
197 0, /* number of parallel prefetches */
201 struct processor_costs pentiumpro_cost
= {
202 1, /* cost of an add instruction */
203 1, /* cost of a lea instruction */
204 1, /* variable shift costs */
205 1, /* constant shift costs */
206 4, /* cost of starting a multiply */
207 0, /* cost of multiply per each bit set */
208 17, /* cost of a divide/mod */
209 1, /* cost of movsx */
210 1, /* cost of movzx */
211 8, /* "large" insn */
213 2, /* cost for loading QImode using movzbl */
214 {4, 4, 4}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
216 Relative to reg-reg move (2). */
217 {2, 2, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {2, 2, 6}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
221 {4, 4, 6}, /* cost of loading integer registers */
222 2, /* cost of moving MMX register */
223 {2, 2}, /* cost of loading MMX registers
224 in SImode and DImode */
225 {2, 2}, /* cost of storing MMX registers
226 in SImode and DImode */
227 2, /* cost of moving SSE register */
228 {2, 2, 8}, /* cost of loading SSE registers
229 in SImode, DImode and TImode */
230 {2, 2, 8}, /* cost of storing SSE registers
231 in SImode, DImode and TImode */
232 3, /* MMX or SSE register to integer */
233 32, /* size of prefetch block */
234 6, /* number of parallel prefetches */
238 struct processor_costs k6_cost
= {
239 1, /* cost of an add instruction */
240 2, /* cost of a lea instruction */
241 1, /* variable shift costs */
242 1, /* constant shift costs */
243 3, /* cost of starting a multiply */
244 0, /* cost of multiply per each bit set */
245 18, /* cost of a divide/mod */
246 2, /* cost of movsx */
247 2, /* cost of movzx */
248 8, /* "large" insn */
250 3, /* cost for loading QImode using movzbl */
251 {4, 5, 4}, /* cost of loading integer registers
252 in QImode, HImode and SImode.
253 Relative to reg-reg move (2). */
254 {2, 3, 2}, /* cost of storing integer registers */
255 4, /* cost of reg,reg fld/fst */
256 {6, 6, 6}, /* cost of loading fp registers
257 in SFmode, DFmode and XFmode */
258 {4, 4, 4}, /* cost of loading integer registers */
259 2, /* cost of moving MMX register */
260 {2, 2}, /* cost of loading MMX registers
261 in SImode and DImode */
262 {2, 2}, /* cost of storing MMX registers
263 in SImode and DImode */
264 2, /* cost of moving SSE register */
265 {2, 2, 8}, /* cost of loading SSE registers
266 in SImode, DImode and TImode */
267 {2, 2, 8}, /* cost of storing SSE registers
268 in SImode, DImode and TImode */
269 6, /* MMX or SSE register to integer */
270 32, /* size of prefetch block */
271 1, /* number of parallel prefetches */
275 struct processor_costs athlon_cost
= {
276 1, /* cost of an add instruction */
277 2, /* cost of a lea instruction */
278 1, /* variable shift costs */
279 1, /* constant shift costs */
280 5, /* cost of starting a multiply */
281 0, /* cost of multiply per each bit set */
282 42, /* cost of a divide/mod */
283 1, /* cost of movsx */
284 1, /* cost of movzx */
285 8, /* "large" insn */
287 4, /* cost for loading QImode using movzbl */
288 {4, 5, 4}, /* cost of loading integer registers
289 in QImode, HImode and SImode.
290 Relative to reg-reg move (2). */
291 {2, 3, 2}, /* cost of storing integer registers */
292 4, /* cost of reg,reg fld/fst */
293 {6, 6, 20}, /* cost of loading fp registers
294 in SFmode, DFmode and XFmode */
295 {4, 4, 16}, /* cost of loading integer registers */
296 2, /* cost of moving MMX register */
297 {2, 2}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {2, 2}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {2, 2, 8}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {2, 2, 8}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
306 6, /* MMX or SSE register to integer */
307 64, /* size of prefetch block */
308 6, /* number of parallel prefetches */
312 struct processor_costs pentium4_cost
= {
313 1, /* cost of an add instruction */
314 1, /* cost of a lea instruction */
315 8, /* variable shift costs */
316 8, /* constant shift costs */
317 30, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 112, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 16, /* "large" insn */
324 2, /* cost for loading QImode using movzbl */
325 {4, 5, 4}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {2, 3, 2}, /* cost of storing integer registers */
329 2, /* cost of reg,reg fld/fst */
330 {2, 2, 6}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {4, 4, 6}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 12, /* cost of moving SSE register */
339 {12, 12, 12}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 10, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
348 const struct processor_costs
*ix86_cost
= &pentium_cost
;
350 /* Processor feature/optimization bitmasks. */
351 #define m_386 (1<<PROCESSOR_I386)
352 #define m_486 (1<<PROCESSOR_I486)
353 #define m_PENT (1<<PROCESSOR_PENTIUM)
354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
355 #define m_K6 (1<<PROCESSOR_K6)
356 #define m_ATHLON (1<<PROCESSOR_ATHLON)
357 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
359 const int x86_use_leave
= m_386
| m_K6
| m_ATHLON
;
360 const int x86_push_memory
= m_386
| m_K6
| m_ATHLON
| m_PENT4
;
361 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
362 const int x86_movx
= m_ATHLON
| m_PPRO
| m_PENT4
/* m_386 | m_K6 */;
363 const int x86_double_with_add
= ~m_386
;
364 const int x86_use_bit_test
= m_386
;
365 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON
| m_K6
;
366 const int x86_cmove
= m_PPRO
| m_ATHLON
| m_PENT4
;
367 const int x86_3dnow_a
= m_ATHLON
;
368 const int x86_deep_branch
= m_PPRO
| m_K6
| m_ATHLON
| m_PENT4
;
369 const int x86_branch_hints
= m_PENT4
;
370 const int x86_use_sahf
= m_PPRO
| m_K6
| m_PENT4
;
371 const int x86_partial_reg_stall
= m_PPRO
;
372 const int x86_use_loop
= m_K6
;
373 const int x86_use_fiop
= ~(m_PPRO
| m_ATHLON
| m_PENT
);
374 const int x86_use_mov0
= m_K6
;
375 const int x86_use_cltd
= ~(m_PENT
| m_K6
);
376 const int x86_read_modify_write
= ~m_PENT
;
377 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
378 const int x86_split_long_moves
= m_PPRO
;
379 const int x86_promote_QImode
= m_K6
| m_PENT
| m_386
| m_486
| m_ATHLON
;
380 const int x86_fast_prefix
= ~(m_PENT
| m_486
| m_386
);
381 const int x86_single_stringop
= m_386
| m_PENT4
;
382 const int x86_qimode_math
= ~(0);
383 const int x86_promote_qi_regs
= 0;
384 const int x86_himode_math
= ~(m_PPRO
);
385 const int x86_promote_hi_regs
= m_PPRO
;
386 const int x86_sub_esp_4
= m_ATHLON
| m_PPRO
| m_PENT4
;
387 const int x86_sub_esp_8
= m_ATHLON
| m_PPRO
| m_386
| m_486
| m_PENT4
;
388 const int x86_add_esp_4
= m_ATHLON
| m_K6
| m_PENT4
;
389 const int x86_add_esp_8
= m_ATHLON
| m_PPRO
| m_K6
| m_386
| m_486
| m_PENT4
;
390 const int x86_integer_DFmode_moves
= ~(m_ATHLON
| m_PENT4
);
391 const int x86_partial_reg_dependency
= m_ATHLON
| m_PENT4
;
392 const int x86_memory_mismatch_stall
= m_ATHLON
| m_PENT4
;
393 const int x86_accumulate_outgoing_args
= m_ATHLON
| m_PENT4
| m_PPRO
;
394 const int x86_prologue_using_move
= m_ATHLON
| m_PENT4
| m_PPRO
;
395 const int x86_epilogue_using_move
= m_ATHLON
| m_PENT4
| m_PPRO
;
396 const int x86_decompose_lea
= m_PENT4
;
397 const int x86_shift1
= ~m_486
;
398 const int x86_arch_always_fancy_math_387
= m_PENT
| m_PPRO
| m_ATHLON
| m_PENT4
;
400 /* In case the avreage insn count for single function invocation is
401 lower than this constant, emit fast (but longer) prologue and
403 #define FAST_PROLOGUE_INSN_COUNT 30
405 /* Set by prologue expander and used by epilogue expander to determine
407 static int use_fast_prologue_epilogue
;
409 #define AT_BP(MODE) (gen_rtx_MEM ((MODE), hard_frame_pointer_rtx))
411 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
412 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
413 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
414 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
416 /* Array of the smallest class containing reg number REGNO, indexed by
417 REGNO. Used by REGNO_REG_CLASS in i386.h. */
419 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
422 AREG
, DREG
, CREG
, BREG
,
424 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
426 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
427 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
430 /* flags, fpsr, dirflag, frame */
431 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
432 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
434 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
436 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
437 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
438 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
442 /* The "default" register map used in 32bit mode. */
444 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
446 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
447 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
448 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
449 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
450 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
451 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
452 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
455 static int const x86_64_int_parameter_registers
[6] =
457 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
458 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
461 static int const x86_64_int_return_registers
[4] =
463 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
466 /* The "default" register map used in 64bit mode. */
467 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
469 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
470 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
471 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
472 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
473 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
474 8,9,10,11,12,13,14,15, /* extended integer registers */
475 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
478 /* Define the register numbers to be used in Dwarf debugging information.
479 The SVR4 reference port C compiler uses the following register numbers
480 in its Dwarf output code:
481 0 for %eax (gcc regno = 0)
482 1 for %ecx (gcc regno = 2)
483 2 for %edx (gcc regno = 1)
484 3 for %ebx (gcc regno = 3)
485 4 for %esp (gcc regno = 7)
486 5 for %ebp (gcc regno = 6)
487 6 for %esi (gcc regno = 4)
488 7 for %edi (gcc regno = 5)
489 The following three DWARF register numbers are never generated by
490 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
491 believes these numbers have these meanings.
492 8 for %eip (no gcc equivalent)
493 9 for %eflags (gcc regno = 17)
494 10 for %trapno (no gcc equivalent)
495 It is not at all clear how we should number the FP stack registers
496 for the x86 architecture. If the version of SDB on x86/svr4 were
497 a bit less brain dead with respect to floating-point then we would
498 have a precedent to follow with respect to DWARF register numbers
499 for x86 FP registers, but the SDB on x86/svr4 is so completely
500 broken with respect to FP registers that it is hardly worth thinking
501 of it as something to strive for compatibility with.
502 The version of x86/svr4 SDB I have at the moment does (partially)
503 seem to believe that DWARF register number 11 is associated with
504 the x86 register %st(0), but that's about all. Higher DWARF
505 register numbers don't seem to be associated with anything in
506 particular, and even for DWARF regno 11, SDB only seems to under-
507 stand that it should say that a variable lives in %st(0) (when
508 asked via an `=' command) if we said it was in DWARF regno 11,
509 but SDB still prints garbage when asked for the value of the
510 variable in question (via a `/' command).
511 (Also note that the labels SDB prints for various FP stack regs
512 when doing an `x' command are all wrong.)
513 Note that these problems generally don't affect the native SVR4
514 C compiler because it doesn't allow the use of -O with -g and
515 because when it is *not* optimizing, it allocates a memory
516 location for each floating-point variable, and the memory
517 location is what gets described in the DWARF AT_location
518 attribute for the variable in question.
519 Regardless of the severe mental illness of the x86/svr4 SDB, we
520 do something sensible here and we use the following DWARF
521 register numbers. Note that these are all stack-top-relative
523 11 for %st(0) (gcc regno = 8)
524 12 for %st(1) (gcc regno = 9)
525 13 for %st(2) (gcc regno = 10)
526 14 for %st(3) (gcc regno = 11)
527 15 for %st(4) (gcc regno = 12)
528 16 for %st(5) (gcc regno = 13)
529 17 for %st(6) (gcc regno = 14)
530 18 for %st(7) (gcc regno = 15)
532 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
534 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
535 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
536 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
537 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
538 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
539 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
540 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
543 /* Test and compare insns in i386.md store the information needed to
544 generate branch and scc insns here. */
546 rtx ix86_compare_op0
= NULL_RTX
;
547 rtx ix86_compare_op1
= NULL_RTX
;
549 /* The encoding characters for the four TLS models present in ELF. */
551 static char const tls_model_chars
[] = " GLil";
553 #define MAX_386_STACK_LOCALS 3
554 /* Size of the register save area. */
555 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
557 /* Define the structure for the machine field in struct function. */
558 struct machine_function
GTY(())
560 rtx stack_locals
[(int) MAX_MACHINE_MODE
][MAX_386_STACK_LOCALS
];
561 const char *some_ld_name
;
562 int save_varrargs_registers
;
563 int accesses_prev_frame
;
566 #define ix86_stack_locals (cfun->machine->stack_locals)
567 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
569 /* Structure describing stack frame layout.
570 Stack grows downward:
576 saved frame pointer if frame_pointer_needed
577 <- HARD_FRAME_POINTER
583 > to_allocate <- FRAME_POINTER
595 int outgoing_arguments_size
;
598 HOST_WIDE_INT to_allocate
;
599 /* The offsets relative to ARG_POINTER. */
600 HOST_WIDE_INT frame_pointer_offset
;
601 HOST_WIDE_INT hard_frame_pointer_offset
;
602 HOST_WIDE_INT stack_pointer_offset
;
605 /* Used to enable/disable debugging features. */
606 const char *ix86_debug_arg_string
, *ix86_debug_addr_string
;
607 /* Code model option as passed by user. */
608 const char *ix86_cmodel_string
;
610 enum cmodel ix86_cmodel
;
612 const char *ix86_asm_string
;
613 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
615 const char *ix86_tls_dialect_string
;
616 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
618 /* Which unit we are generating floating point math for. */
619 enum fpmath_unit ix86_fpmath
;
621 /* Which cpu are we scheduling for. */
622 enum processor_type ix86_cpu
;
623 /* Which instruction set architecture to use. */
624 enum processor_type ix86_arch
;
626 /* Strings to hold which cpu and instruction set architecture to use. */
627 const char *ix86_cpu_string
; /* for -mcpu=<xxx> */
628 const char *ix86_arch_string
; /* for -march=<xxx> */
629 const char *ix86_fpmath_string
; /* for -mfpmath=<xxx> */
631 /* # of registers to use to pass arguments. */
632 const char *ix86_regparm_string
;
634 /* true if sse prefetch instruction is not NOOP. */
635 int x86_prefetch_sse
;
637 /* ix86_regparm_string as a number */
640 /* Alignment to use for loops and jumps: */
642 /* Power of two alignment for loops. */
643 const char *ix86_align_loops_string
;
645 /* Power of two alignment for non-loop jumps. */
646 const char *ix86_align_jumps_string
;
648 /* Power of two alignment for stack boundary in bytes. */
649 const char *ix86_preferred_stack_boundary_string
;
651 /* Preferred alignment for stack boundary in bits. */
652 int ix86_preferred_stack_boundary
;
654 /* Values 1-5: see jump.c */
655 int ix86_branch_cost
;
656 const char *ix86_branch_cost_string
;
658 /* Power of two alignment for functions. */
659 const char *ix86_align_funcs_string
;
661 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
662 static char internal_label_prefix
[16];
663 static int internal_label_prefix_len
;
665 static int local_symbolic_operand
PARAMS ((rtx
, enum machine_mode
));
666 static int tls_symbolic_operand_1
PARAMS ((rtx
, enum tls_model
));
667 static void output_pic_addr_const
PARAMS ((FILE *, rtx
, int));
668 static void put_condition_code
PARAMS ((enum rtx_code
, enum machine_mode
,
670 static const char *get_some_local_dynamic_name
PARAMS ((void));
671 static int get_some_local_dynamic_name_1
PARAMS ((rtx
*, void *));
672 static rtx maybe_get_pool_constant
PARAMS ((rtx
));
673 static rtx ix86_expand_int_compare
PARAMS ((enum rtx_code
, rtx
, rtx
));
674 static enum rtx_code ix86_prepare_fp_compare_args
PARAMS ((enum rtx_code
,
676 static rtx get_thread_pointer
PARAMS ((void));
677 static void get_pc_thunk_name
PARAMS ((char [32], unsigned int));
678 static rtx gen_push
PARAMS ((rtx
));
679 static int memory_address_length
PARAMS ((rtx addr
));
680 static int ix86_flags_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
681 static int ix86_agi_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
682 static enum attr_ppro_uops ix86_safe_ppro_uops
PARAMS ((rtx
));
683 static void ix86_dump_ppro_packet
PARAMS ((FILE *));
684 static void ix86_reorder_insn
PARAMS ((rtx
*, rtx
*));
685 static struct machine_function
* ix86_init_machine_status
PARAMS ((void));
686 static int ix86_split_to_parts
PARAMS ((rtx
, rtx
*, enum machine_mode
));
687 static int ix86_nsaved_regs
PARAMS ((void));
688 static void ix86_emit_save_regs
PARAMS ((void));
689 static void ix86_emit_save_regs_using_mov
PARAMS ((rtx
, HOST_WIDE_INT
));
690 static void ix86_emit_restore_regs_using_mov
PARAMS ((rtx
, int, int));
691 static void ix86_output_function_epilogue
PARAMS ((FILE *, HOST_WIDE_INT
));
692 static void ix86_set_move_mem_attrs_1
PARAMS ((rtx
, rtx
, rtx
, rtx
, rtx
));
693 static void ix86_sched_reorder_ppro
PARAMS ((rtx
*, rtx
*));
694 static HOST_WIDE_INT ix86_GOT_alias_set
PARAMS ((void));
695 static void ix86_adjust_counter
PARAMS ((rtx
, HOST_WIDE_INT
));
696 static rtx ix86_expand_aligntest
PARAMS ((rtx
, int));
697 static void ix86_expand_strlensi_unroll_1
PARAMS ((rtx
, rtx
));
698 static int ix86_issue_rate
PARAMS ((void));
699 static int ix86_adjust_cost
PARAMS ((rtx
, rtx
, rtx
, int));
700 static void ix86_sched_init
PARAMS ((FILE *, int, int));
701 static int ix86_sched_reorder
PARAMS ((FILE *, int, rtx
*, int *, int));
702 static int ix86_variable_issue
PARAMS ((FILE *, int, rtx
, int));
703 static int ia32_use_dfa_pipeline_interface
PARAMS ((void));
704 static int ia32_multipass_dfa_lookahead
PARAMS ((void));
705 static void ix86_init_mmx_sse_builtins
PARAMS ((void));
709 rtx base
, index
, disp
;
713 static int ix86_decompose_address
PARAMS ((rtx
, struct ix86_address
*));
715 static void ix86_encode_section_info
PARAMS ((tree
, int)) ATTRIBUTE_UNUSED
;
716 static const char *ix86_strip_name_encoding
PARAMS ((const char *))
719 struct builtin_description
;
720 static rtx ix86_expand_sse_comi
PARAMS ((const struct builtin_description
*,
722 static rtx ix86_expand_sse_compare
PARAMS ((const struct builtin_description
*,
724 static rtx ix86_expand_unop1_builtin
PARAMS ((enum insn_code
, tree
, rtx
));
725 static rtx ix86_expand_unop_builtin
PARAMS ((enum insn_code
, tree
, rtx
, int));
726 static rtx ix86_expand_binop_builtin
PARAMS ((enum insn_code
, tree
, rtx
));
727 static rtx ix86_expand_timode_binop_builtin
PARAMS ((enum insn_code
,
729 static rtx ix86_expand_store_builtin
PARAMS ((enum insn_code
, tree
));
730 static rtx safe_vector_operand
PARAMS ((rtx
, enum machine_mode
));
731 static enum rtx_code ix86_fp_compare_code_to_integer
PARAMS ((enum rtx_code
));
732 static void ix86_fp_comparison_codes
PARAMS ((enum rtx_code code
,
736 static rtx ix86_expand_fp_compare
PARAMS ((enum rtx_code
, rtx
, rtx
, rtx
,
738 static int ix86_fp_comparison_arithmetics_cost
PARAMS ((enum rtx_code code
));
739 static int ix86_fp_comparison_fcomi_cost
PARAMS ((enum rtx_code code
));
740 static int ix86_fp_comparison_sahf_cost
PARAMS ((enum rtx_code code
));
741 static int ix86_fp_comparison_cost
PARAMS ((enum rtx_code code
));
742 static unsigned int ix86_select_alt_pic_regnum
PARAMS ((void));
743 static int ix86_save_reg
PARAMS ((unsigned int, int));
744 static void ix86_compute_frame_layout
PARAMS ((struct ix86_frame
*));
745 static int ix86_comp_type_attributes
PARAMS ((tree
, tree
));
746 const struct attribute_spec ix86_attribute_table
[];
747 static tree ix86_handle_cdecl_attribute
PARAMS ((tree
*, tree
, tree
, int, bool *));
748 static tree ix86_handle_regparm_attribute
PARAMS ((tree
*, tree
, tree
, int, bool *));
750 #ifdef DO_GLOBAL_CTORS_BODY
751 static void ix86_svr3_asm_out_constructor
PARAMS ((rtx
, int));
754 /* Register class used for passing given 64bit part of the argument.
755 These represent classes as documented by the PS ABI, with the exception
756 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
757 use SF or DFmode move instead of DImode to avoid reformating penalties.
759 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
760 whenever possible (upper half does contain padding).
762 enum x86_64_reg_class
765 X86_64_INTEGER_CLASS
,
766 X86_64_INTEGERSI_CLASS
,
775 static const char * const x86_64_reg_class_name
[] =
776 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
778 #define MAX_CLASSES 4
779 static int classify_argument
PARAMS ((enum machine_mode
, tree
,
780 enum x86_64_reg_class
[MAX_CLASSES
],
782 static int examine_argument
PARAMS ((enum machine_mode
, tree
, int, int *,
784 static rtx construct_container
PARAMS ((enum machine_mode
, tree
, int, int, int,
786 static enum x86_64_reg_class merge_classes
PARAMS ((enum x86_64_reg_class
,
787 enum x86_64_reg_class
));
789 /* Initialize the GCC target structure. */
790 #undef TARGET_ATTRIBUTE_TABLE
791 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
792 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
793 # undef TARGET_MERGE_DECL_ATTRIBUTES
794 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
797 #undef TARGET_COMP_TYPE_ATTRIBUTES
798 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
800 #undef TARGET_INIT_BUILTINS
801 #define TARGET_INIT_BUILTINS ix86_init_builtins
803 #undef TARGET_EXPAND_BUILTIN
804 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
806 #undef TARGET_ASM_FUNCTION_EPILOGUE
807 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
809 #undef TARGET_ASM_OPEN_PAREN
810 #define TARGET_ASM_OPEN_PAREN ""
811 #undef TARGET_ASM_CLOSE_PAREN
812 #define TARGET_ASM_CLOSE_PAREN ""
814 #undef TARGET_ASM_ALIGNED_HI_OP
815 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
816 #undef TARGET_ASM_ALIGNED_SI_OP
817 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
819 #undef TARGET_ASM_ALIGNED_DI_OP
820 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
823 #undef TARGET_ASM_UNALIGNED_HI_OP
824 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
825 #undef TARGET_ASM_UNALIGNED_SI_OP
826 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
827 #undef TARGET_ASM_UNALIGNED_DI_OP
828 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
830 #undef TARGET_SCHED_ADJUST_COST
831 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
832 #undef TARGET_SCHED_ISSUE_RATE
833 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
834 #undef TARGET_SCHED_VARIABLE_ISSUE
835 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
836 #undef TARGET_SCHED_INIT
837 #define TARGET_SCHED_INIT ix86_sched_init
838 #undef TARGET_SCHED_REORDER
839 #define TARGET_SCHED_REORDER ix86_sched_reorder
840 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
841 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
842 ia32_use_dfa_pipeline_interface
843 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
844 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
845 ia32_multipass_dfa_lookahead
848 #undef TARGET_HAVE_TLS
849 #define TARGET_HAVE_TLS true
852 struct gcc_target targetm
= TARGET_INITIALIZER
;
854 /* Sometimes certain combinations of command options do not make
855 sense on a particular target machine. You can define a macro
856 `OVERRIDE_OPTIONS' to take account of this. This macro, if
857 defined, is executed once just after all the command options have
860 Don't use this macro to turn on various extra optimizations for
861 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
867 /* Comes from final.c -- no real reason to change it. */
868 #define MAX_CODE_ALIGN 16
872 const struct processor_costs
*cost
; /* Processor costs */
873 const int target_enable
; /* Target flags to enable. */
874 const int target_disable
; /* Target flags to disable. */
875 const int align_loop
; /* Default alignments. */
876 const int align_loop_max_skip
;
877 const int align_jump
;
878 const int align_jump_max_skip
;
879 const int align_func
;
880 const int branch_cost
;
882 const processor_target_table
[PROCESSOR_max
] =
884 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4, 1},
885 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16, 1},
886 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16, 1},
887 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16, 1},
888 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32, 1},
889 {&athlon_cost
, 0, 0, 16, 7, 64, 7, 16, 1},
890 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0, 1}
893 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
896 const char *const name
; /* processor name or nickname. */
897 const enum processor_type processor
;
903 PTA_PREFETCH_SSE
= 8,
908 const processor_alias_table
[] =
910 {"i386", PROCESSOR_I386
, 0},
911 {"i486", PROCESSOR_I486
, 0},
912 {"i586", PROCESSOR_PENTIUM
, 0},
913 {"pentium", PROCESSOR_PENTIUM
, 0},
914 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
915 {"i686", PROCESSOR_PENTIUMPRO
, 0},
916 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
917 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
918 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
919 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
|
920 PTA_MMX
| PTA_PREFETCH_SSE
},
921 {"k6", PROCESSOR_K6
, PTA_MMX
},
922 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
923 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
924 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
926 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
927 | PTA_3DNOW
| PTA_3DNOW_A
},
928 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
929 | PTA_3DNOW_A
| PTA_SSE
},
930 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
931 | PTA_3DNOW_A
| PTA_SSE
},
932 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
933 | PTA_3DNOW_A
| PTA_SSE
},
936 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
938 #ifdef SUBTARGET_OVERRIDE_OPTIONS
939 SUBTARGET_OVERRIDE_OPTIONS
;
942 if (!ix86_cpu_string
&& ix86_arch_string
)
943 ix86_cpu_string
= ix86_arch_string
;
944 if (!ix86_cpu_string
)
945 ix86_cpu_string
= cpu_names
[TARGET_CPU_DEFAULT
];
946 if (!ix86_arch_string
)
947 ix86_arch_string
= TARGET_64BIT
? "athlon-4" : "i386";
949 if (ix86_cmodel_string
!= 0)
951 if (!strcmp (ix86_cmodel_string
, "small"))
952 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
954 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string
);
955 else if (!strcmp (ix86_cmodel_string
, "32"))
957 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
958 ix86_cmodel
= CM_KERNEL
;
959 else if (!strcmp (ix86_cmodel_string
, "medium") && !flag_pic
)
960 ix86_cmodel
= CM_MEDIUM
;
961 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
962 ix86_cmodel
= CM_LARGE
;
964 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
970 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
972 if (ix86_asm_string
!= 0)
974 if (!strcmp (ix86_asm_string
, "intel"))
975 ix86_asm_dialect
= ASM_INTEL
;
976 else if (!strcmp (ix86_asm_string
, "att"))
977 ix86_asm_dialect
= ASM_ATT
;
979 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
981 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
982 error ("code model `%s' not supported in the %s bit mode",
983 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
984 if (ix86_cmodel
== CM_LARGE
)
985 sorry ("code model `large' not supported yet");
986 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
987 sorry ("%i-bit mode not compiled in",
988 (target_flags
& MASK_64BIT
) ? 64 : 32);
990 for (i
= 0; i
< pta_size
; i
++)
991 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
993 ix86_arch
= processor_alias_table
[i
].processor
;
994 /* Default cpu tuning to the architecture. */
995 ix86_cpu
= ix86_arch
;
996 if (processor_alias_table
[i
].flags
& PTA_MMX
997 && !(target_flags
& MASK_MMX_SET
))
998 target_flags
|= MASK_MMX
;
999 if (processor_alias_table
[i
].flags
& PTA_3DNOW
1000 && !(target_flags
& MASK_3DNOW_SET
))
1001 target_flags
|= MASK_3DNOW
;
1002 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
1003 && !(target_flags
& MASK_3DNOW_A_SET
))
1004 target_flags
|= MASK_3DNOW_A
;
1005 if (processor_alias_table
[i
].flags
& PTA_SSE
1006 && !(target_flags
& MASK_SSE_SET
))
1007 target_flags
|= MASK_SSE
;
1008 if (processor_alias_table
[i
].flags
& PTA_SSE2
1009 && !(target_flags
& MASK_SSE2_SET
))
1010 target_flags
|= MASK_SSE2
;
1011 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1012 x86_prefetch_sse
= true;
1017 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1019 for (i
= 0; i
< pta_size
; i
++)
1020 if (! strcmp (ix86_cpu_string
, processor_alias_table
[i
].name
))
1022 ix86_cpu
= processor_alias_table
[i
].processor
;
1025 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1026 x86_prefetch_sse
= true;
1028 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string
);
1031 ix86_cost
= &size_cost
;
1033 ix86_cost
= processor_target_table
[ix86_cpu
].cost
;
1034 target_flags
|= processor_target_table
[ix86_cpu
].target_enable
;
1035 target_flags
&= ~processor_target_table
[ix86_cpu
].target_disable
;
1037 /* Arrange to set up i386_stack_locals for all functions. */
1038 init_machine_status
= ix86_init_machine_status
;
1040 /* Validate -mregparm= value. */
1041 if (ix86_regparm_string
)
1043 i
= atoi (ix86_regparm_string
);
1044 if (i
< 0 || i
> REGPARM_MAX
)
1045 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
1051 ix86_regparm
= REGPARM_MAX
;
1053 /* If the user has provided any of the -malign-* options,
1054 warn and use that value only if -falign-* is not set.
1055 Remove this code in GCC 3.2 or later. */
1056 if (ix86_align_loops_string
)
1058 warning ("-malign-loops is obsolete, use -falign-loops");
1059 if (align_loops
== 0)
1061 i
= atoi (ix86_align_loops_string
);
1062 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1063 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1065 align_loops
= 1 << i
;
1069 if (ix86_align_jumps_string
)
1071 warning ("-malign-jumps is obsolete, use -falign-jumps");
1072 if (align_jumps
== 0)
1074 i
= atoi (ix86_align_jumps_string
);
1075 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1076 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1078 align_jumps
= 1 << i
;
1082 if (ix86_align_funcs_string
)
1084 warning ("-malign-functions is obsolete, use -falign-functions");
1085 if (align_functions
== 0)
1087 i
= atoi (ix86_align_funcs_string
);
1088 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1089 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1091 align_functions
= 1 << i
;
1095 /* Default align_* from the processor table. */
1096 if (align_loops
== 0)
1098 align_loops
= processor_target_table
[ix86_cpu
].align_loop
;
1099 align_loops_max_skip
= processor_target_table
[ix86_cpu
].align_loop_max_skip
;
1101 if (align_jumps
== 0)
1103 align_jumps
= processor_target_table
[ix86_cpu
].align_jump
;
1104 align_jumps_max_skip
= processor_target_table
[ix86_cpu
].align_jump_max_skip
;
1106 if (align_functions
== 0)
1108 align_functions
= processor_target_table
[ix86_cpu
].align_func
;
1111 /* Validate -mpreferred-stack-boundary= value, or provide default.
1112 The default of 128 bits is for Pentium III's SSE __m128, but we
1113 don't want additional code to keep the stack aligned when
1114 optimizing for code size. */
1115 ix86_preferred_stack_boundary
= (optimize_size
1116 ? TARGET_64BIT
? 64 : 32
1118 if (ix86_preferred_stack_boundary_string
)
1120 i
= atoi (ix86_preferred_stack_boundary_string
);
1121 if (i
< (TARGET_64BIT
? 3 : 2) || i
> 12)
1122 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
1123 TARGET_64BIT
? 3 : 2);
1125 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
1128 /* Validate -mbranch-cost= value, or provide default. */
1129 ix86_branch_cost
= processor_target_table
[ix86_cpu
].branch_cost
;
1130 if (ix86_branch_cost_string
)
1132 i
= atoi (ix86_branch_cost_string
);
1134 error ("-mbranch-cost=%d is not between 0 and 5", i
);
1136 ix86_branch_cost
= i
;
1139 if (ix86_tls_dialect_string
)
1141 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
1142 ix86_tls_dialect
= TLS_DIALECT_GNU
;
1143 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
1144 ix86_tls_dialect
= TLS_DIALECT_SUN
;
1146 error ("bad value (%s) for -mtls-dialect= switch",
1147 ix86_tls_dialect_string
);
1150 /* Keep nonleaf frame pointers. */
1151 if (TARGET_OMIT_LEAF_FRAME_POINTER
)
1152 flag_omit_frame_pointer
= 1;
1154 /* If we're doing fast math, we don't care about comparison order
1155 wrt NaNs. This lets us use a shorter comparison sequence. */
1156 if (flag_unsafe_math_optimizations
)
1157 target_flags
&= ~MASK_IEEE_FP
;
1159 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1160 since the insns won't need emulation. */
1161 if (x86_arch_always_fancy_math_387
& (1 << ix86_arch
))
1162 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
1166 if (TARGET_ALIGN_DOUBLE
)
1167 error ("-malign-double makes no sense in the 64bit mode");
1169 error ("-mrtd calling convention not supported in the 64bit mode");
1170 /* Enable by default the SSE and MMX builtins. */
1171 target_flags
|= (MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
);
1172 ix86_fpmath
= FPMATH_SSE
;
1175 ix86_fpmath
= FPMATH_387
;
1177 if (ix86_fpmath_string
!= 0)
1179 if (! strcmp (ix86_fpmath_string
, "387"))
1180 ix86_fpmath
= FPMATH_387
;
1181 else if (! strcmp (ix86_fpmath_string
, "sse"))
1185 warning ("SSE instruction set disabled, using 387 arithmetics");
1186 ix86_fpmath
= FPMATH_387
;
1189 ix86_fpmath
= FPMATH_SSE
;
1191 else if (! strcmp (ix86_fpmath_string
, "387,sse")
1192 || ! strcmp (ix86_fpmath_string
, "sse,387"))
1196 warning ("SSE instruction set disabled, using 387 arithmetics");
1197 ix86_fpmath
= FPMATH_387
;
1199 else if (!TARGET_80387
)
1201 warning ("387 instruction set disabled, using SSE arithmetics");
1202 ix86_fpmath
= FPMATH_SSE
;
1205 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
1208 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
1211 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1215 target_flags
|= MASK_MMX
;
1216 x86_prefetch_sse
= true;
1219 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1222 target_flags
|= MASK_MMX
;
1223 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1224 extensions it adds. */
1225 if (x86_3dnow_a
& (1 << ix86_arch
))
1226 target_flags
|= MASK_3DNOW_A
;
1228 if ((x86_accumulate_outgoing_args
& CPUMASK
)
1229 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS_SET
)
1231 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
1233 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1236 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
1237 p
= strchr (internal_label_prefix
, 'X');
1238 internal_label_prefix_len
= p
- internal_label_prefix
;
1244 optimization_options (level
, size
)
1246 int size ATTRIBUTE_UNUSED
;
1248 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1249 make the problem with not enough registers even worse. */
1250 #ifdef INSN_SCHEDULING
1252 flag_schedule_insns
= 0;
1254 if (TARGET_64BIT
&& optimize
>= 1)
1255 flag_omit_frame_pointer
= 1;
1258 flag_pcc_struct_return
= 0;
1259 flag_asynchronous_unwind_tables
= 1;
1263 /* Table of valid machine attributes. */
1264 const struct attribute_spec ix86_attribute_table
[] =
1266 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1267 /* Stdcall attribute says callee is responsible for popping arguments
1268 if they are not variable. */
1269 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1270 /* Cdecl attribute says the callee is a normal C declaration */
1271 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1272 /* Regparm attribute specifies how many integer arguments are to be
1273 passed in registers. */
1274 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute
},
1275 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1276 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute
},
1277 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute
},
1278 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
1280 { NULL
, 0, 0, false, false, false, NULL
}
1283 /* Handle a "cdecl" or "stdcall" attribute;
1284 arguments as in struct attribute_spec.handler. */
1286 ix86_handle_cdecl_attribute (node
, name
, args
, flags
, no_add_attrs
)
1289 tree args ATTRIBUTE_UNUSED
;
1290 int flags ATTRIBUTE_UNUSED
;
1293 if (TREE_CODE (*node
) != FUNCTION_TYPE
1294 && TREE_CODE (*node
) != METHOD_TYPE
1295 && TREE_CODE (*node
) != FIELD_DECL
1296 && TREE_CODE (*node
) != TYPE_DECL
)
1298 warning ("`%s' attribute only applies to functions",
1299 IDENTIFIER_POINTER (name
));
1300 *no_add_attrs
= true;
1305 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name
));
1306 *no_add_attrs
= true;
1312 /* Handle a "regparm" attribute;
1313 arguments as in struct attribute_spec.handler. */
1315 ix86_handle_regparm_attribute (node
, name
, args
, flags
, no_add_attrs
)
1319 int flags ATTRIBUTE_UNUSED
;
1322 if (TREE_CODE (*node
) != FUNCTION_TYPE
1323 && TREE_CODE (*node
) != METHOD_TYPE
1324 && TREE_CODE (*node
) != FIELD_DECL
1325 && TREE_CODE (*node
) != TYPE_DECL
)
1327 warning ("`%s' attribute only applies to functions",
1328 IDENTIFIER_POINTER (name
));
1329 *no_add_attrs
= true;
1335 cst
= TREE_VALUE (args
);
1336 if (TREE_CODE (cst
) != INTEGER_CST
)
1338 warning ("`%s' attribute requires an integer constant argument",
1339 IDENTIFIER_POINTER (name
));
1340 *no_add_attrs
= true;
1342 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
1344 warning ("argument to `%s' attribute larger than %d",
1345 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
1346 *no_add_attrs
= true;
1353 /* Return 0 if the attributes for two types are incompatible, 1 if they
1354 are compatible, and 2 if they are nearly compatible (which causes a
1355 warning to be generated). */
1358 ix86_comp_type_attributes (type1
, type2
)
1362 /* Check for mismatch of non-default calling convention. */
1363 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
1365 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
1368 /* Check for mismatched return types (cdecl vs stdcall). */
1369 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
1370 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
1375 /* Value is the number of bytes of arguments automatically
1376 popped when returning from a subroutine call.
1377 FUNDECL is the declaration node of the function (as a tree),
1378 FUNTYPE is the data type of the function (as a tree),
1379 or for a library call it is an identifier node for the subroutine name.
1380 SIZE is the number of bytes of arguments passed on the stack.
1382 On the 80386, the RTD insn may be used to pop them if the number
1383 of args is fixed, but if the number is variable then the caller
1384 must pop them all. RTD can't be used for library calls now
1385 because the library is compiled with the Unix compiler.
1386 Use of RTD is a selectable option, since it is incompatible with
1387 standard Unix calling sequences. If the option is not selected,
1388 the caller must always pop the args.
1390 The attribute stdcall is equivalent to RTD on a per module basis. */
1393 ix86_return_pops_args (fundecl
, funtype
, size
)
1398 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
1400 /* Cdecl functions override -mrtd, and never pop the stack. */
1401 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
1403 /* Stdcall functions will pop the stack if not variable args. */
1404 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
)))
1408 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
1409 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
1410 == void_type_node
)))
1414 /* Lose any fake structure return argument if it is passed on the stack. */
1415 if (aggregate_value_p (TREE_TYPE (funtype
))
1418 int nregs
= ix86_regparm
;
1422 tree attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype
));
1425 nregs
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1429 return GET_MODE_SIZE (Pmode
);
1435 /* Argument support functions. */
1437 /* Return true when register may be used to pass function parameters. */
1439 ix86_function_arg_regno_p (regno
)
1444 return (regno
< REGPARM_MAX
1445 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
1446 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
1448 /* RAX is used as hidden argument to va_arg functions. */
1451 for (i
= 0; i
< REGPARM_MAX
; i
++)
1452 if (regno
== x86_64_int_parameter_registers
[i
])
1457 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1458 for a call to a function whose data type is FNTYPE.
1459 For a library call, FNTYPE is 0. */
1462 init_cumulative_args (cum
, fntype
, libname
)
1463 CUMULATIVE_ARGS
*cum
; /* Argument info to initialize */
1464 tree fntype
; /* tree ptr for function decl */
1465 rtx libname
; /* SYMBOL_REF of library name or 0 */
1467 static CUMULATIVE_ARGS zero_cum
;
1468 tree param
, next_param
;
1470 if (TARGET_DEBUG_ARG
)
1472 fprintf (stderr
, "\ninit_cumulative_args (");
1474 fprintf (stderr
, "fntype code = %s, ret code = %s",
1475 tree_code_name
[(int) TREE_CODE (fntype
)],
1476 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
1478 fprintf (stderr
, "no fntype");
1481 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
1486 /* Set up the number of registers to use for passing arguments. */
1487 cum
->nregs
= ix86_regparm
;
1488 cum
->sse_nregs
= SSE_REGPARM_MAX
;
1489 if (fntype
&& !TARGET_64BIT
)
1491 tree attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype
));
1494 cum
->nregs
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1496 cum
->maybe_vaarg
= false;
1498 /* Determine if this function has variable arguments. This is
1499 indicated by the last argument being 'void_type_mode' if there
1500 are no variable arguments. If there are variable arguments, then
1501 we won't pass anything in registers */
1505 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
1506 param
!= 0; param
= next_param
)
1508 next_param
= TREE_CHAIN (param
);
1509 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
1513 cum
->maybe_vaarg
= true;
1517 if ((!fntype
&& !libname
)
1518 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
1519 cum
->maybe_vaarg
= 1;
1521 if (TARGET_DEBUG_ARG
)
1522 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
1527 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1528 of this code is to classify each 8bytes of incoming argument by the register
1529 class and assign registers accordingly. */
1531 /* Return the union class of CLASS1 and CLASS2.
1532 See the x86-64 PS ABI for details. */
1534 static enum x86_64_reg_class
1535 merge_classes (class1
, class2
)
1536 enum x86_64_reg_class class1
, class2
;
1538 /* Rule #1: If both classes are equal, this is the resulting class. */
1539 if (class1
== class2
)
1542 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1544 if (class1
== X86_64_NO_CLASS
)
1546 if (class2
== X86_64_NO_CLASS
)
1549 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1550 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
1551 return X86_64_MEMORY_CLASS
;
1553 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1554 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
1555 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
1556 return X86_64_INTEGERSI_CLASS
;
1557 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
1558 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
1559 return X86_64_INTEGER_CLASS
;
1561 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1562 if (class1
== X86_64_X87_CLASS
|| class1
== X86_64_X87UP_CLASS
1563 || class2
== X86_64_X87_CLASS
|| class2
== X86_64_X87UP_CLASS
)
1564 return X86_64_MEMORY_CLASS
;
1566 /* Rule #6: Otherwise class SSE is used. */
1567 return X86_64_SSE_CLASS
;
1570 /* Classify the argument of type TYPE and mode MODE.
1571 CLASSES will be filled by the register class used to pass each word
1572 of the operand. The number of words is returned. In case the parameter
1573 should be passed in memory, 0 is returned. As a special case for zero
1574 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1576 BIT_OFFSET is used internally for handling records and specifies offset
1577 of the offset in bits modulo 256 to avoid overflow cases.
1579 See the x86-64 PS ABI for details.
1583 classify_argument (mode
, type
, classes
, bit_offset
)
1584 enum machine_mode mode
;
1586 enum x86_64_reg_class classes
[MAX_CLASSES
];
1590 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1591 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
1593 if (type
&& AGGREGATE_TYPE_P (type
))
1597 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
1599 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1603 for (i
= 0; i
< words
; i
++)
1604 classes
[i
] = X86_64_NO_CLASS
;
1606 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1607 signalize memory class, so handle it as special case. */
1610 classes
[0] = X86_64_NO_CLASS
;
1614 /* Classify each field of record and merge classes. */
1615 if (TREE_CODE (type
) == RECORD_TYPE
)
1617 /* For classes first merge in the field of the subclasses. */
1618 if (TYPE_BINFO (type
) != NULL
&& TYPE_BINFO_BASETYPES (type
) != NULL
)
1620 tree bases
= TYPE_BINFO_BASETYPES (type
);
1621 int n_bases
= TREE_VEC_LENGTH (bases
);
1624 for (i
= 0; i
< n_bases
; ++i
)
1626 tree binfo
= TREE_VEC_ELT (bases
, i
);
1628 int offset
= tree_low_cst (BINFO_OFFSET (binfo
), 0) * 8;
1629 tree type
= BINFO_TYPE (binfo
);
1631 num
= classify_argument (TYPE_MODE (type
),
1633 (offset
+ bit_offset
) % 256);
1636 for (i
= 0; i
< num
; i
++)
1638 int pos
= (offset
+ bit_offset
) / 8 / 8;
1640 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
1644 /* And now merge the fields of structure. */
1645 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
1647 if (TREE_CODE (field
) == FIELD_DECL
)
1651 /* Bitfields are always classified as integer. Handle them
1652 early, since later code would consider them to be
1653 misaligned integers. */
1654 if (DECL_BIT_FIELD (field
))
1656 for (i
= int_bit_position (field
) / 8 / 8;
1657 i
< (int_bit_position (field
)
1658 + tree_low_cst (DECL_SIZE (field
), 0)
1661 merge_classes (X86_64_INTEGER_CLASS
,
1666 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
1667 TREE_TYPE (field
), subclasses
,
1668 (int_bit_position (field
)
1669 + bit_offset
) % 256);
1672 for (i
= 0; i
< num
; i
++)
1675 (int_bit_position (field
) + bit_offset
) / 8 / 8;
1677 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
1683 /* Arrays are handled as small records. */
1684 else if (TREE_CODE (type
) == ARRAY_TYPE
)
1687 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
1688 TREE_TYPE (type
), subclasses
, bit_offset
);
1692 /* The partial classes are now full classes. */
1693 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
1694 subclasses
[0] = X86_64_SSE_CLASS
;
1695 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
1696 subclasses
[0] = X86_64_INTEGER_CLASS
;
1698 for (i
= 0; i
< words
; i
++)
1699 classes
[i
] = subclasses
[i
% num
];
1701 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1702 else if (TREE_CODE (type
) == UNION_TYPE
1703 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
1705 /* For classes first merge in the field of the subclasses. */
1706 if (TYPE_BINFO (type
) != NULL
&& TYPE_BINFO_BASETYPES (type
) != NULL
)
1708 tree bases
= TYPE_BINFO_BASETYPES (type
);
1709 int n_bases
= TREE_VEC_LENGTH (bases
);
1712 for (i
= 0; i
< n_bases
; ++i
)
1714 tree binfo
= TREE_VEC_ELT (bases
, i
);
1716 int offset
= tree_low_cst (BINFO_OFFSET (binfo
), 0) * 8;
1717 tree type
= BINFO_TYPE (binfo
);
1719 num
= classify_argument (TYPE_MODE (type
),
1721 (offset
+ bit_offset
) % 256);
1724 for (i
= 0; i
< num
; i
++)
1726 int pos
= (offset
+ bit_offset
) / 8 / 8;
1728 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
1732 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
1734 if (TREE_CODE (field
) == FIELD_DECL
)
1737 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
1738 TREE_TYPE (field
), subclasses
,
1742 for (i
= 0; i
< num
; i
++)
1743 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
1750 /* Final merger cleanup. */
1751 for (i
= 0; i
< words
; i
++)
1753 /* If one class is MEMORY, everything should be passed in
1755 if (classes
[i
] == X86_64_MEMORY_CLASS
)
1758 /* The X86_64_SSEUP_CLASS should be always preceded by
1759 X86_64_SSE_CLASS. */
1760 if (classes
[i
] == X86_64_SSEUP_CLASS
1761 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
1762 classes
[i
] = X86_64_SSE_CLASS
;
1764 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1765 if (classes
[i
] == X86_64_X87UP_CLASS
1766 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
1767 classes
[i
] = X86_64_SSE_CLASS
;
1772 /* Compute alignment needed. We align all types to natural boundaries with
1773 exception of XFmode that is aligned to 64bits. */
1774 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
1776 int mode_alignment
= GET_MODE_BITSIZE (mode
);
1779 mode_alignment
= 128;
1780 else if (mode
== XCmode
)
1781 mode_alignment
= 256;
1782 /* Misaligned fields are always returned in memory. */
1783 if (bit_offset
% mode_alignment
)
1787 /* Classification of atomic types. */
1797 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
1798 classes
[0] = X86_64_INTEGERSI_CLASS
;
1800 classes
[0] = X86_64_INTEGER_CLASS
;
1804 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
1807 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
1808 classes
[2] = classes
[3] = X86_64_INTEGER_CLASS
;
1811 if (!(bit_offset
% 64))
1812 classes
[0] = X86_64_SSESF_CLASS
;
1814 classes
[0] = X86_64_SSE_CLASS
;
1817 classes
[0] = X86_64_SSEDF_CLASS
;
1820 classes
[0] = X86_64_X87_CLASS
;
1821 classes
[1] = X86_64_X87UP_CLASS
;
1824 classes
[0] = X86_64_X87_CLASS
;
1825 classes
[1] = X86_64_X87UP_CLASS
;
1826 classes
[2] = X86_64_X87_CLASS
;
1827 classes
[3] = X86_64_X87UP_CLASS
;
1830 classes
[0] = X86_64_SSEDF_CLASS
;
1831 classes
[1] = X86_64_SSEDF_CLASS
;
1834 classes
[0] = X86_64_SSE_CLASS
;
1842 classes
[0] = X86_64_SSE_CLASS
;
1843 classes
[1] = X86_64_SSEUP_CLASS
;
1849 classes
[0] = X86_64_SSE_CLASS
;
1859 /* Examine the argument and return set number of register required in each
1860 class. Return 0 iff parameter should be passed in memory. */
1862 examine_argument (mode
, type
, in_return
, int_nregs
, sse_nregs
)
1863 enum machine_mode mode
;
1865 int *int_nregs
, *sse_nregs
;
1868 enum x86_64_reg_class
class[MAX_CLASSES
];
1869 int n
= classify_argument (mode
, type
, class, 0);
1875 for (n
--; n
>= 0; n
--)
1878 case X86_64_INTEGER_CLASS
:
1879 case X86_64_INTEGERSI_CLASS
:
1882 case X86_64_SSE_CLASS
:
1883 case X86_64_SSESF_CLASS
:
1884 case X86_64_SSEDF_CLASS
:
1887 case X86_64_NO_CLASS
:
1888 case X86_64_SSEUP_CLASS
:
1890 case X86_64_X87_CLASS
:
1891 case X86_64_X87UP_CLASS
:
1895 case X86_64_MEMORY_CLASS
:
1900 /* Construct container for the argument used by GCC interface. See
1901 FUNCTION_ARG for the detailed description. */
1903 construct_container (mode
, type
, in_return
, nintregs
, nsseregs
, intreg
, sse_regno
)
1904 enum machine_mode mode
;
1907 int nintregs
, nsseregs
;
1911 enum machine_mode tmpmode
;
1913 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1914 enum x86_64_reg_class
class[MAX_CLASSES
];
1918 int needed_sseregs
, needed_intregs
;
1919 rtx exp
[MAX_CLASSES
];
1922 n
= classify_argument (mode
, type
, class, 0);
1923 if (TARGET_DEBUG_ARG
)
1926 fprintf (stderr
, "Memory class\n");
1929 fprintf (stderr
, "Classes:");
1930 for (i
= 0; i
< n
; i
++)
1932 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
1934 fprintf (stderr
, "\n");
1939 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
, &needed_sseregs
))
1941 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
1944 /* First construct simple cases. Avoid SCmode, since we want to use
1945 single register to pass this type. */
1946 if (n
== 1 && mode
!= SCmode
)
1949 case X86_64_INTEGER_CLASS
:
1950 case X86_64_INTEGERSI_CLASS
:
1951 return gen_rtx_REG (mode
, intreg
[0]);
1952 case X86_64_SSE_CLASS
:
1953 case X86_64_SSESF_CLASS
:
1954 case X86_64_SSEDF_CLASS
:
1955 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
1956 case X86_64_X87_CLASS
:
1957 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
1958 case X86_64_NO_CLASS
:
1959 /* Zero sized array, struct or class. */
1964 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
)
1965 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
1967 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
1968 return gen_rtx_REG (TFmode
, FIRST_STACK_REG
);
1969 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
1970 && class[1] == X86_64_INTEGER_CLASS
1971 && (mode
== CDImode
|| mode
== TImode
)
1972 && intreg
[0] + 1 == intreg
[1])
1973 return gen_rtx_REG (mode
, intreg
[0]);
1975 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
1976 && class[2] == X86_64_X87_CLASS
&& class[3] == X86_64_X87UP_CLASS
)
1977 return gen_rtx_REG (TCmode
, FIRST_STACK_REG
);
1979 /* Otherwise figure out the entries of the PARALLEL. */
1980 for (i
= 0; i
< n
; i
++)
1984 case X86_64_NO_CLASS
:
1986 case X86_64_INTEGER_CLASS
:
1987 case X86_64_INTEGERSI_CLASS
:
1988 /* Merge TImodes on aligned occassions here too. */
1989 if (i
* 8 + 8 > bytes
)
1990 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
1991 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
1995 /* We've requested 24 bytes we don't have mode for. Use DImode. */
1996 if (tmpmode
== BLKmode
)
1998 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
1999 gen_rtx_REG (tmpmode
, *intreg
),
2003 case X86_64_SSESF_CLASS
:
2004 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2005 gen_rtx_REG (SFmode
,
2006 SSE_REGNO (sse_regno
)),
2010 case X86_64_SSEDF_CLASS
:
2011 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2012 gen_rtx_REG (DFmode
,
2013 SSE_REGNO (sse_regno
)),
2017 case X86_64_SSE_CLASS
:
2018 if (i
< n
&& class[i
+ 1] == X86_64_SSEUP_CLASS
)
2019 tmpmode
= TImode
, i
++;
2022 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2023 gen_rtx_REG (tmpmode
,
2024 SSE_REGNO (sse_regno
)),
2032 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
2033 for (i
= 0; i
< nexps
; i
++)
2034 XVECEXP (ret
, 0, i
) = exp
[i
];
2038 /* Update the data in CUM to advance over an argument
2039 of mode MODE and data type TYPE.
2040 (TYPE is null for libcalls where that information may not be available.) */
2043 function_arg_advance (cum
, mode
, type
, named
)
2044 CUMULATIVE_ARGS
*cum
; /* current arg information */
2045 enum machine_mode mode
; /* current arg mode */
2046 tree type
; /* type of the argument or 0 if lib support */
2047 int named
; /* whether or not the argument was named */
2050 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2051 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2053 if (TARGET_DEBUG_ARG
)
2055 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2056 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
2059 int int_nregs
, sse_nregs
;
2060 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
2061 cum
->words
+= words
;
2062 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
2064 cum
->nregs
-= int_nregs
;
2065 cum
->sse_nregs
-= sse_nregs
;
2066 cum
->regno
+= int_nregs
;
2067 cum
->sse_regno
+= sse_nregs
;
2070 cum
->words
+= words
;
2074 if (TARGET_SSE
&& mode
== TImode
)
2076 cum
->sse_words
+= words
;
2077 cum
->sse_nregs
-= 1;
2078 cum
->sse_regno
+= 1;
2079 if (cum
->sse_nregs
<= 0)
2087 cum
->words
+= words
;
2088 cum
->nregs
-= words
;
2089 cum
->regno
+= words
;
2091 if (cum
->nregs
<= 0)
2101 /* Define where to put the arguments to a function.
2102 Value is zero to push the argument on the stack,
2103 or a hard register in which to store the argument.
2105 MODE is the argument's machine mode.
2106 TYPE is the data type of the argument (as a tree).
2107 This is null for libcalls where that information may
2109 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2110 the preceding args and about the function being called.
2111 NAMED is nonzero if this argument is a named parameter
2112 (otherwise it is an extra parameter matching an ellipsis). */
2115 function_arg (cum
, mode
, type
, named
)
2116 CUMULATIVE_ARGS
*cum
; /* current arg information */
2117 enum machine_mode mode
; /* current arg mode */
2118 tree type
; /* type of the argument or 0 if lib support */
2119 int named
; /* != 0 for normal args, == 0 for ... args */
2123 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2124 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2126 /* Handle an hidden AL argument containing number of registers for varargs
2127 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2129 if (mode
== VOIDmode
)
2132 return GEN_INT (cum
->maybe_vaarg
2133 ? (cum
->sse_nregs
< 0
2141 ret
= construct_container (mode
, type
, 0, cum
->nregs
, cum
->sse_nregs
,
2142 &x86_64_int_parameter_registers
[cum
->regno
],
2147 /* For now, pass fp/complex values on the stack. */
2156 if (words
<= cum
->nregs
)
2157 ret
= gen_rtx_REG (mode
, cum
->regno
);
2161 ret
= gen_rtx_REG (mode
, cum
->sse_regno
);
2165 if (TARGET_DEBUG_ARG
)
2168 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2169 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
2172 print_simple_rtl (stderr
, ret
);
2174 fprintf (stderr
, ", stack");
2176 fprintf (stderr
, " )\n");
2182 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2186 ix86_function_arg_boundary (mode
, type
)
2187 enum machine_mode mode
;
2192 return PARM_BOUNDARY
;
2194 align
= TYPE_ALIGN (type
);
2196 align
= GET_MODE_ALIGNMENT (mode
);
2197 if (align
< PARM_BOUNDARY
)
2198 align
= PARM_BOUNDARY
;
2204 /* Return true if N is a possible register number of function value. */
2206 ix86_function_value_regno_p (regno
)
2211 return ((regno
) == 0
2212 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
2213 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
));
2215 return ((regno
) == 0 || (regno
) == FIRST_FLOAT_REG
2216 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
)
2217 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
));
2220 /* Define how to find the value returned by a function.
2221 VALTYPE is the data type of the value (as a tree).
2222 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2223 otherwise, FUNC is 0. */
2225 ix86_function_value (valtype
)
2230 rtx ret
= construct_container (TYPE_MODE (valtype
), valtype
, 1,
2231 REGPARM_MAX
, SSE_REGPARM_MAX
,
2232 x86_64_int_return_registers
, 0);
2233 /* For zero sized structures, construct_continer return NULL, but we need
2234 to keep rest of compiler happy by returning meaningfull value. */
2236 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
2240 return gen_rtx_REG (TYPE_MODE (valtype
), VALUE_REGNO (TYPE_MODE (valtype
)));
2243 /* Return false iff type is returned in memory. */
2245 ix86_return_in_memory (type
)
2248 int needed_intregs
, needed_sseregs
;
2251 return !examine_argument (TYPE_MODE (type
), type
, 1,
2252 &needed_intregs
, &needed_sseregs
);
2256 if (TYPE_MODE (type
) == BLKmode
2257 || (VECTOR_MODE_P (TYPE_MODE (type
))
2258 && int_size_in_bytes (type
) == 8)
2259 || (int_size_in_bytes (type
) > 12 && TYPE_MODE (type
) != TImode
2260 && TYPE_MODE (type
) != TFmode
2261 && !VECTOR_MODE_P (TYPE_MODE (type
))))
2267 /* Define how to find the value returned by a library function
2268 assuming the value has mode MODE. */
2270 ix86_libcall_value (mode
)
2271 enum machine_mode mode
;
2281 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
2284 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
2286 return gen_rtx_REG (mode
, 0);
2290 return gen_rtx_REG (mode
, VALUE_REGNO (mode
));
2293 /* Create the va_list data type. */
2296 ix86_build_va_list ()
2298 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
2300 /* For i386 we use plain pointer to argument area. */
2302 return build_pointer_type (char_type_node
);
2304 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
2305 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
2307 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
2308 unsigned_type_node
);
2309 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
2310 unsigned_type_node
);
2311 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
2313 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
2316 DECL_FIELD_CONTEXT (f_gpr
) = record
;
2317 DECL_FIELD_CONTEXT (f_fpr
) = record
;
2318 DECL_FIELD_CONTEXT (f_ovf
) = record
;
2319 DECL_FIELD_CONTEXT (f_sav
) = record
;
2321 TREE_CHAIN (record
) = type_decl
;
2322 TYPE_NAME (record
) = type_decl
;
2323 TYPE_FIELDS (record
) = f_gpr
;
2324 TREE_CHAIN (f_gpr
) = f_fpr
;
2325 TREE_CHAIN (f_fpr
) = f_ovf
;
2326 TREE_CHAIN (f_ovf
) = f_sav
;
2328 layout_type (record
);
2330 /* The correct type is an array type of one element. */
2331 return build_array_type (record
, build_index_type (size_zero_node
));
2334 /* Perform any needed actions needed for a function that is receiving a
2335 variable number of arguments.
2339 MODE and TYPE are the mode and type of the current parameter.
2341 PRETEND_SIZE is a variable that should be set to the amount of stack
2342 that must be pushed by the prolog to pretend that our caller pushed
2345 Normally, this macro will push all remaining incoming registers on the
2346 stack and set PRETEND_SIZE to the length of the registers pushed. */
2349 ix86_setup_incoming_varargs (cum
, mode
, type
, pretend_size
, no_rtl
)
2350 CUMULATIVE_ARGS
*cum
;
2351 enum machine_mode mode
;
2353 int *pretend_size ATTRIBUTE_UNUSED
;
2357 CUMULATIVE_ARGS next_cum
;
2358 rtx save_area
= NULL_RTX
, mem
;
2371 /* Indicate to allocate space on the stack for varargs save area. */
2372 ix86_save_varrargs_registers
= 1;
2374 fntype
= TREE_TYPE (current_function_decl
);
2375 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
2376 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
2377 != void_type_node
));
2379 /* For varargs, we do not want to skip the dummy va_dcl argument.
2380 For stdargs, we do want to skip the last named argument. */
2383 function_arg_advance (&next_cum
, mode
, type
, 1);
2386 save_area
= frame_pointer_rtx
;
2388 set
= get_varargs_alias_set ();
2390 for (i
= next_cum
.regno
; i
< ix86_regparm
; i
++)
2392 mem
= gen_rtx_MEM (Pmode
,
2393 plus_constant (save_area
, i
* UNITS_PER_WORD
));
2394 set_mem_alias_set (mem
, set
);
2395 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
2396 x86_64_int_parameter_registers
[i
]));
2399 if (next_cum
.sse_nregs
)
2401 /* Now emit code to save SSE registers. The AX parameter contains number
2402 of SSE parameter regsiters used to call this function. We use
2403 sse_prologue_save insn template that produces computed jump across
2404 SSE saves. We need some preparation work to get this working. */
2406 label
= gen_label_rtx ();
2407 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
2409 /* Compute address to jump to :
2410 label - 5*eax + nnamed_sse_arguments*5 */
2411 tmp_reg
= gen_reg_rtx (Pmode
);
2412 nsse_reg
= gen_reg_rtx (Pmode
);
2413 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
2414 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
2415 gen_rtx_MULT (Pmode
, nsse_reg
,
2417 if (next_cum
.sse_regno
)
2420 gen_rtx_CONST (DImode
,
2421 gen_rtx_PLUS (DImode
,
2423 GEN_INT (next_cum
.sse_regno
* 4))));
2425 emit_move_insn (nsse_reg
, label_ref
);
2426 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
2428 /* Compute address of memory block we save into. We always use pointer
2429 pointing 127 bytes after first byte to store - this is needed to keep
2430 instruction size limited by 4 bytes. */
2431 tmp_reg
= gen_reg_rtx (Pmode
);
2432 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
2433 plus_constant (save_area
,
2434 8 * REGPARM_MAX
+ 127)));
2435 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
2436 set_mem_alias_set (mem
, set
);
2437 set_mem_align (mem
, BITS_PER_WORD
);
2439 /* And finally do the dirty job! */
2440 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
2441 GEN_INT (next_cum
.sse_regno
), label
));
2446 /* Implement va_start. */
2449 ix86_va_start (stdarg_p
, valist
, nextarg
)
2454 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
2455 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
2456 tree gpr
, fpr
, ovf
, sav
, t
;
2458 /* Only 64bit target needs something special. */
2461 std_expand_builtin_va_start (stdarg_p
, valist
, nextarg
);
2465 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
2466 f_fpr
= TREE_CHAIN (f_gpr
);
2467 f_ovf
= TREE_CHAIN (f_fpr
);
2468 f_sav
= TREE_CHAIN (f_ovf
);
2470 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
2471 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
);
2472 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
);
2473 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
);
2474 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
);
2476 /* Count number of gp and fp argument registers used. */
2477 words
= current_function_args_info
.words
;
2478 n_gpr
= current_function_args_info
.regno
;
2479 n_fpr
= current_function_args_info
.sse_regno
;
2481 if (TARGET_DEBUG_ARG
)
2482 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2483 (int) words
, (int) n_gpr
, (int) n_fpr
);
2485 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
,
2486 build_int_2 (n_gpr
* 8, 0));
2487 TREE_SIDE_EFFECTS (t
) = 1;
2488 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2490 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
,
2491 build_int_2 (n_fpr
* 16 + 8*REGPARM_MAX
, 0));
2492 TREE_SIDE_EFFECTS (t
) = 1;
2493 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2495 /* Find the overflow area. */
2496 t
= make_tree (TREE_TYPE (ovf
), virtual_incoming_args_rtx
);
2498 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), t
,
2499 build_int_2 (words
* UNITS_PER_WORD
, 0));
2500 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
2501 TREE_SIDE_EFFECTS (t
) = 1;
2502 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2504 /* Find the register save area.
2505 Prologue of the function save it right above stack frame. */
2506 t
= make_tree (TREE_TYPE (sav
), frame_pointer_rtx
);
2507 t
= build (MODIFY_EXPR
, TREE_TYPE (sav
), sav
, t
);
2508 TREE_SIDE_EFFECTS (t
) = 1;
2509 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2512 /* Implement va_arg. */
2514 ix86_va_arg (valist
, type
)
2517 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
2518 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
2519 tree gpr
, fpr
, ovf
, sav
, t
;
2521 rtx lab_false
, lab_over
= NULL_RTX
;
2525 /* Only 64bit target needs something special. */
2528 return std_expand_builtin_va_arg (valist
, type
);
2531 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
2532 f_fpr
= TREE_CHAIN (f_gpr
);
2533 f_ovf
= TREE_CHAIN (f_fpr
);
2534 f_sav
= TREE_CHAIN (f_ovf
);
2536 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
2537 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
);
2538 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
);
2539 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
);
2540 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
);
2542 size
= int_size_in_bytes (type
);
2543 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2545 container
= construct_container (TYPE_MODE (type
), type
, 0,
2546 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
2548 * Pull the value out of the saved registers ...
2551 addr_rtx
= gen_reg_rtx (Pmode
);
2555 rtx int_addr_rtx
, sse_addr_rtx
;
2556 int needed_intregs
, needed_sseregs
;
2559 lab_over
= gen_label_rtx ();
2560 lab_false
= gen_label_rtx ();
2562 examine_argument (TYPE_MODE (type
), type
, 0,
2563 &needed_intregs
, &needed_sseregs
);
2566 need_temp
= ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
2567 || TYPE_ALIGN (type
) > 128);
2569 /* In case we are passing structure, verify that it is consetuctive block
2570 on the register save area. If not we need to do moves. */
2571 if (!need_temp
&& !REG_P (container
))
2573 /* Verify that all registers are strictly consetuctive */
2574 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
2578 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
2580 rtx slot
= XVECEXP (container
, 0, i
);
2581 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
2582 || INTVAL (XEXP (slot
, 1)) != i
* 16)
2590 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
2592 rtx slot
= XVECEXP (container
, 0, i
);
2593 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
2594 || INTVAL (XEXP (slot
, 1)) != i
* 8)
2601 int_addr_rtx
= addr_rtx
;
2602 sse_addr_rtx
= addr_rtx
;
2606 int_addr_rtx
= gen_reg_rtx (Pmode
);
2607 sse_addr_rtx
= gen_reg_rtx (Pmode
);
2609 /* First ensure that we fit completely in registers. */
2612 emit_cmp_and_jump_insns (expand_expr
2613 (gpr
, NULL_RTX
, SImode
, EXPAND_NORMAL
),
2614 GEN_INT ((REGPARM_MAX
- needed_intregs
+
2615 1) * 8), GE
, const1_rtx
, SImode
,
2620 emit_cmp_and_jump_insns (expand_expr
2621 (fpr
, NULL_RTX
, SImode
, EXPAND_NORMAL
),
2622 GEN_INT ((SSE_REGPARM_MAX
-
2623 needed_sseregs
+ 1) * 16 +
2624 REGPARM_MAX
* 8), GE
, const1_rtx
,
2625 SImode
, 1, lab_false
);
2628 /* Compute index to start of area used for integer regs. */
2631 t
= build (PLUS_EXPR
, ptr_type_node
, sav
, gpr
);
2632 r
= expand_expr (t
, int_addr_rtx
, Pmode
, EXPAND_NORMAL
);
2633 if (r
!= int_addr_rtx
)
2634 emit_move_insn (int_addr_rtx
, r
);
2638 t
= build (PLUS_EXPR
, ptr_type_node
, sav
, fpr
);
2639 r
= expand_expr (t
, sse_addr_rtx
, Pmode
, EXPAND_NORMAL
);
2640 if (r
!= sse_addr_rtx
)
2641 emit_move_insn (sse_addr_rtx
, r
);
2648 /* Never use the memory itself, as it has the alias set. */
2649 addr_rtx
= XEXP (assign_temp (type
, 0, 1, 0), 0);
2650 mem
= gen_rtx_MEM (BLKmode
, addr_rtx
);
2651 set_mem_alias_set (mem
, get_varargs_alias_set ());
2652 set_mem_align (mem
, BITS_PER_UNIT
);
2654 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
2656 rtx slot
= XVECEXP (container
, 0, i
);
2657 rtx reg
= XEXP (slot
, 0);
2658 enum machine_mode mode
= GET_MODE (reg
);
2664 if (SSE_REGNO_P (REGNO (reg
)))
2666 src_addr
= sse_addr_rtx
;
2667 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
2671 src_addr
= int_addr_rtx
;
2672 src_offset
= REGNO (reg
) * 8;
2674 src_mem
= gen_rtx_MEM (mode
, src_addr
);
2675 set_mem_alias_set (src_mem
, get_varargs_alias_set ());
2676 src_mem
= adjust_address (src_mem
, mode
, src_offset
);
2677 dest_mem
= adjust_address (mem
, mode
, INTVAL (XEXP (slot
, 1)));
2678 emit_move_insn (dest_mem
, src_mem
);
2685 build (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
2686 build_int_2 (needed_intregs
* 8, 0));
2687 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
, t
);
2688 TREE_SIDE_EFFECTS (t
) = 1;
2689 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2694 build (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
2695 build_int_2 (needed_sseregs
* 16, 0));
2696 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
, t
);
2697 TREE_SIDE_EFFECTS (t
) = 1;
2698 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2701 emit_jump_insn (gen_jump (lab_over
));
2703 emit_label (lab_false
);
2706 /* ... otherwise out of the overflow area. */
2708 /* Care for on-stack alignment if needed. */
2709 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64)
2713 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
2714 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
, build_int_2 (align
- 1, 0));
2715 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
, build_int_2 (-align
, -1));
2719 r
= expand_expr (t
, addr_rtx
, Pmode
, EXPAND_NORMAL
);
2721 emit_move_insn (addr_rtx
, r
);
2724 build (PLUS_EXPR
, TREE_TYPE (t
), t
,
2725 build_int_2 (rsize
* UNITS_PER_WORD
, 0));
2726 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
2727 TREE_SIDE_EFFECTS (t
) = 1;
2728 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2731 emit_label (lab_over
);
2736 /* Return nonzero if OP is general operand representable on x86_64. */
2739 x86_64_general_operand (op
, mode
)
2741 enum machine_mode mode
;
2744 return general_operand (op
, mode
);
2745 if (nonimmediate_operand (op
, mode
))
2747 return x86_64_sign_extended_value (op
);
2750 /* Return nonzero if OP is general operand representable on x86_64
2751 as either sign extended or zero extended constant. */
2754 x86_64_szext_general_operand (op
, mode
)
2756 enum machine_mode mode
;
2759 return general_operand (op
, mode
);
2760 if (nonimmediate_operand (op
, mode
))
2762 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
2765 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2768 x86_64_nonmemory_operand (op
, mode
)
2770 enum machine_mode mode
;
2773 return nonmemory_operand (op
, mode
);
2774 if (register_operand (op
, mode
))
2776 return x86_64_sign_extended_value (op
);
2779 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2782 x86_64_movabs_operand (op
, mode
)
2784 enum machine_mode mode
;
2786 if (!TARGET_64BIT
|| !flag_pic
)
2787 return nonmemory_operand (op
, mode
);
2788 if (register_operand (op
, mode
) || x86_64_sign_extended_value (op
))
2790 if (CONSTANT_P (op
) && !symbolic_reference_mentioned_p (op
))
2795 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2798 x86_64_szext_nonmemory_operand (op
, mode
)
2800 enum machine_mode mode
;
2803 return nonmemory_operand (op
, mode
);
2804 if (register_operand (op
, mode
))
2806 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
2809 /* Return nonzero if OP is immediate operand representable on x86_64. */
2812 x86_64_immediate_operand (op
, mode
)
2814 enum machine_mode mode
;
2817 return immediate_operand (op
, mode
);
2818 return x86_64_sign_extended_value (op
);
2821 /* Return nonzero if OP is immediate operand representable on x86_64. */
2824 x86_64_zext_immediate_operand (op
, mode
)
2826 enum machine_mode mode ATTRIBUTE_UNUSED
;
2828 return x86_64_zero_extended_value (op
);
2831 /* Return nonzero if OP is (const_int 1), else return zero. */
2834 const_int_1_operand (op
, mode
)
2836 enum machine_mode mode ATTRIBUTE_UNUSED
;
2838 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) == 1);
2841 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2842 reference and a constant. */
2845 symbolic_operand (op
, mode
)
2847 enum machine_mode mode ATTRIBUTE_UNUSED
;
2849 switch (GET_CODE (op
))
2857 if (GET_CODE (op
) == SYMBOL_REF
2858 || GET_CODE (op
) == LABEL_REF
2859 || (GET_CODE (op
) == UNSPEC
2860 && (XINT (op
, 1) == UNSPEC_GOT
2861 || XINT (op
, 1) == UNSPEC_GOTOFF
2862 || XINT (op
, 1) == UNSPEC_GOTPCREL
)))
2864 if (GET_CODE (op
) != PLUS
2865 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
2869 if (GET_CODE (op
) == SYMBOL_REF
2870 || GET_CODE (op
) == LABEL_REF
)
2872 /* Only @GOTOFF gets offsets. */
2873 if (GET_CODE (op
) != UNSPEC
2874 || XINT (op
, 1) != UNSPEC_GOTOFF
)
2877 op
= XVECEXP (op
, 0, 0);
2878 if (GET_CODE (op
) == SYMBOL_REF
2879 || GET_CODE (op
) == LABEL_REF
)
2888 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2891 pic_symbolic_operand (op
, mode
)
2893 enum machine_mode mode ATTRIBUTE_UNUSED
;
2895 if (GET_CODE (op
) != CONST
)
2900 if (GET_CODE (XEXP (op
, 0)) == UNSPEC
)
2905 if (GET_CODE (op
) == UNSPEC
)
2907 if (GET_CODE (op
) != PLUS
2908 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
2911 if (GET_CODE (op
) == UNSPEC
)
2917 /* Return true if OP is a symbolic operand that resolves locally. */
2920 local_symbolic_operand (op
, mode
)
2922 enum machine_mode mode ATTRIBUTE_UNUSED
;
2924 if (GET_CODE (op
) == LABEL_REF
)
2927 if (GET_CODE (op
) == CONST
2928 && GET_CODE (XEXP (op
, 0)) == PLUS
2929 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
2930 op
= XEXP (XEXP (op
, 0), 0);
2932 if (GET_CODE (op
) != SYMBOL_REF
)
2935 /* These we've been told are local by varasm and encode_section_info
2937 if (CONSTANT_POOL_ADDRESS_P (op
) || SYMBOL_REF_FLAG (op
))
2940 /* There is, however, a not insubstantial body of code in the rest of
2941 the compiler that assumes it can just stick the results of
2942 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2943 /* ??? This is a hack. Should update the body of the compiler to
2944 always create a DECL an invoke targetm.encode_section_info. */
2945 if (strncmp (XSTR (op
, 0), internal_label_prefix
,
2946 internal_label_prefix_len
) == 0)
2952 /* Test for various thread-local symbols. See ix86_encode_section_info. */
2955 tls_symbolic_operand (op
, mode
)
2957 enum machine_mode mode ATTRIBUTE_UNUSED
;
2959 const char *symbol_str
;
2961 if (GET_CODE (op
) != SYMBOL_REF
)
2963 symbol_str
= XSTR (op
, 0);
2965 if (symbol_str
[0] != '%')
2967 return strchr (tls_model_chars
, symbol_str
[1]) - tls_model_chars
;
2971 tls_symbolic_operand_1 (op
, kind
)
2973 enum tls_model kind
;
2975 const char *symbol_str
;
2977 if (GET_CODE (op
) != SYMBOL_REF
)
2979 symbol_str
= XSTR (op
, 0);
2981 return symbol_str
[0] == '%' && symbol_str
[1] == tls_model_chars
[kind
];
2985 global_dynamic_symbolic_operand (op
, mode
)
2987 enum machine_mode mode ATTRIBUTE_UNUSED
;
2989 return tls_symbolic_operand_1 (op
, TLS_MODEL_GLOBAL_DYNAMIC
);
2993 local_dynamic_symbolic_operand (op
, mode
)
2995 enum machine_mode mode ATTRIBUTE_UNUSED
;
2997 return tls_symbolic_operand_1 (op
, TLS_MODEL_LOCAL_DYNAMIC
);
3001 initial_exec_symbolic_operand (op
, mode
)
3003 enum machine_mode mode ATTRIBUTE_UNUSED
;
3005 return tls_symbolic_operand_1 (op
, TLS_MODEL_INITIAL_EXEC
);
3009 local_exec_symbolic_operand (op
, mode
)
3011 enum machine_mode mode ATTRIBUTE_UNUSED
;
3013 return tls_symbolic_operand_1 (op
, TLS_MODEL_LOCAL_EXEC
);
3016 /* Test for a valid operand for a call instruction. Don't allow the
3017 arg pointer register or virtual regs since they may decay into
3018 reg + const, which the patterns can't handle. */
3021 call_insn_operand (op
, mode
)
3023 enum machine_mode mode ATTRIBUTE_UNUSED
;
3025 /* Disallow indirect through a virtual register. This leads to
3026 compiler aborts when trying to eliminate them. */
3027 if (GET_CODE (op
) == REG
3028 && (op
== arg_pointer_rtx
3029 || op
== frame_pointer_rtx
3030 || (REGNO (op
) >= FIRST_PSEUDO_REGISTER
3031 && REGNO (op
) <= LAST_VIRTUAL_REGISTER
)))
3034 /* Disallow `call 1234'. Due to varying assembler lameness this
3035 gets either rejected or translated to `call .+1234'. */
3036 if (GET_CODE (op
) == CONST_INT
)
3039 /* Explicitly allow SYMBOL_REF even if pic. */
3040 if (GET_CODE (op
) == SYMBOL_REF
)
3043 /* Otherwise we can allow any general_operand in the address. */
3044 return general_operand (op
, Pmode
);
3048 constant_call_address_operand (op
, mode
)
3050 enum machine_mode mode ATTRIBUTE_UNUSED
;
3052 if (GET_CODE (op
) == CONST
3053 && GET_CODE (XEXP (op
, 0)) == PLUS
3054 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
3055 op
= XEXP (XEXP (op
, 0), 0);
3056 return GET_CODE (op
) == SYMBOL_REF
;
3059 /* Match exactly zero and one. */
3062 const0_operand (op
, mode
)
3064 enum machine_mode mode
;
3066 return op
== CONST0_RTX (mode
);
3070 const1_operand (op
, mode
)
3072 enum machine_mode mode ATTRIBUTE_UNUSED
;
3074 return op
== const1_rtx
;
3077 /* Match 2, 4, or 8. Used for leal multiplicands. */
3080 const248_operand (op
, mode
)
3082 enum machine_mode mode ATTRIBUTE_UNUSED
;
3084 return (GET_CODE (op
) == CONST_INT
3085 && (INTVAL (op
) == 2 || INTVAL (op
) == 4 || INTVAL (op
) == 8));
3088 /* True if this is a constant appropriate for an increment or decremenmt. */
3091 incdec_operand (op
, mode
)
3093 enum machine_mode mode ATTRIBUTE_UNUSED
;
3095 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3096 registers, since carry flag is not set. */
3097 if (TARGET_PENTIUM4
&& !optimize_size
)
3099 return op
== const1_rtx
|| op
== constm1_rtx
;
3102 /* Return nonzero if OP is acceptable as operand of DImode shift
3106 shiftdi_operand (op
, mode
)
3108 enum machine_mode mode ATTRIBUTE_UNUSED
;
3111 return nonimmediate_operand (op
, mode
);
3113 return register_operand (op
, mode
);
3116 /* Return false if this is the stack pointer, or any other fake
3117 register eliminable to the stack pointer. Otherwise, this is
3120 This is used to prevent esp from being used as an index reg.
3121 Which would only happen in pathological cases. */
3124 reg_no_sp_operand (op
, mode
)
3126 enum machine_mode mode
;
3129 if (GET_CODE (t
) == SUBREG
)
3131 if (t
== stack_pointer_rtx
|| t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
)
3134 return register_operand (op
, mode
);
3138 mmx_reg_operand (op
, mode
)
3140 enum machine_mode mode ATTRIBUTE_UNUSED
;
3142 return MMX_REG_P (op
);
3145 /* Return false if this is any eliminable register. Otherwise
3149 general_no_elim_operand (op
, mode
)
3151 enum machine_mode mode
;
3154 if (GET_CODE (t
) == SUBREG
)
3156 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
3157 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
3158 || t
== virtual_stack_dynamic_rtx
)
3161 && REGNO (t
) >= FIRST_VIRTUAL_REGISTER
3162 && REGNO (t
) <= LAST_VIRTUAL_REGISTER
)
3165 return general_operand (op
, mode
);
3168 /* Return false if this is any eliminable register. Otherwise
3169 register_operand or const_int. */
3172 nonmemory_no_elim_operand (op
, mode
)
3174 enum machine_mode mode
;
3177 if (GET_CODE (t
) == SUBREG
)
3179 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
3180 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
3181 || t
== virtual_stack_dynamic_rtx
)
3184 return GET_CODE (op
) == CONST_INT
|| register_operand (op
, mode
);
3187 /* Return true if op is a Q_REGS class register. */
3190 q_regs_operand (op
, mode
)
3192 enum machine_mode mode
;
3194 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3196 if (GET_CODE (op
) == SUBREG
)
3197 op
= SUBREG_REG (op
);
3198 return ANY_QI_REG_P (op
);
3201 /* Return true if op is a NON_Q_REGS class register. */
3204 non_q_regs_operand (op
, mode
)
3206 enum machine_mode mode
;
3208 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3210 if (GET_CODE (op
) == SUBREG
)
3211 op
= SUBREG_REG (op
);
3212 return NON_QI_REG_P (op
);
3215 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3218 sse_comparison_operator (op
, mode
)
3220 enum machine_mode mode ATTRIBUTE_UNUSED
;
3222 enum rtx_code code
= GET_CODE (op
);
3225 /* Operations supported directly. */
3235 /* These are equivalent to ones above in non-IEEE comparisons. */
3242 return !TARGET_IEEE_FP
;
3247 /* Return 1 if OP is a valid comparison operator in valid mode. */
3249 ix86_comparison_operator (op
, mode
)
3251 enum machine_mode mode
;
3253 enum machine_mode inmode
;
3254 enum rtx_code code
= GET_CODE (op
);
3255 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3257 if (GET_RTX_CLASS (code
) != '<')
3259 inmode
= GET_MODE (XEXP (op
, 0));
3261 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3263 enum rtx_code second_code
, bypass_code
;
3264 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
3265 return (bypass_code
== NIL
&& second_code
== NIL
);
3272 if (inmode
== CCmode
|| inmode
== CCGCmode
3273 || inmode
== CCGOCmode
|| inmode
== CCNOmode
)
3276 case LTU
: case GTU
: case LEU
: case ORDERED
: case UNORDERED
: case GEU
:
3277 if (inmode
== CCmode
)
3281 if (inmode
== CCmode
|| inmode
== CCGCmode
|| inmode
== CCNOmode
)
3289 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3292 fcmov_comparison_operator (op
, mode
)
3294 enum machine_mode mode
;
3296 enum machine_mode inmode
;
3297 enum rtx_code code
= GET_CODE (op
);
3298 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3300 if (GET_RTX_CLASS (code
) != '<')
3302 inmode
= GET_MODE (XEXP (op
, 0));
3303 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3305 enum rtx_code second_code
, bypass_code
;
3306 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
3307 if (bypass_code
!= NIL
|| second_code
!= NIL
)
3309 code
= ix86_fp_compare_code_to_integer (code
);
3311 /* i387 supports just limited amount of conditional codes. */
3314 case LTU
: case GTU
: case LEU
: case GEU
:
3315 if (inmode
== CCmode
|| inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3318 case ORDERED
: case UNORDERED
:
3326 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3329 promotable_binary_operator (op
, mode
)
3331 enum machine_mode mode ATTRIBUTE_UNUSED
;
3333 switch (GET_CODE (op
))
3336 /* Modern CPUs have same latency for HImode and SImode multiply,
3337 but 386 and 486 do HImode multiply faster. */
3338 return ix86_cpu
> PROCESSOR_I486
;
3350 /* Nearly general operand, but accept any const_double, since we wish
3351 to be able to drop them into memory rather than have them get pulled
3355 cmp_fp_expander_operand (op
, mode
)
3357 enum machine_mode mode
;
3359 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
3361 if (GET_CODE (op
) == CONST_DOUBLE
)
3363 return general_operand (op
, mode
);
3366 /* Match an SI or HImode register for a zero_extract. */
3369 ext_register_operand (op
, mode
)
3371 enum machine_mode mode ATTRIBUTE_UNUSED
;
3374 if ((!TARGET_64BIT
|| GET_MODE (op
) != DImode
)
3375 && GET_MODE (op
) != SImode
&& GET_MODE (op
) != HImode
)
3378 if (!register_operand (op
, VOIDmode
))
3381 /* Be curefull to accept only registers having upper parts. */
3382 regno
= REG_P (op
) ? REGNO (op
) : REGNO (SUBREG_REG (op
));
3383 return (regno
> LAST_VIRTUAL_REGISTER
|| regno
< 4);
3386 /* Return 1 if this is a valid binary floating-point operation.
3387 OP is the expression matched, and MODE is its mode. */
3390 binary_fp_operator (op
, mode
)
3392 enum machine_mode mode
;
3394 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
3397 switch (GET_CODE (op
))
3403 return GET_MODE_CLASS (GET_MODE (op
)) == MODE_FLOAT
;
3411 mult_operator (op
, mode
)
3413 enum machine_mode mode ATTRIBUTE_UNUSED
;
3415 return GET_CODE (op
) == MULT
;
3419 div_operator (op
, mode
)
3421 enum machine_mode mode ATTRIBUTE_UNUSED
;
3423 return GET_CODE (op
) == DIV
;
3427 arith_or_logical_operator (op
, mode
)
3429 enum machine_mode mode
;
3431 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
3432 && (GET_RTX_CLASS (GET_CODE (op
)) == 'c'
3433 || GET_RTX_CLASS (GET_CODE (op
)) == '2'));
3436 /* Returns 1 if OP is memory operand with a displacement. */
3439 memory_displacement_operand (op
, mode
)
3441 enum machine_mode mode
;
3443 struct ix86_address parts
;
3445 if (! memory_operand (op
, mode
))
3448 if (! ix86_decompose_address (XEXP (op
, 0), &parts
))
3451 return parts
.disp
!= NULL_RTX
;
3454 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3455 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3457 ??? It seems likely that this will only work because cmpsi is an
3458 expander, and no actual insns use this. */
3461 cmpsi_operand (op
, mode
)
3463 enum machine_mode mode
;
3465 if (nonimmediate_operand (op
, mode
))
3468 if (GET_CODE (op
) == AND
3469 && GET_MODE (op
) == SImode
3470 && GET_CODE (XEXP (op
, 0)) == ZERO_EXTRACT
3471 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
3472 && GET_CODE (XEXP (XEXP (op
, 0), 2)) == CONST_INT
3473 && INTVAL (XEXP (XEXP (op
, 0), 1)) == 8
3474 && INTVAL (XEXP (XEXP (op
, 0), 2)) == 8
3475 && GET_CODE (XEXP (op
, 1)) == CONST_INT
)
3481 /* Returns 1 if OP is memory operand that can not be represented by the
3485 long_memory_operand (op
, mode
)
3487 enum machine_mode mode
;
3489 if (! memory_operand (op
, mode
))
3492 return memory_address_length (op
) != 0;
3495 /* Return nonzero if the rtx is known aligned. */
3498 aligned_operand (op
, mode
)
3500 enum machine_mode mode
;
3502 struct ix86_address parts
;
3504 if (!general_operand (op
, mode
))
3507 /* Registers and immediate operands are always "aligned". */
3508 if (GET_CODE (op
) != MEM
)
3511 /* Don't even try to do any aligned optimizations with volatiles. */
3512 if (MEM_VOLATILE_P (op
))
3517 /* Pushes and pops are only valid on the stack pointer. */
3518 if (GET_CODE (op
) == PRE_DEC
3519 || GET_CODE (op
) == POST_INC
)
3522 /* Decode the address. */
3523 if (! ix86_decompose_address (op
, &parts
))
3526 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
3527 parts
.base
= SUBREG_REG (parts
.base
);
3528 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
3529 parts
.index
= SUBREG_REG (parts
.index
);
3531 /* Look for some component that isn't known to be aligned. */
3535 && REGNO_POINTER_ALIGN (REGNO (parts
.index
)) < 32)
3540 if (REGNO_POINTER_ALIGN (REGNO (parts
.base
)) < 32)
3545 if (GET_CODE (parts
.disp
) != CONST_INT
3546 || (INTVAL (parts
.disp
) & 3) != 0)
3550 /* Didn't find one -- this must be an aligned address. */
3554 /* Return true if the constant is something that can be loaded with
3555 a special instruction. Only handle 0.0 and 1.0; others are less
3559 standard_80387_constant_p (x
)
3562 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
3564 /* Note that on the 80387, other constants, such as pi, that we should support
3565 too. On some machines, these are much slower to load as standard constant,
3566 than to load from doubles in memory. */
3567 if (x
== CONST0_RTX (GET_MODE (x
)))
3569 if (x
== CONST1_RTX (GET_MODE (x
)))
3574 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3577 standard_sse_constant_p (x
)
3580 if (GET_CODE (x
) != CONST_DOUBLE
)
3582 return (x
== CONST0_RTX (GET_MODE (x
)));
3585 /* Returns 1 if OP contains a symbol reference */
3588 symbolic_reference_mentioned_p (op
)
3591 register const char *fmt
;
3594 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
3597 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
3598 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
3604 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
3605 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
3609 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
3616 /* Return 1 if it is appropriate to emit `ret' instructions in the
3617 body of a function. Do this only if the epilogue is simple, needing a
3618 couple of insns. Prior to reloading, we can't tell how many registers
3619 must be saved, so return 0 then. Return 0 if there is no frame
3620 marker to de-allocate.
3622 If NON_SAVING_SETJMP is defined and true, then it is not possible
3623 for the epilogue to be simple, so return 0. This is a special case
3624 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3625 until final, but jump_optimize may need to know sooner if a
3629 ix86_can_use_return_insn_p ()
3631 struct ix86_frame frame
;
3633 #ifdef NON_SAVING_SETJMP
3634 if (NON_SAVING_SETJMP
&& current_function_calls_setjmp
)
3638 if (! reload_completed
|| frame_pointer_needed
)
3641 /* Don't allow more than 32 pop, since that's all we can do
3642 with one instruction. */
3643 if (current_function_pops_args
3644 && current_function_args_size
>= 32768)
3647 ix86_compute_frame_layout (&frame
);
3648 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
3651 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3653 x86_64_sign_extended_value (value
)
3656 switch (GET_CODE (value
))
3658 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3659 to be at least 32 and this all acceptable constants are
3660 represented as CONST_INT. */
3662 if (HOST_BITS_PER_WIDE_INT
== 32)
3666 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (value
), DImode
);
3667 return trunc_int_for_mode (val
, SImode
) == val
;
3671 /* For certain code models, the symbolic references are known to fit. */
3673 return ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_KERNEL
;
3675 /* For certain code models, the code is near as well. */
3677 return ix86_cmodel
!= CM_LARGE
&& ix86_cmodel
!= CM_SMALL_PIC
;
3679 /* We also may accept the offsetted memory references in certain special
3682 if (GET_CODE (XEXP (value
, 0)) == UNSPEC
3683 && XINT (XEXP (value
, 0), 1) == UNSPEC_GOTPCREL
)
3685 else if (GET_CODE (XEXP (value
, 0)) == PLUS
)
3687 rtx op1
= XEXP (XEXP (value
, 0), 0);
3688 rtx op2
= XEXP (XEXP (value
, 0), 1);
3689 HOST_WIDE_INT offset
;
3691 if (ix86_cmodel
== CM_LARGE
)
3693 if (GET_CODE (op2
) != CONST_INT
)
3695 offset
= trunc_int_for_mode (INTVAL (op2
), DImode
);
3696 switch (GET_CODE (op1
))
3699 /* For CM_SMALL assume that latest object is 1MB before
3700 end of 31bits boundary. We may also accept pretty
3701 large negative constants knowing that all objects are
3702 in the positive half of address space. */
3703 if (ix86_cmodel
== CM_SMALL
3704 && offset
< 1024*1024*1024
3705 && trunc_int_for_mode (offset
, SImode
) == offset
)
3707 /* For CM_KERNEL we know that all object resist in the
3708 negative half of 32bits address space. We may not
3709 accept negative offsets, since they may be just off
3710 and we may accept pretty large positive ones. */
3711 if (ix86_cmodel
== CM_KERNEL
3713 && trunc_int_for_mode (offset
, SImode
) == offset
)
3717 /* These conditions are similar to SYMBOL_REF ones, just the
3718 constraints for code models differ. */
3719 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
3720 && offset
< 1024*1024*1024
3721 && trunc_int_for_mode (offset
, SImode
) == offset
)
3723 if (ix86_cmodel
== CM_KERNEL
3725 && trunc_int_for_mode (offset
, SImode
) == offset
)
3738 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3740 x86_64_zero_extended_value (value
)
3743 switch (GET_CODE (value
))
3746 if (HOST_BITS_PER_WIDE_INT
== 32)
3747 return (GET_MODE (value
) == VOIDmode
3748 && !CONST_DOUBLE_HIGH (value
));
3752 if (HOST_BITS_PER_WIDE_INT
== 32)
3753 return INTVAL (value
) >= 0;
3755 return !(INTVAL (value
) & ~(HOST_WIDE_INT
) 0xffffffff);
3758 /* For certain code models, the symbolic references are known to fit. */
3760 return ix86_cmodel
== CM_SMALL
;
3762 /* For certain code models, the code is near as well. */
3764 return ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
;
3766 /* We also may accept the offsetted memory references in certain special
3769 if (GET_CODE (XEXP (value
, 0)) == PLUS
)
3771 rtx op1
= XEXP (XEXP (value
, 0), 0);
3772 rtx op2
= XEXP (XEXP (value
, 0), 1);
3774 if (ix86_cmodel
== CM_LARGE
)
3776 switch (GET_CODE (op1
))
3780 /* For small code model we may accept pretty large positive
3781 offsets, since one bit is available for free. Negative
3782 offsets are limited by the size of NULL pointer area
3783 specified by the ABI. */
3784 if (ix86_cmodel
== CM_SMALL
3785 && GET_CODE (op2
) == CONST_INT
3786 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
3787 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
3790 /* ??? For the kernel, we may accept adjustment of
3791 -0x10000000, since we know that it will just convert
3792 negative address space to positive, but perhaps this
3793 is not worthwhile. */
3796 /* These conditions are similar to SYMBOL_REF ones, just the
3797 constraints for code models differ. */
3798 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
3799 && GET_CODE (op2
) == CONST_INT
3800 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
3801 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
3815 /* Value should be nonzero if functions must have frame pointers.
3816 Zero means the frame pointer need not be set up (and parms may
3817 be accessed via the stack pointer) in functions that seem suitable. */
3820 ix86_frame_pointer_required ()
3822 /* If we accessed previous frames, then the generated code expects
3823 to be able to access the saved ebp value in our frame. */
3824 if (cfun
->machine
->accesses_prev_frame
)
3827 /* Several x86 os'es need a frame pointer for other reasons,
3828 usually pertaining to setjmp. */
3829 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
3832 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3833 the frame pointer by default. Turn it back on now if we've not
3834 got a leaf function. */
3835 if (TARGET_OMIT_LEAF_FRAME_POINTER
3836 && (!current_function_is_leaf
|| current_function_profile
))
3842 /* Record that the current function accesses previous call frames. */
3845 ix86_setup_frame_addresses ()
3847 cfun
->machine
->accesses_prev_frame
= 1;
3850 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3851 # define USE_HIDDEN_LINKONCE 1
3853 # define USE_HIDDEN_LINKONCE 0
3856 static int pic_labels_used
;
3858 /* Fills in the label name that should be used for a pc thunk for
3859 the given register. */
3862 get_pc_thunk_name (name
, regno
)
3866 if (USE_HIDDEN_LINKONCE
)
3867 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
3869 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
3873 /* This function generates code for -fpic that loads %ebx with
3874 the return address of the caller and then returns. */
3877 ix86_asm_file_end (file
)
3883 for (regno
= 0; regno
< 8; ++regno
)
3887 if (! ((pic_labels_used
>> regno
) & 1))
3890 get_pc_thunk_name (name
, regno
);
3892 if (USE_HIDDEN_LINKONCE
)
3896 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
3898 TREE_PUBLIC (decl
) = 1;
3899 TREE_STATIC (decl
) = 1;
3900 DECL_ONE_ONLY (decl
) = 1;
3902 (*targetm
.asm_out
.unique_section
) (decl
, 0);
3903 named_section (decl
, NULL
, 0);
3905 ASM_GLOBALIZE_LABEL (file
, name
);
3906 fputs ("\t.hidden\t", file
);
3907 assemble_name (file
, name
);
3909 ASM_DECLARE_FUNCTION_NAME (file
, name
, decl
);
3914 ASM_OUTPUT_LABEL (file
, name
);
3917 xops
[0] = gen_rtx_REG (SImode
, regno
);
3918 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
3919 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
3920 output_asm_insn ("ret", xops
);
3924 /* Emit code for the SET_GOT patterns. */
3927 output_set_got (dest
)
3933 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
3935 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
3937 xops
[2] = gen_rtx_LABEL_REF (Pmode
, gen_label_rtx ());
3940 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
3942 output_asm_insn ("call\t%a2", xops
);
3944 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, "L",
3945 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
3948 output_asm_insn ("pop{l}\t%0", xops
);
3953 get_pc_thunk_name (name
, REGNO (dest
));
3954 pic_labels_used
|= 1 << REGNO (dest
);
3956 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
3957 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
3958 output_asm_insn ("call\t%X2", xops
);
3961 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
3962 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
3964 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops
);
3969 /* Generate an "push" pattern for input ARG. */
3975 return gen_rtx_SET (VOIDmode
,
3977 gen_rtx_PRE_DEC (Pmode
,
3978 stack_pointer_rtx
)),
3982 /* Return >= 0 if there is an unused call-clobbered register available
3983 for the entire function. */
3986 ix86_select_alt_pic_regnum ()
3988 if (current_function_is_leaf
&& !current_function_profile
)
3991 for (i
= 2; i
>= 0; --i
)
3992 if (!regs_ever_live
[i
])
3996 return INVALID_REGNUM
;
3999 /* Return 1 if we need to save REGNO. */
4001 ix86_save_reg (regno
, maybe_eh_return
)
4003 int maybe_eh_return
;
4005 if (pic_offset_table_rtx
4006 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
4007 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
4008 || current_function_profile
4009 || current_function_calls_eh_return
))
4011 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
4016 if (current_function_calls_eh_return
&& maybe_eh_return
)
4021 unsigned test
= EH_RETURN_DATA_REGNO (i
);
4022 if (test
== INVALID_REGNUM
)
4029 return (regs_ever_live
[regno
]
4030 && !call_used_regs
[regno
]
4031 && !fixed_regs
[regno
]
4032 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
4035 /* Return number of registers to be saved on the stack. */
4043 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
4044 if (ix86_save_reg (regno
, true))
4049 /* Return the offset between two registers, one to be eliminated, and the other
4050 its replacement, at the start of a routine. */
4053 ix86_initial_elimination_offset (from
, to
)
4057 struct ix86_frame frame
;
4058 ix86_compute_frame_layout (&frame
);
4060 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4061 return frame
.hard_frame_pointer_offset
;
4062 else if (from
== FRAME_POINTER_REGNUM
4063 && to
== HARD_FRAME_POINTER_REGNUM
)
4064 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
4067 if (to
!= STACK_POINTER_REGNUM
)
4069 else if (from
== ARG_POINTER_REGNUM
)
4070 return frame
.stack_pointer_offset
;
4071 else if (from
!= FRAME_POINTER_REGNUM
)
4074 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
4078 /* Fill structure ix86_frame about frame of currently computed function. */
4081 ix86_compute_frame_layout (frame
)
4082 struct ix86_frame
*frame
;
4084 HOST_WIDE_INT total_size
;
4085 int stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
4087 int preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
4088 HOST_WIDE_INT size
= get_frame_size ();
4090 frame
->nregs
= ix86_nsaved_regs ();
4093 /* Skip return address and saved base pointer. */
4094 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
4096 frame
->hard_frame_pointer_offset
= offset
;
4098 /* Do some sanity checking of stack_alignment_needed and
4099 preferred_alignment, since i386 port is the only using those features
4100 that may break easily. */
4102 if (size
&& !stack_alignment_needed
)
4104 if (preferred_alignment
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
4106 if (preferred_alignment
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
4108 if (stack_alignment_needed
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
4111 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
4112 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
4114 /* Register save area */
4115 offset
+= frame
->nregs
* UNITS_PER_WORD
;
4118 if (ix86_save_varrargs_registers
)
4120 offset
+= X86_64_VARARGS_SIZE
;
4121 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
4124 frame
->va_arg_size
= 0;
4126 /* Align start of frame for local function. */
4127 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
4128 & -stack_alignment_needed
) - offset
;
4130 offset
+= frame
->padding1
;
4132 /* Frame pointer points here. */
4133 frame
->frame_pointer_offset
= offset
;
4137 /* Add outgoing arguments area. Can be skipped if we eliminated
4138 all the function calls as dead code. */
4139 if (ACCUMULATE_OUTGOING_ARGS
&& !current_function_is_leaf
)
4141 offset
+= current_function_outgoing_args_size
;
4142 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
4145 frame
->outgoing_arguments_size
= 0;
4147 /* Align stack boundary. Only needed if we're calling another function
4149 if (!current_function_is_leaf
|| current_function_calls_alloca
)
4150 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
4151 & -preferred_alignment
) - offset
;
4153 frame
->padding2
= 0;
4155 offset
+= frame
->padding2
;
4157 /* We've reached end of stack frame. */
4158 frame
->stack_pointer_offset
= offset
;
4160 /* Size prologue needs to allocate. */
4161 frame
->to_allocate
=
4162 (size
+ frame
->padding1
+ frame
->padding2
4163 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
4165 if (TARGET_64BIT
&& TARGET_RED_ZONE
&& current_function_sp_is_unchanging
4166 && current_function_is_leaf
)
4168 frame
->red_zone_size
= frame
->to_allocate
;
4169 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
4170 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
4173 frame
->red_zone_size
= 0;
4174 frame
->to_allocate
-= frame
->red_zone_size
;
4175 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
4177 fprintf (stderr
, "nregs: %i\n", frame
->nregs
);
4178 fprintf (stderr
, "size: %i\n", size
);
4179 fprintf (stderr
, "alignment1: %i\n", stack_alignment_needed
);
4180 fprintf (stderr
, "padding1: %i\n", frame
->padding1
);
4181 fprintf (stderr
, "va_arg: %i\n", frame
->va_arg_size
);
4182 fprintf (stderr
, "padding2: %i\n", frame
->padding2
);
4183 fprintf (stderr
, "to_allocate: %i\n", frame
->to_allocate
);
4184 fprintf (stderr
, "red_zone_size: %i\n", frame
->red_zone_size
);
4185 fprintf (stderr
, "frame_pointer_offset: %i\n", frame
->frame_pointer_offset
);
4186 fprintf (stderr
, "hard_frame_pointer_offset: %i\n",
4187 frame
->hard_frame_pointer_offset
);
4188 fprintf (stderr
, "stack_pointer_offset: %i\n", frame
->stack_pointer_offset
);
4192 /* Emit code to save registers in the prologue. */
4195 ix86_emit_save_regs ()
4200 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
4201 if (ix86_save_reg (regno
, true))
4203 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
4204 RTX_FRAME_RELATED_P (insn
) = 1;
4208 /* Emit code to save registers using MOV insns. First register
4209 is restored from POINTER + OFFSET. */
4211 ix86_emit_save_regs_using_mov (pointer
, offset
)
4213 HOST_WIDE_INT offset
;
4218 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4219 if (ix86_save_reg (regno
, true))
4221 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
4223 gen_rtx_REG (Pmode
, regno
));
4224 RTX_FRAME_RELATED_P (insn
) = 1;
4225 offset
+= UNITS_PER_WORD
;
4229 /* Expand the prologue into a bunch of separate insns. */
4232 ix86_expand_prologue ()
4236 struct ix86_frame frame
;
4238 HOST_WIDE_INT allocate
;
4242 use_fast_prologue_epilogue
4243 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT
);
4244 if (TARGET_PROLOGUE_USING_MOVE
)
4245 use_mov
= use_fast_prologue_epilogue
;
4247 ix86_compute_frame_layout (&frame
);
4249 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4250 slower on all targets. Also sdb doesn't like it. */
4252 if (frame_pointer_needed
)
4254 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
4255 RTX_FRAME_RELATED_P (insn
) = 1;
4257 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
4258 RTX_FRAME_RELATED_P (insn
) = 1;
4261 allocate
= frame
.to_allocate
;
4262 /* In case we are dealing only with single register and empty frame,
4263 push is equivalent of the mov+add sequence. */
4264 if (allocate
== 0 && frame
.nregs
<= 1)
4268 ix86_emit_save_regs ();
4270 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
4274 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
4276 insn
= emit_insn (gen_pro_epilogue_adjust_stack
4277 (stack_pointer_rtx
, stack_pointer_rtx
,
4278 GEN_INT (-allocate
)));
4279 RTX_FRAME_RELATED_P (insn
) = 1;
4283 /* ??? Is this only valid for Win32? */
4290 arg0
= gen_rtx_REG (SImode
, 0);
4291 emit_move_insn (arg0
, GEN_INT (allocate
));
4293 sym
= gen_rtx_MEM (FUNCTION_MODE
,
4294 gen_rtx_SYMBOL_REF (Pmode
, "_alloca"));
4295 insn
= emit_call_insn (gen_call (sym
, const0_rtx
, constm1_rtx
));
4297 CALL_INSN_FUNCTION_USAGE (insn
)
4298 = gen_rtx_EXPR_LIST (VOIDmode
, gen_rtx_USE (VOIDmode
, arg0
),
4299 CALL_INSN_FUNCTION_USAGE (insn
));
4303 if (!frame_pointer_needed
|| !frame
.to_allocate
)
4304 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
4306 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
4307 -frame
.nregs
* UNITS_PER_WORD
);
4310 #ifdef SUBTARGET_PROLOGUE
4314 pic_reg_used
= false;
4315 if (pic_offset_table_rtx
4316 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
4317 || current_function_profile
))
4319 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
4321 if (alt_pic_reg_used
!= INVALID_REGNUM
)
4322 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
4324 pic_reg_used
= true;
4329 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
4331 /* Even with accurate pre-reload life analysis, we can wind up
4332 deleting all references to the pic register after reload.
4333 Consider if cross-jumping unifies two sides of a branch
4334 controled by a comparison vs the only read from a global.
4335 In which case, allow the set_got to be deleted, though we're
4336 too late to do anything about the ebx save in the prologue. */
4337 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
4340 /* Prevent function calls from be scheduled before the call to mcount.
4341 In the pic_reg_used case, make sure that the got load isn't deleted. */
4342 if (current_function_profile
)
4343 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
4346 /* Emit code to restore saved registers using MOV insns. First register
4347 is restored from POINTER + OFFSET. */
4349 ix86_emit_restore_regs_using_mov (pointer
, offset
, maybe_eh_return
)
4352 int maybe_eh_return
;
4356 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4357 if (ix86_save_reg (regno
, maybe_eh_return
))
4359 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
4360 adjust_address (gen_rtx_MEM (Pmode
, pointer
),
4362 offset
+= UNITS_PER_WORD
;
4366 /* Restore function stack, frame, and registers. */
4369 ix86_expand_epilogue (style
)
4373 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
4374 struct ix86_frame frame
;
4375 HOST_WIDE_INT offset
;
4377 ix86_compute_frame_layout (&frame
);
4379 /* Calculate start of saved registers relative to ebp. Special care
4380 must be taken for the normal return case of a function using
4381 eh_return: the eax and edx registers are marked as saved, but not
4382 restored along this path. */
4383 offset
= frame
.nregs
;
4384 if (current_function_calls_eh_return
&& style
!= 2)
4386 offset
*= -UNITS_PER_WORD
;
4388 /* If we're only restoring one register and sp is not valid then
4389 using a move instruction to restore the register since it's
4390 less work than reloading sp and popping the register.
4392 The default code result in stack adjustment using add/lea instruction,
4393 while this code results in LEAVE instruction (or discrete equivalent),
4394 so it is profitable in some other cases as well. Especially when there
4395 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4396 and there is exactly one register to pop. This heruistic may need some
4397 tuning in future. */
4398 if ((!sp_valid
&& frame
.nregs
<= 1)
4399 || (TARGET_EPILOGUE_USING_MOVE
4400 && use_fast_prologue_epilogue
4401 && (frame
.nregs
> 1 || frame
.to_allocate
))
4402 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
4403 || (frame_pointer_needed
&& TARGET_USE_LEAVE
4404 && use_fast_prologue_epilogue
&& frame
.nregs
== 1)
4405 || current_function_calls_eh_return
)
4407 /* Restore registers. We can use ebp or esp to address the memory
4408 locations. If both are available, default to ebp, since offsets
4409 are known to be small. Only exception is esp pointing directly to the
4410 end of block of saved registers, where we may simplify addressing
4413 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
4414 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
4415 frame
.to_allocate
, style
== 2);
4417 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
4418 offset
, style
== 2);
4420 /* eh_return epilogues need %ecx added to the stack pointer. */
4423 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
4425 if (frame_pointer_needed
)
4427 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
4428 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
4429 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
4431 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
4432 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
4434 emit_insn (gen_pro_epilogue_adjust_stack
4435 (stack_pointer_rtx
, sa
, const0_rtx
));
4439 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
4440 tmp
= plus_constant (tmp
, (frame
.to_allocate
4441 + frame
.nregs
* UNITS_PER_WORD
));
4442 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
4445 else if (!frame_pointer_needed
)
4446 emit_insn (gen_pro_epilogue_adjust_stack
4447 (stack_pointer_rtx
, stack_pointer_rtx
,
4448 GEN_INT (frame
.to_allocate
4449 + frame
.nregs
* UNITS_PER_WORD
)));
4450 /* If not an i386, mov & pop is faster than "leave". */
4451 else if (TARGET_USE_LEAVE
|| optimize_size
|| !use_fast_prologue_epilogue
)
4452 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4455 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
4456 hard_frame_pointer_rtx
,
4459 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4461 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4466 /* First step is to deallocate the stack frame so that we can
4467 pop the registers. */
4470 if (!frame_pointer_needed
)
4472 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
4473 hard_frame_pointer_rtx
,
4476 else if (frame
.to_allocate
)
4477 emit_insn (gen_pro_epilogue_adjust_stack
4478 (stack_pointer_rtx
, stack_pointer_rtx
,
4479 GEN_INT (frame
.to_allocate
)));
4481 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4482 if (ix86_save_reg (regno
, false))
4485 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
4487 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
4489 if (frame_pointer_needed
)
4491 /* Leave results in shorter dependency chains on CPUs that are
4492 able to grok it fast. */
4493 if (TARGET_USE_LEAVE
)
4494 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4495 else if (TARGET_64BIT
)
4496 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4498 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4502 /* Sibcall epilogues don't want a return instruction. */
4506 if (current_function_pops_args
&& current_function_args_size
)
4508 rtx popc
= GEN_INT (current_function_pops_args
);
4510 /* i386 can only pop 64K bytes. If asked to pop more, pop
4511 return address, do explicit add, and jump indirectly to the
4514 if (current_function_pops_args
>= 65536)
4516 rtx ecx
= gen_rtx_REG (SImode
, 2);
4518 /* There are is no "pascal" calling convention in 64bit ABI. */
4522 emit_insn (gen_popsi1 (ecx
));
4523 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
4524 emit_jump_insn (gen_return_indirect_internal (ecx
));
4527 emit_jump_insn (gen_return_pop_internal (popc
));
4530 emit_jump_insn (gen_return_internal ());
4533 /* Reset from the function's potential modifications. */
4536 ix86_output_function_epilogue (file
, size
)
4537 FILE *file ATTRIBUTE_UNUSED
;
4538 HOST_WIDE_INT size ATTRIBUTE_UNUSED
;
4540 if (pic_offset_table_rtx
)
4541 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
4544 /* Extract the parts of an RTL expression that is a valid memory address
4545 for an instruction. Return 0 if the structure of the address is
4546 grossly off. Return -1 if the address contains ASHIFT, so it is not
4547 strictly valid, but still used for computing length of lea instruction.
4551 ix86_decompose_address (addr
, out
)
4553 struct ix86_address
*out
;
4555 rtx base
= NULL_RTX
;
4556 rtx index
= NULL_RTX
;
4557 rtx disp
= NULL_RTX
;
4558 HOST_WIDE_INT scale
= 1;
4559 rtx scale_rtx
= NULL_RTX
;
4562 if (REG_P (addr
) || GET_CODE (addr
) == SUBREG
)
4564 else if (GET_CODE (addr
) == PLUS
)
4566 rtx op0
= XEXP (addr
, 0);
4567 rtx op1
= XEXP (addr
, 1);
4568 enum rtx_code code0
= GET_CODE (op0
);
4569 enum rtx_code code1
= GET_CODE (op1
);
4571 if (code0
== REG
|| code0
== SUBREG
)
4573 if (code1
== REG
|| code1
== SUBREG
)
4574 index
= op0
, base
= op1
; /* index + base */
4576 base
= op0
, disp
= op1
; /* base + displacement */
4578 else if (code0
== MULT
)
4580 index
= XEXP (op0
, 0);
4581 scale_rtx
= XEXP (op0
, 1);
4582 if (code1
== REG
|| code1
== SUBREG
)
4583 base
= op1
; /* index*scale + base */
4585 disp
= op1
; /* index*scale + disp */
4587 else if (code0
== PLUS
&& GET_CODE (XEXP (op0
, 0)) == MULT
)
4589 index
= XEXP (XEXP (op0
, 0), 0); /* index*scale + base + disp */
4590 scale_rtx
= XEXP (XEXP (op0
, 0), 1);
4591 base
= XEXP (op0
, 1);
4594 else if (code0
== PLUS
)
4596 index
= XEXP (op0
, 0); /* index + base + disp */
4597 base
= XEXP (op0
, 1);
4603 else if (GET_CODE (addr
) == MULT
)
4605 index
= XEXP (addr
, 0); /* index*scale */
4606 scale_rtx
= XEXP (addr
, 1);
4608 else if (GET_CODE (addr
) == ASHIFT
)
4612 /* We're called for lea too, which implements ashift on occasion. */
4613 index
= XEXP (addr
, 0);
4614 tmp
= XEXP (addr
, 1);
4615 if (GET_CODE (tmp
) != CONST_INT
)
4617 scale
= INTVAL (tmp
);
4618 if ((unsigned HOST_WIDE_INT
) scale
> 3)
4624 disp
= addr
; /* displacement */
4626 /* Extract the integral value of scale. */
4629 if (GET_CODE (scale_rtx
) != CONST_INT
)
4631 scale
= INTVAL (scale_rtx
);
4634 /* Allow arg pointer and stack pointer as index if there is not scaling */
4635 if (base
&& index
&& scale
== 1
4636 && (index
== arg_pointer_rtx
|| index
== frame_pointer_rtx
4637 || index
== stack_pointer_rtx
))
4644 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4645 if ((base
== hard_frame_pointer_rtx
4646 || base
== frame_pointer_rtx
4647 || base
== arg_pointer_rtx
) && !disp
)
4650 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4651 Avoid this by transforming to [%esi+0]. */
4652 if (ix86_cpu
== PROCESSOR_K6
&& !optimize_size
4653 && base
&& !index
&& !disp
4655 && REGNO_REG_CLASS (REGNO (base
)) == SIREG
)
4658 /* Special case: encode reg+reg instead of reg*2. */
4659 if (!base
&& index
&& scale
&& scale
== 2)
4660 base
= index
, scale
= 1;
4662 /* Special case: scaling cannot be encoded without base or displacement. */
4663 if (!base
&& !disp
&& index
&& scale
!= 1)
4674 /* Return cost of the memory address x.
4675 For i386, it is better to use a complex address than let gcc copy
4676 the address into a reg and make a new pseudo. But not if the address
4677 requires to two regs - that would mean more pseudos with longer
4680 ix86_address_cost (x
)
4683 struct ix86_address parts
;
4686 if (!ix86_decompose_address (x
, &parts
))
4689 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
4690 parts
.base
= SUBREG_REG (parts
.base
);
4691 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
4692 parts
.index
= SUBREG_REG (parts
.index
);
4694 /* More complex memory references are better. */
4695 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
4698 /* Attempt to minimize number of registers in the address. */
4700 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
4702 && (!REG_P (parts
.index
)
4703 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
4707 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
4709 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
4710 && parts
.base
!= parts
.index
)
4713 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4714 since it's predecode logic can't detect the length of instructions
4715 and it degenerates to vector decoded. Increase cost of such
4716 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4717 to split such addresses or even refuse such addresses at all.
4719 Following addressing modes are affected:
4724 The first and last case may be avoidable by explicitly coding the zero in
4725 memory address, but I don't have AMD-K6 machine handy to check this
4729 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
4730 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
4731 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
4737 /* If X is a machine specific address (i.e. a symbol or label being
4738 referenced as a displacement from the GOT implemented using an
4739 UNSPEC), then return the base term. Otherwise return X. */
4742 ix86_find_base_term (x
)
4749 if (GET_CODE (x
) != CONST
)
4752 if (GET_CODE (term
) == PLUS
4753 && (GET_CODE (XEXP (term
, 1)) == CONST_INT
4754 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
4755 term
= XEXP (term
, 0);
4756 if (GET_CODE (term
) != UNSPEC
4757 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
4760 term
= XVECEXP (term
, 0, 0);
4762 if (GET_CODE (term
) != SYMBOL_REF
4763 && GET_CODE (term
) != LABEL_REF
)
4769 if (GET_CODE (x
) != PLUS
4770 || XEXP (x
, 0) != pic_offset_table_rtx
4771 || GET_CODE (XEXP (x
, 1)) != CONST
)
4774 term
= XEXP (XEXP (x
, 1), 0);
4776 if (GET_CODE (term
) == PLUS
&& GET_CODE (XEXP (term
, 1)) == CONST_INT
)
4777 term
= XEXP (term
, 0);
4779 if (GET_CODE (term
) != UNSPEC
4780 || XINT (term
, 1) != UNSPEC_GOTOFF
)
4783 term
= XVECEXP (term
, 0, 0);
4785 if (GET_CODE (term
) != SYMBOL_REF
4786 && GET_CODE (term
) != LABEL_REF
)
4792 /* Determine if a given RTX is a valid constant. We already know this
4793 satisfies CONSTANT_P. */
4796 legitimate_constant_p (x
)
4801 switch (GET_CODE (x
))
4804 /* TLS symbols are not constant. */
4805 if (tls_symbolic_operand (x
, Pmode
))
4810 inner
= XEXP (x
, 0);
4812 /* Offsets of TLS symbols are never valid.
4813 Discourage CSE from creating them. */
4814 if (GET_CODE (inner
) == PLUS
4815 && tls_symbolic_operand (XEXP (inner
, 0), Pmode
))
4818 /* Only some unspecs are valid as "constants". */
4819 if (GET_CODE (inner
) == UNSPEC
)
4820 switch (XINT (inner
, 1))
4823 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
4835 /* Otherwise we handle everything else in the move patterns. */
4839 /* Determine if a given RTX is a valid constant address. */
4842 constant_address_p (x
)
4845 switch (GET_CODE (x
))
4852 return TARGET_64BIT
;
4856 return !flag_pic
&& legitimate_constant_p (x
);
4863 /* Nonzero if the constant value X is a legitimate general operand
4864 when generating PIC code. It is given that flag_pic is on and
4865 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4868 legitimate_pic_operand_p (x
)
4873 switch (GET_CODE (x
))
4876 inner
= XEXP (x
, 0);
4878 /* Only some unspecs are valid as "constants". */
4879 if (GET_CODE (inner
) == UNSPEC
)
4880 switch (XINT (inner
, 1))
4883 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
4893 return legitimate_pic_address_disp_p (x
);
4900 /* Determine if a given CONST RTX is a valid memory displacement
4904 legitimate_pic_address_disp_p (disp
)
4909 /* In 64bit mode we can allow direct addresses of symbols and labels
4910 when they are not dynamic symbols. */
4914 if (GET_CODE (disp
) == CONST
)
4916 /* ??? Handle PIC code models */
4917 if (GET_CODE (x
) == PLUS
4918 && (GET_CODE (XEXP (x
, 1)) == CONST_INT
4919 && ix86_cmodel
== CM_SMALL_PIC
4920 && INTVAL (XEXP (x
, 1)) < 1024*1024*1024
4921 && INTVAL (XEXP (x
, 1)) > -1024*1024*1024))
4923 if (local_symbolic_operand (x
, Pmode
))
4926 if (GET_CODE (disp
) != CONST
)
4928 disp
= XEXP (disp
, 0);
4932 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4933 of GOT tables. We should not need these anyway. */
4934 if (GET_CODE (disp
) != UNSPEC
4935 || XINT (disp
, 1) != UNSPEC_GOTPCREL
)
4938 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
4939 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
4945 if (GET_CODE (disp
) == PLUS
)
4947 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
4949 disp
= XEXP (disp
, 0);
4953 if (GET_CODE (disp
) != UNSPEC
)
4956 switch (XINT (disp
, 1))
4961 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
4963 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
4964 case UNSPEC_GOTTPOFF
:
4967 return initial_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
4969 /* ??? Could support offset here. */
4972 return local_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
4974 /* ??? Could support offset here. */
4977 return local_dynamic_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
4983 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4984 memory address for an instruction. The MODE argument is the machine mode
4985 for the MEM expression that wants to use this address.
4987 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
4988 convert common non-canonical forms to canonical form so that they will
4992 legitimate_address_p (mode
, addr
, strict
)
4993 enum machine_mode mode
;
4997 struct ix86_address parts
;
4998 rtx base
, index
, disp
;
4999 HOST_WIDE_INT scale
;
5000 const char *reason
= NULL
;
5001 rtx reason_rtx
= NULL_RTX
;
5003 if (TARGET_DEBUG_ADDR
)
5006 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5007 GET_MODE_NAME (mode
), strict
);
5011 if (ix86_decompose_address (addr
, &parts
) <= 0)
5013 reason
= "decomposition failed";
5018 index
= parts
.index
;
5020 scale
= parts
.scale
;
5022 /* Validate base register.
5024 Don't allow SUBREG's here, it can lead to spill failures when the base
5025 is one word out of a two word structure, which is represented internally
5033 if (GET_CODE (base
) == SUBREG
)
5034 reg
= SUBREG_REG (base
);
5038 if (GET_CODE (reg
) != REG
)
5040 reason
= "base is not a register";
5044 if (GET_MODE (base
) != Pmode
)
5046 reason
= "base is not in Pmode";
5050 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
5051 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
5053 reason
= "base is not valid";
5058 /* Validate index register.
5060 Don't allow SUBREG's here, it can lead to spill failures when the index
5061 is one word out of a two word structure, which is represented internally
5069 if (GET_CODE (index
) == SUBREG
)
5070 reg
= SUBREG_REG (index
);
5074 if (GET_CODE (reg
) != REG
)
5076 reason
= "index is not a register";
5080 if (GET_MODE (index
) != Pmode
)
5082 reason
= "index is not in Pmode";
5086 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
5087 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
5089 reason
= "index is not valid";
5094 /* Validate scale factor. */
5097 reason_rtx
= GEN_INT (scale
);
5100 reason
= "scale without index";
5104 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
5106 reason
= "scale is not a valid multiplier";
5111 /* Validate displacement. */
5118 if (!x86_64_sign_extended_value (disp
))
5120 reason
= "displacement is out of range";
5126 if (GET_CODE (disp
) == CONST_DOUBLE
)
5128 reason
= "displacement is a const_double";
5133 if (GET_CODE (disp
) == CONST
5134 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
5135 switch (XINT (XEXP (disp
, 0), 1))
5139 case UNSPEC_GOTPCREL
:
5142 goto is_legitimate_pic
;
5144 case UNSPEC_GOTTPOFF
:
5150 reason
= "invalid address unspec";
5154 else if (flag_pic
&& SYMBOLIC_CONST (disp
))
5157 if (TARGET_64BIT
&& (index
|| base
))
5159 reason
= "non-constant pic memory reference";
5162 if (! legitimate_pic_address_disp_p (disp
))
5164 reason
= "displacement is an invalid pic construct";
5168 /* This code used to verify that a symbolic pic displacement
5169 includes the pic_offset_table_rtx register.
5171 While this is good idea, unfortunately these constructs may
5172 be created by "adds using lea" optimization for incorrect
5181 This code is nonsensical, but results in addressing
5182 GOT table with pic_offset_table_rtx base. We can't
5183 just refuse it easily, since it gets matched by
5184 "addsi3" pattern, that later gets split to lea in the
5185 case output register differs from input. While this
5186 can be handled by separate addsi pattern for this case
5187 that never results in lea, this seems to be easier and
5188 correct fix for crash to disable this test. */
5190 else if (!CONSTANT_ADDRESS_P (disp
))
5192 reason
= "displacement is not constant";
5197 /* Everything looks valid. */
5198 if (TARGET_DEBUG_ADDR
)
5199 fprintf (stderr
, "Success.\n");
5203 if (TARGET_DEBUG_ADDR
)
5205 fprintf (stderr
, "Error: %s\n", reason
);
5206 debug_rtx (reason_rtx
);
5211 /* Return an unique alias set for the GOT. */
5213 static HOST_WIDE_INT
5214 ix86_GOT_alias_set ()
5216 static HOST_WIDE_INT set
= -1;
5218 set
= new_alias_set ();
5222 /* Return a legitimate reference for ORIG (an address) using the
5223 register REG. If REG is 0, a new pseudo is generated.
5225 There are two types of references that must be handled:
5227 1. Global data references must load the address from the GOT, via
5228 the PIC reg. An insn is emitted to do this load, and the reg is
5231 2. Static data references, constant pool addresses, and code labels
5232 compute the address as an offset from the GOT, whose base is in
5233 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5234 differentiate them from global data objects. The returned
5235 address is the PIC reg + an unspec constant.
5237 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5238 reg also appears in the address. */
5241 legitimize_pic_address (orig
, reg
)
5249 if (local_symbolic_operand (addr
, Pmode
))
5251 /* In 64bit mode we can address such objects directly. */
5256 /* This symbol may be referenced via a displacement from the PIC
5257 base address (@GOTOFF). */
5259 if (reload_in_progress
)
5260 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5261 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
5262 new = gen_rtx_CONST (Pmode
, new);
5263 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5267 emit_move_insn (reg
, new);
5272 else if (GET_CODE (addr
) == SYMBOL_REF
)
5276 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
5277 new = gen_rtx_CONST (Pmode
, new);
5278 new = gen_rtx_MEM (Pmode
, new);
5279 RTX_UNCHANGING_P (new) = 1;
5280 set_mem_alias_set (new, ix86_GOT_alias_set ());
5283 reg
= gen_reg_rtx (Pmode
);
5284 /* Use directly gen_movsi, otherwise the address is loaded
5285 into register for CSE. We don't want to CSE this addresses,
5286 instead we CSE addresses from the GOT table, so skip this. */
5287 emit_insn (gen_movsi (reg
, new));
5292 /* This symbol must be referenced via a load from the
5293 Global Offset Table (@GOT). */
5295 if (reload_in_progress
)
5296 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5297 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
5298 new = gen_rtx_CONST (Pmode
, new);
5299 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5300 new = gen_rtx_MEM (Pmode
, new);
5301 RTX_UNCHANGING_P (new) = 1;
5302 set_mem_alias_set (new, ix86_GOT_alias_set ());
5305 reg
= gen_reg_rtx (Pmode
);
5306 emit_move_insn (reg
, new);
5312 if (GET_CODE (addr
) == CONST
)
5314 addr
= XEXP (addr
, 0);
5316 /* We must match stuff we generate before. Assume the only
5317 unspecs that can get here are ours. Not that we could do
5318 anything with them anyway... */
5319 if (GET_CODE (addr
) == UNSPEC
5320 || (GET_CODE (addr
) == PLUS
5321 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
5323 if (GET_CODE (addr
) != PLUS
)
5326 if (GET_CODE (addr
) == PLUS
)
5328 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
5330 /* Check first to see if this is a constant offset from a @GOTOFF
5331 symbol reference. */
5332 if (local_symbolic_operand (op0
, Pmode
)
5333 && GET_CODE (op1
) == CONST_INT
)
5337 if (reload_in_progress
)
5338 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5339 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
5341 new = gen_rtx_PLUS (Pmode
, new, op1
);
5342 new = gen_rtx_CONST (Pmode
, new);
5343 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5347 emit_move_insn (reg
, new);
5353 /* ??? We need to limit offsets here. */
5358 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
5359 new = legitimize_pic_address (XEXP (addr
, 1),
5360 base
== reg
? NULL_RTX
: reg
);
5362 if (GET_CODE (new) == CONST_INT
)
5363 new = plus_constant (base
, INTVAL (new));
5366 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
5368 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
5369 new = XEXP (new, 1);
5371 new = gen_rtx_PLUS (Pmode
, base
, new);
5380 ix86_encode_section_info (decl
, first
)
5382 int first ATTRIBUTE_UNUSED
;
5384 bool local_p
= (*targetm
.binds_local_p
) (decl
);
5387 rtl
= DECL_P (decl
) ? DECL_RTL (decl
) : TREE_CST_RTL (decl
);
5388 if (GET_CODE (rtl
) != MEM
)
5390 symbol
= XEXP (rtl
, 0);
5391 if (GET_CODE (symbol
) != SYMBOL_REF
)
5394 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5395 symbol so that we may access it directly in the GOT. */
5398 SYMBOL_REF_FLAG (symbol
) = local_p
;
5400 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5401 "local dynamic", "initial exec" or "local exec" TLS models
5404 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL (decl
))
5406 const char *symbol_str
;
5409 enum tls_model kind
;
5414 kind
= TLS_MODEL_LOCAL_EXEC
;
5416 kind
= TLS_MODEL_INITIAL_EXEC
;
5418 /* Local dynamic is inefficient when we're not combining the
5419 parts of the address. */
5420 else if (optimize
&& local_p
)
5421 kind
= TLS_MODEL_LOCAL_DYNAMIC
;
5423 kind
= TLS_MODEL_GLOBAL_DYNAMIC
;
5424 if (kind
< flag_tls_default
)
5425 kind
= flag_tls_default
;
5427 symbol_str
= XSTR (symbol
, 0);
5429 if (symbol_str
[0] == '%')
5431 if (symbol_str
[1] == tls_model_chars
[kind
])
5435 len
= strlen (symbol_str
) + 1;
5436 newstr
= alloca (len
+ 2);
5439 newstr
[1] = tls_model_chars
[kind
];
5440 memcpy (newstr
+ 2, symbol_str
, len
);
5442 XSTR (symbol
, 0) = ggc_alloc_string (newstr
, len
+ 2 - 1);
5446 /* Undo the above when printing symbol names. */
5449 ix86_strip_name_encoding (str
)
5459 /* Load the thread pointer into a register. */
5462 get_thread_pointer ()
5466 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
5467 tp
= gen_rtx_CONST (Pmode
, tp
);
5468 tp
= force_reg (Pmode
, tp
);
5473 /* Try machine-dependent ways of modifying an illegitimate address
5474 to be legitimate. If we find one, return the new, valid address.
5475 This macro is used in only one place: `memory_address' in explow.c.
5477 OLDX is the address as it was before break_out_memory_refs was called.
5478 In some cases it is useful to look at this to decide what needs to be done.
5480 MODE and WIN are passed so that this macro can use
5481 GO_IF_LEGITIMATE_ADDRESS.
5483 It is always safe for this macro to do nothing. It exists to recognize
5484 opportunities to optimize the output.
5486 For the 80386, we handle X+REG by loading X into a register R and
5487 using R+REG. R will go in a general reg and indexing will be used.
5488 However, if REG is a broken-out memory address or multiplication,
5489 nothing needs to be done because REG can certainly go in a general reg.
5491 When -fpic is used, special handling is needed for symbolic references.
5492 See comments by legitimize_pic_address in i386.c for details. */
5495 legitimize_address (x
, oldx
, mode
)
5497 register rtx oldx ATTRIBUTE_UNUSED
;
5498 enum machine_mode mode
;
5503 if (TARGET_DEBUG_ADDR
)
5505 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5506 GET_MODE_NAME (mode
));
5510 log
= tls_symbolic_operand (x
, mode
);
5513 rtx dest
, base
, off
, pic
;
5517 case TLS_MODEL_GLOBAL_DYNAMIC
:
5518 dest
= gen_reg_rtx (Pmode
);
5519 emit_insn (gen_tls_global_dynamic (dest
, x
));
5522 case TLS_MODEL_LOCAL_DYNAMIC
:
5523 base
= gen_reg_rtx (Pmode
);
5524 emit_insn (gen_tls_local_dynamic_base (base
));
5526 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
5527 off
= gen_rtx_CONST (Pmode
, off
);
5529 return gen_rtx_PLUS (Pmode
, base
, off
);
5531 case TLS_MODEL_INITIAL_EXEC
:
5534 if (reload_in_progress
)
5535 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5536 pic
= pic_offset_table_rtx
;
5540 pic
= gen_reg_rtx (Pmode
);
5541 emit_insn (gen_set_got (pic
));
5544 base
= get_thread_pointer ();
5546 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_GOTTPOFF
);
5547 off
= gen_rtx_CONST (Pmode
, off
);
5548 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
5549 off
= gen_rtx_MEM (Pmode
, off
);
5550 RTX_UNCHANGING_P (off
) = 1;
5551 set_mem_alias_set (off
, ix86_GOT_alias_set ());
5553 /* Damn Sun for specifing a set of dynamic relocations without
5554 considering the two-operand nature of the architecture!
5555 We'd be much better off with a "GOTNTPOFF" relocation that
5556 already contained the negated constant. */
5557 /* ??? Using negl and reg+reg addressing appears to be a lose
5558 size-wise. The negl is two bytes, just like the extra movl
5559 incurred by the two-operand subl, but reg+reg addressing
5560 uses the two-byte modrm form, unlike plain reg. */
5562 dest
= gen_reg_rtx (Pmode
);
5563 emit_insn (gen_subsi3 (dest
, base
, off
));
5566 case TLS_MODEL_LOCAL_EXEC
:
5567 base
= get_thread_pointer ();
5569 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
5570 TARGET_GNU_TLS
? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
5571 off
= gen_rtx_CONST (Pmode
, off
);
5574 return gen_rtx_PLUS (Pmode
, base
, off
);
5577 dest
= gen_reg_rtx (Pmode
);
5578 emit_insn (gen_subsi3 (dest
, base
, off
));
5589 if (flag_pic
&& SYMBOLIC_CONST (x
))
5590 return legitimize_pic_address (x
, 0);
5592 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5593 if (GET_CODE (x
) == ASHIFT
5594 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5595 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (x
, 1)))) < 4)
5598 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
5599 GEN_INT (1 << log
));
5602 if (GET_CODE (x
) == PLUS
)
5604 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5606 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
5607 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
5608 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)))) < 4)
5611 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
5612 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
5613 GEN_INT (1 << log
));
5616 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
5617 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
5618 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 1), 1)))) < 4)
5621 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
5622 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
5623 GEN_INT (1 << log
));
5626 /* Put multiply first if it isn't already. */
5627 if (GET_CODE (XEXP (x
, 1)) == MULT
)
5629 rtx tmp
= XEXP (x
, 0);
5630 XEXP (x
, 0) = XEXP (x
, 1);
5635 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5636 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5637 created by virtual register instantiation, register elimination, and
5638 similar optimizations. */
5639 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
5642 x
= gen_rtx_PLUS (Pmode
,
5643 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
5644 XEXP (XEXP (x
, 1), 0)),
5645 XEXP (XEXP (x
, 1), 1));
5649 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5650 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5651 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
5652 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
5653 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
5654 && CONSTANT_P (XEXP (x
, 1)))
5657 rtx other
= NULL_RTX
;
5659 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5661 constant
= XEXP (x
, 1);
5662 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5664 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
5666 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5667 other
= XEXP (x
, 1);
5675 x
= gen_rtx_PLUS (Pmode
,
5676 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
5677 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
5678 plus_constant (other
, INTVAL (constant
)));
5682 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
5685 if (GET_CODE (XEXP (x
, 0)) == MULT
)
5688 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
5691 if (GET_CODE (XEXP (x
, 1)) == MULT
)
5694 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
5698 && GET_CODE (XEXP (x
, 1)) == REG
5699 && GET_CODE (XEXP (x
, 0)) == REG
)
5702 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
5705 x
= legitimize_pic_address (x
, 0);
5708 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
5711 if (GET_CODE (XEXP (x
, 0)) == REG
)
5713 register rtx temp
= gen_reg_rtx (Pmode
);
5714 register rtx val
= force_operand (XEXP (x
, 1), temp
);
5716 emit_move_insn (temp
, val
);
5722 else if (GET_CODE (XEXP (x
, 1)) == REG
)
5724 register rtx temp
= gen_reg_rtx (Pmode
);
5725 register rtx val
= force_operand (XEXP (x
, 0), temp
);
5727 emit_move_insn (temp
, val
);
5737 /* Print an integer constant expression in assembler syntax. Addition
5738 and subtraction are the only arithmetic that may appear in these
5739 expressions. FILE is the stdio stream to write to, X is the rtx, and
5740 CODE is the operand print code from the output string. */
5743 output_pic_addr_const (file
, x
, code
)
5750 switch (GET_CODE (x
))
5760 assemble_name (file
, XSTR (x
, 0));
5761 if (code
== 'P' && ! SYMBOL_REF_FLAG (x
))
5762 fputs ("@PLT", file
);
5769 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
5770 assemble_name (asm_out_file
, buf
);
5774 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
5778 /* This used to output parentheses around the expression,
5779 but that does not work on the 386 (either ATT or BSD assembler). */
5780 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5784 if (GET_MODE (x
) == VOIDmode
)
5786 /* We can use %d if the number is <32 bits and positive. */
5787 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
5788 fprintf (file
, "0x%lx%08lx",
5789 (unsigned long) CONST_DOUBLE_HIGH (x
),
5790 (unsigned long) CONST_DOUBLE_LOW (x
));
5792 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
5795 /* We can't handle floating point constants;
5796 PRINT_OPERAND must handle them. */
5797 output_operand_lossage ("floating constant misused");
5801 /* Some assemblers need integer constants to appear first. */
5802 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
5804 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5806 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5808 else if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5810 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5812 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5819 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
5820 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5822 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5823 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
5827 if (XVECLEN (x
, 0) != 1)
5829 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
5830 switch (XINT (x
, 1))
5833 fputs ("@GOT", file
);
5836 fputs ("@GOTOFF", file
);
5838 case UNSPEC_GOTPCREL
:
5839 fputs ("@GOTPCREL(%rip)", file
);
5841 case UNSPEC_GOTTPOFF
:
5842 fputs ("@GOTTPOFF", file
);
5845 fputs ("@TPOFF", file
);
5848 fputs ("@NTPOFF", file
);
5851 fputs ("@DTPOFF", file
);
5854 output_operand_lossage ("invalid UNSPEC as operand");
5860 output_operand_lossage ("invalid expression as operand");
5864 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5865 We need to handle our special PIC relocations. */
5868 i386_dwarf_output_addr_const (file
, x
)
5873 fprintf (file
, "%s", TARGET_64BIT
? ASM_QUAD
: ASM_LONG
);
5877 fprintf (file
, "%s", ASM_LONG
);
5880 output_pic_addr_const (file
, x
, '\0');
5882 output_addr_const (file
, x
);
5886 /* In the name of slightly smaller debug output, and to cater to
5887 general assembler losage, recognize PIC+GOTOFF and turn it back
5888 into a direct symbol reference. */
5891 i386_simplify_dwarf_addr (orig_x
)
5896 if (GET_CODE (x
) == MEM
)
5901 if (GET_CODE (x
) != CONST
5902 || GET_CODE (XEXP (x
, 0)) != UNSPEC
5903 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
5904 || GET_CODE (orig_x
) != MEM
)
5906 return XVECEXP (XEXP (x
, 0), 0, 0);
5909 if (GET_CODE (x
) != PLUS
5910 || GET_CODE (XEXP (x
, 1)) != CONST
)
5913 if (GET_CODE (XEXP (x
, 0)) == REG
5914 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
5915 /* %ebx + GOT/GOTOFF */
5917 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
5919 /* %ebx + %reg * scale + GOT/GOTOFF */
5921 if (GET_CODE (XEXP (y
, 0)) == REG
5922 && REGNO (XEXP (y
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
5924 else if (GET_CODE (XEXP (y
, 1)) == REG
5925 && REGNO (XEXP (y
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
5929 if (GET_CODE (y
) != REG
5930 && GET_CODE (y
) != MULT
5931 && GET_CODE (y
) != ASHIFT
)
5937 x
= XEXP (XEXP (x
, 1), 0);
5938 if (GET_CODE (x
) == UNSPEC
5939 && ((XINT (x
, 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
5940 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& GET_CODE (orig_x
) != MEM
)))
5943 return gen_rtx_PLUS (Pmode
, y
, XVECEXP (x
, 0, 0));
5944 return XVECEXP (x
, 0, 0);
5947 if (GET_CODE (x
) == PLUS
5948 && GET_CODE (XEXP (x
, 0)) == UNSPEC
5949 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5950 && ((XINT (XEXP (x
, 0), 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
5951 || (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTOFF
5952 && GET_CODE (orig_x
) != MEM
)))
5954 x
= gen_rtx_PLUS (VOIDmode
, XVECEXP (XEXP (x
, 0), 0, 0), XEXP (x
, 1));
5956 return gen_rtx_PLUS (Pmode
, y
, x
);
5964 put_condition_code (code
, mode
, reverse
, fp
, file
)
5966 enum machine_mode mode
;
5972 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
5974 enum rtx_code second_code
, bypass_code
;
5975 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
5976 if (bypass_code
!= NIL
|| second_code
!= NIL
)
5978 code
= ix86_fp_compare_code_to_integer (code
);
5982 code
= reverse_condition (code
);
5993 if (mode
!= CCmode
&& mode
!= CCNOmode
&& mode
!= CCGCmode
)
5998 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
5999 Those same assemblers have the same but opposite losage on cmov. */
6002 suffix
= fp
? "nbe" : "a";
6005 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
6007 else if (mode
== CCmode
|| mode
== CCGCmode
)
6018 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
6020 else if (mode
== CCmode
|| mode
== CCGCmode
)
6029 suffix
= fp
? "nb" : "ae";
6032 if (mode
!= CCmode
&& mode
!= CCGCmode
&& mode
!= CCNOmode
)
6042 suffix
= fp
? "u" : "p";
6045 suffix
= fp
? "nu" : "np";
6050 fputs (suffix
, file
);
6054 print_reg (x
, code
, file
)
6059 if (REGNO (x
) == ARG_POINTER_REGNUM
6060 || REGNO (x
) == FRAME_POINTER_REGNUM
6061 || REGNO (x
) == FLAGS_REG
6062 || REGNO (x
) == FPSR_REG
)
6065 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
6068 if (code
== 'w' || MMX_REG_P (x
))
6070 else if (code
== 'b')
6072 else if (code
== 'k')
6074 else if (code
== 'q')
6076 else if (code
== 'y')
6078 else if (code
== 'h')
6081 code
= GET_MODE_SIZE (GET_MODE (x
));
6083 /* Irritatingly, AMD extended registers use different naming convention
6084 from the normal registers. */
6085 if (REX_INT_REG_P (x
))
6092 error ("extended registers have no high halves");
6095 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6098 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6101 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6104 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6107 error ("unsupported operand size for extended register");
6115 if (STACK_TOP_P (x
))
6117 fputs ("st(0)", file
);
6124 if (! ANY_FP_REG_P (x
))
6125 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
6129 fputs (hi_reg_name
[REGNO (x
)], file
);
6132 fputs (qi_reg_name
[REGNO (x
)], file
);
6135 fputs (qi_high_reg_name
[REGNO (x
)], file
);
6142 /* Locate some local-dynamic symbol still in use by this function
6143 so that we can print its name in some tls_local_dynamic_base
6147 get_some_local_dynamic_name ()
6151 if (cfun
->machine
->some_ld_name
)
6152 return cfun
->machine
->some_ld_name
;
6154 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
6156 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
6157 return cfun
->machine
->some_ld_name
;
6163 get_some_local_dynamic_name_1 (px
, data
)
6165 void *data ATTRIBUTE_UNUSED
;
6169 if (GET_CODE (x
) == SYMBOL_REF
6170 && local_dynamic_symbolic_operand (x
, Pmode
))
6172 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
6180 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6181 C -- print opcode suffix for set/cmov insn.
6182 c -- like C, but print reversed condition
6183 F,f -- likewise, but for floating-point.
6184 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6186 R -- print the prefix for register names.
6187 z -- print the opcode suffix for the size of the current operand.
6188 * -- print a star (in certain assembler syntax)
6189 A -- print an absolute memory reference.
6190 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6191 s -- print a shift double count, followed by the assemblers argument
6193 b -- print the QImode name of the register for the indicated operand.
6194 %b0 would print %al if operands[0] is reg 0.
6195 w -- likewise, print the HImode name of the register.
6196 k -- likewise, print the SImode name of the register.
6197 q -- likewise, print the DImode name of the register.
6198 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6199 y -- print "st(0)" instead of "st" as a register.
6200 D -- print condition for SSE cmp instruction.
6201 P -- if PIC, print an @PLT suffix.
6202 X -- don't print any sort of PIC '@' suffix for a symbol.
6203 & -- print some in-use local-dynamic symbol name.
6207 print_operand (file
, x
, code
)
6217 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6222 assemble_name (file
, get_some_local_dynamic_name ());
6226 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6228 else if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6230 /* Intel syntax. For absolute addresses, registers should not
6231 be surrounded by braces. */
6232 if (GET_CODE (x
) != REG
)
6235 PRINT_OPERAND (file
, x
, 0);
6243 PRINT_OPERAND (file
, x
, 0);
6248 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6253 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6258 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6263 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6268 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6273 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6278 /* 387 opcodes don't get size suffixes if the operands are
6280 if (STACK_REG_P (x
))
6283 /* Likewise if using Intel opcodes. */
6284 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6287 /* This is the size of op from size of operand. */
6288 switch (GET_MODE_SIZE (GET_MODE (x
)))
6291 #ifdef HAVE_GAS_FILDS_FISTS
6297 if (GET_MODE (x
) == SFmode
)
6312 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
6314 #ifdef GAS_MNEMONICS
6340 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
6342 PRINT_OPERAND (file
, x
, 0);
6348 /* Little bit of braindamage here. The SSE compare instructions
6349 does use completely different names for the comparisons that the
6350 fp conditional moves. */
6351 switch (GET_CODE (x
))
6366 fputs ("unord", file
);
6370 fputs ("neq", file
);
6374 fputs ("nlt", file
);
6378 fputs ("nle", file
);
6381 fputs ("ord", file
);
6389 #ifdef CMOV_SUN_AS_SYNTAX
6390 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6392 switch (GET_MODE (x
))
6394 case HImode
: putc ('w', file
); break;
6396 case SFmode
: putc ('l', file
); break;
6398 case DFmode
: putc ('q', file
); break;
6406 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
6409 #ifdef CMOV_SUN_AS_SYNTAX
6410 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6413 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
6416 /* Like above, but reverse condition */
6418 /* Check to see if argument to %c is really a constant
6419 and not a condition code which needs to be reversed. */
6420 if (GET_RTX_CLASS (GET_CODE (x
)) != '<')
6422 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6425 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
6428 #ifdef CMOV_SUN_AS_SYNTAX
6429 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6432 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
6438 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
6441 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
6444 int pred_val
= INTVAL (XEXP (x
, 0));
6446 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
6447 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
6449 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
6450 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
6452 /* Emit hints only in the case default branch prediction
6453 heruistics would fail. */
6454 if (taken
!= cputaken
)
6456 /* We use 3e (DS) prefix for taken branches and
6457 2e (CS) prefix for not taken branches. */
6459 fputs ("ds ; ", file
);
6461 fputs ("cs ; ", file
);
6468 output_operand_lossage ("invalid operand code `%c'", code
);
6472 if (GET_CODE (x
) == REG
)
6474 PRINT_REG (x
, code
, file
);
6477 else if (GET_CODE (x
) == MEM
)
6479 /* No `byte ptr' prefix for call instructions. */
6480 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
6483 switch (GET_MODE_SIZE (GET_MODE (x
)))
6485 case 1: size
= "BYTE"; break;
6486 case 2: size
= "WORD"; break;
6487 case 4: size
= "DWORD"; break;
6488 case 8: size
= "QWORD"; break;
6489 case 12: size
= "XWORD"; break;
6490 case 16: size
= "XMMWORD"; break;
6495 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6498 else if (code
== 'w')
6500 else if (code
== 'k')
6504 fputs (" PTR ", file
);
6508 if (flag_pic
&& CONSTANT_ADDRESS_P (x
))
6509 output_pic_addr_const (file
, x
, code
);
6510 /* Avoid (%rip) for call operands. */
6511 else if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
6512 && GET_CODE (x
) != CONST_INT
)
6513 output_addr_const (file
, x
);
6514 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
6515 output_operand_lossage ("invalid constraints for operand");
6520 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
6525 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
6526 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
6528 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6530 fprintf (file
, "0x%lx", l
);
6533 /* These float cases don't actually occur as immediate operands. */
6534 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
6539 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
6540 REAL_VALUE_TO_DECIMAL (r
, "%.22e", dstr
);
6541 fprintf (file
, "%s", dstr
);
6544 else if (GET_CODE (x
) == CONST_DOUBLE
6545 && (GET_MODE (x
) == XFmode
|| GET_MODE (x
) == TFmode
))
6550 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
6551 REAL_VALUE_TO_DECIMAL (r
, "%.22e", dstr
);
6552 fprintf (file
, "%s", dstr
);
6555 else if (GET_CODE (x
) == CONST
6556 && GET_CODE (XEXP (x
, 0)) == UNSPEC
6557 && XINT (XEXP (x
, 0), 1) == UNSPEC_TP
)
6559 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6560 fputs ("DWORD PTR ", file
);
6561 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
6563 fputs ("gs:0", file
);
6570 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
6572 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6575 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
6576 || GET_CODE (x
) == LABEL_REF
)
6578 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6581 fputs ("OFFSET FLAT:", file
);
6584 if (GET_CODE (x
) == CONST_INT
)
6585 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
6587 output_pic_addr_const (file
, x
, code
);
6589 output_addr_const (file
, x
);
6593 /* Print a memory operand whose address is ADDR. */
6596 print_operand_address (file
, addr
)
6600 struct ix86_address parts
;
6601 rtx base
, index
, disp
;
6604 if (! ix86_decompose_address (addr
, &parts
))
6608 index
= parts
.index
;
6610 scale
= parts
.scale
;
6612 if (!base
&& !index
)
6614 /* Displacement only requires special attention. */
6616 if (GET_CODE (disp
) == CONST_INT
)
6618 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6620 if (USER_LABEL_PREFIX
[0] == 0)
6622 fputs ("ds:", file
);
6624 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (addr
));
6627 output_pic_addr_const (file
, addr
, 0);
6629 output_addr_const (file
, addr
);
6631 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6633 && (GET_CODE (addr
) == SYMBOL_REF
6634 || GET_CODE (addr
) == LABEL_REF
6635 || (GET_CODE (addr
) == CONST
6636 && GET_CODE (XEXP (addr
, 0)) == PLUS
6637 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
6638 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)))
6639 fputs ("(%rip)", file
);
6643 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6648 output_pic_addr_const (file
, disp
, 0);
6649 else if (GET_CODE (disp
) == LABEL_REF
)
6650 output_asm_label (disp
);
6652 output_addr_const (file
, disp
);
6657 PRINT_REG (base
, 0, file
);
6661 PRINT_REG (index
, 0, file
);
6663 fprintf (file
, ",%d", scale
);
6669 rtx offset
= NULL_RTX
;
6673 /* Pull out the offset of a symbol; print any symbol itself. */
6674 if (GET_CODE (disp
) == CONST
6675 && GET_CODE (XEXP (disp
, 0)) == PLUS
6676 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
6678 offset
= XEXP (XEXP (disp
, 0), 1);
6679 disp
= gen_rtx_CONST (VOIDmode
,
6680 XEXP (XEXP (disp
, 0), 0));
6684 output_pic_addr_const (file
, disp
, 0);
6685 else if (GET_CODE (disp
) == LABEL_REF
)
6686 output_asm_label (disp
);
6687 else if (GET_CODE (disp
) == CONST_INT
)
6690 output_addr_const (file
, disp
);
6696 PRINT_REG (base
, 0, file
);
6699 if (INTVAL (offset
) >= 0)
6701 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
6705 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
6712 PRINT_REG (index
, 0, file
);
6714 fprintf (file
, "*%d", scale
);
6722 output_addr_const_extra (file
, x
)
6728 if (GET_CODE (x
) != UNSPEC
)
6731 op
= XVECEXP (x
, 0, 0);
6732 switch (XINT (x
, 1))
6734 case UNSPEC_GOTTPOFF
:
6735 output_addr_const (file
, op
);
6736 fputs ("@GOTTPOFF", file
);
6739 output_addr_const (file
, op
);
6740 fputs ("@TPOFF", file
);
6743 output_addr_const (file
, op
);
6744 fputs ("@NTPOFF", file
);
6747 output_addr_const (file
, op
);
6748 fputs ("@DTPOFF", file
);
6758 /* Split one or more DImode RTL references into pairs of SImode
6759 references. The RTL can be REG, offsettable MEM, integer constant, or
6760 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6761 split and "num" is its length. lo_half and hi_half are output arrays
6762 that parallel "operands". */
6765 split_di (operands
, num
, lo_half
, hi_half
)
6768 rtx lo_half
[], hi_half
[];
6772 rtx op
= operands
[num
];
6774 /* simplify_subreg refuse to split volatile memory addresses,
6775 but we still have to handle it. */
6776 if (GET_CODE (op
) == MEM
)
6778 lo_half
[num
] = adjust_address (op
, SImode
, 0);
6779 hi_half
[num
] = adjust_address (op
, SImode
, 4);
6783 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
6784 GET_MODE (op
) == VOIDmode
6785 ? DImode
: GET_MODE (op
), 0);
6786 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
6787 GET_MODE (op
) == VOIDmode
6788 ? DImode
: GET_MODE (op
), 4);
6792 /* Split one or more TImode RTL references into pairs of SImode
6793 references. The RTL can be REG, offsettable MEM, integer constant, or
6794 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6795 split and "num" is its length. lo_half and hi_half are output arrays
6796 that parallel "operands". */
6799 split_ti (operands
, num
, lo_half
, hi_half
)
6802 rtx lo_half
[], hi_half
[];
6806 rtx op
= operands
[num
];
6808 /* simplify_subreg refuse to split volatile memory addresses, but we
6809 still have to handle it. */
6810 if (GET_CODE (op
) == MEM
)
6812 lo_half
[num
] = adjust_address (op
, DImode
, 0);
6813 hi_half
[num
] = adjust_address (op
, DImode
, 8);
6817 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
6818 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
6823 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6824 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6825 is the expression of the binary operation. The output may either be
6826 emitted here, or returned to the caller, like all output_* functions.
6828 There is no guarantee that the operands are the same mode, as they
6829 might be within FLOAT or FLOAT_EXTEND expressions. */
6831 #ifndef SYSV386_COMPAT
6832 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6833 wants to fix the assemblers because that causes incompatibility
6834 with gcc. No-one wants to fix gcc because that causes
6835 incompatibility with assemblers... You can use the option of
6836 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6837 #define SYSV386_COMPAT 1
6841 output_387_binary_op (insn
, operands
)
6845 static char buf
[30];
6848 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]) | SSE_REG_P (operands
[2]);
6850 #ifdef ENABLE_CHECKING
6851 /* Even if we do not want to check the inputs, this documents input
6852 constraints. Which helps in understanding the following code. */
6853 if (STACK_REG_P (operands
[0])
6854 && ((REG_P (operands
[1])
6855 && REGNO (operands
[0]) == REGNO (operands
[1])
6856 && (STACK_REG_P (operands
[2]) || GET_CODE (operands
[2]) == MEM
))
6857 || (REG_P (operands
[2])
6858 && REGNO (operands
[0]) == REGNO (operands
[2])
6859 && (STACK_REG_P (operands
[1]) || GET_CODE (operands
[1]) == MEM
)))
6860 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
6866 switch (GET_CODE (operands
[3]))
6869 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6870 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6878 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6879 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6887 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6888 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6896 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6897 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6911 if (GET_MODE (operands
[0]) == SFmode
)
6912 strcat (buf
, "ss\t{%2, %0|%0, %2}");
6914 strcat (buf
, "sd\t{%2, %0|%0, %2}");
6919 switch (GET_CODE (operands
[3]))
6923 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
6925 rtx temp
= operands
[2];
6926 operands
[2] = operands
[1];
6930 /* know operands[0] == operands[1]. */
6932 if (GET_CODE (operands
[2]) == MEM
)
6938 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
6940 if (STACK_TOP_P (operands
[0]))
6941 /* How is it that we are storing to a dead operand[2]?
6942 Well, presumably operands[1] is dead too. We can't
6943 store the result to st(0) as st(0) gets popped on this
6944 instruction. Instead store to operands[2] (which I
6945 think has to be st(1)). st(1) will be popped later.
6946 gcc <= 2.8.1 didn't have this check and generated
6947 assembly code that the Unixware assembler rejected. */
6948 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6950 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6954 if (STACK_TOP_P (operands
[0]))
6955 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6957 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6962 if (GET_CODE (operands
[1]) == MEM
)
6968 if (GET_CODE (operands
[2]) == MEM
)
6974 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
6977 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6978 derived assemblers, confusingly reverse the direction of
6979 the operation for fsub{r} and fdiv{r} when the
6980 destination register is not st(0). The Intel assembler
6981 doesn't have this brain damage. Read !SYSV386_COMPAT to
6982 figure out what the hardware really does. */
6983 if (STACK_TOP_P (operands
[0]))
6984 p
= "{p\t%0, %2|rp\t%2, %0}";
6986 p
= "{rp\t%2, %0|p\t%0, %2}";
6988 if (STACK_TOP_P (operands
[0]))
6989 /* As above for fmul/fadd, we can't store to st(0). */
6990 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6992 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6997 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
7000 if (STACK_TOP_P (operands
[0]))
7001 p
= "{rp\t%0, %1|p\t%1, %0}";
7003 p
= "{p\t%1, %0|rp\t%0, %1}";
7005 if (STACK_TOP_P (operands
[0]))
7006 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7008 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7013 if (STACK_TOP_P (operands
[0]))
7015 if (STACK_TOP_P (operands
[1]))
7016 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7018 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7021 else if (STACK_TOP_P (operands
[1]))
7024 p
= "{\t%1, %0|r\t%0, %1}";
7026 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7032 p
= "{r\t%2, %0|\t%0, %2}";
7034 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7047 /* Output code to initialize control word copies used by
7048 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7049 is set to control word rounding downwards. */
7051 emit_i387_cw_initialization (normal
, round_down
)
7052 rtx normal
, round_down
;
7054 rtx reg
= gen_reg_rtx (HImode
);
7056 emit_insn (gen_x86_fnstcw_1 (normal
));
7057 emit_move_insn (reg
, normal
);
7058 if (!TARGET_PARTIAL_REG_STALL
&& !optimize_size
7060 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
7062 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0xc00)));
7063 emit_move_insn (round_down
, reg
);
7066 /* Output code for INSN to convert a float to a signed int. OPERANDS
7067 are the insn operands. The output may be [HSD]Imode and the input
7068 operand may be [SDX]Fmode. */
7071 output_fix_trunc (insn
, operands
)
7075 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
7076 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
7078 /* Jump through a hoop or two for DImode, since the hardware has no
7079 non-popping instruction. We used to do this a different way, but
7080 that was somewhat fragile and broke with post-reload splitters. */
7081 if (dimode_p
&& !stack_top_dies
)
7082 output_asm_insn ("fld\t%y1", operands
);
7084 if (!STACK_TOP_P (operands
[1]))
7087 if (GET_CODE (operands
[0]) != MEM
)
7090 output_asm_insn ("fldcw\t%3", operands
);
7091 if (stack_top_dies
|| dimode_p
)
7092 output_asm_insn ("fistp%z0\t%0", operands
);
7094 output_asm_insn ("fist%z0\t%0", operands
);
7095 output_asm_insn ("fldcw\t%2", operands
);
7100 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7101 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7102 when fucom should be used. */
7105 output_fp_compare (insn
, operands
, eflags_p
, unordered_p
)
7108 int eflags_p
, unordered_p
;
7111 rtx cmp_op0
= operands
[0];
7112 rtx cmp_op1
= operands
[1];
7113 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]);
7118 cmp_op1
= operands
[2];
7122 if (GET_MODE (operands
[0]) == SFmode
)
7124 return "ucomiss\t{%1, %0|%0, %1}";
7126 return "comiss\t{%1, %0|%0, %y}";
7129 return "ucomisd\t{%1, %0|%0, %1}";
7131 return "comisd\t{%1, %0|%0, %y}";
7134 if (! STACK_TOP_P (cmp_op0
))
7137 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
7139 if (STACK_REG_P (cmp_op1
)
7141 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
7142 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
7144 /* If both the top of the 387 stack dies, and the other operand
7145 is also a stack register that dies, then this must be a
7146 `fcompp' float compare */
7150 /* There is no double popping fcomi variant. Fortunately,
7151 eflags is immune from the fstp's cc clobbering. */
7153 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
7155 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
7163 return "fucompp\n\tfnstsw\t%0";
7165 return "fcompp\n\tfnstsw\t%0";
7178 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7180 static const char * const alt
[24] =
7192 "fcomi\t{%y1, %0|%0, %y1}",
7193 "fcomip\t{%y1, %0|%0, %y1}",
7194 "fucomi\t{%y1, %0|%0, %y1}",
7195 "fucomip\t{%y1, %0|%0, %y1}",
7202 "fcom%z2\t%y2\n\tfnstsw\t%0",
7203 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7204 "fucom%z2\t%y2\n\tfnstsw\t%0",
7205 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7207 "ficom%z2\t%y2\n\tfnstsw\t%0",
7208 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7216 mask
= eflags_p
<< 3;
7217 mask
|= (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
) << 2;
7218 mask
|= unordered_p
<< 1;
7219 mask
|= stack_top_dies
;
7232 ix86_output_addr_vec_elt (file
, value
)
7236 const char *directive
= ASM_LONG
;
7241 directive
= ASM_QUAD
;
7247 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
7251 ix86_output_addr_diff_elt (file
, value
, rel
)
7256 fprintf (file
, "%s%s%d-%s%d\n",
7257 ASM_LONG
, LPREFIX
, value
, LPREFIX
, rel
);
7258 else if (HAVE_AS_GOTOFF_IN_DATA
)
7259 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
7261 asm_fprintf (file
, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
7262 ASM_LONG
, LPREFIX
, value
);
7265 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7269 ix86_expand_clear (dest
)
7274 /* We play register width games, which are only valid after reload. */
7275 if (!reload_completed
)
7278 /* Avoid HImode and its attendant prefix byte. */
7279 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
7280 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
7282 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
7284 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7285 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
7287 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
7288 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
7294 /* X is an unchanging MEM. If it is a constant pool reference, return
7295 the constant pool rtx, else NULL. */
7298 maybe_get_pool_constant (x
)
7305 if (GET_CODE (x
) != PLUS
)
7307 if (XEXP (x
, 0) != pic_offset_table_rtx
)
7310 if (GET_CODE (x
) != CONST
)
7313 if (GET_CODE (x
) != UNSPEC
)
7315 if (XINT (x
, 1) != UNSPEC_GOTOFF
)
7317 x
= XVECEXP (x
, 0, 0);
7320 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
7321 return get_pool_constant (x
);
7327 ix86_expand_move (mode
, operands
)
7328 enum machine_mode mode
;
7331 int strict
= (reload_in_progress
|| reload_completed
);
7332 rtx insn
, op0
, op1
, tmp
;
7337 /* ??? We have a slight problem. We need to say that tls symbols are
7338 not legitimate constants so that reload does not helpfully reload
7339 these constants from a REG_EQUIV, which we cannot handle. (Recall
7340 that general- and local-dynamic address resolution requires a
7343 However, if we say that tls symbols are not legitimate constants,
7344 then emit_move_insn helpfully drop them into the constant pool.
7346 It is far easier to work around emit_move_insn than reload. Recognize
7347 the MEM that we would have created and extract the symbol_ref. */
7350 && GET_CODE (op1
) == MEM
7351 && RTX_UNCHANGING_P (op1
))
7353 tmp
= maybe_get_pool_constant (op1
);
7354 /* Note that we only care about symbolic constants here, which
7355 unlike CONST_INT will always have a proper mode. */
7356 if (tmp
&& GET_MODE (tmp
) == Pmode
)
7360 if (tls_symbolic_operand (op1
, Pmode
))
7362 op1
= legitimize_address (op1
, op1
, VOIDmode
);
7363 if (GET_CODE (op0
) == MEM
)
7365 tmp
= gen_reg_rtx (mode
);
7366 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, op1
));
7370 else if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
7372 if (GET_CODE (op0
) == MEM
)
7373 op1
= force_reg (Pmode
, op1
);
7377 if (GET_CODE (temp
) != REG
)
7378 temp
= gen_reg_rtx (Pmode
);
7379 temp
= legitimize_pic_address (op1
, temp
);
7387 if (GET_CODE (op0
) == MEM
7388 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
7389 || !push_operand (op0
, mode
))
7390 && GET_CODE (op1
) == MEM
)
7391 op1
= force_reg (mode
, op1
);
7393 if (push_operand (op0
, mode
)
7394 && ! general_no_elim_operand (op1
, mode
))
7395 op1
= copy_to_mode_reg (mode
, op1
);
7397 /* Force large constants in 64bit compilation into register
7398 to get them CSEed. */
7399 if (TARGET_64BIT
&& mode
== DImode
7400 && immediate_operand (op1
, mode
)
7401 && !x86_64_zero_extended_value (op1
)
7402 && !register_operand (op0
, mode
)
7403 && optimize
&& !reload_completed
&& !reload_in_progress
)
7404 op1
= copy_to_mode_reg (mode
, op1
);
7406 if (FLOAT_MODE_P (mode
))
7408 /* If we are loading a floating point constant to a register,
7409 force the value to memory now, since we'll get better code
7410 out the back end. */
7414 else if (GET_CODE (op1
) == CONST_DOUBLE
7415 && register_operand (op0
, mode
))
7416 op1
= validize_mem (force_const_mem (mode
, op1
));
7420 insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
7426 ix86_expand_vector_move (mode
, operands
)
7427 enum machine_mode mode
;
7430 /* Force constants other than zero into memory. We do not know how
7431 the instructions used to build constants modify the upper 64 bits
7432 of the register, once we have that information we may be able
7433 to handle some of them more efficiently. */
7434 if ((reload_in_progress
| reload_completed
) == 0
7435 && register_operand (operands
[0], mode
)
7436 && CONSTANT_P (operands
[1]))
7438 rtx addr
= gen_reg_rtx (Pmode
);
7439 emit_move_insn (addr
, XEXP (force_const_mem (mode
, operands
[1]), 0));
7440 operands
[1] = gen_rtx_MEM (mode
, addr
);
7443 /* Make operand1 a register if it isn't already. */
7444 if ((reload_in_progress
| reload_completed
) == 0
7445 && !register_operand (operands
[0], mode
)
7446 && !register_operand (operands
[1], mode
)
7447 && operands
[1] != CONST0_RTX (mode
))
7449 rtx temp
= force_reg (GET_MODE (operands
[1]), operands
[1]);
7450 emit_move_insn (operands
[0], temp
);
7454 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], operands
[1]));
7457 /* Attempt to expand a binary operator. Make the expansion closer to the
7458 actual machine, then just general_operand, which will allow 3 separate
7459 memory references (one output, two input) in a single insn. */
7462 ix86_expand_binary_operator (code
, mode
, operands
)
7464 enum machine_mode mode
;
7467 int matching_memory
;
7468 rtx src1
, src2
, dst
, op
, clob
;
7474 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7475 if (GET_RTX_CLASS (code
) == 'c'
7476 && (rtx_equal_p (dst
, src2
)
7477 || immediate_operand (src1
, mode
)))
7484 /* If the destination is memory, and we do not have matching source
7485 operands, do things in registers. */
7486 matching_memory
= 0;
7487 if (GET_CODE (dst
) == MEM
)
7489 if (rtx_equal_p (dst
, src1
))
7490 matching_memory
= 1;
7491 else if (GET_RTX_CLASS (code
) == 'c'
7492 && rtx_equal_p (dst
, src2
))
7493 matching_memory
= 2;
7495 dst
= gen_reg_rtx (mode
);
7498 /* Both source operands cannot be in memory. */
7499 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
7501 if (matching_memory
!= 2)
7502 src2
= force_reg (mode
, src2
);
7504 src1
= force_reg (mode
, src1
);
7507 /* If the operation is not commutable, source 1 cannot be a constant
7508 or non-matching memory. */
7509 if ((CONSTANT_P (src1
)
7510 || (!matching_memory
&& GET_CODE (src1
) == MEM
))
7511 && GET_RTX_CLASS (code
) != 'c')
7512 src1
= force_reg (mode
, src1
);
7514 /* If optimizing, copy to regs to improve CSE */
7515 if (optimize
&& ! no_new_pseudos
)
7517 if (GET_CODE (dst
) == MEM
)
7518 dst
= gen_reg_rtx (mode
);
7519 if (GET_CODE (src1
) == MEM
)
7520 src1
= force_reg (mode
, src1
);
7521 if (GET_CODE (src2
) == MEM
)
7522 src2
= force_reg (mode
, src2
);
7525 /* Emit the instruction. */
7527 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
7528 if (reload_in_progress
)
7530 /* Reload doesn't know about the flags register, and doesn't know that
7531 it doesn't want to clobber it. We can only do this with PLUS. */
7538 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
7539 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
7542 /* Fix up the destination if needed. */
7543 if (dst
!= operands
[0])
7544 emit_move_insn (operands
[0], dst
);
7547 /* Return TRUE or FALSE depending on whether the binary operator meets the
7548 appropriate constraints. */
7551 ix86_binary_operator_ok (code
, mode
, operands
)
7553 enum machine_mode mode ATTRIBUTE_UNUSED
;
7556 /* Both source operands cannot be in memory. */
7557 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
7559 /* If the operation is not commutable, source 1 cannot be a constant. */
7560 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != 'c')
7562 /* If the destination is memory, we must have a matching source operand. */
7563 if (GET_CODE (operands
[0]) == MEM
7564 && ! (rtx_equal_p (operands
[0], operands
[1])
7565 || (GET_RTX_CLASS (code
) == 'c'
7566 && rtx_equal_p (operands
[0], operands
[2]))))
7568 /* If the operation is not commutable and the source 1 is memory, we must
7569 have a matching destination. */
7570 if (GET_CODE (operands
[1]) == MEM
7571 && GET_RTX_CLASS (code
) != 'c'
7572 && ! rtx_equal_p (operands
[0], operands
[1]))
7577 /* Attempt to expand a unary operator. Make the expansion closer to the
7578 actual machine, then just general_operand, which will allow 2 separate
7579 memory references (one output, one input) in a single insn. */
7582 ix86_expand_unary_operator (code
, mode
, operands
)
7584 enum machine_mode mode
;
7587 int matching_memory
;
7588 rtx src
, dst
, op
, clob
;
7593 /* If the destination is memory, and we do not have matching source
7594 operands, do things in registers. */
7595 matching_memory
= 0;
7596 if (GET_CODE (dst
) == MEM
)
7598 if (rtx_equal_p (dst
, src
))
7599 matching_memory
= 1;
7601 dst
= gen_reg_rtx (mode
);
7604 /* When source operand is memory, destination must match. */
7605 if (!matching_memory
&& GET_CODE (src
) == MEM
)
7606 src
= force_reg (mode
, src
);
7608 /* If optimizing, copy to regs to improve CSE */
7609 if (optimize
&& ! no_new_pseudos
)
7611 if (GET_CODE (dst
) == MEM
)
7612 dst
= gen_reg_rtx (mode
);
7613 if (GET_CODE (src
) == MEM
)
7614 src
= force_reg (mode
, src
);
7617 /* Emit the instruction. */
7619 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
7620 if (reload_in_progress
|| code
== NOT
)
7622 /* Reload doesn't know about the flags register, and doesn't know that
7623 it doesn't want to clobber it. */
7630 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
7631 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
7634 /* Fix up the destination if needed. */
7635 if (dst
!= operands
[0])
7636 emit_move_insn (operands
[0], dst
);
7639 /* Return TRUE or FALSE depending on whether the unary operator meets the
7640 appropriate constraints. */
7643 ix86_unary_operator_ok (code
, mode
, operands
)
7644 enum rtx_code code ATTRIBUTE_UNUSED
;
7645 enum machine_mode mode ATTRIBUTE_UNUSED
;
7646 rtx operands
[2] ATTRIBUTE_UNUSED
;
7648 /* If one of operands is memory, source and destination must match. */
7649 if ((GET_CODE (operands
[0]) == MEM
7650 || GET_CODE (operands
[1]) == MEM
)
7651 && ! rtx_equal_p (operands
[0], operands
[1]))
7656 /* Return TRUE or FALSE depending on whether the first SET in INSN
7657 has source and destination with matching CC modes, and that the
7658 CC mode is at least as constrained as REQ_MODE. */
7661 ix86_match_ccmode (insn
, req_mode
)
7663 enum machine_mode req_mode
;
7666 enum machine_mode set_mode
;
7668 set
= PATTERN (insn
);
7669 if (GET_CODE (set
) == PARALLEL
)
7670 set
= XVECEXP (set
, 0, 0);
7671 if (GET_CODE (set
) != SET
)
7673 if (GET_CODE (SET_SRC (set
)) != COMPARE
)
7676 set_mode
= GET_MODE (SET_DEST (set
));
7680 if (req_mode
!= CCNOmode
7681 && (req_mode
!= CCmode
7682 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
7686 if (req_mode
== CCGCmode
)
7690 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
7694 if (req_mode
== CCZmode
)
7704 return (GET_MODE (SET_SRC (set
)) == set_mode
);
7707 /* Generate insn patterns to do an integer compare of OPERANDS. */
7710 ix86_expand_int_compare (code
, op0
, op1
)
7714 enum machine_mode cmpmode
;
7717 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
7718 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
7720 /* This is very simple, but making the interface the same as in the
7721 FP case makes the rest of the code easier. */
7722 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
7723 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
7725 /* Return the test that should be put into the flags user, i.e.
7726 the bcc, scc, or cmov instruction. */
7727 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
7730 /* Figure out whether to use ordered or unordered fp comparisons.
7731 Return the appropriate mode to use. */
7734 ix86_fp_compare_mode (code
)
7735 enum rtx_code code ATTRIBUTE_UNUSED
;
7737 /* ??? In order to make all comparisons reversible, we do all comparisons
7738 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7739 all forms trapping and nontrapping comparisons, we can make inequality
7740 comparisons trapping again, since it results in better code when using
7741 FCOM based compares. */
7742 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
7746 ix86_cc_mode (code
, op0
, op1
)
7750 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
7751 return ix86_fp_compare_mode (code
);
7754 /* Only zero flag is needed. */
7756 case NE
: /* ZF!=0 */
7758 /* Codes needing carry flag. */
7759 case GEU
: /* CF=0 */
7760 case GTU
: /* CF=0 & ZF=0 */
7761 case LTU
: /* CF=1 */
7762 case LEU
: /* CF=1 | ZF=1 */
7764 /* Codes possibly doable only with sign flag when
7765 comparing against zero. */
7766 case GE
: /* SF=OF or SF=0 */
7767 case LT
: /* SF<>OF or SF=1 */
7768 if (op1
== const0_rtx
)
7771 /* For other cases Carry flag is not required. */
7773 /* Codes doable only with sign flag when comparing
7774 against zero, but we miss jump instruction for it
7775 so we need to use relational tests agains overflow
7776 that thus needs to be zero. */
7777 case GT
: /* ZF=0 & SF=OF */
7778 case LE
: /* ZF=1 | SF<>OF */
7779 if (op1
== const0_rtx
)
7783 /* strcmp pattern do (use flags) and combine may ask us for proper
7792 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7795 ix86_use_fcomi_compare (code
)
7796 enum rtx_code code ATTRIBUTE_UNUSED
;
7798 enum rtx_code swapped_code
= swap_condition (code
);
7799 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
7800 || (ix86_fp_comparison_cost (swapped_code
)
7801 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
7804 /* Swap, force into registers, or otherwise massage the two operands
7805 to a fp comparison. The operands are updated in place; the new
7806 comparsion code is returned. */
7808 static enum rtx_code
7809 ix86_prepare_fp_compare_args (code
, pop0
, pop1
)
7813 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
7814 rtx op0
= *pop0
, op1
= *pop1
;
7815 enum machine_mode op_mode
= GET_MODE (op0
);
7816 int is_sse
= SSE_REG_P (op0
) | SSE_REG_P (op1
);
7818 /* All of the unordered compare instructions only work on registers.
7819 The same is true of the XFmode compare instructions. The same is
7820 true of the fcomi compare instructions. */
7823 && (fpcmp_mode
== CCFPUmode
7824 || op_mode
== XFmode
7825 || op_mode
== TFmode
7826 || ix86_use_fcomi_compare (code
)))
7828 op0
= force_reg (op_mode
, op0
);
7829 op1
= force_reg (op_mode
, op1
);
7833 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7834 things around if they appear profitable, otherwise force op0
7837 if (standard_80387_constant_p (op0
) == 0
7838 || (GET_CODE (op0
) == MEM
7839 && ! (standard_80387_constant_p (op1
) == 0
7840 || GET_CODE (op1
) == MEM
)))
7843 tmp
= op0
, op0
= op1
, op1
= tmp
;
7844 code
= swap_condition (code
);
7847 if (GET_CODE (op0
) != REG
)
7848 op0
= force_reg (op_mode
, op0
);
7850 if (CONSTANT_P (op1
))
7852 if (standard_80387_constant_p (op1
))
7853 op1
= force_reg (op_mode
, op1
);
7855 op1
= validize_mem (force_const_mem (op_mode
, op1
));
7859 /* Try to rearrange the comparison to make it cheaper. */
7860 if (ix86_fp_comparison_cost (code
)
7861 > ix86_fp_comparison_cost (swap_condition (code
))
7862 && (GET_CODE (op1
) == REG
|| !no_new_pseudos
))
7865 tmp
= op0
, op0
= op1
, op1
= tmp
;
7866 code
= swap_condition (code
);
7867 if (GET_CODE (op0
) != REG
)
7868 op0
= force_reg (op_mode
, op0
);
7876 /* Convert comparison codes we use to represent FP comparison to integer
7877 code that will result in proper branch. Return UNKNOWN if no such code
7879 static enum rtx_code
7880 ix86_fp_compare_code_to_integer (code
)
7910 /* Split comparison code CODE into comparisons we can do using branch
7911 instructions. BYPASS_CODE is comparison code for branch that will
7912 branch around FIRST_CODE and SECOND_CODE. If some of branches
7913 is not required, set value to NIL.
7914 We never require more than two branches. */
7916 ix86_fp_comparison_codes (code
, bypass_code
, first_code
, second_code
)
7917 enum rtx_code code
, *bypass_code
, *first_code
, *second_code
;
7923 /* The fcomi comparison sets flags as follows:
7933 case GT
: /* GTU - CF=0 & ZF=0 */
7934 case GE
: /* GEU - CF=0 */
7935 case ORDERED
: /* PF=0 */
7936 case UNORDERED
: /* PF=1 */
7937 case UNEQ
: /* EQ - ZF=1 */
7938 case UNLT
: /* LTU - CF=1 */
7939 case UNLE
: /* LEU - CF=1 | ZF=1 */
7940 case LTGT
: /* EQ - ZF=0 */
7942 case LT
: /* LTU - CF=1 - fails on unordered */
7944 *bypass_code
= UNORDERED
;
7946 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
7948 *bypass_code
= UNORDERED
;
7950 case EQ
: /* EQ - ZF=1 - fails on unordered */
7952 *bypass_code
= UNORDERED
;
7954 case NE
: /* NE - ZF=0 - fails on unordered */
7956 *second_code
= UNORDERED
;
7958 case UNGE
: /* GEU - CF=0 - fails on unordered */
7960 *second_code
= UNORDERED
;
7962 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
7964 *second_code
= UNORDERED
;
7969 if (!TARGET_IEEE_FP
)
7976 /* Return cost of comparison done fcom + arithmetics operations on AX.
7977 All following functions do use number of instructions as an cost metrics.
7978 In future this should be tweaked to compute bytes for optimize_size and
7979 take into account performance of various instructions on various CPUs. */
7981 ix86_fp_comparison_arithmetics_cost (code
)
7984 if (!TARGET_IEEE_FP
)
7986 /* The cost of code output by ix86_expand_fp_compare. */
8014 /* Return cost of comparison done using fcomi operation.
8015 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8017 ix86_fp_comparison_fcomi_cost (code
)
8020 enum rtx_code bypass_code
, first_code
, second_code
;
8021 /* Return arbitarily high cost when instruction is not supported - this
8022 prevents gcc from using it. */
8025 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8026 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 2;
8029 /* Return cost of comparison done using sahf operation.
8030 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8032 ix86_fp_comparison_sahf_cost (code
)
8035 enum rtx_code bypass_code
, first_code
, second_code
;
8036 /* Return arbitarily high cost when instruction is not preferred - this
8037 avoids gcc from using it. */
8038 if (!TARGET_USE_SAHF
&& !optimize_size
)
8040 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8041 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 3;
8044 /* Compute cost of the comparison done using any method.
8045 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8047 ix86_fp_comparison_cost (code
)
8050 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
8053 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
8054 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
8056 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
8057 if (min
> sahf_cost
)
8059 if (min
> fcomi_cost
)
8064 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8067 ix86_expand_fp_compare (code
, op0
, op1
, scratch
, second_test
, bypass_test
)
8069 rtx op0
, op1
, scratch
;
8073 enum machine_mode fpcmp_mode
, intcmp_mode
;
8075 int cost
= ix86_fp_comparison_cost (code
);
8076 enum rtx_code bypass_code
, first_code
, second_code
;
8078 fpcmp_mode
= ix86_fp_compare_mode (code
);
8079 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
8082 *second_test
= NULL_RTX
;
8084 *bypass_test
= NULL_RTX
;
8086 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8088 /* Do fcomi/sahf based test when profitable. */
8089 if ((bypass_code
== NIL
|| bypass_test
)
8090 && (second_code
== NIL
|| second_test
)
8091 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
8095 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8096 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
8102 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8103 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
8105 scratch
= gen_reg_rtx (HImode
);
8106 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
8107 emit_insn (gen_x86_sahf_1 (scratch
));
8110 /* The FP codes work out to act like unsigned. */
8111 intcmp_mode
= fpcmp_mode
;
8113 if (bypass_code
!= NIL
)
8114 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
8115 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8117 if (second_code
!= NIL
)
8118 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
8119 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8124 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8125 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8126 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
8128 scratch
= gen_reg_rtx (HImode
);
8129 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
8131 /* In the unordered case, we have to check C2 for NaN's, which
8132 doesn't happen to work out to anything nice combination-wise.
8133 So do some bit twiddling on the value we've got in AH to come
8134 up with an appropriate set of condition codes. */
8136 intcmp_mode
= CCNOmode
;
8141 if (code
== GT
|| !TARGET_IEEE_FP
)
8143 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
8148 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8149 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
8150 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
8151 intcmp_mode
= CCmode
;
8157 if (code
== LT
&& TARGET_IEEE_FP
)
8159 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8160 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
8161 intcmp_mode
= CCmode
;
8166 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
8172 if (code
== GE
|| !TARGET_IEEE_FP
)
8174 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
8179 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8180 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
8187 if (code
== LE
&& TARGET_IEEE_FP
)
8189 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8190 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
8191 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
8192 intcmp_mode
= CCmode
;
8197 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
8203 if (code
== EQ
&& TARGET_IEEE_FP
)
8205 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8206 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
8207 intcmp_mode
= CCmode
;
8212 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
8219 if (code
== NE
&& TARGET_IEEE_FP
)
8221 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8222 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
8228 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
8234 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
8238 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
8247 /* Return the test that should be put into the flags user, i.e.
8248 the bcc, scc, or cmov instruction. */
8249 return gen_rtx_fmt_ee (code
, VOIDmode
,
8250 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8255 ix86_expand_compare (code
, second_test
, bypass_test
)
8257 rtx
*second_test
, *bypass_test
;
8260 op0
= ix86_compare_op0
;
8261 op1
= ix86_compare_op1
;
8264 *second_test
= NULL_RTX
;
8266 *bypass_test
= NULL_RTX
;
8268 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
8269 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
8270 second_test
, bypass_test
);
8272 ret
= ix86_expand_int_compare (code
, op0
, op1
);
8277 /* Return true if the CODE will result in nontrivial jump sequence. */
8279 ix86_fp_jump_nontrivial_p (code
)
8282 enum rtx_code bypass_code
, first_code
, second_code
;
8285 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8286 return bypass_code
!= NIL
|| second_code
!= NIL
;
8290 ix86_expand_branch (code
, label
)
8296 switch (GET_MODE (ix86_compare_op0
))
8302 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
8303 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
8304 gen_rtx_LABEL_REF (VOIDmode
, label
),
8306 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
8316 enum rtx_code bypass_code
, first_code
, second_code
;
8318 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
8321 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8323 /* Check whether we will use the natural sequence with one jump. If
8324 so, we can expand jump early. Otherwise delay expansion by
8325 creating compound insn to not confuse optimizers. */
8326 if (bypass_code
== NIL
&& second_code
== NIL
8329 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
8330 gen_rtx_LABEL_REF (VOIDmode
, label
),
8335 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
8336 ix86_compare_op0
, ix86_compare_op1
);
8337 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
8338 gen_rtx_LABEL_REF (VOIDmode
, label
),
8340 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
8342 use_fcomi
= ix86_use_fcomi_compare (code
);
8343 vec
= rtvec_alloc (3 + !use_fcomi
);
8344 RTVEC_ELT (vec
, 0) = tmp
;
8346 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
8348 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
8351 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
8353 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
8361 /* Expand DImode branch into multiple compare+branch. */
8363 rtx lo
[2], hi
[2], label2
;
8364 enum rtx_code code1
, code2
, code3
;
8366 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
8368 tmp
= ix86_compare_op0
;
8369 ix86_compare_op0
= ix86_compare_op1
;
8370 ix86_compare_op1
= tmp
;
8371 code
= swap_condition (code
);
8373 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
8374 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
8376 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8377 avoid two branches. This costs one extra insn, so disable when
8378 optimizing for size. */
8380 if ((code
== EQ
|| code
== NE
)
8382 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
8387 if (hi
[1] != const0_rtx
)
8388 xor1
= expand_binop (SImode
, xor_optab
, xor1
, hi
[1],
8389 NULL_RTX
, 0, OPTAB_WIDEN
);
8392 if (lo
[1] != const0_rtx
)
8393 xor0
= expand_binop (SImode
, xor_optab
, xor0
, lo
[1],
8394 NULL_RTX
, 0, OPTAB_WIDEN
);
8396 tmp
= expand_binop (SImode
, ior_optab
, xor1
, xor0
,
8397 NULL_RTX
, 0, OPTAB_WIDEN
);
8399 ix86_compare_op0
= tmp
;
8400 ix86_compare_op1
= const0_rtx
;
8401 ix86_expand_branch (code
, label
);
8405 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8406 op1 is a constant and the low word is zero, then we can just
8407 examine the high word. */
8409 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
)
8412 case LT
: case LTU
: case GE
: case GEU
:
8413 ix86_compare_op0
= hi
[0];
8414 ix86_compare_op1
= hi
[1];
8415 ix86_expand_branch (code
, label
);
8421 /* Otherwise, we need two or three jumps. */
8423 label2
= gen_label_rtx ();
8426 code2
= swap_condition (code
);
8427 code3
= unsigned_condition (code
);
8431 case LT
: case GT
: case LTU
: case GTU
:
8434 case LE
: code1
= LT
; code2
= GT
; break;
8435 case GE
: code1
= GT
; code2
= LT
; break;
8436 case LEU
: code1
= LTU
; code2
= GTU
; break;
8437 case GEU
: code1
= GTU
; code2
= LTU
; break;
8439 case EQ
: code1
= NIL
; code2
= NE
; break;
8440 case NE
: code2
= NIL
; break;
8448 * if (hi(a) < hi(b)) goto true;
8449 * if (hi(a) > hi(b)) goto false;
8450 * if (lo(a) < lo(b)) goto true;
8454 ix86_compare_op0
= hi
[0];
8455 ix86_compare_op1
= hi
[1];
8458 ix86_expand_branch (code1
, label
);
8460 ix86_expand_branch (code2
, label2
);
8462 ix86_compare_op0
= lo
[0];
8463 ix86_compare_op1
= lo
[1];
8464 ix86_expand_branch (code3
, label
);
8467 emit_label (label2
);
8476 /* Split branch based on floating point condition. */
8478 ix86_split_fp_branch (code
, op1
, op2
, target1
, target2
, tmp
)
8480 rtx op1
, op2
, target1
, target2
, tmp
;
8483 rtx label
= NULL_RTX
;
8485 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
8488 if (target2
!= pc_rtx
)
8491 code
= reverse_condition_maybe_unordered (code
);
8496 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
8497 tmp
, &second
, &bypass
);
8499 if (split_branch_probability
>= 0)
8501 /* Distribute the probabilities across the jumps.
8502 Assume the BYPASS and SECOND to be always test
8504 probability
= split_branch_probability
;
8506 /* Value of 1 is low enough to make no need for probability
8507 to be updated. Later we may run some experiments and see
8508 if unordered values are more frequent in practice. */
8510 bypass_probability
= 1;
8512 second_probability
= 1;
8514 if (bypass
!= NULL_RTX
)
8516 label
= gen_label_rtx ();
8517 i
= emit_jump_insn (gen_rtx_SET
8519 gen_rtx_IF_THEN_ELSE (VOIDmode
,
8521 gen_rtx_LABEL_REF (VOIDmode
,
8524 if (bypass_probability
>= 0)
8526 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
8527 GEN_INT (bypass_probability
),
8530 i
= emit_jump_insn (gen_rtx_SET
8532 gen_rtx_IF_THEN_ELSE (VOIDmode
,
8533 condition
, target1
, target2
)));
8534 if (probability
>= 0)
8536 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
8537 GEN_INT (probability
),
8539 if (second
!= NULL_RTX
)
8541 i
= emit_jump_insn (gen_rtx_SET
8543 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
8545 if (second_probability
>= 0)
8547 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
8548 GEN_INT (second_probability
),
8551 if (label
!= NULL_RTX
)
8556 ix86_expand_setcc (code
, dest
)
8560 rtx ret
, tmp
, tmpreg
;
8561 rtx second_test
, bypass_test
;
8563 if (GET_MODE (ix86_compare_op0
) == DImode
8565 return 0; /* FAIL */
8567 if (GET_MODE (dest
) != QImode
)
8570 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
8571 PUT_MODE (ret
, QImode
);
8576 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
8577 if (bypass_test
|| second_test
)
8579 rtx test
= second_test
;
8581 rtx tmp2
= gen_reg_rtx (QImode
);
8588 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
8590 PUT_MODE (test
, QImode
);
8591 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
8594 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
8596 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
8599 return 1; /* DONE */
8603 ix86_expand_int_movcc (operands
)
8606 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
8607 rtx compare_seq
, compare_op
;
8608 rtx second_test
, bypass_test
;
8609 enum machine_mode mode
= GET_MODE (operands
[0]);
8611 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8612 In case comparsion is done with immediate, we can convert it to LTU or
8613 GEU by altering the integer. */
8615 if ((code
== LEU
|| code
== GTU
)
8616 && GET_CODE (ix86_compare_op1
) == CONST_INT
8618 && INTVAL (ix86_compare_op1
) != -1
8619 /* For x86-64, the immediate field in the instruction is 32-bit
8620 signed, so we can't increment a DImode value above 0x7fffffff. */
8622 || GET_MODE (ix86_compare_op0
) != DImode
8623 || INTVAL (ix86_compare_op1
) != 0x7fffffff)
8624 && GET_CODE (operands
[2]) == CONST_INT
8625 && GET_CODE (operands
[3]) == CONST_INT
)
8631 ix86_compare_op1
= gen_int_mode (INTVAL (ix86_compare_op1
) + 1,
8632 GET_MODE (ix86_compare_op0
));
8636 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
8637 compare_seq
= get_insns ();
8640 compare_code
= GET_CODE (compare_op
);
8642 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8643 HImode insns, we'd be swallowed in word prefix ops. */
8646 && (mode
!= DImode
|| TARGET_64BIT
)
8647 && GET_CODE (operands
[2]) == CONST_INT
8648 && GET_CODE (operands
[3]) == CONST_INT
)
8650 rtx out
= operands
[0];
8651 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
8652 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
8655 if ((compare_code
== LTU
|| compare_code
== GEU
)
8656 && !second_test
&& !bypass_test
)
8659 /* Detect overlap between destination and compare sources. */
8662 /* To simplify rest of code, restrict to the GEU case. */
8663 if (compare_code
== LTU
)
8668 compare_code
= reverse_condition (compare_code
);
8669 code
= reverse_condition (code
);
8673 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
8674 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
8675 tmp
= gen_reg_rtx (mode
);
8677 emit_insn (compare_seq
);
8679 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
));
8681 emit_insn (gen_x86_movsicc_0_m1 (tmp
));
8693 tmp
= expand_simple_binop (mode
, PLUS
,
8695 tmp
, 1, OPTAB_DIRECT
);
8706 tmp
= expand_simple_binop (mode
, IOR
,
8708 tmp
, 1, OPTAB_DIRECT
);
8710 else if (diff
== -1 && ct
)
8720 tmp
= expand_simple_unop (mode
, NOT
, tmp
, tmp
, 1);
8722 tmp
= expand_simple_binop (mode
, PLUS
,
8724 tmp
, 1, OPTAB_DIRECT
);
8731 * andl cf - ct, dest
8736 tmp
= expand_simple_binop (mode
, AND
,
8738 gen_int_mode (cf
- ct
, mode
),
8739 tmp
, 1, OPTAB_DIRECT
);
8741 tmp
= expand_simple_binop (mode
, PLUS
,
8743 tmp
, 1, OPTAB_DIRECT
);
8747 emit_move_insn (out
, tmp
);
8749 return 1; /* DONE */
8756 tmp
= ct
, ct
= cf
, cf
= tmp
;
8758 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
8760 /* We may be reversing unordered compare to normal compare, that
8761 is not valid in general (we may convert non-trapping condition
8762 to trapping one), however on i386 we currently emit all
8763 comparisons unordered. */
8764 compare_code
= reverse_condition_maybe_unordered (compare_code
);
8765 code
= reverse_condition_maybe_unordered (code
);
8769 compare_code
= reverse_condition (compare_code
);
8770 code
= reverse_condition (code
);
8775 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
8776 && GET_CODE (ix86_compare_op1
) == CONST_INT
)
8778 if (ix86_compare_op1
== const0_rtx
8779 && (code
== LT
|| code
== GE
))
8780 compare_code
= code
;
8781 else if (ix86_compare_op1
== constm1_rtx
)
8785 else if (code
== GT
)
8790 /* Optimize dest = (op0 < 0) ? -1 : cf. */
8791 if (compare_code
!= NIL
8792 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
8793 && (cf
== -1 || ct
== -1))
8795 /* If lea code below could be used, only optimize
8796 if it results in a 2 insn sequence. */
8798 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
8799 || diff
== 3 || diff
== 5 || diff
== 9)
8800 || (compare_code
== LT
&& ct
== -1)
8801 || (compare_code
== GE
&& cf
== -1))
8804 * notl op1 (if necessary)
8812 code
= reverse_condition (code
);
8815 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
8816 ix86_compare_op1
, VOIDmode
, 0, -1);
8818 out
= expand_simple_binop (mode
, IOR
,
8820 out
, 1, OPTAB_DIRECT
);
8821 if (out
!= operands
[0])
8822 emit_move_insn (operands
[0], out
);
8824 return 1; /* DONE */
8828 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
8829 || diff
== 3 || diff
== 5 || diff
== 9)
8830 && (mode
!= DImode
|| x86_64_sign_extended_value (GEN_INT (cf
))))
8836 * lea cf(dest*(ct-cf)),dest
8840 * This also catches the degenerate setcc-only case.
8846 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
8847 ix86_compare_op1
, VOIDmode
, 0, 1);
8850 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
8851 done in proper mode to match. */
8858 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
8862 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
8868 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
8872 && (GET_CODE (tmp
) != SUBREG
|| SUBREG_REG (tmp
) != out
))
8878 clob
= gen_rtx_REG (CCmode
, FLAGS_REG
);
8879 clob
= gen_rtx_CLOBBER (VOIDmode
, clob
);
8881 tmp
= gen_rtx_SET (VOIDmode
, out
, tmp
);
8882 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
8886 emit_insn (gen_rtx_SET (VOIDmode
, out
, tmp
));
8888 if (out
!= operands
[0])
8889 emit_move_insn (operands
[0], out
);
8891 return 1; /* DONE */
8895 * General case: Jumpful:
8896 * xorl dest,dest cmpl op1, op2
8897 * cmpl op1, op2 movl ct, dest
8899 * decl dest movl cf, dest
8900 * andl (cf-ct),dest 1:
8905 * This is reasonably steep, but branch mispredict costs are
8906 * high on modern cpus, so consider failing only if optimizing
8909 * %%% Parameterize branch_cost on the tuning architecture, then
8910 * use that. The 80386 couldn't care less about mispredicts.
8913 if (!optimize_size
&& !TARGET_CMOVE
)
8919 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
8920 /* We may be reversing unordered compare to normal compare,
8921 that is not valid in general (we may convert non-trapping
8922 condition to trapping one), however on i386 we currently
8923 emit all comparisons unordered. */
8924 code
= reverse_condition_maybe_unordered (code
);
8927 code
= reverse_condition (code
);
8928 if (compare_code
!= NIL
)
8929 compare_code
= reverse_condition (compare_code
);
8933 if (compare_code
!= NIL
)
8935 /* notl op1 (if needed)
8940 For x < 0 (resp. x <= -1) there will be no notl,
8941 so if possible swap the constants to get rid of the
8943 True/false will be -1/0 while code below (store flag
8944 followed by decrement) is 0/-1, so the constants need
8945 to be exchanged once more. */
8947 if (compare_code
== GE
|| !cf
)
8949 code
= reverse_condition (code
);
8954 HOST_WIDE_INT tmp
= cf
;
8959 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
8960 ix86_compare_op1
, VOIDmode
, 0, -1);
8964 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
8965 ix86_compare_op1
, VOIDmode
, 0, 1);
8967 out
= expand_simple_binop (mode
, PLUS
,
8969 out
, 1, OPTAB_DIRECT
);
8972 out
= expand_simple_binop (mode
, AND
,
8974 gen_int_mode (cf
- ct
, mode
),
8975 out
, 1, OPTAB_DIRECT
);
8976 out
= expand_simple_binop (mode
, PLUS
,
8978 out
, 1, OPTAB_DIRECT
);
8979 if (out
!= operands
[0])
8980 emit_move_insn (operands
[0], out
);
8982 return 1; /* DONE */
8988 /* Try a few things more with specific constants and a variable. */
8991 rtx var
, orig_out
, out
, tmp
;
8994 return 0; /* FAIL */
8996 /* If one of the two operands is an interesting constant, load a
8997 constant with the above and mask it in with a logical operation. */
8999 if (GET_CODE (operands
[2]) == CONST_INT
)
9002 if (INTVAL (operands
[2]) == 0)
9003 operands
[3] = constm1_rtx
, op
= and_optab
;
9004 else if (INTVAL (operands
[2]) == -1)
9005 operands
[3] = const0_rtx
, op
= ior_optab
;
9007 return 0; /* FAIL */
9009 else if (GET_CODE (operands
[3]) == CONST_INT
)
9012 if (INTVAL (operands
[3]) == 0)
9013 operands
[2] = constm1_rtx
, op
= and_optab
;
9014 else if (INTVAL (operands
[3]) == -1)
9015 operands
[2] = const0_rtx
, op
= ior_optab
;
9017 return 0; /* FAIL */
9020 return 0; /* FAIL */
9022 orig_out
= operands
[0];
9023 tmp
= gen_reg_rtx (mode
);
9026 /* Recurse to get the constant loaded. */
9027 if (ix86_expand_int_movcc (operands
) == 0)
9028 return 0; /* FAIL */
9030 /* Mask in the interesting variable. */
9031 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
9033 if (out
!= orig_out
)
9034 emit_move_insn (orig_out
, out
);
9036 return 1; /* DONE */
9040 * For comparison with above,
9050 if (! nonimmediate_operand (operands
[2], mode
))
9051 operands
[2] = force_reg (mode
, operands
[2]);
9052 if (! nonimmediate_operand (operands
[3], mode
))
9053 operands
[3] = force_reg (mode
, operands
[3]);
9055 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
9057 rtx tmp
= gen_reg_rtx (mode
);
9058 emit_move_insn (tmp
, operands
[3]);
9061 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
9063 rtx tmp
= gen_reg_rtx (mode
);
9064 emit_move_insn (tmp
, operands
[2]);
9067 if (! register_operand (operands
[2], VOIDmode
)
9068 && ! register_operand (operands
[3], VOIDmode
))
9069 operands
[2] = force_reg (mode
, operands
[2]);
9071 emit_insn (compare_seq
);
9072 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9073 gen_rtx_IF_THEN_ELSE (mode
,
9074 compare_op
, operands
[2],
9077 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9078 gen_rtx_IF_THEN_ELSE (mode
,
9083 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9084 gen_rtx_IF_THEN_ELSE (mode
,
9089 return 1; /* DONE */
9093 ix86_expand_fp_movcc (operands
)
9098 rtx compare_op
, second_test
, bypass_test
;
9100 /* For SF/DFmode conditional moves based on comparisons
9101 in same mode, we may want to use SSE min/max instructions. */
9102 if (((TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == SFmode
)
9103 || (TARGET_SSE2
&& TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == DFmode
))
9104 && GET_MODE (ix86_compare_op0
) == GET_MODE (operands
[0])
9105 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9107 || (GET_CODE (operands
[1]) != LTGT
&& GET_CODE (operands
[1]) != UNEQ
))
9108 /* We may be called from the post-reload splitter. */
9109 && (!REG_P (operands
[0])
9110 || SSE_REG_P (operands
[0])
9111 || REGNO (operands
[0]) >= FIRST_PSEUDO_REGISTER
))
9113 rtx op0
= ix86_compare_op0
, op1
= ix86_compare_op1
;
9114 code
= GET_CODE (operands
[1]);
9116 /* See if we have (cross) match between comparison operands and
9117 conditional move operands. */
9118 if (rtx_equal_p (operands
[2], op1
))
9123 code
= reverse_condition_maybe_unordered (code
);
9125 if (rtx_equal_p (operands
[2], op0
) && rtx_equal_p (operands
[3], op1
))
9127 /* Check for min operation. */
9130 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
9131 if (memory_operand (op0
, VOIDmode
))
9132 op0
= force_reg (GET_MODE (operands
[0]), op0
);
9133 if (GET_MODE (operands
[0]) == SFmode
)
9134 emit_insn (gen_minsf3 (operands
[0], op0
, op1
));
9136 emit_insn (gen_mindf3 (operands
[0], op0
, op1
));
9139 /* Check for max operation. */
9142 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
9143 if (memory_operand (op0
, VOIDmode
))
9144 op0
= force_reg (GET_MODE (operands
[0]), op0
);
9145 if (GET_MODE (operands
[0]) == SFmode
)
9146 emit_insn (gen_maxsf3 (operands
[0], op0
, op1
));
9148 emit_insn (gen_maxdf3 (operands
[0], op0
, op1
));
9152 /* Manage condition to be sse_comparison_operator. In case we are
9153 in non-ieee mode, try to canonicalize the destination operand
9154 to be first in the comparison - this helps reload to avoid extra
9156 if (!sse_comparison_operator (operands
[1], VOIDmode
)
9157 || (rtx_equal_p (operands
[0], ix86_compare_op1
) && !TARGET_IEEE_FP
))
9159 rtx tmp
= ix86_compare_op0
;
9160 ix86_compare_op0
= ix86_compare_op1
;
9161 ix86_compare_op1
= tmp
;
9162 operands
[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands
[1])),
9163 VOIDmode
, ix86_compare_op0
,
9166 /* Similary try to manage result to be first operand of conditional
9167 move. We also don't support the NE comparison on SSE, so try to
9169 if ((rtx_equal_p (operands
[0], operands
[3])
9170 && (!TARGET_IEEE_FP
|| GET_CODE (operands
[1]) != EQ
))
9171 || (GET_CODE (operands
[1]) == NE
&& TARGET_IEEE_FP
))
9173 rtx tmp
= operands
[2];
9174 operands
[2] = operands
[3];
9176 operands
[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9177 (GET_CODE (operands
[1])),
9178 VOIDmode
, ix86_compare_op0
,
9181 if (GET_MODE (operands
[0]) == SFmode
)
9182 emit_insn (gen_sse_movsfcc (operands
[0], operands
[1],
9183 operands
[2], operands
[3],
9184 ix86_compare_op0
, ix86_compare_op1
));
9186 emit_insn (gen_sse_movdfcc (operands
[0], operands
[1],
9187 operands
[2], operands
[3],
9188 ix86_compare_op0
, ix86_compare_op1
));
9192 /* The floating point conditional move instructions don't directly
9193 support conditions resulting from a signed integer comparison. */
9195 code
= GET_CODE (operands
[1]);
9196 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9198 /* The floating point conditional move instructions don't directly
9199 support signed integer comparisons. */
9201 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
9203 if (second_test
!= NULL
|| bypass_test
!= NULL
)
9205 tmp
= gen_reg_rtx (QImode
);
9206 ix86_expand_setcc (code
, tmp
);
9208 ix86_compare_op0
= tmp
;
9209 ix86_compare_op1
= const0_rtx
;
9210 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9212 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
9214 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
9215 emit_move_insn (tmp
, operands
[3]);
9218 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
9220 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
9221 emit_move_insn (tmp
, operands
[2]);
9225 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9226 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9231 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9232 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9237 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9238 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9246 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9247 works for floating pointer parameters and nonoffsetable memories.
9248 For pushes, it returns just stack offsets; the values will be saved
9249 in the right order. Maximally three parts are generated. */
9252 ix86_split_to_parts (operand
, parts
, mode
)
9255 enum machine_mode mode
;
9260 size
= mode
== TFmode
? 3 : (GET_MODE_SIZE (mode
) / 4);
9262 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
9264 if (GET_CODE (operand
) == REG
&& MMX_REGNO_P (REGNO (operand
)))
9266 if (size
< 2 || size
> 3)
9269 /* Optimize constant pool reference to immediates. This is used by fp
9270 moves, that force all constants to memory to allow combining. */
9271 if (GET_CODE (operand
) == MEM
&& RTX_UNCHANGING_P (operand
))
9273 rtx tmp
= maybe_get_pool_constant (operand
);
9278 if (GET_CODE (operand
) == MEM
&& !offsettable_memref_p (operand
))
9280 /* The only non-offsetable memories we handle are pushes. */
9281 if (! push_operand (operand
, VOIDmode
))
9284 operand
= copy_rtx (operand
);
9285 PUT_MODE (operand
, Pmode
);
9286 parts
[0] = parts
[1] = parts
[2] = operand
;
9288 else if (!TARGET_64BIT
)
9291 split_di (&operand
, 1, &parts
[0], &parts
[1]);
9294 if (REG_P (operand
))
9296 if (!reload_completed
)
9298 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
9299 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
9301 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
9303 else if (offsettable_memref_p (operand
))
9305 operand
= adjust_address (operand
, SImode
, 0);
9307 parts
[1] = adjust_address (operand
, SImode
, 4);
9309 parts
[2] = adjust_address (operand
, SImode
, 8);
9311 else if (GET_CODE (operand
) == CONST_DOUBLE
)
9316 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
9321 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
9322 parts
[2] = gen_int_mode (l
[2], SImode
);
9325 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
9330 parts
[1] = gen_int_mode (l
[1], SImode
);
9331 parts
[0] = gen_int_mode (l
[0], SImode
);
9340 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
9341 if (mode
== XFmode
|| mode
== TFmode
)
9343 if (REG_P (operand
))
9345 if (!reload_completed
)
9347 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
9348 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
9350 else if (offsettable_memref_p (operand
))
9352 operand
= adjust_address (operand
, DImode
, 0);
9354 parts
[1] = adjust_address (operand
, SImode
, 8);
9356 else if (GET_CODE (operand
) == CONST_DOUBLE
)
9361 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
9362 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
9363 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9364 if (HOST_BITS_PER_WIDE_INT
>= 64)
9367 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
9368 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
9371 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
9372 parts
[1] = gen_int_mode (l
[2], SImode
);
9382 /* Emit insns to perform a move or push of DI, DF, and XF values.
9383 Return false when normal moves are needed; true when all required
9384 insns have been emitted. Operands 2-4 contain the input values
9385 int the correct order; operands 5-7 contain the output values. */
9388 ix86_split_long_move (operands
)
9395 enum machine_mode mode
= GET_MODE (operands
[0]);
9397 /* The DFmode expanders may ask us to move double.
9398 For 64bit target this is single move. By hiding the fact
9399 here we simplify i386.md splitters. */
9400 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
9402 /* Optimize constant pool reference to immediates. This is used by
9403 fp moves, that force all constants to memory to allow combining. */
9405 if (GET_CODE (operands
[1]) == MEM
9406 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
9407 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
9408 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
9409 if (push_operand (operands
[0], VOIDmode
))
9411 operands
[0] = copy_rtx (operands
[0]);
9412 PUT_MODE (operands
[0], Pmode
);
9415 operands
[0] = gen_lowpart (DImode
, operands
[0]);
9416 operands
[1] = gen_lowpart (DImode
, operands
[1]);
9417 emit_move_insn (operands
[0], operands
[1]);
9421 /* The only non-offsettable memory we handle is push. */
9422 if (push_operand (operands
[0], VOIDmode
))
9424 else if (GET_CODE (operands
[0]) == MEM
9425 && ! offsettable_memref_p (operands
[0]))
9428 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
9429 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
9431 /* When emitting push, take care for source operands on the stack. */
9432 if (push
&& GET_CODE (operands
[1]) == MEM
9433 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
9436 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
9437 XEXP (part
[1][2], 0));
9438 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
9439 XEXP (part
[1][1], 0));
9442 /* We need to do copy in the right order in case an address register
9443 of the source overlaps the destination. */
9444 if (REG_P (part
[0][0]) && GET_CODE (part
[1][0]) == MEM
)
9446 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
9448 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
9451 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
9454 /* Collision in the middle part can be handled by reordering. */
9455 if (collisions
== 1 && nparts
== 3
9456 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
9459 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
9460 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
9463 /* If there are more collisions, we can't handle it by reordering.
9464 Do an lea to the last part and use only one colliding move. */
9465 else if (collisions
> 1)
9468 emit_insn (gen_rtx_SET (VOIDmode
, part
[0][nparts
- 1],
9469 XEXP (part
[1][0], 0)));
9470 part
[1][0] = change_address (part
[1][0],
9471 TARGET_64BIT
? DImode
: SImode
,
9472 part
[0][nparts
- 1]);
9473 part
[1][1] = adjust_address (part
[1][0], VOIDmode
, UNITS_PER_WORD
);
9475 part
[1][2] = adjust_address (part
[1][0], VOIDmode
, 8);
9485 /* We use only first 12 bytes of TFmode value, but for pushing we
9486 are required to adjust stack as if we were pushing real 16byte
9488 if (mode
== TFmode
&& !TARGET_64BIT
)
9489 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
9491 emit_move_insn (part
[0][2], part
[1][2]);
9496 /* In 64bit mode we don't have 32bit push available. In case this is
9497 register, it is OK - we will just use larger counterpart. We also
9498 retype memory - these comes from attempt to avoid REX prefix on
9499 moving of second half of TFmode value. */
9500 if (GET_MODE (part
[1][1]) == SImode
)
9502 if (GET_CODE (part
[1][1]) == MEM
)
9503 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
9504 else if (REG_P (part
[1][1]))
9505 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
9508 if (GET_MODE (part
[1][0]) == SImode
)
9509 part
[1][0] = part
[1][1];
9512 emit_move_insn (part
[0][1], part
[1][1]);
9513 emit_move_insn (part
[0][0], part
[1][0]);
9517 /* Choose correct order to not overwrite the source before it is copied. */
9518 if ((REG_P (part
[0][0])
9519 && REG_P (part
[1][1])
9520 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
9522 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
9524 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
9528 operands
[2] = part
[0][2];
9529 operands
[3] = part
[0][1];
9530 operands
[4] = part
[0][0];
9531 operands
[5] = part
[1][2];
9532 operands
[6] = part
[1][1];
9533 operands
[7] = part
[1][0];
9537 operands
[2] = part
[0][1];
9538 operands
[3] = part
[0][0];
9539 operands
[5] = part
[1][1];
9540 operands
[6] = part
[1][0];
9547 operands
[2] = part
[0][0];
9548 operands
[3] = part
[0][1];
9549 operands
[4] = part
[0][2];
9550 operands
[5] = part
[1][0];
9551 operands
[6] = part
[1][1];
9552 operands
[7] = part
[1][2];
9556 operands
[2] = part
[0][0];
9557 operands
[3] = part
[0][1];
9558 operands
[5] = part
[1][0];
9559 operands
[6] = part
[1][1];
9562 emit_move_insn (operands
[2], operands
[5]);
9563 emit_move_insn (operands
[3], operands
[6]);
9565 emit_move_insn (operands
[4], operands
[7]);
9571 ix86_split_ashldi (operands
, scratch
)
9572 rtx
*operands
, scratch
;
9574 rtx low
[2], high
[2];
9577 if (GET_CODE (operands
[2]) == CONST_INT
)
9579 split_di (operands
, 2, low
, high
);
9580 count
= INTVAL (operands
[2]) & 63;
9584 emit_move_insn (high
[0], low
[1]);
9585 emit_move_insn (low
[0], const0_rtx
);
9588 emit_insn (gen_ashlsi3 (high
[0], high
[0], GEN_INT (count
- 32)));
9592 if (!rtx_equal_p (operands
[0], operands
[1]))
9593 emit_move_insn (operands
[0], operands
[1]);
9594 emit_insn (gen_x86_shld_1 (high
[0], low
[0], GEN_INT (count
)));
9595 emit_insn (gen_ashlsi3 (low
[0], low
[0], GEN_INT (count
)));
9600 if (!rtx_equal_p (operands
[0], operands
[1]))
9601 emit_move_insn (operands
[0], operands
[1]);
9603 split_di (operands
, 1, low
, high
);
9605 emit_insn (gen_x86_shld_1 (high
[0], low
[0], operands
[2]));
9606 emit_insn (gen_ashlsi3 (low
[0], low
[0], operands
[2]));
9608 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
9610 if (! no_new_pseudos
)
9611 scratch
= force_reg (SImode
, const0_rtx
);
9613 emit_move_insn (scratch
, const0_rtx
);
9615 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2],
9619 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
9624 ix86_split_ashrdi (operands
, scratch
)
9625 rtx
*operands
, scratch
;
9627 rtx low
[2], high
[2];
9630 if (GET_CODE (operands
[2]) == CONST_INT
)
9632 split_di (operands
, 2, low
, high
);
9633 count
= INTVAL (operands
[2]) & 63;
9637 emit_move_insn (low
[0], high
[1]);
9639 if (! reload_completed
)
9640 emit_insn (gen_ashrsi3 (high
[0], low
[0], GEN_INT (31)));
9643 emit_move_insn (high
[0], low
[0]);
9644 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
9648 emit_insn (gen_ashrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
9652 if (!rtx_equal_p (operands
[0], operands
[1]))
9653 emit_move_insn (operands
[0], operands
[1]);
9654 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
9655 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (count
)));
9660 if (!rtx_equal_p (operands
[0], operands
[1]))
9661 emit_move_insn (operands
[0], operands
[1]);
9663 split_di (operands
, 1, low
, high
);
9665 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
9666 emit_insn (gen_ashrsi3 (high
[0], high
[0], operands
[2]));
9668 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
9670 if (! no_new_pseudos
)
9671 scratch
= gen_reg_rtx (SImode
);
9672 emit_move_insn (scratch
, high
[0]);
9673 emit_insn (gen_ashrsi3 (scratch
, scratch
, GEN_INT (31)));
9674 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
9678 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
9683 ix86_split_lshrdi (operands
, scratch
)
9684 rtx
*operands
, scratch
;
9686 rtx low
[2], high
[2];
9689 if (GET_CODE (operands
[2]) == CONST_INT
)
9691 split_di (operands
, 2, low
, high
);
9692 count
= INTVAL (operands
[2]) & 63;
9696 emit_move_insn (low
[0], high
[1]);
9697 emit_move_insn (high
[0], const0_rtx
);
9700 emit_insn (gen_lshrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
9704 if (!rtx_equal_p (operands
[0], operands
[1]))
9705 emit_move_insn (operands
[0], operands
[1]);
9706 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
9707 emit_insn (gen_lshrsi3 (high
[0], high
[0], GEN_INT (count
)));
9712 if (!rtx_equal_p (operands
[0], operands
[1]))
9713 emit_move_insn (operands
[0], operands
[1]);
9715 split_di (operands
, 1, low
, high
);
9717 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
9718 emit_insn (gen_lshrsi3 (high
[0], high
[0], operands
[2]));
9720 /* Heh. By reversing the arguments, we can reuse this pattern. */
9721 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
9723 if (! no_new_pseudos
)
9724 scratch
= force_reg (SImode
, const0_rtx
);
9726 emit_move_insn (scratch
, const0_rtx
);
9728 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
9732 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
9736 /* Helper function for the string operations below. Dest VARIABLE whether
9737 it is aligned to VALUE bytes. If true, jump to the label. */
9739 ix86_expand_aligntest (variable
, value
)
9743 rtx label
= gen_label_rtx ();
9744 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
9745 if (GET_MODE (variable
) == DImode
)
9746 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
9748 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
9749 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
9754 /* Adjust COUNTER by the VALUE. */
9756 ix86_adjust_counter (countreg
, value
)
9758 HOST_WIDE_INT value
;
9760 if (GET_MODE (countreg
) == DImode
)
9761 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
9763 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
9766 /* Zero extend possibly SImode EXP to Pmode register. */
9768 ix86_zero_extend_to_Pmode (exp
)
9772 if (GET_MODE (exp
) == VOIDmode
)
9773 return force_reg (Pmode
, exp
);
9774 if (GET_MODE (exp
) == Pmode
)
9775 return copy_to_mode_reg (Pmode
, exp
);
9776 r
= gen_reg_rtx (Pmode
);
9777 emit_insn (gen_zero_extendsidi2 (r
, exp
));
9781 /* Expand string move (memcpy) operation. Use i386 string operations when
9782 profitable. expand_clrstr contains similar code. */
9784 ix86_expand_movstr (dst
, src
, count_exp
, align_exp
)
9785 rtx dst
, src
, count_exp
, align_exp
;
9787 rtx srcreg
, destreg
, countreg
;
9788 enum machine_mode counter_mode
;
9789 HOST_WIDE_INT align
= 0;
9790 unsigned HOST_WIDE_INT count
= 0;
9795 if (GET_CODE (align_exp
) == CONST_INT
)
9796 align
= INTVAL (align_exp
);
9798 /* This simple hack avoids all inlining code and simplifies code below. */
9799 if (!TARGET_ALIGN_STRINGOPS
)
9802 if (GET_CODE (count_exp
) == CONST_INT
)
9803 count
= INTVAL (count_exp
);
9805 /* Figure out proper mode for counter. For 32bits it is always SImode,
9806 for 64bits use SImode when possible, otherwise DImode.
9807 Set count to number of bytes copied when known at compile time. */
9808 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
9809 || x86_64_zero_extended_value (count_exp
))
9810 counter_mode
= SImode
;
9812 counter_mode
= DImode
;
9814 if (counter_mode
!= SImode
&& counter_mode
!= DImode
)
9817 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
9818 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
9820 emit_insn (gen_cld ());
9822 /* When optimizing for size emit simple rep ; movsb instruction for
9823 counts not divisible by 4. */
9825 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
9827 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
9829 emit_insn (gen_rep_movqi_rex64 (destreg
, srcreg
, countreg
,
9830 destreg
, srcreg
, countreg
));
9832 emit_insn (gen_rep_movqi (destreg
, srcreg
, countreg
,
9833 destreg
, srcreg
, countreg
));
9836 /* For constant aligned (or small unaligned) copies use rep movsl
9837 followed by code copying the rest. For PentiumPro ensure 8 byte
9838 alignment to allow rep movsl acceleration. */
9842 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
9843 || optimize_size
|| count
< (unsigned int) 64))
9845 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
9846 if (count
& ~(size
- 1))
9848 countreg
= copy_to_mode_reg (counter_mode
,
9849 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
9850 & (TARGET_64BIT
? -1 : 0x3fffffff)));
9851 countreg
= ix86_zero_extend_to_Pmode (countreg
);
9855 emit_insn (gen_rep_movsi_rex64 (destreg
, srcreg
, countreg
,
9856 destreg
, srcreg
, countreg
));
9858 emit_insn (gen_rep_movsi (destreg
, srcreg
, countreg
,
9859 destreg
, srcreg
, countreg
));
9862 emit_insn (gen_rep_movdi_rex64 (destreg
, srcreg
, countreg
,
9863 destreg
, srcreg
, countreg
));
9865 if (size
== 8 && (count
& 0x04))
9866 emit_insn (gen_strmovsi (destreg
, srcreg
));
9868 emit_insn (gen_strmovhi (destreg
, srcreg
));
9870 emit_insn (gen_strmovqi (destreg
, srcreg
));
9872 /* The generic code based on the glibc implementation:
9873 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9874 allowing accelerated copying there)
9875 - copy the data using rep movsl
9881 int desired_alignment
= (TARGET_PENTIUMPRO
9882 && (count
== 0 || count
>= (unsigned int) 260)
9883 ? 8 : UNITS_PER_WORD
);
9885 /* In case we don't know anything about the alignment, default to
9886 library version, since it is usually equally fast and result in
9888 if (!TARGET_INLINE_ALL_STRINGOPS
&& align
< UNITS_PER_WORD
)
9894 if (TARGET_SINGLE_STRINGOP
)
9895 emit_insn (gen_cld ());
9897 countreg2
= gen_reg_rtx (Pmode
);
9898 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
9900 /* We don't use loops to align destination and to copy parts smaller
9901 than 4 bytes, because gcc is able to optimize such code better (in
9902 the case the destination or the count really is aligned, gcc is often
9903 able to predict the branches) and also it is friendlier to the
9904 hardware branch prediction.
9906 Using loops is benefical for generic case, because we can
9907 handle small counts using the loops. Many CPUs (such as Athlon)
9908 have large REP prefix setup costs.
9910 This is quite costy. Maybe we can revisit this decision later or
9911 add some customizability to this code. */
9913 if (count
== 0 && align
< desired_alignment
)
9915 label
= gen_label_rtx ();
9916 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
9917 LEU
, 0, counter_mode
, 1, label
);
9921 rtx label
= ix86_expand_aligntest (destreg
, 1);
9922 emit_insn (gen_strmovqi (destreg
, srcreg
));
9923 ix86_adjust_counter (countreg
, 1);
9925 LABEL_NUSES (label
) = 1;
9929 rtx label
= ix86_expand_aligntest (destreg
, 2);
9930 emit_insn (gen_strmovhi (destreg
, srcreg
));
9931 ix86_adjust_counter (countreg
, 2);
9933 LABEL_NUSES (label
) = 1;
9935 if (align
<= 4 && desired_alignment
> 4)
9937 rtx label
= ix86_expand_aligntest (destreg
, 4);
9938 emit_insn (gen_strmovsi (destreg
, srcreg
));
9939 ix86_adjust_counter (countreg
, 4);
9941 LABEL_NUSES (label
) = 1;
9944 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
9947 LABEL_NUSES (label
) = 1;
9950 if (!TARGET_SINGLE_STRINGOP
)
9951 emit_insn (gen_cld ());
9954 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
9956 emit_insn (gen_rep_movdi_rex64 (destreg
, srcreg
, countreg2
,
9957 destreg
, srcreg
, countreg2
));
9961 emit_insn (gen_lshrsi3 (countreg2
, countreg
, GEN_INT (2)));
9962 emit_insn (gen_rep_movsi (destreg
, srcreg
, countreg2
,
9963 destreg
, srcreg
, countreg2
));
9969 LABEL_NUSES (label
) = 1;
9971 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
9972 emit_insn (gen_strmovsi (destreg
, srcreg
));
9973 if ((align
<= 4 || count
== 0) && TARGET_64BIT
)
9975 rtx label
= ix86_expand_aligntest (countreg
, 4);
9976 emit_insn (gen_strmovsi (destreg
, srcreg
));
9978 LABEL_NUSES (label
) = 1;
9980 if (align
> 2 && count
!= 0 && (count
& 2))
9981 emit_insn (gen_strmovhi (destreg
, srcreg
));
9982 if (align
<= 2 || count
== 0)
9984 rtx label
= ix86_expand_aligntest (countreg
, 2);
9985 emit_insn (gen_strmovhi (destreg
, srcreg
));
9987 LABEL_NUSES (label
) = 1;
9989 if (align
> 1 && count
!= 0 && (count
& 1))
9990 emit_insn (gen_strmovqi (destreg
, srcreg
));
9991 if (align
<= 1 || count
== 0)
9993 rtx label
= ix86_expand_aligntest (countreg
, 1);
9994 emit_insn (gen_strmovqi (destreg
, srcreg
));
9996 LABEL_NUSES (label
) = 1;
10000 insns
= get_insns ();
10003 ix86_set_move_mem_attrs (insns
, dst
, src
, destreg
, srcreg
);
10008 /* Expand string clear operation (bzero). Use i386 string operations when
10009 profitable. expand_movstr contains similar code. */
10011 ix86_expand_clrstr (src
, count_exp
, align_exp
)
10012 rtx src
, count_exp
, align_exp
;
10014 rtx destreg
, zeroreg
, countreg
;
10015 enum machine_mode counter_mode
;
10016 HOST_WIDE_INT align
= 0;
10017 unsigned HOST_WIDE_INT count
= 0;
10019 if (GET_CODE (align_exp
) == CONST_INT
)
10020 align
= INTVAL (align_exp
);
10022 /* This simple hack avoids all inlining code and simplifies code below. */
10023 if (!TARGET_ALIGN_STRINGOPS
)
10026 if (GET_CODE (count_exp
) == CONST_INT
)
10027 count
= INTVAL (count_exp
);
10028 /* Figure out proper mode for counter. For 32bits it is always SImode,
10029 for 64bits use SImode when possible, otherwise DImode.
10030 Set count to number of bytes copied when known at compile time. */
10031 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
10032 || x86_64_zero_extended_value (count_exp
))
10033 counter_mode
= SImode
;
10035 counter_mode
= DImode
;
10037 destreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
10039 emit_insn (gen_cld ());
10041 /* When optimizing for size emit simple rep ; movsb instruction for
10042 counts not divisible by 4. */
10044 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
10046 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
10047 zeroreg
= copy_to_mode_reg (QImode
, const0_rtx
);
10049 emit_insn (gen_rep_stosqi_rex64 (destreg
, countreg
, zeroreg
,
10050 destreg
, countreg
));
10052 emit_insn (gen_rep_stosqi (destreg
, countreg
, zeroreg
,
10053 destreg
, countreg
));
10055 else if (count
!= 0
10057 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
10058 || optimize_size
|| count
< (unsigned int) 64))
10060 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
10061 zeroreg
= copy_to_mode_reg (size
== 4 ? SImode
: DImode
, const0_rtx
);
10062 if (count
& ~(size
- 1))
10064 countreg
= copy_to_mode_reg (counter_mode
,
10065 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
10066 & (TARGET_64BIT
? -1 : 0x3fffffff)));
10067 countreg
= ix86_zero_extend_to_Pmode (countreg
);
10071 emit_insn (gen_rep_stossi_rex64 (destreg
, countreg
, zeroreg
,
10072 destreg
, countreg
));
10074 emit_insn (gen_rep_stossi (destreg
, countreg
, zeroreg
,
10075 destreg
, countreg
));
10078 emit_insn (gen_rep_stosdi_rex64 (destreg
, countreg
, zeroreg
,
10079 destreg
, countreg
));
10081 if (size
== 8 && (count
& 0x04))
10082 emit_insn (gen_strsetsi (destreg
,
10083 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
10085 emit_insn (gen_strsethi (destreg
,
10086 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10088 emit_insn (gen_strsetqi (destreg
,
10089 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10095 /* Compute desired alignment of the string operation. */
10096 int desired_alignment
= (TARGET_PENTIUMPRO
10097 && (count
== 0 || count
>= (unsigned int) 260)
10098 ? 8 : UNITS_PER_WORD
);
10100 /* In case we don't know anything about the alignment, default to
10101 library version, since it is usually equally fast and result in
10103 if (!TARGET_INLINE_ALL_STRINGOPS
&& align
< UNITS_PER_WORD
)
10106 if (TARGET_SINGLE_STRINGOP
)
10107 emit_insn (gen_cld ());
10109 countreg2
= gen_reg_rtx (Pmode
);
10110 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
10111 zeroreg
= copy_to_mode_reg (Pmode
, const0_rtx
);
10113 if (count
== 0 && align
< desired_alignment
)
10115 label
= gen_label_rtx ();
10116 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
10117 LEU
, 0, counter_mode
, 1, label
);
10121 rtx label
= ix86_expand_aligntest (destreg
, 1);
10122 emit_insn (gen_strsetqi (destreg
,
10123 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10124 ix86_adjust_counter (countreg
, 1);
10125 emit_label (label
);
10126 LABEL_NUSES (label
) = 1;
10130 rtx label
= ix86_expand_aligntest (destreg
, 2);
10131 emit_insn (gen_strsethi (destreg
,
10132 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10133 ix86_adjust_counter (countreg
, 2);
10134 emit_label (label
);
10135 LABEL_NUSES (label
) = 1;
10137 if (align
<= 4 && desired_alignment
> 4)
10139 rtx label
= ix86_expand_aligntest (destreg
, 4);
10140 emit_insn (gen_strsetsi (destreg
, (TARGET_64BIT
10141 ? gen_rtx_SUBREG (SImode
, zeroreg
, 0)
10143 ix86_adjust_counter (countreg
, 4);
10144 emit_label (label
);
10145 LABEL_NUSES (label
) = 1;
10148 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
10150 emit_label (label
);
10151 LABEL_NUSES (label
) = 1;
10155 if (!TARGET_SINGLE_STRINGOP
)
10156 emit_insn (gen_cld ());
10159 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
10161 emit_insn (gen_rep_stosdi_rex64 (destreg
, countreg2
, zeroreg
,
10162 destreg
, countreg2
));
10166 emit_insn (gen_lshrsi3 (countreg2
, countreg
, GEN_INT (2)));
10167 emit_insn (gen_rep_stossi (destreg
, countreg2
, zeroreg
,
10168 destreg
, countreg2
));
10172 emit_label (label
);
10173 LABEL_NUSES (label
) = 1;
10176 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
10177 emit_insn (gen_strsetsi (destreg
,
10178 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
10179 if (TARGET_64BIT
&& (align
<= 4 || count
== 0))
10181 rtx label
= ix86_expand_aligntest (countreg
, 2);
10182 emit_insn (gen_strsetsi (destreg
,
10183 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
10184 emit_label (label
);
10185 LABEL_NUSES (label
) = 1;
10187 if (align
> 2 && count
!= 0 && (count
& 2))
10188 emit_insn (gen_strsethi (destreg
,
10189 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10190 if (align
<= 2 || count
== 0)
10192 rtx label
= ix86_expand_aligntest (countreg
, 2);
10193 emit_insn (gen_strsethi (destreg
,
10194 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10195 emit_label (label
);
10196 LABEL_NUSES (label
) = 1;
10198 if (align
> 1 && count
!= 0 && (count
& 1))
10199 emit_insn (gen_strsetqi (destreg
,
10200 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10201 if (align
<= 1 || count
== 0)
10203 rtx label
= ix86_expand_aligntest (countreg
, 1);
10204 emit_insn (gen_strsetqi (destreg
,
10205 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10206 emit_label (label
);
10207 LABEL_NUSES (label
) = 1;
10212 /* Expand strlen. */
10214 ix86_expand_strlen (out
, src
, eoschar
, align
)
10215 rtx out
, src
, eoschar
, align
;
10217 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
10219 /* The generic case of strlen expander is long. Avoid it's
10220 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10222 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
10223 && !TARGET_INLINE_ALL_STRINGOPS
10225 && (GET_CODE (align
) != CONST_INT
|| INTVAL (align
) < 4))
10228 addr
= force_reg (Pmode
, XEXP (src
, 0));
10229 scratch1
= gen_reg_rtx (Pmode
);
10231 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
10234 /* Well it seems that some optimizer does not combine a call like
10235 foo(strlen(bar), strlen(bar));
10236 when the move and the subtraction is done here. It does calculate
10237 the length just once when these instructions are done inside of
10238 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10239 often used and I use one fewer register for the lifetime of
10240 output_strlen_unroll() this is better. */
10242 emit_move_insn (out
, addr
);
10244 ix86_expand_strlensi_unroll_1 (out
, align
);
10246 /* strlensi_unroll_1 returns the address of the zero at the end of
10247 the string, like memchr(), so compute the length by subtracting
10248 the start address. */
10250 emit_insn (gen_subdi3 (out
, out
, addr
));
10252 emit_insn (gen_subsi3 (out
, out
, addr
));
10256 scratch2
= gen_reg_rtx (Pmode
);
10257 scratch3
= gen_reg_rtx (Pmode
);
10258 scratch4
= force_reg (Pmode
, constm1_rtx
);
10260 emit_move_insn (scratch3
, addr
);
10261 eoschar
= force_reg (QImode
, eoschar
);
10263 emit_insn (gen_cld ());
10266 emit_insn (gen_strlenqi_rex_1 (scratch1
, scratch3
, eoschar
,
10267 align
, scratch4
, scratch3
));
10268 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
10269 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
10273 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, eoschar
,
10274 align
, scratch4
, scratch3
));
10275 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
10276 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
10282 /* Expand the appropriate insns for doing strlen if not just doing
10285 out = result, initialized with the start address
10286 align_rtx = alignment of the address.
10287 scratch = scratch register, initialized with the startaddress when
10288 not aligned, otherwise undefined
10290 This is just the body. It needs the initialisations mentioned above and
10291 some address computing at the end. These things are done in i386.md. */
10294 ix86_expand_strlensi_unroll_1 (out
, align_rtx
)
10295 rtx out
, align_rtx
;
10299 rtx align_2_label
= NULL_RTX
;
10300 rtx align_3_label
= NULL_RTX
;
10301 rtx align_4_label
= gen_label_rtx ();
10302 rtx end_0_label
= gen_label_rtx ();
10304 rtx tmpreg
= gen_reg_rtx (SImode
);
10305 rtx scratch
= gen_reg_rtx (SImode
);
10308 if (GET_CODE (align_rtx
) == CONST_INT
)
10309 align
= INTVAL (align_rtx
);
10311 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10313 /* Is there a known alignment and is it less than 4? */
10316 rtx scratch1
= gen_reg_rtx (Pmode
);
10317 emit_move_insn (scratch1
, out
);
10318 /* Is there a known alignment and is it not 2? */
10321 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
10322 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
10324 /* Leave just the 3 lower bits. */
10325 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
10326 NULL_RTX
, 0, OPTAB_WIDEN
);
10328 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
10329 Pmode
, 1, align_4_label
);
10330 emit_cmp_and_jump_insns (align_rtx
, GEN_INT (2), EQ
, NULL
,
10331 Pmode
, 1, align_2_label
);
10332 emit_cmp_and_jump_insns (align_rtx
, GEN_INT (2), GTU
, NULL
,
10333 Pmode
, 1, align_3_label
);
10337 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10338 check if is aligned to 4 - byte. */
10340 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (2),
10341 NULL_RTX
, 0, OPTAB_WIDEN
);
10343 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
10344 Pmode
, 1, align_4_label
);
10347 mem
= gen_rtx_MEM (QImode
, out
);
10349 /* Now compare the bytes. */
10351 /* Compare the first n unaligned byte on a byte per byte basis. */
10352 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
10353 QImode
, 1, end_0_label
);
10355 /* Increment the address. */
10357 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
10359 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
10361 /* Not needed with an alignment of 2 */
10364 emit_label (align_2_label
);
10366 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
10370 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
10372 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
10374 emit_label (align_3_label
);
10377 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
10381 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
10383 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
10386 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10387 align this loop. It gives only huge programs, but does not help to
10389 emit_label (align_4_label
);
10391 mem
= gen_rtx_MEM (SImode
, out
);
10392 emit_move_insn (scratch
, mem
);
10394 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
10396 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
10398 /* This formula yields a nonzero result iff one of the bytes is zero.
10399 This saves three branches inside loop and many cycles. */
10401 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
10402 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
10403 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
10404 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
10405 gen_int_mode (0x80808080, SImode
)));
10406 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
10411 rtx reg
= gen_reg_rtx (SImode
);
10412 rtx reg2
= gen_reg_rtx (Pmode
);
10413 emit_move_insn (reg
, tmpreg
);
10414 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
10416 /* If zero is not in the first two bytes, move two bytes forward. */
10417 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
10418 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
10419 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
10420 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
10421 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
10424 /* Emit lea manually to avoid clobbering of flags. */
10425 emit_insn (gen_rtx_SET (SImode
, reg2
,
10426 gen_rtx_PLUS (Pmode
, out
, GEN_INT (2))));
10428 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
10429 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
10430 emit_insn (gen_rtx_SET (VOIDmode
, out
,
10431 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
10438 rtx end_2_label
= gen_label_rtx ();
10439 /* Is zero in the first two bytes? */
10441 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
10442 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
10443 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
10444 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
10445 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
10447 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
10448 JUMP_LABEL (tmp
) = end_2_label
;
10450 /* Not in the first two. Move two bytes forward. */
10451 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
10453 emit_insn (gen_adddi3 (out
, out
, GEN_INT (2)));
10455 emit_insn (gen_addsi3 (out
, out
, GEN_INT (2)));
10457 emit_label (end_2_label
);
10461 /* Avoid branch in fixing the byte. */
10462 tmpreg
= gen_lowpart (QImode
, tmpreg
);
10463 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
10465 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3)));
10467 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3)));
10469 emit_label (end_0_label
);
10473 ix86_expand_call (retval
, fnaddr
, callarg1
, callarg2
, pop
)
10474 rtx retval
, fnaddr
, callarg1
, callarg2
, pop
;
10476 rtx use
= NULL
, call
;
10478 if (pop
== const0_rtx
)
10480 if (TARGET_64BIT
&& pop
)
10483 /* Static functions and indirect calls don't need the pic register. */
10484 if (! TARGET_64BIT
&& flag_pic
10485 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
10486 && ! SYMBOL_REF_FLAG (XEXP (fnaddr
, 0)))
10487 use_reg (&use
, pic_offset_table_rtx
);
10489 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
10491 rtx al
= gen_rtx_REG (QImode
, 0);
10492 emit_move_insn (al
, callarg2
);
10493 use_reg (&use
, al
);
10496 if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
10498 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
10499 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
10502 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
10504 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
10507 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
10508 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
10509 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
10512 call
= emit_call_insn (call
);
10514 CALL_INSN_FUNCTION_USAGE (call
) = use
;
10518 /* Clear stack slot assignments remembered from previous functions.
10519 This is called from INIT_EXPANDERS once before RTL is emitted for each
10522 static struct machine_function
*
10523 ix86_init_machine_status ()
10525 return ggc_alloc_cleared (sizeof (struct machine_function
));
10528 /* Return a MEM corresponding to a stack slot with mode MODE.
10529 Allocate a new slot if necessary.
10531 The RTL for a function can have several slots available: N is
10532 which slot to use. */
10535 assign_386_stack_local (mode
, n
)
10536 enum machine_mode mode
;
10539 if (n
< 0 || n
>= MAX_386_STACK_LOCALS
)
10542 if (ix86_stack_locals
[(int) mode
][n
] == NULL_RTX
)
10543 ix86_stack_locals
[(int) mode
][n
]
10544 = assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
10546 return ix86_stack_locals
[(int) mode
][n
];
10549 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10551 static GTY(()) rtx ix86_tls_symbol
;
10553 ix86_tls_get_addr ()
10556 if (!ix86_tls_symbol
)
10558 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, (TARGET_GNU_TLS
10559 ? "___tls_get_addr"
10560 : "__tls_get_addr"));
10563 return ix86_tls_symbol
;
10566 /* Calculate the length of the memory address in the instruction
10567 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10570 memory_address_length (addr
)
10573 struct ix86_address parts
;
10574 rtx base
, index
, disp
;
10577 if (GET_CODE (addr
) == PRE_DEC
10578 || GET_CODE (addr
) == POST_INC
10579 || GET_CODE (addr
) == PRE_MODIFY
10580 || GET_CODE (addr
) == POST_MODIFY
)
10583 if (! ix86_decompose_address (addr
, &parts
))
10587 index
= parts
.index
;
10591 /* Register Indirect. */
10592 if (base
&& !index
&& !disp
)
10594 /* Special cases: ebp and esp need the two-byte modrm form. */
10595 if (addr
== stack_pointer_rtx
10596 || addr
== arg_pointer_rtx
10597 || addr
== frame_pointer_rtx
10598 || addr
== hard_frame_pointer_rtx
)
10602 /* Direct Addressing. */
10603 else if (disp
&& !base
&& !index
)
10608 /* Find the length of the displacement constant. */
10611 if (GET_CODE (disp
) == CONST_INT
10612 && CONST_OK_FOR_LETTER_P (INTVAL (disp
), 'K'))
10618 /* An index requires the two-byte modrm form. */
10626 /* Compute default value for "length_immediate" attribute. When SHORTFORM
10627 is set, expect that insn have 8bit immediate alternative. */
10629 ix86_attr_length_immediate_default (insn
, shortform
)
10635 extract_insn_cached (insn
);
10636 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
10637 if (CONSTANT_P (recog_data
.operand
[i
]))
10642 && GET_CODE (recog_data
.operand
[i
]) == CONST_INT
10643 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data
.operand
[i
]), 'K'))
10647 switch (get_attr_mode (insn
))
10658 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
10663 fatal_insn ("unknown insn mode", insn
);
10669 /* Compute default value for "length_address" attribute. */
10671 ix86_attr_length_address_default (insn
)
10675 extract_insn_cached (insn
);
10676 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
10677 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
10679 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
10685 /* Return the maximum number of instructions a cpu can issue. */
10692 case PROCESSOR_PENTIUM
:
10696 case PROCESSOR_PENTIUMPRO
:
10697 case PROCESSOR_PENTIUM4
:
10698 case PROCESSOR_ATHLON
:
10706 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10707 by DEP_INSN and nothing set by DEP_INSN. */
10710 ix86_flags_dependant (insn
, dep_insn
, insn_type
)
10711 rtx insn
, dep_insn
;
10712 enum attr_type insn_type
;
10716 /* Simplify the test for uninteresting insns. */
10717 if (insn_type
!= TYPE_SETCC
10718 && insn_type
!= TYPE_ICMOV
10719 && insn_type
!= TYPE_FCMOV
10720 && insn_type
!= TYPE_IBR
)
10723 if ((set
= single_set (dep_insn
)) != 0)
10725 set
= SET_DEST (set
);
10728 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
10729 && XVECLEN (PATTERN (dep_insn
), 0) == 2
10730 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
10731 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
10733 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
10734 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
10739 if (GET_CODE (set
) != REG
|| REGNO (set
) != FLAGS_REG
)
10742 /* This test is true if the dependent insn reads the flags but
10743 not any other potentially set register. */
10744 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
10747 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
10753 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
10754 address with operands set by DEP_INSN. */
10757 ix86_agi_dependant (insn
, dep_insn
, insn_type
)
10758 rtx insn
, dep_insn
;
10759 enum attr_type insn_type
;
10763 if (insn_type
== TYPE_LEA
10766 addr
= PATTERN (insn
);
10767 if (GET_CODE (addr
) == SET
)
10769 else if (GET_CODE (addr
) == PARALLEL
10770 && GET_CODE (XVECEXP (addr
, 0, 0)) == SET
)
10771 addr
= XVECEXP (addr
, 0, 0);
10774 addr
= SET_SRC (addr
);
10779 extract_insn_cached (insn
);
10780 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
10781 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
10783 addr
= XEXP (recog_data
.operand
[i
], 0);
10790 return modified_in_p (addr
, dep_insn
);
10794 ix86_adjust_cost (insn
, link
, dep_insn
, cost
)
10795 rtx insn
, link
, dep_insn
;
10798 enum attr_type insn_type
, dep_insn_type
;
10799 enum attr_memory memory
, dep_memory
;
10801 int dep_insn_code_number
;
10803 /* Anti and output depenancies have zero cost on all CPUs. */
10804 if (REG_NOTE_KIND (link
) != 0)
10807 dep_insn_code_number
= recog_memoized (dep_insn
);
10809 /* If we can't recognize the insns, we can't really do anything. */
10810 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
10813 insn_type
= get_attr_type (insn
);
10814 dep_insn_type
= get_attr_type (dep_insn
);
10818 case PROCESSOR_PENTIUM
:
10819 /* Address Generation Interlock adds a cycle of latency. */
10820 if (ix86_agi_dependant (insn
, dep_insn
, insn_type
))
10823 /* ??? Compares pair with jump/setcc. */
10824 if (ix86_flags_dependant (insn
, dep_insn
, insn_type
))
10827 /* Floating point stores require value to be ready one cycle ealier. */
10828 if (insn_type
== TYPE_FMOV
10829 && get_attr_memory (insn
) == MEMORY_STORE
10830 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
10834 case PROCESSOR_PENTIUMPRO
:
10835 memory
= get_attr_memory (insn
);
10836 dep_memory
= get_attr_memory (dep_insn
);
10838 /* Since we can't represent delayed latencies of load+operation,
10839 increase the cost here for non-imov insns. */
10840 if (dep_insn_type
!= TYPE_IMOV
10841 && dep_insn_type
!= TYPE_FMOV
10842 && (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
))
10845 /* INT->FP conversion is expensive. */
10846 if (get_attr_fp_int_src (dep_insn
))
10849 /* There is one cycle extra latency between an FP op and a store. */
10850 if (insn_type
== TYPE_FMOV
10851 && (set
= single_set (dep_insn
)) != NULL_RTX
10852 && (set2
= single_set (insn
)) != NULL_RTX
10853 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
10854 && GET_CODE (SET_DEST (set2
)) == MEM
)
10857 /* Show ability of reorder buffer to hide latency of load by executing
10858 in parallel with previous instruction in case
10859 previous instruction is not needed to compute the address. */
10860 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
10861 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
10863 /* Claim moves to take one cycle, as core can issue one load
10864 at time and the next load can start cycle later. */
10865 if (dep_insn_type
== TYPE_IMOV
10866 || dep_insn_type
== TYPE_FMOV
)
10874 memory
= get_attr_memory (insn
);
10875 dep_memory
= get_attr_memory (dep_insn
);
10876 /* The esp dependency is resolved before the instruction is really
10878 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
10879 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
10882 /* Since we can't represent delayed latencies of load+operation,
10883 increase the cost here for non-imov insns. */
10884 if (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
)
10885 cost
+= (dep_insn_type
!= TYPE_IMOV
) ? 2 : 1;
10887 /* INT->FP conversion is expensive. */
10888 if (get_attr_fp_int_src (dep_insn
))
10891 /* Show ability of reorder buffer to hide latency of load by executing
10892 in parallel with previous instruction in case
10893 previous instruction is not needed to compute the address. */
10894 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
10895 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
10897 /* Claim moves to take one cycle, as core can issue one load
10898 at time and the next load can start cycle later. */
10899 if (dep_insn_type
== TYPE_IMOV
10900 || dep_insn_type
== TYPE_FMOV
)
10909 case PROCESSOR_ATHLON
:
10910 memory
= get_attr_memory (insn
);
10911 dep_memory
= get_attr_memory (dep_insn
);
10913 if (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
)
10915 if (dep_insn_type
== TYPE_IMOV
|| dep_insn_type
== TYPE_FMOV
)
10920 /* Show ability of reorder buffer to hide latency of load by executing
10921 in parallel with previous instruction in case
10922 previous instruction is not needed to compute the address. */
10923 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
10924 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
10926 /* Claim moves to take one cycle, as core can issue one load
10927 at time and the next load can start cycle later. */
10928 if (dep_insn_type
== TYPE_IMOV
10929 || dep_insn_type
== TYPE_FMOV
)
10931 else if (cost
>= 3)
10946 struct ppro_sched_data
10949 int issued_this_cycle
;
10953 static enum attr_ppro_uops
10954 ix86_safe_ppro_uops (insn
)
10957 if (recog_memoized (insn
) >= 0)
10958 return get_attr_ppro_uops (insn
);
10960 return PPRO_UOPS_MANY
;
10964 ix86_dump_ppro_packet (dump
)
10967 if (ix86_sched_data
.ppro
.decode
[0])
10969 fprintf (dump
, "PPRO packet: %d",
10970 INSN_UID (ix86_sched_data
.ppro
.decode
[0]));
10971 if (ix86_sched_data
.ppro
.decode
[1])
10972 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[1]));
10973 if (ix86_sched_data
.ppro
.decode
[2])
10974 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[2]));
10975 fputc ('\n', dump
);
10979 /* We're beginning a new block. Initialize data structures as necessary. */
10982 ix86_sched_init (dump
, sched_verbose
, veclen
)
10983 FILE *dump ATTRIBUTE_UNUSED
;
10984 int sched_verbose ATTRIBUTE_UNUSED
;
10985 int veclen ATTRIBUTE_UNUSED
;
10987 memset (&ix86_sched_data
, 0, sizeof (ix86_sched_data
));
10990 /* Shift INSN to SLOT, and shift everything else down. */
10993 ix86_reorder_insn (insnp
, slot
)
11000 insnp
[0] = insnp
[1];
11001 while (++insnp
!= slot
);
11007 ix86_sched_reorder_ppro (ready
, e_ready
)
11012 enum attr_ppro_uops cur_uops
;
11013 int issued_this_cycle
;
11017 /* At this point .ppro.decode contains the state of the three
11018 decoders from last "cycle". That is, those insns that were
11019 actually independent. But here we're scheduling for the
11020 decoder, and we may find things that are decodable in the
11023 memcpy (decode
, ix86_sched_data
.ppro
.decode
, sizeof (decode
));
11024 issued_this_cycle
= 0;
11027 cur_uops
= ix86_safe_ppro_uops (*insnp
);
11029 /* If the decoders are empty, and we've a complex insn at the
11030 head of the priority queue, let it issue without complaint. */
11031 if (decode
[0] == NULL
)
11033 if (cur_uops
== PPRO_UOPS_MANY
)
11035 decode
[0] = *insnp
;
11039 /* Otherwise, search for a 2-4 uop unsn to issue. */
11040 while (cur_uops
!= PPRO_UOPS_FEW
)
11042 if (insnp
== ready
)
11044 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
11047 /* If so, move it to the head of the line. */
11048 if (cur_uops
== PPRO_UOPS_FEW
)
11049 ix86_reorder_insn (insnp
, e_ready
);
11051 /* Issue the head of the queue. */
11052 issued_this_cycle
= 1;
11053 decode
[0] = *e_ready
--;
11056 /* Look for simple insns to fill in the other two slots. */
11057 for (i
= 1; i
< 3; ++i
)
11058 if (decode
[i
] == NULL
)
11060 if (ready
> e_ready
)
11064 cur_uops
= ix86_safe_ppro_uops (*insnp
);
11065 while (cur_uops
!= PPRO_UOPS_ONE
)
11067 if (insnp
== ready
)
11069 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
11072 /* Found one. Move it to the head of the queue and issue it. */
11073 if (cur_uops
== PPRO_UOPS_ONE
)
11075 ix86_reorder_insn (insnp
, e_ready
);
11076 decode
[i
] = *e_ready
--;
11077 issued_this_cycle
++;
11081 /* ??? Didn't find one. Ideally, here we would do a lazy split
11082 of 2-uop insns, issue one and queue the other. */
11086 if (issued_this_cycle
== 0)
11087 issued_this_cycle
= 1;
11088 ix86_sched_data
.ppro
.issued_this_cycle
= issued_this_cycle
;
11091 /* We are about to being issuing insns for this clock cycle.
11092 Override the default sort algorithm to better slot instructions. */
11094 ix86_sched_reorder (dump
, sched_verbose
, ready
, n_readyp
, clock_var
)
11095 FILE *dump ATTRIBUTE_UNUSED
;
11096 int sched_verbose ATTRIBUTE_UNUSED
;
11099 int clock_var ATTRIBUTE_UNUSED
;
11101 int n_ready
= *n_readyp
;
11102 rtx
*e_ready
= ready
+ n_ready
- 1;
11104 /* Make sure to go ahead and initialize key items in
11105 ix86_sched_data if we are not going to bother trying to
11106 reorder the ready queue. */
11109 ix86_sched_data
.ppro
.issued_this_cycle
= 1;
11118 case PROCESSOR_PENTIUMPRO
:
11119 ix86_sched_reorder_ppro (ready
, e_ready
);
11124 return ix86_issue_rate ();
11127 /* We are about to issue INSN. Return the number of insns left on the
11128 ready queue that can be issued this cycle. */
11131 ix86_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
)
11135 int can_issue_more
;
11141 return can_issue_more
- 1;
11143 case PROCESSOR_PENTIUMPRO
:
11145 enum attr_ppro_uops uops
= ix86_safe_ppro_uops (insn
);
11147 if (uops
== PPRO_UOPS_MANY
)
11150 ix86_dump_ppro_packet (dump
);
11151 ix86_sched_data
.ppro
.decode
[0] = insn
;
11152 ix86_sched_data
.ppro
.decode
[1] = NULL
;
11153 ix86_sched_data
.ppro
.decode
[2] = NULL
;
11155 ix86_dump_ppro_packet (dump
);
11156 ix86_sched_data
.ppro
.decode
[0] = NULL
;
11158 else if (uops
== PPRO_UOPS_FEW
)
11161 ix86_dump_ppro_packet (dump
);
11162 ix86_sched_data
.ppro
.decode
[0] = insn
;
11163 ix86_sched_data
.ppro
.decode
[1] = NULL
;
11164 ix86_sched_data
.ppro
.decode
[2] = NULL
;
11168 for (i
= 0; i
< 3; ++i
)
11169 if (ix86_sched_data
.ppro
.decode
[i
] == NULL
)
11171 ix86_sched_data
.ppro
.decode
[i
] = insn
;
11179 ix86_dump_ppro_packet (dump
);
11180 ix86_sched_data
.ppro
.decode
[0] = NULL
;
11181 ix86_sched_data
.ppro
.decode
[1] = NULL
;
11182 ix86_sched_data
.ppro
.decode
[2] = NULL
;
11186 return --ix86_sched_data
.ppro
.issued_this_cycle
;
11191 ia32_use_dfa_pipeline_interface ()
11193 if (ix86_cpu
== PROCESSOR_PENTIUM
)
11198 /* How many alternative schedules to try. This should be as wide as the
11199 scheduling freedom in the DFA, but no wider. Making this value too
11200 large results extra work for the scheduler. */
11203 ia32_multipass_dfa_lookahead ()
11205 if (ix86_cpu
== PROCESSOR_PENTIUM
)
11212 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11213 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11217 ix86_set_move_mem_attrs (insns
, dstref
, srcref
, dstreg
, srcreg
)
11219 rtx dstref
, srcref
, dstreg
, srcreg
;
11223 for (insn
= insns
; insn
!= 0 ; insn
= NEXT_INSN (insn
))
11225 ix86_set_move_mem_attrs_1 (PATTERN (insn
), dstref
, srcref
,
11229 /* Subroutine of above to actually do the updating by recursively walking
11233 ix86_set_move_mem_attrs_1 (x
, dstref
, srcref
, dstreg
, srcreg
)
11235 rtx dstref
, srcref
, dstreg
, srcreg
;
11237 enum rtx_code code
= GET_CODE (x
);
11238 const char *format_ptr
= GET_RTX_FORMAT (code
);
11241 if (code
== MEM
&& XEXP (x
, 0) == dstreg
)
11242 MEM_COPY_ATTRIBUTES (x
, dstref
);
11243 else if (code
== MEM
&& XEXP (x
, 0) == srcreg
)
11244 MEM_COPY_ATTRIBUTES (x
, srcref
);
11246 for (i
= 0; i
< GET_RTX_LENGTH (code
); i
++, format_ptr
++)
11248 if (*format_ptr
== 'e')
11249 ix86_set_move_mem_attrs_1 (XEXP (x
, i
), dstref
, srcref
,
11251 else if (*format_ptr
== 'E')
11252 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
11253 ix86_set_move_mem_attrs_1 (XVECEXP (x
, i
, j
), dstref
, srcref
,
11258 /* Compute the alignment given to a constant that is being placed in memory.
11259 EXP is the constant and ALIGN is the alignment that the object would
11261 The value of this function is used instead of that alignment to align
11265 ix86_constant_alignment (exp
, align
)
11269 if (TREE_CODE (exp
) == REAL_CST
)
11271 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
11273 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
11276 else if (TREE_CODE (exp
) == STRING_CST
&& TREE_STRING_LENGTH (exp
) >= 31
11283 /* Compute the alignment for a static variable.
11284 TYPE is the data type, and ALIGN is the alignment that
11285 the object would ordinarily have. The value of this function is used
11286 instead of that alignment to align the object. */
11289 ix86_data_alignment (type
, align
)
11293 if (AGGREGATE_TYPE_P (type
)
11294 && TYPE_SIZE (type
)
11295 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
11296 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 256
11297 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 256)
11300 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11301 to 16byte boundary. */
11304 if (AGGREGATE_TYPE_P (type
)
11305 && TYPE_SIZE (type
)
11306 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
11307 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
11308 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
11312 if (TREE_CODE (type
) == ARRAY_TYPE
)
11314 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
11316 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
11319 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
11322 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
11324 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
11327 else if ((TREE_CODE (type
) == RECORD_TYPE
11328 || TREE_CODE (type
) == UNION_TYPE
11329 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
11330 && TYPE_FIELDS (type
))
11332 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
11334 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
11337 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
11338 || TREE_CODE (type
) == INTEGER_TYPE
)
11340 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
11342 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
11349 /* Compute the alignment for a local variable.
11350 TYPE is the data type, and ALIGN is the alignment that
11351 the object would ordinarily have. The value of this macro is used
11352 instead of that alignment to align the object. */
11355 ix86_local_alignment (type
, align
)
11359 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11360 to 16byte boundary. */
11363 if (AGGREGATE_TYPE_P (type
)
11364 && TYPE_SIZE (type
)
11365 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
11366 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
11367 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
11370 if (TREE_CODE (type
) == ARRAY_TYPE
)
11372 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
11374 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
11377 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
11379 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
11381 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
11384 else if ((TREE_CODE (type
) == RECORD_TYPE
11385 || TREE_CODE (type
) == UNION_TYPE
11386 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
11387 && TYPE_FIELDS (type
))
11389 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
11391 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
11394 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
11395 || TREE_CODE (type
) == INTEGER_TYPE
)
11398 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
11400 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
11406 /* Emit RTL insns to initialize the variable parts of a trampoline.
11407 FNADDR is an RTX for the address of the function's pure code.
11408 CXT is an RTX for the static chain value for the function. */
11410 x86_initialize_trampoline (tramp
, fnaddr
, cxt
)
11411 rtx tramp
, fnaddr
, cxt
;
11415 /* Compute offset from the end of the jmp to the target function. */
11416 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
11417 plus_constant (tramp
, 10),
11418 NULL_RTX
, 1, OPTAB_DIRECT
);
11419 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
11420 gen_int_mode (0xb9, QImode
));
11421 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
11422 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
11423 gen_int_mode (0xe9, QImode
));
11424 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
11429 /* Try to load address using shorter movl instead of movabs.
11430 We may want to support movq for kernel mode, but kernel does not use
11431 trampolines at the moment. */
11432 if (x86_64_zero_extended_value (fnaddr
))
11434 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
11435 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11436 gen_int_mode (0xbb41, HImode
));
11437 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
11438 gen_lowpart (SImode
, fnaddr
));
11443 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11444 gen_int_mode (0xbb49, HImode
));
11445 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
11449 /* Load static chain using movabs to r10. */
11450 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11451 gen_int_mode (0xba49, HImode
));
11452 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
11455 /* Jump to the r11 */
11456 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11457 gen_int_mode (0xff49, HImode
));
11458 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
11459 gen_int_mode (0xe3, QImode
));
11461 if (offset
> TRAMPOLINE_SIZE
)
11466 #define def_builtin(MASK, NAME, TYPE, CODE) \
11468 if ((MASK) & target_flags) \
11469 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL); \
11472 struct builtin_description
11474 const unsigned int mask
;
11475 const enum insn_code icode
;
11476 const char *const name
;
11477 const enum ix86_builtins code
;
11478 const enum rtx_code comparison
;
11479 const unsigned int flag
;
11482 /* Used for builtins that are enabled both by -msse and -msse2. */
11483 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11485 static const struct builtin_description bdesc_comi
[] =
11487 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, EQ
, 0 },
11488 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, LT
, 0 },
11489 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, LE
, 0 },
11490 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, LT
, 1 },
11491 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, LE
, 1 },
11492 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, NE
, 0 },
11493 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, EQ
, 0 },
11494 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, LT
, 0 },
11495 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, LE
, 0 },
11496 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, LT
, 1 },
11497 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, LE
, 1 },
11498 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, NE
, 0 },
11499 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, EQ
, 0 },
11500 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, LT
, 0 },
11501 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, LE
, 0 },
11502 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, LT
, 1 },
11503 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, LE
, 1 },
11504 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, NE
, 0 },
11505 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, EQ
, 0 },
11506 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, LT
, 0 },
11507 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, LE
, 0 },
11508 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, LT
, 1 },
11509 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, LE
, 1 },
11510 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, NE
, 0 },
11513 static const struct builtin_description bdesc_2arg
[] =
11516 { MASK_SSE1
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
11517 { MASK_SSE1
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
11518 { MASK_SSE1
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
11519 { MASK_SSE1
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
11520 { MASK_SSE1
, CODE_FOR_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
11521 { MASK_SSE1
, CODE_FOR_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
11522 { MASK_SSE1
, CODE_FOR_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
11523 { MASK_SSE1
, CODE_FOR_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
11525 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
11526 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
11527 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
11528 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, 1 },
11529 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, 1 },
11530 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
11531 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, EQ
, 0 },
11532 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, LT
, 0 },
11533 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, LE
, 0 },
11534 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, LT
, 1 },
11535 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, LE
, 1 },
11536 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, UNORDERED
, 0 },
11537 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
11538 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
11539 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
11540 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS
, LT
, 1 },
11541 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS
, LE
, 1 },
11542 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
11543 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, EQ
, 0 },
11544 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, LT
, 0 },
11545 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, LE
, 0 },
11546 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, LT
, 1 },
11547 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, LE
, 1 },
11548 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
11550 { MASK_SSE1
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
11551 { MASK_SSE1
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
11552 { MASK_SSE1
, CODE_FOR_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
11553 { MASK_SSE1
, CODE_FOR_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
11555 { MASK_SSE1
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
11556 { MASK_SSE1
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
11557 { MASK_SSE1
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
11558 { MASK_SSE1
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
11559 { MASK_SSE1
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
11562 { MASK_MMX
, CODE_FOR_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
11563 { MASK_MMX
, CODE_FOR_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
11564 { MASK_MMX
, CODE_FOR_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
11565 { MASK_MMX
, CODE_FOR_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
11566 { MASK_MMX
, CODE_FOR_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
11567 { MASK_MMX
, CODE_FOR_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
11569 { MASK_MMX
, CODE_FOR_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
11570 { MASK_MMX
, CODE_FOR_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
11571 { MASK_MMX
, CODE_FOR_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
11572 { MASK_MMX
, CODE_FOR_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
11573 { MASK_MMX
, CODE_FOR_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
11574 { MASK_MMX
, CODE_FOR_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
11575 { MASK_MMX
, CODE_FOR_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
11576 { MASK_MMX
, CODE_FOR_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
11578 { MASK_MMX
, CODE_FOR_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
11579 { MASK_MMX
, CODE_FOR_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
11580 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
11582 { MASK_MMX
, CODE_FOR_mmx_anddi3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
11583 { MASK_MMX
, CODE_FOR_mmx_nanddi3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
11584 { MASK_MMX
, CODE_FOR_mmx_iordi3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
11585 { MASK_MMX
, CODE_FOR_mmx_xordi3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
11587 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
11588 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
11590 { MASK_MMX
, CODE_FOR_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
11591 { MASK_MMX
, CODE_FOR_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
11592 { MASK_MMX
, CODE_FOR_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
11593 { MASK_MMX
, CODE_FOR_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
11594 { MASK_MMX
, CODE_FOR_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
11595 { MASK_MMX
, CODE_FOR_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
11597 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
11598 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
11599 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
11600 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
11602 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
11603 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
11604 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
11605 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
11606 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
11607 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
11610 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
11611 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
11612 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
11614 { MASK_SSE1
, CODE_FOR_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
11615 { MASK_SSE1
, CODE_FOR_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
11617 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
11618 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
11619 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
11620 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
11621 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
11622 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
11624 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
11625 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
11626 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
11627 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
11628 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
11629 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
11631 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
11632 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
11633 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
11634 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
11636 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
11637 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
11640 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
11641 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
11642 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
11643 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
11644 { MASK_SSE2
, CODE_FOR_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
11645 { MASK_SSE2
, CODE_FOR_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
11646 { MASK_SSE2
, CODE_FOR_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
11647 { MASK_SSE2
, CODE_FOR_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
11649 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
11650 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
11651 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
11652 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, 1 },
11653 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, 1 },
11654 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
11655 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, EQ
, 0 },
11656 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, LT
, 0 },
11657 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, LE
, 0 },
11658 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, LT
, 1 },
11659 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, LE
, 1 },
11660 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, UNORDERED
, 0 },
11661 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
11662 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
11663 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
11664 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD
, LT
, 1 },
11665 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD
, LE
, 1 },
11666 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
11667 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, EQ
, 0 },
11668 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, LT
, 0 },
11669 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, LE
, 0 },
11670 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD
, LT
, 1 },
11671 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD
, LE
, 1 },
11672 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, UNORDERED
, 0 },
11674 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
11675 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
11676 { MASK_SSE2
, CODE_FOR_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
11677 { MASK_SSE2
, CODE_FOR_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
11679 { MASK_SSE2
, CODE_FOR_sse2_anddf3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
11680 { MASK_SSE2
, CODE_FOR_sse2_nanddf3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
11681 { MASK_SSE2
, CODE_FOR_sse2_iordf3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
11682 { MASK_SSE2
, CODE_FOR_sse2_xordf3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
11684 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
11685 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
11686 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
11689 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
11690 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
11691 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
11692 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
11693 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
11694 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
11695 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
11696 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
11698 { MASK_MMX
, CODE_FOR_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
11699 { MASK_MMX
, CODE_FOR_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
11700 { MASK_MMX
, CODE_FOR_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
11701 { MASK_MMX
, CODE_FOR_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
11702 { MASK_MMX
, CODE_FOR_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
11703 { MASK_MMX
, CODE_FOR_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
11704 { MASK_MMX
, CODE_FOR_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
11705 { MASK_MMX
, CODE_FOR_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
11707 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
11708 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
11709 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, 0, 0 },
11710 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, 0, 0 },
11712 { MASK_SSE2
, CODE_FOR_sse2_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
11713 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
11714 { MASK_SSE2
, CODE_FOR_sse2_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
11715 { MASK_SSE2
, CODE_FOR_sse2_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
11717 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
11718 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
11720 { MASK_SSE2
, CODE_FOR_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
11721 { MASK_SSE2
, CODE_FOR_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
11722 { MASK_SSE2
, CODE_FOR_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
11723 { MASK_SSE2
, CODE_FOR_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
11724 { MASK_SSE2
, CODE_FOR_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
11725 { MASK_SSE2
, CODE_FOR_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
11727 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
11728 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
11729 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
11730 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
11732 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
11733 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
11734 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
11735 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
11736 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
11737 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
11739 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
11740 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
11741 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
11743 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
11744 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
11746 { MASK_SSE2
, CODE_FOR_ashlv8hi3_ti
, 0, IX86_BUILTIN_PSLLW128
, 0, 0 },
11747 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
11748 { MASK_SSE2
, CODE_FOR_ashlv4si3_ti
, 0, IX86_BUILTIN_PSLLD128
, 0, 0 },
11749 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
11750 { MASK_SSE2
, CODE_FOR_ashlv2di3_ti
, 0, IX86_BUILTIN_PSLLQ128
, 0, 0 },
11751 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
11753 { MASK_SSE2
, CODE_FOR_lshrv8hi3_ti
, 0, IX86_BUILTIN_PSRLW128
, 0, 0 },
11754 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
11755 { MASK_SSE2
, CODE_FOR_lshrv4si3_ti
, 0, IX86_BUILTIN_PSRLD128
, 0, 0 },
11756 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
11757 { MASK_SSE2
, CODE_FOR_lshrv2di3_ti
, 0, IX86_BUILTIN_PSRLQ128
, 0, 0 },
11758 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
11760 { MASK_SSE2
, CODE_FOR_ashrv8hi3_ti
, 0, IX86_BUILTIN_PSRAW128
, 0, 0 },
11761 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
11762 { MASK_SSE2
, CODE_FOR_ashrv4si3_ti
, 0, IX86_BUILTIN_PSRAD128
, 0, 0 },
11763 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
11765 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
11767 { MASK_SSE2
, CODE_FOR_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
11768 { MASK_SSE2
, CODE_FOR_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
11769 { MASK_SSE2
, CODE_FOR_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 }
11772 static const struct builtin_description bdesc_1arg
[] =
11774 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
11775 { MASK_SSE1
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
11777 { MASK_SSE1
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
11778 { MASK_SSE1
, CODE_FOR_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
11779 { MASK_SSE1
, CODE_FOR_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
11781 { MASK_SSE1
, CODE_FOR_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
11782 { MASK_SSE1
, CODE_FOR_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
11783 { MASK_SSE1
, CODE_FOR_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
11784 { MASK_SSE1
, CODE_FOR_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
11786 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
11787 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
11788 { MASK_SSE2
, CODE_FOR_sse2_movq2dq
, 0, IX86_BUILTIN_MOVQ2DQ
, 0, 0 },
11790 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
11792 { MASK_SSE2
, CODE_FOR_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
11793 { MASK_SSE2
, CODE_FOR_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
11795 { MASK_SSE2
, CODE_FOR_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
11796 { MASK_SSE2
, CODE_FOR_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
11797 { MASK_SSE2
, CODE_FOR_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
11798 { MASK_SSE2
, CODE_FOR_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
11799 { MASK_SSE2
, CODE_FOR_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
11801 { MASK_SSE2
, CODE_FOR_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
11803 { MASK_SSE2
, CODE_FOR_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
11804 { MASK_SSE2
, CODE_FOR_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
11806 { MASK_SSE2
, CODE_FOR_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
11807 { MASK_SSE2
, CODE_FOR_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
11808 { MASK_SSE2
, CODE_FOR_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 }
11812 ix86_init_builtins ()
11815 ix86_init_mmx_sse_builtins ();
11818 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
11819 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
11822 ix86_init_mmx_sse_builtins ()
11824 const struct builtin_description
* d
;
11827 tree pchar_type_node
= build_pointer_type (char_type_node
);
11828 tree pfloat_type_node
= build_pointer_type (float_type_node
);
11829 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
11830 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
11831 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
11834 tree int_ftype_v4sf_v4sf
11835 = build_function_type_list (integer_type_node
,
11836 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
11837 tree v4si_ftype_v4sf_v4sf
11838 = build_function_type_list (V4SI_type_node
,
11839 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
11840 /* MMX/SSE/integer conversions. */
11841 tree int_ftype_v4sf
11842 = build_function_type_list (integer_type_node
,
11843 V4SF_type_node
, NULL_TREE
);
11844 tree int_ftype_v8qi
11845 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
11846 tree v4sf_ftype_v4sf_int
11847 = build_function_type_list (V4SF_type_node
,
11848 V4SF_type_node
, integer_type_node
, NULL_TREE
);
11849 tree v4sf_ftype_v4sf_v2si
11850 = build_function_type_list (V4SF_type_node
,
11851 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
11852 tree int_ftype_v4hi_int
11853 = build_function_type_list (integer_type_node
,
11854 V4HI_type_node
, integer_type_node
, NULL_TREE
);
11855 tree v4hi_ftype_v4hi_int_int
11856 = build_function_type_list (V4HI_type_node
,
11857 integer_type_node
, integer_type_node
,
11859 /* Miscellaneous. */
11860 tree v8qi_ftype_v4hi_v4hi
11861 = build_function_type_list (V8QI_type_node
,
11862 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
11863 tree v4hi_ftype_v2si_v2si
11864 = build_function_type_list (V4HI_type_node
,
11865 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
11866 tree v4sf_ftype_v4sf_v4sf_int
11867 = build_function_type_list (V4SF_type_node
,
11868 V4SF_type_node
, V4SF_type_node
,
11869 integer_type_node
, NULL_TREE
);
11870 tree v2si_ftype_v4hi_v4hi
11871 = build_function_type_list (V2SI_type_node
,
11872 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
11873 tree v4hi_ftype_v4hi_int
11874 = build_function_type_list (V4HI_type_node
,
11875 V4HI_type_node
, V4HI_type_node
,
11876 integer_type_node
, NULL_TREE
);
11877 tree v4hi_ftype_v4hi_di
11878 = build_function_type_list (V4HI_type_node
,
11879 V4HI_type_node
, long_long_unsigned_type_node
,
11881 tree v2si_ftype_v2si_di
11882 = build_function_type_list (V2SI_type_node
,
11883 V2SI_type_node
, long_long_unsigned_type_node
,
11885 tree void_ftype_void
11886 = build_function_type (void_type_node
, void_list_node
);
11887 tree void_ftype_unsigned
11888 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
11889 tree unsigned_ftype_void
11890 = build_function_type (unsigned_type_node
, void_list_node
);
11892 = build_function_type (long_long_unsigned_type_node
, void_list_node
);
11893 tree v4sf_ftype_void
11894 = build_function_type (V4SF_type_node
, void_list_node
);
11895 tree v2si_ftype_v4sf
11896 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
11897 /* Loads/stores. */
11898 tree void_ftype_v8qi_v8qi_pchar
11899 = build_function_type_list (void_type_node
,
11900 V8QI_type_node
, V8QI_type_node
,
11901 pchar_type_node
, NULL_TREE
);
11902 tree v4sf_ftype_pfloat
11903 = build_function_type_list (V4SF_type_node
, pfloat_type_node
, NULL_TREE
);
11904 /* @@@ the type is bogus */
11905 tree v4sf_ftype_v4sf_pv2si
11906 = build_function_type_list (V4SF_type_node
,
11907 V4SF_type_node
, pv2di_type_node
, NULL_TREE
);
11908 tree void_ftype_pv2si_v4sf
11909 = build_function_type_list (void_type_node
,
11910 pv2di_type_node
, V4SF_type_node
, NULL_TREE
);
11911 tree void_ftype_pfloat_v4sf
11912 = build_function_type_list (void_type_node
,
11913 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
11914 tree void_ftype_pdi_di
11915 = build_function_type_list (void_type_node
,
11916 pdi_type_node
, long_long_unsigned_type_node
,
11918 tree void_ftype_pv2di_v2di
11919 = build_function_type_list (void_type_node
,
11920 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
11921 /* Normal vector unops. */
11922 tree v4sf_ftype_v4sf
11923 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
11925 /* Normal vector binops. */
11926 tree v4sf_ftype_v4sf_v4sf
11927 = build_function_type_list (V4SF_type_node
,
11928 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
11929 tree v8qi_ftype_v8qi_v8qi
11930 = build_function_type_list (V8QI_type_node
,
11931 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
11932 tree v4hi_ftype_v4hi_v4hi
11933 = build_function_type_list (V4HI_type_node
,
11934 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
11935 tree v2si_ftype_v2si_v2si
11936 = build_function_type_list (V2SI_type_node
,
11937 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
11938 tree di_ftype_di_di
11939 = build_function_type_list (long_long_unsigned_type_node
,
11940 long_long_unsigned_type_node
,
11941 long_long_unsigned_type_node
, NULL_TREE
);
11943 tree v2si_ftype_v2sf
11944 = build_function_type_list (V2SI_type_node
, V2SFmode
, NULL_TREE
);
11945 tree v2sf_ftype_v2si
11946 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
11947 tree v2si_ftype_v2si
11948 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
11949 tree v2sf_ftype_v2sf
11950 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
11951 tree v2sf_ftype_v2sf_v2sf
11952 = build_function_type_list (V2SF_type_node
,
11953 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
11954 tree v2si_ftype_v2sf_v2sf
11955 = build_function_type_list (V2SI_type_node
,
11956 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
11957 tree pint_type_node
= build_pointer_type (integer_type_node
);
11958 tree pdouble_type_node
= build_pointer_type (double_type_node
);
11959 tree int_ftype_v2df_v2df
11960 = build_function_type_list (integer_type_node
,
11961 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
11964 = build_function_type (intTI_type_node
, void_list_node
);
11965 tree ti_ftype_ti_ti
11966 = build_function_type_list (intTI_type_node
,
11967 intTI_type_node
, intTI_type_node
, NULL_TREE
);
11968 tree void_ftype_pvoid
11969 = build_function_type_list (void_type_node
, ptr_type_node
, NULL_TREE
);
11971 = build_function_type_list (V2DI_type_node
,
11972 long_long_unsigned_type_node
, NULL_TREE
);
11973 tree v4sf_ftype_v4si
11974 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
11975 tree v4si_ftype_v4sf
11976 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
11977 tree v2df_ftype_v4si
11978 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
11979 tree v4si_ftype_v2df
11980 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
11981 tree v2si_ftype_v2df
11982 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
11983 tree v4sf_ftype_v2df
11984 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
11985 tree v2df_ftype_v2si
11986 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
11987 tree v2df_ftype_v4sf
11988 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
11989 tree int_ftype_v2df
11990 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
11991 tree v2df_ftype_v2df_int
11992 = build_function_type_list (V2DF_type_node
,
11993 V2DF_type_node
, integer_type_node
, NULL_TREE
);
11994 tree v4sf_ftype_v4sf_v2df
11995 = build_function_type_list (V4SF_type_node
,
11996 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
11997 tree v2df_ftype_v2df_v4sf
11998 = build_function_type_list (V2DF_type_node
,
11999 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
12000 tree v2df_ftype_v2df_v2df_int
12001 = build_function_type_list (V2DF_type_node
,
12002 V2DF_type_node
, V2DF_type_node
,
12005 tree v2df_ftype_v2df_pv2si
12006 = build_function_type_list (V2DF_type_node
,
12007 V2DF_type_node
, pv2si_type_node
, NULL_TREE
);
12008 tree void_ftype_pv2si_v2df
12009 = build_function_type_list (void_type_node
,
12010 pv2si_type_node
, V2DF_type_node
, NULL_TREE
);
12011 tree void_ftype_pdouble_v2df
12012 = build_function_type_list (void_type_node
,
12013 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
12014 tree void_ftype_pint_int
12015 = build_function_type_list (void_type_node
,
12016 pint_type_node
, integer_type_node
, NULL_TREE
);
12017 tree void_ftype_v16qi_v16qi_pchar
12018 = build_function_type_list (void_type_node
,
12019 V16QI_type_node
, V16QI_type_node
,
12020 pchar_type_node
, NULL_TREE
);
12021 tree v2df_ftype_pdouble
12022 = build_function_type_list (V2DF_type_node
, pdouble_type_node
, NULL_TREE
);
12023 tree v2df_ftype_v2df_v2df
12024 = build_function_type_list (V2DF_type_node
,
12025 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12026 tree v16qi_ftype_v16qi_v16qi
12027 = build_function_type_list (V16QI_type_node
,
12028 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
12029 tree v8hi_ftype_v8hi_v8hi
12030 = build_function_type_list (V8HI_type_node
,
12031 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
12032 tree v4si_ftype_v4si_v4si
12033 = build_function_type_list (V4SI_type_node
,
12034 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
12035 tree v2di_ftype_v2di_v2di
12036 = build_function_type_list (V2DI_type_node
,
12037 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
12038 tree v2di_ftype_v2df_v2df
12039 = build_function_type_list (V2DI_type_node
,
12040 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12041 tree v2df_ftype_v2df
12042 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12043 tree v2df_ftype_double
12044 = build_function_type_list (V2DF_type_node
, double_type_node
, NULL_TREE
);
12045 tree v2df_ftype_double_double
12046 = build_function_type_list (V2DF_type_node
,
12047 double_type_node
, double_type_node
, NULL_TREE
);
12048 tree int_ftype_v8hi_int
12049 = build_function_type_list (integer_type_node
,
12050 V8HI_type_node
, integer_type_node
, NULL_TREE
);
12051 tree v8hi_ftype_v8hi_int_int
12052 = build_function_type_list (V8HI_type_node
,
12053 V8HI_type_node
, integer_type_node
,
12054 integer_type_node
, NULL_TREE
);
12055 tree v2di_ftype_v2di_int
12056 = build_function_type_list (V2DI_type_node
,
12057 V2DI_type_node
, integer_type_node
, NULL_TREE
);
12058 tree v4si_ftype_v4si_int
12059 = build_function_type_list (V4SI_type_node
,
12060 V4SI_type_node
, integer_type_node
, NULL_TREE
);
12061 tree v8hi_ftype_v8hi_int
12062 = build_function_type_list (V8HI_type_node
,
12063 V8HI_type_node
, integer_type_node
, NULL_TREE
);
12064 tree v8hi_ftype_v8hi_v2di
12065 = build_function_type_list (V8HI_type_node
,
12066 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
12067 tree v4si_ftype_v4si_v2di
12068 = build_function_type_list (V4SI_type_node
,
12069 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
12070 tree v4si_ftype_v8hi_v8hi
12071 = build_function_type_list (V4SI_type_node
,
12072 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
12073 tree di_ftype_v8qi_v8qi
12074 = build_function_type_list (long_long_unsigned_type_node
,
12075 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
12076 tree v2di_ftype_v16qi_v16qi
12077 = build_function_type_list (V2DI_type_node
,
12078 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
12079 tree int_ftype_v16qi
12080 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
12082 /* Add all builtins that are more or less simple operations on two
12084 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
12086 /* Use one of the operands; the target can have a different mode for
12087 mask-generating compares. */
12088 enum machine_mode mode
;
12093 mode
= insn_data
[d
->icode
].operand
[1].mode
;
12098 type
= v16qi_ftype_v16qi_v16qi
;
12101 type
= v8hi_ftype_v8hi_v8hi
;
12104 type
= v4si_ftype_v4si_v4si
;
12107 type
= v2di_ftype_v2di_v2di
;
12110 type
= v2df_ftype_v2df_v2df
;
12113 type
= ti_ftype_ti_ti
;
12116 type
= v4sf_ftype_v4sf_v4sf
;
12119 type
= v8qi_ftype_v8qi_v8qi
;
12122 type
= v4hi_ftype_v4hi_v4hi
;
12125 type
= v2si_ftype_v2si_v2si
;
12128 type
= di_ftype_di_di
;
12135 /* Override for comparisons. */
12136 if (d
->icode
== CODE_FOR_maskcmpv4sf3
12137 || d
->icode
== CODE_FOR_maskncmpv4sf3
12138 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
12139 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
)
12140 type
= v4si_ftype_v4sf_v4sf
;
12142 if (d
->icode
== CODE_FOR_maskcmpv2df3
12143 || d
->icode
== CODE_FOR_maskncmpv2df3
12144 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
12145 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
12146 type
= v2di_ftype_v2df_v2df
;
12148 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
12151 /* Add the remaining MMX insns with somewhat more complicated types. */
12152 def_builtin (MASK_MMX
, "__builtin_ia32_mmx_zero", di_ftype_void
, IX86_BUILTIN_MMX_ZERO
);
12153 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
12154 def_builtin (MASK_MMX
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
12155 def_builtin (MASK_MMX
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
12156 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
12157 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
12158 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
12160 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
12161 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
12162 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
12164 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
12165 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
12167 def_builtin (MASK_MMX
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
12168 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
12170 /* comi/ucomi insns. */
12171 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
12172 if (d
->mask
== MASK_SSE2
)
12173 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
12175 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
12177 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
12178 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
12179 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
12181 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
12182 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
12183 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
12184 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
12185 def_builtin (MASK_SSE1
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
12186 def_builtin (MASK_SSE1
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
12188 def_builtin (MASK_SSE1
, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf
, IX86_BUILTIN_ANDPS
);
12189 def_builtin (MASK_SSE1
, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf
, IX86_BUILTIN_ANDNPS
);
12190 def_builtin (MASK_SSE1
, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf
, IX86_BUILTIN_ORPS
);
12191 def_builtin (MASK_SSE1
, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf
, IX86_BUILTIN_XORPS
);
12193 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_pextrw", int_ftype_v4hi_int
, IX86_BUILTIN_PEXTRW
);
12194 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int
, IX86_BUILTIN_PINSRW
);
12196 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
12198 def_builtin (MASK_SSE1
, "__builtin_ia32_loadaps", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADAPS
);
12199 def_builtin (MASK_SSE1
, "__builtin_ia32_loadups", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADUPS
);
12200 def_builtin (MASK_SSE1
, "__builtin_ia32_loadss", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADSS
);
12201 def_builtin (MASK_SSE1
, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREAPS
);
12202 def_builtin (MASK_SSE1
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
12203 def_builtin (MASK_SSE1
, "__builtin_ia32_storess", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STORESS
);
12205 def_builtin (MASK_SSE1
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
12206 def_builtin (MASK_SSE1
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
12207 def_builtin (MASK_SSE1
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
12208 def_builtin (MASK_SSE1
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
12210 def_builtin (MASK_SSE1
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
12211 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
12212 def_builtin (MASK_SSE1
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
12213 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
12215 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
12217 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
12219 def_builtin (MASK_SSE1
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
12220 def_builtin (MASK_SSE1
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
12221 def_builtin (MASK_SSE1
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
12222 def_builtin (MASK_SSE1
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
12223 def_builtin (MASK_SSE1
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
12224 def_builtin (MASK_SSE1
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
12226 def_builtin (MASK_SSE1
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
12228 /* Original 3DNow! */
12229 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
12230 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
12231 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
12232 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
12233 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
12234 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
12235 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
12236 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
12237 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
12238 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
12239 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
12240 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
12241 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
12242 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
12243 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
12244 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
12245 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
12246 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
12247 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
12248 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
12250 /* 3DNow! extension as used in the Athlon CPU. */
12251 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
12252 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
12253 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
12254 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
12255 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
12256 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
12258 def_builtin (MASK_SSE1
, "__builtin_ia32_setzerops", v4sf_ftype_void
, IX86_BUILTIN_SSE_ZERO
);
12261 def_builtin (MASK_SSE2
, "__builtin_ia32_pextrw128", int_ftype_v8hi_int
, IX86_BUILTIN_PEXTRW128
);
12262 def_builtin (MASK_SSE2
, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int
, IX86_BUILTIN_PINSRW128
);
12264 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
12265 def_builtin (MASK_SSE2
, "__builtin_ia32_movq2dq", v2di_ftype_di
, IX86_BUILTIN_MOVQ2DQ
);
12267 def_builtin (MASK_SSE2
, "__builtin_ia32_loadapd", v2df_ftype_pdouble
, IX86_BUILTIN_LOADAPD
);
12268 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pdouble
, IX86_BUILTIN_LOADUPD
);
12269 def_builtin (MASK_SSE2
, "__builtin_ia32_loadsd", v2df_ftype_pdouble
, IX86_BUILTIN_LOADSD
);
12270 def_builtin (MASK_SSE2
, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREAPD
);
12271 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
12272 def_builtin (MASK_SSE2
, "__builtin_ia32_storesd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORESD
);
12274 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADHPD
);
12275 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADLPD
);
12276 def_builtin (MASK_SSE2
, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STOREHPD
);
12277 def_builtin (MASK_SSE2
, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STORELPD
);
12279 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
12280 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
12281 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
12282 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
12283 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
12285 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
12286 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
12287 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
12288 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
12290 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
12291 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
12293 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
12295 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
12296 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
12298 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
12299 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
12300 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
12301 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
12302 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
12304 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
12306 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
12307 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
12309 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
12310 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
12311 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
12313 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
12314 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
12315 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
12317 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd1", v2df_ftype_double
, IX86_BUILTIN_SETPD1
);
12318 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd", v2df_ftype_double_double
, IX86_BUILTIN_SETPD
);
12319 def_builtin (MASK_SSE2
, "__builtin_ia32_setzeropd", ti_ftype_void
, IX86_BUILTIN_CLRPD
);
12320 def_builtin (MASK_SSE2
, "__builtin_ia32_loadpd1", v2df_ftype_pdouble
, IX86_BUILTIN_LOADPD1
);
12321 def_builtin (MASK_SSE2
, "__builtin_ia32_loadrpd", v2df_ftype_pdouble
, IX86_BUILTIN_LOADRPD
);
12322 def_builtin (MASK_SSE2
, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREPD1
);
12323 def_builtin (MASK_SSE2
, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORERPD
);
12325 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pvoid
, IX86_BUILTIN_CLFLUSH
);
12326 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
12327 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
12329 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
12330 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
12331 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
12333 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
12334 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
12335 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
12337 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
12338 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
12340 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
12341 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
12342 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
12344 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
12345 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
12346 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
12348 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
12349 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
12351 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
12354 /* Errors in the source file can cause expand_expr to return const0_rtx
12355 where we expect a vector. To avoid crashing, use one of the vector
12356 clear instructions. */
12358 safe_vector_operand (x
, mode
)
12360 enum machine_mode mode
;
12362 if (x
!= const0_rtx
)
12364 x
= gen_reg_rtx (mode
);
12366 if (VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
))
12367 emit_insn (gen_mmx_clrdi (mode
== DImode
? x
12368 : gen_rtx_SUBREG (DImode
, x
, 0)));
12370 emit_insn (gen_sse_clrv4sf (mode
== V4SFmode
? x
12371 : gen_rtx_SUBREG (V4SFmode
, x
, 0)));
12375 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12378 ix86_expand_binop_builtin (icode
, arglist
, target
)
12379 enum insn_code icode
;
12384 tree arg0
= TREE_VALUE (arglist
);
12385 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12386 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12387 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12388 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
12389 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
12390 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
12392 if (VECTOR_MODE_P (mode0
))
12393 op0
= safe_vector_operand (op0
, mode0
);
12394 if (VECTOR_MODE_P (mode1
))
12395 op1
= safe_vector_operand (op1
, mode1
);
12398 || GET_MODE (target
) != tmode
12399 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12400 target
= gen_reg_rtx (tmode
);
12402 /* In case the insn wants input operands in modes different from
12403 the result, abort. */
12404 if (GET_MODE (op0
) != mode0
|| GET_MODE (op1
) != mode1
)
12407 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12408 op0
= copy_to_mode_reg (mode0
, op0
);
12409 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
12410 op1
= copy_to_mode_reg (mode1
, op1
);
12412 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12413 yet one of the two must not be a memory. This is normally enforced
12414 by expanders, but we didn't bother to create one here. */
12415 if (GET_CODE (op0
) == MEM
&& GET_CODE (op1
) == MEM
)
12416 op0
= copy_to_mode_reg (mode0
, op0
);
12418 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
12425 /* In type_for_mode we restrict the ability to create TImode types
12426 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
12427 to have a V4SFmode signature. Convert them in-place to TImode. */
12430 ix86_expand_timode_binop_builtin (icode
, arglist
, target
)
12431 enum insn_code icode
;
12436 tree arg0
= TREE_VALUE (arglist
);
12437 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12438 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12439 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12441 op0
= gen_lowpart (TImode
, op0
);
12442 op1
= gen_lowpart (TImode
, op1
);
12443 target
= gen_reg_rtx (TImode
);
12445 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, TImode
))
12446 op0
= copy_to_mode_reg (TImode
, op0
);
12447 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, TImode
))
12448 op1
= copy_to_mode_reg (TImode
, op1
);
12450 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12451 yet one of the two must not be a memory. This is normally enforced
12452 by expanders, but we didn't bother to create one here. */
12453 if (GET_CODE (op0
) == MEM
&& GET_CODE (op1
) == MEM
)
12454 op0
= copy_to_mode_reg (TImode
, op0
);
12456 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
12461 return gen_lowpart (V4SFmode
, target
);
12464 /* Subroutine of ix86_expand_builtin to take care of stores. */
12467 ix86_expand_store_builtin (icode
, arglist
)
12468 enum insn_code icode
;
12472 tree arg0
= TREE_VALUE (arglist
);
12473 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12474 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12475 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12476 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
12477 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
12479 if (VECTOR_MODE_P (mode1
))
12480 op1
= safe_vector_operand (op1
, mode1
);
12482 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
12484 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
12485 op1
= copy_to_mode_reg (mode1
, op1
);
12487 pat
= GEN_FCN (icode
) (op0
, op1
);
12493 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12496 ix86_expand_unop_builtin (icode
, arglist
, target
, do_load
)
12497 enum insn_code icode
;
12503 tree arg0
= TREE_VALUE (arglist
);
12504 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12505 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
12506 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
12509 || GET_MODE (target
) != tmode
12510 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12511 target
= gen_reg_rtx (tmode
);
12513 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
12516 if (VECTOR_MODE_P (mode0
))
12517 op0
= safe_vector_operand (op0
, mode0
);
12519 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12520 op0
= copy_to_mode_reg (mode0
, op0
);
12523 pat
= GEN_FCN (icode
) (target
, op0
);
12530 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12531 sqrtss, rsqrtss, rcpss. */
12534 ix86_expand_unop1_builtin (icode
, arglist
, target
)
12535 enum insn_code icode
;
12540 tree arg0
= TREE_VALUE (arglist
);
12541 rtx op1
, op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12542 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
12543 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
12546 || GET_MODE (target
) != tmode
12547 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12548 target
= gen_reg_rtx (tmode
);
12550 if (VECTOR_MODE_P (mode0
))
12551 op0
= safe_vector_operand (op0
, mode0
);
12553 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12554 op0
= copy_to_mode_reg (mode0
, op0
);
12557 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
12558 op1
= copy_to_mode_reg (mode0
, op1
);
12560 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
12567 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12570 ix86_expand_sse_compare (d
, arglist
, target
)
12571 const struct builtin_description
*d
;
12576 tree arg0
= TREE_VALUE (arglist
);
12577 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12578 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12579 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12581 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
12582 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
12583 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
12584 enum rtx_code comparison
= d
->comparison
;
12586 if (VECTOR_MODE_P (mode0
))
12587 op0
= safe_vector_operand (op0
, mode0
);
12588 if (VECTOR_MODE_P (mode1
))
12589 op1
= safe_vector_operand (op1
, mode1
);
12591 /* Swap operands if we have a comparison that isn't available in
12595 rtx tmp
= gen_reg_rtx (mode1
);
12596 emit_move_insn (tmp
, op1
);
12602 || GET_MODE (target
) != tmode
12603 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
12604 target
= gen_reg_rtx (tmode
);
12606 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
12607 op0
= copy_to_mode_reg (mode0
, op0
);
12608 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
12609 op1
= copy_to_mode_reg (mode1
, op1
);
12611 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
12612 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
12619 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12622 ix86_expand_sse_comi (d
, arglist
, target
)
12623 const struct builtin_description
*d
;
12628 tree arg0
= TREE_VALUE (arglist
);
12629 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12630 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12631 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12633 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
12634 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
12635 enum rtx_code comparison
= d
->comparison
;
12637 if (VECTOR_MODE_P (mode0
))
12638 op0
= safe_vector_operand (op0
, mode0
);
12639 if (VECTOR_MODE_P (mode1
))
12640 op1
= safe_vector_operand (op1
, mode1
);
12642 /* Swap operands if we have a comparison that isn't available in
12651 target
= gen_reg_rtx (SImode
);
12652 emit_move_insn (target
, const0_rtx
);
12653 target
= gen_rtx_SUBREG (QImode
, target
, 0);
12655 if (! (*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
12656 op0
= copy_to_mode_reg (mode0
, op0
);
12657 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
12658 op1
= copy_to_mode_reg (mode1
, op1
);
12660 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
12661 pat
= GEN_FCN (d
->icode
) (op0
, op1
, op2
);
12665 emit_insn (gen_rtx_SET (VOIDmode
,
12666 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
12667 gen_rtx_fmt_ee (comparison
, QImode
,
12668 gen_rtx_REG (CCmode
, FLAGS_REG
),
12671 return SUBREG_REG (target
);
12674 /* Expand an expression EXP that calls a built-in function,
12675 with result going to TARGET if that's convenient
12676 (and in mode MODE if that's convenient).
12677 SUBTARGET may be used as the target for computing one of EXP's operands.
12678 IGNORE is nonzero if the value is to be ignored. */
12681 ix86_expand_builtin (exp
, target
, subtarget
, mode
, ignore
)
12684 rtx subtarget ATTRIBUTE_UNUSED
;
12685 enum machine_mode mode ATTRIBUTE_UNUSED
;
12686 int ignore ATTRIBUTE_UNUSED
;
12688 const struct builtin_description
*d
;
12690 enum insn_code icode
;
12691 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
12692 tree arglist
= TREE_OPERAND (exp
, 1);
12693 tree arg0
, arg1
, arg2
;
12694 rtx op0
, op1
, op2
, pat
;
12695 enum machine_mode tmode
, mode0
, mode1
, mode2
;
12696 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
12700 case IX86_BUILTIN_EMMS
:
12701 emit_insn (gen_emms ());
12704 case IX86_BUILTIN_SFENCE
:
12705 emit_insn (gen_sfence ());
12708 case IX86_BUILTIN_PEXTRW
:
12709 case IX86_BUILTIN_PEXTRW128
:
12710 icode
= (fcode
== IX86_BUILTIN_PEXTRW
12711 ? CODE_FOR_mmx_pextrw
12712 : CODE_FOR_sse2_pextrw
);
12713 arg0
= TREE_VALUE (arglist
);
12714 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12715 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12716 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12717 tmode
= insn_data
[icode
].operand
[0].mode
;
12718 mode0
= insn_data
[icode
].operand
[1].mode
;
12719 mode1
= insn_data
[icode
].operand
[2].mode
;
12721 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12722 op0
= copy_to_mode_reg (mode0
, op0
);
12723 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
12725 /* @@@ better error message */
12726 error ("selector must be an immediate");
12727 return gen_reg_rtx (tmode
);
12730 || GET_MODE (target
) != tmode
12731 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12732 target
= gen_reg_rtx (tmode
);
12733 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
12739 case IX86_BUILTIN_PINSRW
:
12740 case IX86_BUILTIN_PINSRW128
:
12741 icode
= (fcode
== IX86_BUILTIN_PINSRW
12742 ? CODE_FOR_mmx_pinsrw
12743 : CODE_FOR_sse2_pinsrw
);
12744 arg0
= TREE_VALUE (arglist
);
12745 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12746 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
12747 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12748 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12749 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
12750 tmode
= insn_data
[icode
].operand
[0].mode
;
12751 mode0
= insn_data
[icode
].operand
[1].mode
;
12752 mode1
= insn_data
[icode
].operand
[2].mode
;
12753 mode2
= insn_data
[icode
].operand
[3].mode
;
12755 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12756 op0
= copy_to_mode_reg (mode0
, op0
);
12757 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
12758 op1
= copy_to_mode_reg (mode1
, op1
);
12759 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
12761 /* @@@ better error message */
12762 error ("selector must be an immediate");
12766 || GET_MODE (target
) != tmode
12767 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12768 target
= gen_reg_rtx (tmode
);
12769 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
12775 case IX86_BUILTIN_MASKMOVQ
:
12776 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
12777 ? (TARGET_64BIT
? CODE_FOR_mmx_maskmovq_rex
: CODE_FOR_mmx_maskmovq
)
12778 : CODE_FOR_sse2_maskmovdqu
);
12779 /* Note the arg order is different from the operand order. */
12780 arg1
= TREE_VALUE (arglist
);
12781 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
12782 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
12783 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12784 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12785 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
12786 mode0
= insn_data
[icode
].operand
[0].mode
;
12787 mode1
= insn_data
[icode
].operand
[1].mode
;
12788 mode2
= insn_data
[icode
].operand
[2].mode
;
12790 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
12791 op0
= copy_to_mode_reg (mode0
, op0
);
12792 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
12793 op1
= copy_to_mode_reg (mode1
, op1
);
12794 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
12795 op2
= copy_to_mode_reg (mode2
, op2
);
12796 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
12802 case IX86_BUILTIN_SQRTSS
:
12803 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2
, arglist
, target
);
12804 case IX86_BUILTIN_RSQRTSS
:
12805 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2
, arglist
, target
);
12806 case IX86_BUILTIN_RCPSS
:
12807 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2
, arglist
, target
);
12809 case IX86_BUILTIN_ANDPS
:
12810 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3
,
12812 case IX86_BUILTIN_ANDNPS
:
12813 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3
,
12815 case IX86_BUILTIN_ORPS
:
12816 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3
,
12818 case IX86_BUILTIN_XORPS
:
12819 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3
,
12822 case IX86_BUILTIN_LOADAPS
:
12823 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps
, arglist
, target
, 1);
12825 case IX86_BUILTIN_LOADUPS
:
12826 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
12828 case IX86_BUILTIN_STOREAPS
:
12829 return ix86_expand_store_builtin (CODE_FOR_sse_movaps
, arglist
);
12830 case IX86_BUILTIN_STOREUPS
:
12831 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
);
12833 case IX86_BUILTIN_LOADSS
:
12834 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss
, arglist
, target
, 1);
12836 case IX86_BUILTIN_STORESS
:
12837 return ix86_expand_store_builtin (CODE_FOR_sse_storess
, arglist
);
12839 case IX86_BUILTIN_LOADHPS
:
12840 case IX86_BUILTIN_LOADLPS
:
12841 case IX86_BUILTIN_LOADHPD
:
12842 case IX86_BUILTIN_LOADLPD
:
12843 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_movhps
12844 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_movlps
12845 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_movhpd
12846 : CODE_FOR_sse2_movlpd
);
12847 arg0
= TREE_VALUE (arglist
);
12848 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12849 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12850 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12851 tmode
= insn_data
[icode
].operand
[0].mode
;
12852 mode0
= insn_data
[icode
].operand
[1].mode
;
12853 mode1
= insn_data
[icode
].operand
[2].mode
;
12855 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12856 op0
= copy_to_mode_reg (mode0
, op0
);
12857 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
12859 || GET_MODE (target
) != tmode
12860 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12861 target
= gen_reg_rtx (tmode
);
12862 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
12868 case IX86_BUILTIN_STOREHPS
:
12869 case IX86_BUILTIN_STORELPS
:
12870 case IX86_BUILTIN_STOREHPD
:
12871 case IX86_BUILTIN_STORELPD
:
12872 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_movhps
12873 : fcode
== IX86_BUILTIN_STORELPS
? CODE_FOR_sse_movlps
12874 : fcode
== IX86_BUILTIN_STOREHPD
? CODE_FOR_sse2_movhpd
12875 : CODE_FOR_sse2_movlpd
);
12876 arg0
= TREE_VALUE (arglist
);
12877 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12878 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12879 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12880 mode0
= insn_data
[icode
].operand
[1].mode
;
12881 mode1
= insn_data
[icode
].operand
[2].mode
;
12883 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
12884 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
12885 op1
= copy_to_mode_reg (mode1
, op1
);
12887 pat
= GEN_FCN (icode
) (op0
, op0
, op1
);
12893 case IX86_BUILTIN_MOVNTPS
:
12894 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
);
12895 case IX86_BUILTIN_MOVNTQ
:
12896 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
);
12898 case IX86_BUILTIN_LDMXCSR
:
12899 op0
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
12900 target
= assign_386_stack_local (SImode
, 0);
12901 emit_move_insn (target
, op0
);
12902 emit_insn (gen_ldmxcsr (target
));
12905 case IX86_BUILTIN_STMXCSR
:
12906 target
= assign_386_stack_local (SImode
, 0);
12907 emit_insn (gen_stmxcsr (target
));
12908 return copy_to_mode_reg (SImode
, target
);
12910 case IX86_BUILTIN_SHUFPS
:
12911 case IX86_BUILTIN_SHUFPD
:
12912 icode
= (fcode
== IX86_BUILTIN_SHUFPS
12913 ? CODE_FOR_sse_shufps
12914 : CODE_FOR_sse2_shufpd
);
12915 arg0
= TREE_VALUE (arglist
);
12916 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12917 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
12918 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12919 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12920 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
12921 tmode
= insn_data
[icode
].operand
[0].mode
;
12922 mode0
= insn_data
[icode
].operand
[1].mode
;
12923 mode1
= insn_data
[icode
].operand
[2].mode
;
12924 mode2
= insn_data
[icode
].operand
[3].mode
;
12926 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12927 op0
= copy_to_mode_reg (mode0
, op0
);
12928 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
12929 op1
= copy_to_mode_reg (mode1
, op1
);
12930 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
12932 /* @@@ better error message */
12933 error ("mask must be an immediate");
12934 return gen_reg_rtx (tmode
);
12937 || GET_MODE (target
) != tmode
12938 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12939 target
= gen_reg_rtx (tmode
);
12940 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
12946 case IX86_BUILTIN_PSHUFW
:
12947 case IX86_BUILTIN_PSHUFD
:
12948 case IX86_BUILTIN_PSHUFHW
:
12949 case IX86_BUILTIN_PSHUFLW
:
12950 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
12951 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
12952 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
12953 : CODE_FOR_mmx_pshufw
);
12954 arg0
= TREE_VALUE (arglist
);
12955 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12956 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12957 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12958 tmode
= insn_data
[icode
].operand
[0].mode
;
12959 mode1
= insn_data
[icode
].operand
[1].mode
;
12960 mode2
= insn_data
[icode
].operand
[2].mode
;
12962 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
12963 op0
= copy_to_mode_reg (mode1
, op0
);
12964 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
12966 /* @@@ better error message */
12967 error ("mask must be an immediate");
12971 || GET_MODE (target
) != tmode
12972 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12973 target
= gen_reg_rtx (tmode
);
12974 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
12980 case IX86_BUILTIN_FEMMS
:
12981 emit_insn (gen_femms ());
12984 case IX86_BUILTIN_PAVGUSB
:
12985 return ix86_expand_binop_builtin (CODE_FOR_pavgusb
, arglist
, target
);
12987 case IX86_BUILTIN_PF2ID
:
12988 return ix86_expand_unop_builtin (CODE_FOR_pf2id
, arglist
, target
, 0);
12990 case IX86_BUILTIN_PFACC
:
12991 return ix86_expand_binop_builtin (CODE_FOR_pfacc
, arglist
, target
);
12993 case IX86_BUILTIN_PFADD
:
12994 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3
, arglist
, target
);
12996 case IX86_BUILTIN_PFCMPEQ
:
12997 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3
, arglist
, target
);
12999 case IX86_BUILTIN_PFCMPGE
:
13000 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3
, arglist
, target
);
13002 case IX86_BUILTIN_PFCMPGT
:
13003 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3
, arglist
, target
);
13005 case IX86_BUILTIN_PFMAX
:
13006 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3
, arglist
, target
);
13008 case IX86_BUILTIN_PFMIN
:
13009 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3
, arglist
, target
);
13011 case IX86_BUILTIN_PFMUL
:
13012 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3
, arglist
, target
);
13014 case IX86_BUILTIN_PFRCP
:
13015 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2
, arglist
, target
, 0);
13017 case IX86_BUILTIN_PFRCPIT1
:
13018 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3
, arglist
, target
);
13020 case IX86_BUILTIN_PFRCPIT2
:
13021 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3
, arglist
, target
);
13023 case IX86_BUILTIN_PFRSQIT1
:
13024 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3
, arglist
, target
);
13026 case IX86_BUILTIN_PFRSQRT
:
13027 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2
, arglist
, target
, 0);
13029 case IX86_BUILTIN_PFSUB
:
13030 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3
, arglist
, target
);
13032 case IX86_BUILTIN_PFSUBR
:
13033 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3
, arglist
, target
);
13035 case IX86_BUILTIN_PI2FD
:
13036 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2
, arglist
, target
, 0);
13038 case IX86_BUILTIN_PMULHRW
:
13039 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3
, arglist
, target
);
13041 case IX86_BUILTIN_PF2IW
:
13042 return ix86_expand_unop_builtin (CODE_FOR_pf2iw
, arglist
, target
, 0);
13044 case IX86_BUILTIN_PFNACC
:
13045 return ix86_expand_binop_builtin (CODE_FOR_pfnacc
, arglist
, target
);
13047 case IX86_BUILTIN_PFPNACC
:
13048 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc
, arglist
, target
);
13050 case IX86_BUILTIN_PI2FW
:
13051 return ix86_expand_unop_builtin (CODE_FOR_pi2fw
, arglist
, target
, 0);
13053 case IX86_BUILTIN_PSWAPDSI
:
13054 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2
, arglist
, target
, 0);
13056 case IX86_BUILTIN_PSWAPDSF
:
13057 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2
, arglist
, target
, 0);
13059 case IX86_BUILTIN_SSE_ZERO
:
13060 target
= gen_reg_rtx (V4SFmode
);
13061 emit_insn (gen_sse_clrv4sf (target
));
13064 case IX86_BUILTIN_MMX_ZERO
:
13065 target
= gen_reg_rtx (DImode
);
13066 emit_insn (gen_mmx_clrdi (target
));
13069 case IX86_BUILTIN_SQRTSD
:
13070 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2
, arglist
, target
);
13071 case IX86_BUILTIN_LOADAPD
:
13072 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
, target
, 1);
13073 case IX86_BUILTIN_LOADUPD
:
13074 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, arglist
, target
, 1);
13076 case IX86_BUILTIN_STOREAPD
:
13077 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
13078 case IX86_BUILTIN_STOREUPD
:
13079 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, arglist
);
13081 case IX86_BUILTIN_LOADSD
:
13082 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
, target
, 1);
13084 case IX86_BUILTIN_STORESD
:
13085 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd
, arglist
);
13087 case IX86_BUILTIN_SETPD1
:
13088 target
= assign_386_stack_local (DFmode
, 0);
13089 arg0
= TREE_VALUE (arglist
);
13090 emit_move_insn (adjust_address (target
, DFmode
, 0),
13091 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
13092 op0
= gen_reg_rtx (V2DFmode
);
13093 emit_insn (gen_sse2_loadsd (op0
, adjust_address (target
, V2DFmode
, 0)));
13094 emit_insn (gen_sse2_shufpd (op0
, op0
, op0
, GEN_INT (0)));
13097 case IX86_BUILTIN_SETPD
:
13098 target
= assign_386_stack_local (V2DFmode
, 0);
13099 arg0
= TREE_VALUE (arglist
);
13100 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13101 emit_move_insn (adjust_address (target
, DFmode
, 0),
13102 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
13103 emit_move_insn (adjust_address (target
, DFmode
, 8),
13104 expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0));
13105 op0
= gen_reg_rtx (V2DFmode
);
13106 emit_insn (gen_sse2_movapd (op0
, target
));
13109 case IX86_BUILTIN_LOADRPD
:
13110 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
,
13111 gen_reg_rtx (V2DFmode
), 1);
13112 emit_insn (gen_sse2_shufpd (target
, target
, target
, GEN_INT (1)));
13115 case IX86_BUILTIN_LOADPD1
:
13116 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
,
13117 gen_reg_rtx (V2DFmode
), 1);
13118 emit_insn (gen_sse2_shufpd (target
, target
, target
, const0_rtx
));
13121 case IX86_BUILTIN_STOREPD1
:
13122 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
13123 case IX86_BUILTIN_STORERPD
:
13124 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
13126 case IX86_BUILTIN_MFENCE
:
13127 emit_insn (gen_sse2_mfence ());
13129 case IX86_BUILTIN_LFENCE
:
13130 emit_insn (gen_sse2_lfence ());
13133 case IX86_BUILTIN_CLFLUSH
:
13134 arg0
= TREE_VALUE (arglist
);
13135 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13136 icode
= CODE_FOR_sse2_clflush
;
13137 mode0
= insn_data
[icode
].operand
[0].mode
;
13138 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
13139 op0
= copy_to_mode_reg (mode0
, op0
);
13141 emit_insn (gen_sse2_clflush (op0
));
13144 case IX86_BUILTIN_MOVNTPD
:
13145 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, arglist
);
13146 case IX86_BUILTIN_MOVNTDQ
:
13147 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, arglist
);
13148 case IX86_BUILTIN_MOVNTI
:
13149 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, arglist
);
13155 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
13156 if (d
->code
== fcode
)
13158 /* Compares are treated specially. */
13159 if (d
->icode
== CODE_FOR_maskcmpv4sf3
13160 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
13161 || d
->icode
== CODE_FOR_maskncmpv4sf3
13162 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
13163 || d
->icode
== CODE_FOR_maskcmpv2df3
13164 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
13165 || d
->icode
== CODE_FOR_maskncmpv2df3
13166 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
13167 return ix86_expand_sse_compare (d
, arglist
, target
);
13169 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
13172 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
13173 if (d
->code
== fcode
)
13174 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
13176 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
13177 if (d
->code
== fcode
)
13178 return ix86_expand_sse_comi (d
, arglist
, target
);
13180 /* @@@ Should really do something sensible here. */
13184 /* Store OPERAND to the memory after reload is completed. This means
13185 that we can't easily use assign_stack_local. */
13187 ix86_force_to_memory (mode
, operand
)
13188 enum machine_mode mode
;
13192 if (!reload_completed
)
13194 if (TARGET_64BIT
&& TARGET_RED_ZONE
)
13196 result
= gen_rtx_MEM (mode
,
13197 gen_rtx_PLUS (Pmode
,
13199 GEN_INT (-RED_ZONE_SIZE
)));
13200 emit_move_insn (result
, operand
);
13202 else if (TARGET_64BIT
&& !TARGET_RED_ZONE
)
13208 operand
= gen_lowpart (DImode
, operand
);
13212 gen_rtx_SET (VOIDmode
,
13213 gen_rtx_MEM (DImode
,
13214 gen_rtx_PRE_DEC (DImode
,
13215 stack_pointer_rtx
)),
13221 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
13230 split_di (&operand
, 1, operands
, operands
+ 1);
13232 gen_rtx_SET (VOIDmode
,
13233 gen_rtx_MEM (SImode
,
13234 gen_rtx_PRE_DEC (Pmode
,
13235 stack_pointer_rtx
)),
13238 gen_rtx_SET (VOIDmode
,
13239 gen_rtx_MEM (SImode
,
13240 gen_rtx_PRE_DEC (Pmode
,
13241 stack_pointer_rtx
)),
13246 /* It is better to store HImodes as SImodes. */
13247 if (!TARGET_PARTIAL_REG_STALL
)
13248 operand
= gen_lowpart (SImode
, operand
);
13252 gen_rtx_SET (VOIDmode
,
13253 gen_rtx_MEM (GET_MODE (operand
),
13254 gen_rtx_PRE_DEC (SImode
,
13255 stack_pointer_rtx
)),
13261 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
13266 /* Free operand from the memory. */
13268 ix86_free_from_memory (mode
)
13269 enum machine_mode mode
;
13271 if (!TARGET_64BIT
|| !TARGET_RED_ZONE
)
13275 if (mode
== DImode
|| TARGET_64BIT
)
13277 else if (mode
== HImode
&& TARGET_PARTIAL_REG_STALL
)
13281 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13282 to pop or add instruction if registers are available. */
13283 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
13284 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
13289 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13290 QImode must go into class Q_REGS.
13291 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13292 movdf to do mem-to-mem moves through integer regs. */
13294 ix86_preferred_reload_class (x
, class)
13296 enum reg_class
class;
13298 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
13300 /* SSE can't load any constant directly yet. */
13301 if (SSE_CLASS_P (class))
13303 /* Floats can load 0 and 1. */
13304 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x
))
13306 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13307 if (MAYBE_SSE_CLASS_P (class))
13308 return (reg_class_subset_p (class, GENERAL_REGS
)
13309 ? GENERAL_REGS
: FLOAT_REGS
);
13313 /* General regs can load everything. */
13314 if (reg_class_subset_p (class, GENERAL_REGS
))
13315 return GENERAL_REGS
;
13316 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13317 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13320 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x
))
13322 if (GET_MODE (x
) == QImode
&& ! reg_class_subset_p (class, Q_REGS
))
13327 /* If we are copying between general and FP registers, we need a memory
13328 location. The same is true for SSE and MMX registers.
13330 The macro can't work reliably when one of the CLASSES is class containing
13331 registers from multiple units (SSE, MMX, integer). We avoid this by never
13332 combining those units in single alternative in the machine description.
13333 Ensure that this constraint holds to avoid unexpected surprises.
13335 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13336 enforce these sanity checks. */
13338 ix86_secondary_memory_needed (class1
, class2
, mode
, strict
)
13339 enum reg_class class1
, class2
;
13340 enum machine_mode mode
;
13343 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
13344 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
13345 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
13346 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
13347 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
13348 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
13355 return (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
)
13356 || (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
)
13357 && (mode
) != SImode
)
13358 || (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
13359 && (mode
) != SImode
));
13361 /* Return the cost of moving data from a register in class CLASS1 to
13362 one in class CLASS2.
13364 It is not required that the cost always equal 2 when FROM is the same as TO;
13365 on some machines it is expensive to move between registers if they are not
13366 general registers. */
13368 ix86_register_move_cost (mode
, class1
, class2
)
13369 enum machine_mode mode
;
13370 enum reg_class class1
, class2
;
13372 /* In case we require secondary memory, compute cost of the store followed
13373 by load. In case of copying from general_purpose_register we may emit
13374 multiple stores followed by single load causing memory size mismatch
13375 stall. Count this as arbitarily high cost of 20. */
13376 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
13379 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
13381 return (MEMORY_MOVE_COST (mode
, class1
, 0)
13382 + MEMORY_MOVE_COST (mode
, class2
, 1) + add_cost
);
13384 /* Moves between SSE/MMX and integer unit are expensive. */
13385 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
13386 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
13387 return ix86_cost
->mmxsse_to_integer
;
13388 if (MAYBE_FLOAT_CLASS_P (class1
))
13389 return ix86_cost
->fp_move
;
13390 if (MAYBE_SSE_CLASS_P (class1
))
13391 return ix86_cost
->sse_move
;
13392 if (MAYBE_MMX_CLASS_P (class1
))
13393 return ix86_cost
->mmx_move
;
13397 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13399 ix86_hard_regno_mode_ok (regno
, mode
)
13401 enum machine_mode mode
;
13403 /* Flags and only flags can only hold CCmode values. */
13404 if (CC_REGNO_P (regno
))
13405 return GET_MODE_CLASS (mode
) == MODE_CC
;
13406 if (GET_MODE_CLASS (mode
) == MODE_CC
13407 || GET_MODE_CLASS (mode
) == MODE_RANDOM
13408 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
13410 if (FP_REGNO_P (regno
))
13411 return VALID_FP_MODE_P (mode
);
13412 if (SSE_REGNO_P (regno
))
13413 return VALID_SSE_REG_MODE (mode
);
13414 if (MMX_REGNO_P (regno
))
13415 return VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
);
13416 /* We handle both integer and floats in the general purpose registers.
13417 In future we should be able to handle vector modes as well. */
13418 if (!VALID_INT_MODE_P (mode
) && !VALID_FP_MODE_P (mode
))
13420 /* Take care for QImode values - they can be in non-QI regs, but then
13421 they do cause partial register stalls. */
13422 if (regno
< 4 || mode
!= QImode
|| TARGET_64BIT
)
13424 return reload_in_progress
|| reload_completed
|| !TARGET_PARTIAL_REG_STALL
;
13427 /* Return the cost of moving data of mode M between a
13428 register and memory. A value of 2 is the default; this cost is
13429 relative to those in `REGISTER_MOVE_COST'.
13431 If moving between registers and memory is more expensive than
13432 between two registers, you should define this macro to express the
13435 Model also increased moving costs of QImode registers in non
13439 ix86_memory_move_cost (mode
, class, in
)
13440 enum machine_mode mode
;
13441 enum reg_class
class;
13444 if (FLOAT_CLASS_P (class))
13462 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
13464 if (SSE_CLASS_P (class))
13467 switch (GET_MODE_SIZE (mode
))
13481 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
13483 if (MMX_CLASS_P (class))
13486 switch (GET_MODE_SIZE (mode
))
13497 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
13499 switch (GET_MODE_SIZE (mode
))
13503 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
13504 : ix86_cost
->movzbl_load
);
13506 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
13507 : ix86_cost
->int_store
[0] + 4);
13510 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
13512 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13513 if (mode
== TFmode
)
13515 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
13516 * (int) GET_MODE_SIZE (mode
) / 4);
13520 #ifdef DO_GLOBAL_CTORS_BODY
13522 ix86_svr3_asm_out_constructor (symbol
, priority
)
13524 int priority ATTRIBUTE_UNUSED
;
13527 fputs ("\tpushl $", asm_out_file
);
13528 assemble_name (asm_out_file
, XSTR (symbol
, 0));
13529 fputc ('\n', asm_out_file
);
13533 /* Order the registers for register allocator. */
13536 x86_order_regs_for_local_alloc ()
13541 /* First allocate the local general purpose registers. */
13542 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
13543 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
13544 reg_alloc_order
[pos
++] = i
;
13546 /* Global general purpose registers. */
13547 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
13548 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
13549 reg_alloc_order
[pos
++] = i
;
13551 /* x87 registers come first in case we are doing FP math
13553 if (!TARGET_SSE_MATH
)
13554 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
13555 reg_alloc_order
[pos
++] = i
;
13557 /* SSE registers. */
13558 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
13559 reg_alloc_order
[pos
++] = i
;
13560 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
13561 reg_alloc_order
[pos
++] = i
;
13563 /* x87 registerts. */
13564 if (TARGET_SSE_MATH
)
13565 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
13566 reg_alloc_order
[pos
++] = i
;
13568 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
13569 reg_alloc_order
[pos
++] = i
;
13571 /* Initialize the rest of array as we do not allocate some registers
13573 while (pos
< FIRST_PSEUDO_REGISTER
)
13574 reg_alloc_order
[pos
++] = 0;
13578 x86_output_mi_thunk (file
, delta
, function
)
13586 if (ix86_regparm
> 0)
13587 parm
= TYPE_ARG_TYPES (TREE_TYPE (function
));
13590 for (; parm
; parm
= TREE_CHAIN (parm
))
13591 if (TREE_VALUE (parm
) == void_type_node
)
13594 xops
[0] = GEN_INT (delta
);
13597 int n
= aggregate_value_p (TREE_TYPE (TREE_TYPE (function
))) != 0;
13598 xops
[1] = gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
13599 output_asm_insn ("add{q} {%0, %1|%1, %0}", xops
);
13602 fprintf (file
, "\tjmp *");
13603 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
13604 fprintf (file
, "@GOTPCREL(%%rip)\n");
13608 fprintf (file
, "\tjmp ");
13609 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
13610 fprintf (file
, "\n");
13616 xops
[1] = gen_rtx_REG (SImode
, 0);
13617 else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
))))
13618 xops
[1] = gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
13620 xops
[1] = gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
13621 output_asm_insn ("add{l} {%0, %1|%1, %0}", xops
);
13625 xops
[0] = pic_offset_table_rtx
;
13626 xops
[1] = gen_label_rtx ();
13627 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
13629 if (ix86_regparm
> 2)
13631 output_asm_insn ("push{l}\t%0", xops
);
13632 output_asm_insn ("call\t%P1", xops
);
13633 ASM_OUTPUT_INTERNAL_LABEL (file
, "L", CODE_LABEL_NUMBER (xops
[1]));
13634 output_asm_insn ("pop{l}\t%0", xops
);
13636 ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops
);
13637 xops
[0] = gen_rtx_MEM (SImode
, XEXP (DECL_RTL (function
), 0));
13639 ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops
);
13640 asm_fprintf (file
, "\tpop{l\t%%ebx|\t%%ebx}\n");
13641 asm_fprintf (file
, "\tjmp\t{*%%ecx|%%ecx}\n");
13645 fprintf (file
, "\tjmp ");
13646 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
13647 fprintf (file
, "\n");
13653 x86_field_alignment (field
, computed
)
13657 enum machine_mode mode
;
13658 if (TARGET_64BIT
|| DECL_USER_ALIGN (field
) || TARGET_ALIGN_DOUBLE
)
13660 mode
= TYPE_MODE (TREE_CODE (TREE_TYPE (field
)) == ARRAY_TYPE
13661 ? get_inner_array_type (field
) : TREE_TYPE (field
));
13662 if ((mode
== DFmode
|| mode
== DCmode
13663 || mode
== DImode
|| mode
== CDImode
)
13664 && !TARGET_ALIGN_DOUBLE
)
13665 return MIN (32, computed
);
13669 #include "gt-i386.h"