1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-flags.h"
35 #include "insn-attr.h"
42 #include "basic-block.h"
45 #ifndef CHECK_STACK_LIMIT
46 #define CHECK_STACK_LIMIT -1
49 /* Processor costs (relative to an add) */
50 struct processor_costs i386_cost
= { /* 386 specific costs */
51 1, /* cost of an add instruction */
52 1, /* cost of a lea instruction */
53 3, /* variable shift costs */
54 2, /* constant shift costs */
55 6, /* cost of starting a multiply */
56 1, /* cost of multiply per each bit set */
57 23, /* cost of a divide/mod */
58 15, /* "large" insn */
60 4, /* cost for loading QImode using movzbl */
61 {2, 4, 2}, /* cost of loading integer registers
62 in QImode, HImode and SImode.
63 Relative to reg-reg move (2). */
64 {2, 4, 2}, /* cost of storing integer registers */
65 2, /* cost of reg,reg fld/fst */
66 {8, 8, 8}, /* cost of loading fp registers
67 in SFmode, DFmode and XFmode */
68 {8, 8, 8}, /* cost of loading integer registers */
69 2, /* cost of moving MMX register */
70 {4, 8}, /* cost of loading MMX registers
71 in SImode and DImode */
72 {4, 8}, /* cost of storing MMX registers
73 in SImode and DImode */
74 2, /* cost of moving SSE register */
75 {4, 8, 16}, /* cost of loading SSE registers
76 in SImode, DImode and TImode */
77 {4, 8, 16}, /* cost of storing SSE registers
78 in SImode, DImode and TImode */
79 3, /* MMX or SSE register to integer */
82 struct processor_costs i486_cost
= { /* 486 specific costs */
83 1, /* cost of an add instruction */
84 1, /* cost of a lea instruction */
85 3, /* variable shift costs */
86 2, /* constant shift costs */
87 12, /* cost of starting a multiply */
88 1, /* cost of multiply per each bit set */
89 40, /* cost of a divide/mod */
90 15, /* "large" insn */
92 4, /* cost for loading QImode using movzbl */
93 {2, 4, 2}, /* cost of loading integer registers
94 in QImode, HImode and SImode.
95 Relative to reg-reg move (2). */
96 {2, 4, 2}, /* cost of storing integer registers */
97 2, /* cost of reg,reg fld/fst */
98 {8, 8, 8}, /* cost of loading fp registers
99 in SFmode, DFmode and XFmode */
100 {8, 8, 8}, /* cost of loading integer registers */
101 2, /* cost of moving MMX register */
102 {4, 8}, /* cost of loading MMX registers
103 in SImode and DImode */
104 {4, 8}, /* cost of storing MMX registers
105 in SImode and DImode */
106 2, /* cost of moving SSE register */
107 {4, 8, 16}, /* cost of loading SSE registers
108 in SImode, DImode and TImode */
109 {4, 8, 16}, /* cost of storing SSE registers
110 in SImode, DImode and TImode */
111 3 /* MMX or SSE register to integer */
114 struct processor_costs pentium_cost
= {
115 1, /* cost of an add instruction */
116 1, /* cost of a lea instruction */
117 4, /* variable shift costs */
118 1, /* constant shift costs */
119 11, /* cost of starting a multiply */
120 0, /* cost of multiply per each bit set */
121 25, /* cost of a divide/mod */
122 8, /* "large" insn */
124 6, /* cost for loading QImode using movzbl */
125 {2, 4, 2}, /* cost of loading integer registers
126 in QImode, HImode and SImode.
127 Relative to reg-reg move (2). */
128 {2, 4, 2}, /* cost of storing integer registers */
129 2, /* cost of reg,reg fld/fst */
130 {2, 2, 6}, /* cost of loading fp registers
131 in SFmode, DFmode and XFmode */
132 {4, 4, 6}, /* cost of loading integer registers */
133 8, /* cost of moving MMX register */
134 {8, 8}, /* cost of loading MMX registers
135 in SImode and DImode */
136 {8, 8}, /* cost of storing MMX registers
137 in SImode and DImode */
138 2, /* cost of moving SSE register */
139 {4, 8, 16}, /* cost of loading SSE registers
140 in SImode, DImode and TImode */
141 {4, 8, 16}, /* cost of storing SSE registers
142 in SImode, DImode and TImode */
143 3 /* MMX or SSE register to integer */
146 struct processor_costs pentiumpro_cost
= {
147 1, /* cost of an add instruction */
148 1, /* cost of a lea instruction */
149 1, /* variable shift costs */
150 1, /* constant shift costs */
151 4, /* cost of starting a multiply */
152 0, /* cost of multiply per each bit set */
153 17, /* cost of a divide/mod */
154 8, /* "large" insn */
156 2, /* cost for loading QImode using movzbl */
157 {4, 4, 4}, /* cost of loading integer registers
158 in QImode, HImode and SImode.
159 Relative to reg-reg move (2). */
160 {2, 2, 2}, /* cost of storing integer registers */
161 2, /* cost of reg,reg fld/fst */
162 {2, 2, 6}, /* cost of loading fp registers
163 in SFmode, DFmode and XFmode */
164 {4, 4, 6}, /* cost of loading integer registers */
165 2, /* cost of moving MMX register */
166 {2, 2}, /* cost of loading MMX registers
167 in SImode and DImode */
168 {2, 2}, /* cost of storing MMX registers
169 in SImode and DImode */
170 2, /* cost of moving SSE register */
171 {2, 2, 8}, /* cost of loading SSE registers
172 in SImode, DImode and TImode */
173 {2, 2, 8}, /* cost of storing SSE registers
174 in SImode, DImode and TImode */
175 3 /* MMX or SSE register to integer */
178 struct processor_costs k6_cost
= {
179 1, /* cost of an add instruction */
180 2, /* cost of a lea instruction */
181 1, /* variable shift costs */
182 1, /* constant shift costs */
183 3, /* cost of starting a multiply */
184 0, /* cost of multiply per each bit set */
185 18, /* cost of a divide/mod */
186 8, /* "large" insn */
188 3, /* cost for loading QImode using movzbl */
189 {4, 5, 4}, /* cost of loading integer registers
190 in QImode, HImode and SImode.
191 Relative to reg-reg move (2). */
192 {2, 3, 2}, /* cost of storing integer registers */
193 4, /* cost of reg,reg fld/fst */
194 {6, 6, 6}, /* cost of loading fp registers
195 in SFmode, DFmode and XFmode */
196 {4, 4, 4}, /* cost of loading integer registers */
197 2, /* cost of moving MMX register */
198 {2, 2}, /* cost of loading MMX registers
199 in SImode and DImode */
200 {2, 2}, /* cost of storing MMX registers
201 in SImode and DImode */
202 2, /* cost of moving SSE register */
203 {2, 2, 8}, /* cost of loading SSE registers
204 in SImode, DImode and TImode */
205 {2, 2, 8}, /* cost of storing SSE registers
206 in SImode, DImode and TImode */
207 6 /* MMX or SSE register to integer */
210 struct processor_costs athlon_cost
= {
211 1, /* cost of an add instruction */
212 2, /* cost of a lea instruction */
213 1, /* variable shift costs */
214 1, /* constant shift costs */
215 5, /* cost of starting a multiply */
216 0, /* cost of multiply per each bit set */
217 42, /* cost of a divide/mod */
218 8, /* "large" insn */
220 4, /* cost for loading QImode using movzbl */
221 {4, 5, 4}, /* cost of loading integer registers
222 in QImode, HImode and SImode.
223 Relative to reg-reg move (2). */
224 {2, 3, 2}, /* cost of storing integer registers */
225 4, /* cost of reg,reg fld/fst */
226 {6, 6, 20}, /* cost of loading fp registers
227 in SFmode, DFmode and XFmode */
228 {4, 4, 16}, /* cost of loading integer registers */
229 2, /* cost of moving MMX register */
230 {2, 2}, /* cost of loading MMX registers
231 in SImode and DImode */
232 {2, 2}, /* cost of storing MMX registers
233 in SImode and DImode */
234 2, /* cost of moving SSE register */
235 {2, 2, 8}, /* cost of loading SSE registers
236 in SImode, DImode and TImode */
237 {2, 2, 8}, /* cost of storing SSE registers
238 in SImode, DImode and TImode */
239 6 /* MMX or SSE register to integer */
242 struct processor_costs pentium4_cost
= {
243 1, /* cost of an add instruction */
244 1, /* cost of a lea instruction */
245 8, /* variable shift costs */
246 8, /* constant shift costs */
247 30, /* cost of starting a multiply */
248 0, /* cost of multiply per each bit set */
249 112, /* cost of a divide/mod */
250 16, /* "large" insn */
252 2, /* cost for loading QImode using movzbl */
253 {4, 5, 4}, /* cost of loading integer registers
254 in QImode, HImode and SImode.
255 Relative to reg-reg move (2). */
256 {2, 3, 2}, /* cost of storing integer registers */
257 2, /* cost of reg,reg fld/fst */
258 {2, 2, 6}, /* cost of loading fp registers
259 in SFmode, DFmode and XFmode */
260 {4, 4, 6}, /* cost of loading integer registers */
261 2, /* cost of moving MMX register */
262 {2, 2}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {2, 2}, /* cost of storing MMX registers
265 in SImode and DImode */
266 12, /* cost of moving SSE register */
267 {12, 12, 12}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {2, 2, 8}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
271 10, /* MMX or SSE register to integer */
274 struct processor_costs
*ix86_cost
= &pentium_cost
;
276 /* Processor feature/optimization bitmasks. */
277 #define m_386 (1<<PROCESSOR_I386)
278 #define m_486 (1<<PROCESSOR_I486)
279 #define m_PENT (1<<PROCESSOR_PENTIUM)
280 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
281 #define m_K6 (1<<PROCESSOR_K6)
282 #define m_ATHLON (1<<PROCESSOR_ATHLON)
283 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
285 const int x86_use_leave
= m_386
| m_K6
| m_ATHLON
;
286 const int x86_push_memory
= m_386
| m_K6
| m_ATHLON
| m_PENT4
;
287 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
288 const int x86_movx
= m_ATHLON
| m_PPRO
| m_PENT4
/* m_386 | m_K6 */;
289 const int x86_double_with_add
= ~m_386
;
290 const int x86_use_bit_test
= m_386
;
291 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON
| m_K6
;
292 const int x86_cmove
= m_PPRO
| m_ATHLON
| m_PENT4
;
293 const int x86_deep_branch
= m_PPRO
| m_K6
| m_ATHLON
| m_PENT4
;
294 const int x86_use_sahf
= m_PPRO
| m_K6
| m_PENT4
;
295 const int x86_partial_reg_stall
= m_PPRO
;
296 const int x86_use_loop
= m_K6
;
297 const int x86_use_fiop
= ~(m_PPRO
| m_ATHLON
| m_PENT
);
298 const int x86_use_mov0
= m_K6
;
299 const int x86_use_cltd
= ~(m_PENT
| m_K6
);
300 const int x86_read_modify_write
= ~m_PENT
;
301 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
302 const int x86_split_long_moves
= m_PPRO
;
303 const int x86_promote_QImode
= m_K6
| m_PENT
| m_386
| m_486
;
304 const int x86_single_stringop
= m_386
| m_PENT4
;
305 const int x86_qimode_math
= ~(0);
306 const int x86_promote_qi_regs
= 0;
307 const int x86_himode_math
= ~(m_PPRO
);
308 const int x86_promote_hi_regs
= m_PPRO
;
309 const int x86_sub_esp_4
= m_ATHLON
| m_PPRO
| m_PENT4
;
310 const int x86_sub_esp_8
= m_ATHLON
| m_PPRO
| m_386
| m_486
| m_PENT4
;
311 const int x86_add_esp_4
= m_ATHLON
| m_K6
| m_PENT4
;
312 const int x86_add_esp_8
= m_ATHLON
| m_PPRO
| m_K6
| m_386
| m_486
| m_PENT4
;
313 const int x86_integer_DFmode_moves
= ~(m_ATHLON
| m_PENT4
);
314 const int x86_partial_reg_dependency
= m_ATHLON
| m_PENT4
;
315 const int x86_memory_mismatch_stall
= m_ATHLON
| m_PENT4
;
317 #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
319 const char * const hi_reg_name
[] = HI_REGISTER_NAMES
;
320 const char * const qi_reg_name
[] = QI_REGISTER_NAMES
;
321 const char * const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
323 /* Array of the smallest class containing reg number REGNO, indexed by
324 REGNO. Used by REGNO_REG_CLASS in i386.h. */
326 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
329 AREG
, DREG
, CREG
, BREG
,
331 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
333 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
334 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
337 /* flags, fpsr, dirflag, frame */
338 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
339 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
341 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
343 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
344 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
345 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
349 /* The "default" register map used in 32bit mode. */
351 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
353 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
354 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
355 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
356 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
357 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
358 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
359 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
362 /* The "default" register map used in 64bit mode. */
363 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
365 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
366 33, 34, 35, 36, 37, 38, 39, 40 /* fp regs */
367 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
368 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
369 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
370 8,9,10,11,12,13,14,15, /* extended integer registers */
371 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
374 /* Define the register numbers to be used in Dwarf debugging information.
375 The SVR4 reference port C compiler uses the following register numbers
376 in its Dwarf output code:
377 0 for %eax (gcc regno = 0)
378 1 for %ecx (gcc regno = 2)
379 2 for %edx (gcc regno = 1)
380 3 for %ebx (gcc regno = 3)
381 4 for %esp (gcc regno = 7)
382 5 for %ebp (gcc regno = 6)
383 6 for %esi (gcc regno = 4)
384 7 for %edi (gcc regno = 5)
385 The following three DWARF register numbers are never generated by
386 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
387 believes these numbers have these meanings.
388 8 for %eip (no gcc equivalent)
389 9 for %eflags (gcc regno = 17)
390 10 for %trapno (no gcc equivalent)
391 It is not at all clear how we should number the FP stack registers
392 for the x86 architecture. If the version of SDB on x86/svr4 were
393 a bit less brain dead with respect to floating-point then we would
394 have a precedent to follow with respect to DWARF register numbers
395 for x86 FP registers, but the SDB on x86/svr4 is so completely
396 broken with respect to FP registers that it is hardly worth thinking
397 of it as something to strive for compatibility with.
398 The version of x86/svr4 SDB I have at the moment does (partially)
399 seem to believe that DWARF register number 11 is associated with
400 the x86 register %st(0), but that's about all. Higher DWARF
401 register numbers don't seem to be associated with anything in
402 particular, and even for DWARF regno 11, SDB only seems to under-
403 stand that it should say that a variable lives in %st(0) (when
404 asked via an `=' command) if we said it was in DWARF regno 11,
405 but SDB still prints garbage when asked for the value of the
406 variable in question (via a `/' command).
407 (Also note that the labels SDB prints for various FP stack regs
408 when doing an `x' command are all wrong.)
409 Note that these problems generally don't affect the native SVR4
410 C compiler because it doesn't allow the use of -O with -g and
411 because when it is *not* optimizing, it allocates a memory
412 location for each floating-point variable, and the memory
413 location is what gets described in the DWARF AT_location
414 attribute for the variable in question.
415 Regardless of the severe mental illness of the x86/svr4 SDB, we
416 do something sensible here and we use the following DWARF
417 register numbers. Note that these are all stack-top-relative
419 11 for %st(0) (gcc regno = 8)
420 12 for %st(1) (gcc regno = 9)
421 13 for %st(2) (gcc regno = 10)
422 14 for %st(3) (gcc regno = 11)
423 15 for %st(4) (gcc regno = 12)
424 16 for %st(5) (gcc regno = 13)
425 17 for %st(6) (gcc regno = 14)
426 18 for %st(7) (gcc regno = 15)
428 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
430 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
431 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
432 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
433 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
434 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
435 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
436 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
439 /* Test and compare insns in i386.md store the information needed to
440 generate branch and scc insns here. */
442 struct rtx_def
*ix86_compare_op0
= NULL_RTX
;
443 struct rtx_def
*ix86_compare_op1
= NULL_RTX
;
445 #define MAX_386_STACK_LOCALS 2
447 /* Define the structure for the machine field in struct function. */
448 struct machine_function
450 rtx stack_locals
[(int) MAX_MACHINE_MODE
][MAX_386_STACK_LOCALS
];
451 int accesses_prev_frame
;
454 #define ix86_stack_locals (cfun->machine->stack_locals)
456 /* Structure describing stack frame layout.
457 Stack grows downward:
463 saved frame pointer if frame_pointer_needed
464 <- HARD_FRAME_POINTER
470 > to_allocate <- FRAME_POINTER
481 int outgoing_arguments_size
;
483 HOST_WIDE_INT to_allocate
;
484 /* The offsets relative to ARG_POINTER. */
485 HOST_WIDE_INT frame_pointer_offset
;
486 HOST_WIDE_INT hard_frame_pointer_offset
;
487 HOST_WIDE_INT stack_pointer_offset
;
490 /* Code model option as passed by user. */
491 const char *ix86_cmodel_string
;
493 enum cmodel ix86_cmodel
;
495 /* which cpu are we scheduling for */
496 enum processor_type ix86_cpu
;
498 /* which instruction set architecture to use. */
501 /* Strings to hold which cpu and instruction set architecture to use. */
502 const char *ix86_cpu_string
; /* for -mcpu=<xxx> */
503 const char *ix86_arch_string
; /* for -march=<xxx> */
505 /* # of registers to use to pass arguments. */
506 const char *ix86_regparm_string
;
508 /* ix86_regparm_string as a number */
511 /* Alignment to use for loops and jumps: */
513 /* Power of two alignment for loops. */
514 const char *ix86_align_loops_string
;
516 /* Power of two alignment for non-loop jumps. */
517 const char *ix86_align_jumps_string
;
519 /* Power of two alignment for stack boundary in bytes. */
520 const char *ix86_preferred_stack_boundary_string
;
522 /* Preferred alignment for stack boundary in bits. */
523 int ix86_preferred_stack_boundary
;
525 /* Values 1-5: see jump.c */
526 int ix86_branch_cost
;
527 const char *ix86_branch_cost_string
;
529 /* Power of two alignment for functions. */
530 int ix86_align_funcs
;
531 const char *ix86_align_funcs_string
;
533 /* Power of two alignment for loops. */
534 int ix86_align_loops
;
536 /* Power of two alignment for non-loop jumps. */
537 int ix86_align_jumps
;
539 static void output_pic_addr_const
PARAMS ((FILE *, rtx
, int));
540 static void put_condition_code
PARAMS ((enum rtx_code
, enum machine_mode
,
542 static rtx ix86_expand_int_compare
PARAMS ((enum rtx_code
, rtx
, rtx
));
543 static enum rtx_code ix86_prepare_fp_compare_args
PARAMS ((enum rtx_code
,
545 static rtx gen_push
PARAMS ((rtx
));
546 static int memory_address_length
PARAMS ((rtx addr
));
547 static int ix86_flags_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
548 static int ix86_agi_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
549 static int ix86_safe_length
PARAMS ((rtx
));
550 static enum attr_memory ix86_safe_memory
PARAMS ((rtx
));
551 static enum attr_pent_pair ix86_safe_pent_pair
PARAMS ((rtx
));
552 static enum attr_ppro_uops ix86_safe_ppro_uops
PARAMS ((rtx
));
553 static void ix86_dump_ppro_packet
PARAMS ((FILE *));
554 static void ix86_reorder_insn
PARAMS ((rtx
*, rtx
*));
555 static rtx
* ix86_pent_find_pair
PARAMS ((rtx
*, rtx
*, enum attr_pent_pair
,
557 static void ix86_init_machine_status
PARAMS ((struct function
*));
558 static void ix86_mark_machine_status
PARAMS ((struct function
*));
559 static void ix86_free_machine_status
PARAMS ((struct function
*));
560 static int ix86_split_to_parts
PARAMS ((rtx
, rtx
*, enum machine_mode
));
561 static int ix86_safe_length_prefix
PARAMS ((rtx
));
562 static int ix86_nsaved_regs
PARAMS((void));
563 static void ix86_emit_save_regs
PARAMS((void));
564 static void ix86_emit_restore_regs_using_mov
PARAMS ((rtx
, int));
565 static void ix86_emit_epilogue_esp_adjustment
PARAMS((int));
566 static void ix86_set_move_mem_attrs_1
PARAMS ((rtx
, rtx
, rtx
, rtx
, rtx
));
567 static void ix86_sched_reorder_pentium
PARAMS((rtx
*, rtx
*));
568 static void ix86_sched_reorder_ppro
PARAMS((rtx
*, rtx
*));
569 static HOST_WIDE_INT ix86_GOT_alias_set
PARAMS ((void));
573 rtx base
, index
, disp
;
577 static int ix86_decompose_address
PARAMS ((rtx
, struct ix86_address
*));
579 struct builtin_description
;
580 static rtx ix86_expand_sse_comi
PARAMS ((struct builtin_description
*, tree
,
582 static rtx ix86_expand_sse_compare
PARAMS ((struct builtin_description
*, tree
,
584 static rtx ix86_expand_unop1_builtin
PARAMS ((enum insn_code
, tree
, rtx
));
585 static rtx ix86_expand_unop_builtin
PARAMS ((enum insn_code
, tree
, rtx
, int));
586 static rtx ix86_expand_binop_builtin
PARAMS ((enum insn_code
, tree
, rtx
));
587 static rtx ix86_expand_store_builtin
PARAMS ((enum insn_code
, tree
, int));
588 static rtx safe_vector_operand
PARAMS ((rtx
, enum machine_mode
));
589 static enum rtx_code ix86_fp_compare_code_to_integer
PARAMS ((enum rtx_code
));
590 static void ix86_fp_comparison_codes
PARAMS ((enum rtx_code code
,
594 static rtx ix86_expand_fp_compare
PARAMS ((enum rtx_code
, rtx
, rtx
, rtx
,
596 static int ix86_fp_comparison_arithmetics_cost
PARAMS ((enum rtx_code code
));
597 static int ix86_fp_comparison_fcomi_cost
PARAMS ((enum rtx_code code
));
598 static int ix86_fp_comparison_sahf_cost
PARAMS ((enum rtx_code code
));
599 static int ix86_fp_comparison_cost
PARAMS ((enum rtx_code code
));
600 static int ix86_save_reg
PARAMS ((int));
601 static void ix86_compute_frame_layout
PARAMS ((struct ix86_frame
*));
603 /* Sometimes certain combinations of command options do not make
604 sense on a particular target machine. You can define a macro
605 `OVERRIDE_OPTIONS' to take account of this. This macro, if
606 defined, is executed once just after all the command options have
609 Don't use this macro to turn on various extra optimizations for
610 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
616 /* Comes from final.c -- no real reason to change it. */
617 #define MAX_CODE_ALIGN 16
621 struct processor_costs
*cost
; /* Processor costs */
622 int target_enable
; /* Target flags to enable. */
623 int target_disable
; /* Target flags to disable. */
624 int align_loop
; /* Default alignments. */
629 const processor_target_table
[PROCESSOR_max
] =
631 {&i386_cost
, 0, 0, 2, 2, 2, 1},
632 {&i486_cost
, 0, 0, 4, 4, 4, 1},
633 {&pentium_cost
, 0, 0, -4, -4, -4, 1},
634 {&pentiumpro_cost
, 0, 0, 4, -4, 4, 1},
635 {&k6_cost
, 0, 0, -5, -5, 4, 1},
636 {&athlon_cost
, 0, 0, 4, -4, 4, 1},
637 {&pentium4_cost
, 0, 0, 2, 2, 2, 1}
642 const char *name
; /* processor name or nickname. */
643 enum processor_type processor
;
645 const processor_alias_table
[] =
647 {"i386", PROCESSOR_I386
},
648 {"i486", PROCESSOR_I486
},
649 {"i586", PROCESSOR_PENTIUM
},
650 {"pentium", PROCESSOR_PENTIUM
},
651 {"i686", PROCESSOR_PENTIUMPRO
},
652 {"pentiumpro", PROCESSOR_PENTIUMPRO
},
653 {"k6", PROCESSOR_K6
},
654 {"athlon", PROCESSOR_ATHLON
},
655 {"pentium4", PROCESSOR_PENTIUM4
},
658 int const pta_size
= sizeof (processor_alias_table
) / sizeof (struct pta
);
660 #ifdef SUBTARGET_OVERRIDE_OPTIONS
661 SUBTARGET_OVERRIDE_OPTIONS
;
664 ix86_arch
= PROCESSOR_I386
;
665 ix86_cpu
= (enum processor_type
) TARGET_CPU_DEFAULT
;
667 if (ix86_cmodel_string
!= 0)
669 if (!strcmp (ix86_cmodel_string
, "small"))
670 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
672 sorry ("Code model %s not supported in PIC mode", ix86_cmodel_string
);
673 else if (!strcmp (ix86_cmodel_string
, "32"))
675 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
676 ix86_cmodel
= CM_KERNEL
;
677 else if (!strcmp (ix86_cmodel_string
, "medium") && !flag_pic
)
678 ix86_cmodel
= CM_MEDIUM
;
679 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
680 ix86_cmodel
= CM_LARGE
;
682 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
688 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
690 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
691 error ("Code model `%s' not supported in the %s bit mode.",
692 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
693 if (ix86_cmodel
== CM_LARGE
)
694 sorry ("Code model `large' not supported yet.");
696 if (ix86_arch_string
!= 0)
698 for (i
= 0; i
< pta_size
; i
++)
699 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
701 ix86_arch
= processor_alias_table
[i
].processor
;
702 /* Default cpu tuning to the architecture. */
703 ix86_cpu
= ix86_arch
;
708 error ("bad value (%s) for -march= switch", ix86_arch_string
);
711 if (ix86_cpu_string
!= 0)
713 for (i
= 0; i
< pta_size
; i
++)
714 if (! strcmp (ix86_cpu_string
, processor_alias_table
[i
].name
))
716 ix86_cpu
= processor_alias_table
[i
].processor
;
720 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string
);
723 ix86_cost
= processor_target_table
[ix86_cpu
].cost
;
724 target_flags
|= processor_target_table
[ix86_cpu
].target_enable
;
725 target_flags
&= ~processor_target_table
[ix86_cpu
].target_disable
;
727 /* Arrange to set up i386_stack_locals for all functions. */
728 init_machine_status
= ix86_init_machine_status
;
729 mark_machine_status
= ix86_mark_machine_status
;
730 free_machine_status
= ix86_free_machine_status
;
732 /* Validate -mregparm= value. */
733 if (ix86_regparm_string
)
735 i
= atoi (ix86_regparm_string
);
736 if (i
< 0 || i
> REGPARM_MAX
)
737 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
742 /* Validate -malign-loops= value, or provide default. */
743 ix86_align_loops
= processor_target_table
[ix86_cpu
].align_loop
;
744 if (ix86_align_loops_string
)
746 i
= atoi (ix86_align_loops_string
);
747 if (i
< 0 || i
> MAX_CODE_ALIGN
)
748 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
750 ix86_align_loops
= i
;
753 /* Validate -malign-jumps= value, or provide default. */
754 ix86_align_jumps
= processor_target_table
[ix86_cpu
].align_jump
;
755 if (ix86_align_jumps_string
)
757 i
= atoi (ix86_align_jumps_string
);
758 if (i
< 0 || i
> MAX_CODE_ALIGN
)
759 error ("-malign-jumps=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
761 ix86_align_jumps
= i
;
764 /* Validate -malign-functions= value, or provide default. */
765 ix86_align_funcs
= processor_target_table
[ix86_cpu
].align_func
;
766 if (ix86_align_funcs_string
)
768 i
= atoi (ix86_align_funcs_string
);
769 if (i
< 0 || i
> MAX_CODE_ALIGN
)
770 error ("-malign-functions=%d is not between 0 and %d",
773 ix86_align_funcs
= i
;
776 /* Validate -mpreferred-stack-boundary= value, or provide default.
777 The default of 128 bits is for Pentium III's SSE __m128. */
778 ix86_preferred_stack_boundary
= 128;
779 if (ix86_preferred_stack_boundary_string
)
781 i
= atoi (ix86_preferred_stack_boundary_string
);
783 error ("-mpreferred-stack-boundary=%d is not between 2 and 31", i
);
785 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
788 /* Validate -mbranch-cost= value, or provide default. */
789 ix86_branch_cost
= processor_target_table
[ix86_cpu
].branch_cost
;
790 if (ix86_branch_cost_string
)
792 i
= atoi (ix86_branch_cost_string
);
794 error ("-mbranch-cost=%d is not between 0 and 5", i
);
796 ix86_branch_cost
= i
;
799 /* Keep nonleaf frame pointers. */
800 if (TARGET_OMIT_LEAF_FRAME_POINTER
)
801 flag_omit_frame_pointer
= 1;
803 /* If we're doing fast math, we don't care about comparison order
804 wrt NaNs. This lets us use a shorter comparison sequence. */
805 if (flag_unsafe_math_optimizations
)
806 target_flags
&= ~MASK_IEEE_FP
;
808 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
811 target_flags
|= MASK_MMX
;
815 optimization_options (level
, size
)
817 int size ATTRIBUTE_UNUSED
;
819 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
820 make the problem with not enough registers even worse. */
821 #ifdef INSN_SCHEDULING
823 flag_schedule_insns
= 0;
827 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
828 attribute for DECL. The attributes in ATTRIBUTES have previously been
832 ix86_valid_decl_attribute_p (decl
, attributes
, identifier
, args
)
833 tree decl ATTRIBUTE_UNUSED
;
834 tree attributes ATTRIBUTE_UNUSED
;
835 tree identifier ATTRIBUTE_UNUSED
;
836 tree args ATTRIBUTE_UNUSED
;
841 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
842 attribute for TYPE. The attributes in ATTRIBUTES have previously been
846 ix86_valid_type_attribute_p (type
, attributes
, identifier
, args
)
848 tree attributes ATTRIBUTE_UNUSED
;
852 if (TREE_CODE (type
) != FUNCTION_TYPE
853 && TREE_CODE (type
) != METHOD_TYPE
854 && TREE_CODE (type
) != FIELD_DECL
855 && TREE_CODE (type
) != TYPE_DECL
)
858 /* Stdcall attribute says callee is responsible for popping arguments
859 if they are not variable. */
860 if (is_attribute_p ("stdcall", identifier
))
861 return (args
== NULL_TREE
);
863 /* Cdecl attribute says the callee is a normal C declaration. */
864 if (is_attribute_p ("cdecl", identifier
))
865 return (args
== NULL_TREE
);
867 /* Regparm attribute specifies how many integer arguments are to be
868 passed in registers. */
869 if (is_attribute_p ("regparm", identifier
))
873 if (! args
|| TREE_CODE (args
) != TREE_LIST
874 || TREE_CHAIN (args
) != NULL_TREE
875 || TREE_VALUE (args
) == NULL_TREE
)
878 cst
= TREE_VALUE (args
);
879 if (TREE_CODE (cst
) != INTEGER_CST
)
882 if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
891 /* Return 0 if the attributes for two types are incompatible, 1 if they
892 are compatible, and 2 if they are nearly compatible (which causes a
893 warning to be generated). */
896 ix86_comp_type_attributes (type1
, type2
)
900 /* Check for mismatch of non-default calling convention. */
901 const char *rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
903 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
906 /* Check for mismatched return types (cdecl vs stdcall). */
907 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
908 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
913 /* Value is the number of bytes of arguments automatically
914 popped when returning from a subroutine call.
915 FUNDECL is the declaration node of the function (as a tree),
916 FUNTYPE is the data type of the function (as a tree),
917 or for a library call it is an identifier node for the subroutine name.
918 SIZE is the number of bytes of arguments passed on the stack.
920 On the 80386, the RTD insn may be used to pop them if the number
921 of args is fixed, but if the number is variable then the caller
922 must pop them all. RTD can't be used for library calls now
923 because the library is compiled with the Unix compiler.
924 Use of RTD is a selectable option, since it is incompatible with
925 standard Unix calling sequences. If the option is not selected,
926 the caller must always pop the args.
928 The attribute stdcall is equivalent to RTD on a per module basis. */
931 ix86_return_pops_args (fundecl
, funtype
, size
)
936 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
938 /* Cdecl functions override -mrtd, and never pop the stack. */
939 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
941 /* Stdcall functions will pop the stack if not variable args. */
942 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
)))
946 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
947 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
952 /* Lose any fake structure return argument. */
953 if (aggregate_value_p (TREE_TYPE (funtype
)))
954 return GET_MODE_SIZE (Pmode
);
959 /* Argument support functions. */
961 /* Initialize a variable CUM of type CUMULATIVE_ARGS
962 for a call to a function whose data type is FNTYPE.
963 For a library call, FNTYPE is 0. */
966 init_cumulative_args (cum
, fntype
, libname
)
967 CUMULATIVE_ARGS
*cum
; /* Argument info to initialize */
968 tree fntype
; /* tree ptr for function decl */
969 rtx libname
; /* SYMBOL_REF of library name or 0 */
971 static CUMULATIVE_ARGS zero_cum
;
972 tree param
, next_param
;
974 if (TARGET_DEBUG_ARG
)
976 fprintf (stderr
, "\ninit_cumulative_args (");
978 fprintf (stderr
, "fntype code = %s, ret code = %s",
979 tree_code_name
[(int) TREE_CODE (fntype
)],
980 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
982 fprintf (stderr
, "no fntype");
985 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
990 /* Set up the number of registers to use for passing arguments. */
991 cum
->nregs
= ix86_regparm
;
994 tree attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype
));
997 cum
->nregs
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1000 /* Determine if this function has variable arguments. This is
1001 indicated by the last argument being 'void_type_mode' if there
1002 are no variable arguments. If there are variable arguments, then
1003 we won't pass anything in registers */
1007 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
1008 param
!= 0; param
= next_param
)
1010 next_param
= TREE_CHAIN (param
);
1011 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
1016 if (TARGET_DEBUG_ARG
)
1017 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
1022 /* Update the data in CUM to advance over an argument
1023 of mode MODE and data type TYPE.
1024 (TYPE is null for libcalls where that information may not be available.) */
1027 function_arg_advance (cum
, mode
, type
, named
)
1028 CUMULATIVE_ARGS
*cum
; /* current arg information */
1029 enum machine_mode mode
; /* current arg mode */
1030 tree type
; /* type of the argument or 0 if lib support */
1031 int named
; /* whether or not the argument was named */
1034 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1035 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
1037 if (TARGET_DEBUG_ARG
)
1039 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
1040 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
1041 if (TARGET_SSE
&& mode
== TImode
)
1043 cum
->sse_words
+= words
;
1044 cum
->sse_nregs
-= 1;
1045 cum
->sse_regno
+= 1;
1046 if (cum
->sse_nregs
<= 0)
1054 cum
->words
+= words
;
1055 cum
->nregs
-= words
;
1056 cum
->regno
+= words
;
1058 if (cum
->nregs
<= 0)
1067 /* Define where to put the arguments to a function.
1068 Value is zero to push the argument on the stack,
1069 or a hard register in which to store the argument.
1071 MODE is the argument's machine mode.
1072 TYPE is the data type of the argument (as a tree).
1073 This is null for libcalls where that information may
1075 CUM is a variable of type CUMULATIVE_ARGS which gives info about
1076 the preceding args and about the function being called.
1077 NAMED is nonzero if this argument is a named parameter
1078 (otherwise it is an extra parameter matching an ellipsis). */
1081 function_arg (cum
, mode
, type
, named
)
1082 CUMULATIVE_ARGS
*cum
; /* current arg information */
1083 enum machine_mode mode
; /* current arg mode */
1084 tree type
; /* type of the argument or 0 if lib support */
1085 int named
; /* != 0 for normal args, == 0 for ... args */
1089 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1090 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
1094 /* For now, pass fp/complex values on the stack. */
1103 if (words
<= cum
->nregs
)
1104 ret
= gen_rtx_REG (mode
, cum
->regno
);
1108 ret
= gen_rtx_REG (mode
, cum
->sse_regno
);
1112 if (TARGET_DEBUG_ARG
)
1115 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
1116 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
1119 fprintf (stderr
, ", reg=%%e%s", reg_names
[ REGNO(ret
) ]);
1121 fprintf (stderr
, ", stack");
1123 fprintf (stderr
, " )\n");
1130 /* Return nonzero if OP is general operand representable on x86_64. */
1133 x86_64_general_operand (op
, mode
)
1135 enum machine_mode mode
;
1138 return general_operand (op
, mode
);
1139 if (nonimmediate_operand (op
, mode
))
1141 return x86_64_sign_extended_value (op
);
1144 /* Return nonzero if OP is general operand representable on x86_64
1145 as eighter sign extended or zero extended constant. */
1148 x86_64_szext_general_operand (op
, mode
)
1150 enum machine_mode mode
;
1153 return general_operand (op
, mode
);
1154 if (nonimmediate_operand (op
, mode
))
1156 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
1159 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
1162 x86_64_nonmemory_operand (op
, mode
)
1164 enum machine_mode mode
;
1167 return nonmemory_operand (op
, mode
);
1168 if (register_operand (op
, mode
))
1170 return x86_64_sign_extended_value (op
);
1173 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
1176 x86_64_movabs_operand (op
, mode
)
1178 enum machine_mode mode
;
1180 if (!TARGET_64BIT
|| !flag_pic
)
1181 return nonmemory_operand (op
, mode
);
1182 if (register_operand (op
, mode
) || x86_64_sign_extended_value (op
))
1184 if (CONSTANT_P (op
) && !symbolic_reference_mentioned_p (op
))
1189 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
1192 x86_64_szext_nonmemory_operand (op
, mode
)
1194 enum machine_mode mode
;
1197 return nonmemory_operand (op
, mode
);
1198 if (register_operand (op
, mode
))
1200 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
1203 /* Return nonzero if OP is immediate operand representable on x86_64. */
1206 x86_64_immediate_operand (op
, mode
)
1208 enum machine_mode mode
;
1211 return immediate_operand (op
, mode
);
1212 return x86_64_sign_extended_value (op
);
1215 /* Return nonzero if OP is immediate operand representable on x86_64. */
1218 x86_64_zext_immediate_operand (op
, mode
)
1220 enum machine_mode mode ATTRIBUTE_UNUSED
;
1222 return x86_64_zero_extended_value (op
);
1225 /* Return nonzero if OP is (const_int 1), else return zero. */
1228 const_int_1_operand (op
, mode
)
1230 enum machine_mode mode ATTRIBUTE_UNUSED
;
1232 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) == 1);
1235 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
1236 reference and a constant. */
1239 symbolic_operand (op
, mode
)
1241 enum machine_mode mode ATTRIBUTE_UNUSED
;
1243 switch (GET_CODE (op
))
1251 if (GET_CODE (op
) == SYMBOL_REF
1252 || GET_CODE (op
) == LABEL_REF
1253 || (GET_CODE (op
) == UNSPEC
1254 && XINT (op
, 1) >= 6
1255 && XINT (op
, 1) <= 7))
1257 if (GET_CODE (op
) != PLUS
1258 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
1262 if (GET_CODE (op
) == SYMBOL_REF
1263 || GET_CODE (op
) == LABEL_REF
)
1265 /* Only @GOTOFF gets offsets. */
1266 if (GET_CODE (op
) != UNSPEC
1267 || XINT (op
, 1) != 7)
1270 op
= XVECEXP (op
, 0, 0);
1271 if (GET_CODE (op
) == SYMBOL_REF
1272 || GET_CODE (op
) == LABEL_REF
)
1281 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
1284 pic_symbolic_operand (op
, mode
)
1286 enum machine_mode mode ATTRIBUTE_UNUSED
;
1288 if (GET_CODE (op
) == CONST
)
1291 if (GET_CODE (op
) == UNSPEC
)
1293 if (GET_CODE (op
) != PLUS
1294 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
1297 if (GET_CODE (op
) == UNSPEC
)
1303 /* Test for a valid operand for a call instruction. Don't allow the
1304 arg pointer register or virtual regs since they may decay into
1305 reg + const, which the patterns can't handle. */
1308 call_insn_operand (op
, mode
)
1310 enum machine_mode mode ATTRIBUTE_UNUSED
;
1312 /* Disallow indirect through a virtual register. This leads to
1313 compiler aborts when trying to eliminate them. */
1314 if (GET_CODE (op
) == REG
1315 && (op
== arg_pointer_rtx
1316 || op
== frame_pointer_rtx
1317 || (REGNO (op
) >= FIRST_PSEUDO_REGISTER
1318 && REGNO (op
) <= LAST_VIRTUAL_REGISTER
)))
1321 /* Disallow `call 1234'. Due to varying assembler lameness this
1322 gets either rejected or translated to `call .+1234'. */
1323 if (GET_CODE (op
) == CONST_INT
)
1326 /* Explicitly allow SYMBOL_REF even if pic. */
1327 if (GET_CODE (op
) == SYMBOL_REF
)
1330 /* Half-pic doesn't allow anything but registers and constants.
1331 We've just taken care of the later. */
1333 return register_operand (op
, Pmode
);
1335 /* Otherwise we can allow any general_operand in the address. */
1336 return general_operand (op
, Pmode
);
1340 constant_call_address_operand (op
, mode
)
1342 enum machine_mode mode ATTRIBUTE_UNUSED
;
1344 if (GET_CODE (op
) == CONST
1345 && GET_CODE (XEXP (op
, 0)) == PLUS
1346 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
1347 op
= XEXP (XEXP (op
, 0), 0);
1348 return GET_CODE (op
) == SYMBOL_REF
;
1351 /* Match exactly zero and one. */
1354 const0_operand (op
, mode
)
1356 enum machine_mode mode
;
1358 return op
== CONST0_RTX (mode
);
1362 const1_operand (op
, mode
)
1364 enum machine_mode mode ATTRIBUTE_UNUSED
;
1366 return op
== const1_rtx
;
1369 /* Match 2, 4, or 8. Used for leal multiplicands. */
1372 const248_operand (op
, mode
)
1374 enum machine_mode mode ATTRIBUTE_UNUSED
;
1376 return (GET_CODE (op
) == CONST_INT
1377 && (INTVAL (op
) == 2 || INTVAL (op
) == 4 || INTVAL (op
) == 8));
1380 /* True if this is a constant appropriate for an increment or decremenmt. */
1383 incdec_operand (op
, mode
)
1385 enum machine_mode mode
;
1387 /* On Pentium4, the inc and dec operations causes extra dependancy on flag
1388 registers, since carry flag is not set. */
1389 if (TARGET_PENTIUM4
&& !optimize_size
)
1391 if (op
== const1_rtx
|| op
== constm1_rtx
)
1393 if (GET_CODE (op
) != CONST_INT
)
1395 if (mode
== SImode
&& INTVAL (op
) == (HOST_WIDE_INT
) 0xffffffff)
1397 if (mode
== HImode
&& INTVAL (op
) == (HOST_WIDE_INT
) 0xffff)
1399 if (mode
== QImode
&& INTVAL (op
) == (HOST_WIDE_INT
) 0xff)
1404 /* Return false if this is the stack pointer, or any other fake
1405 register eliminable to the stack pointer. Otherwise, this is
1408 This is used to prevent esp from being used as an index reg.
1409 Which would only happen in pathological cases. */
1412 reg_no_sp_operand (op
, mode
)
1414 enum machine_mode mode
;
1417 if (GET_CODE (t
) == SUBREG
)
1419 if (t
== stack_pointer_rtx
|| t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
)
1422 return register_operand (op
, mode
);
1426 mmx_reg_operand (op
, mode
)
1428 enum machine_mode mode ATTRIBUTE_UNUSED
;
1430 return MMX_REG_P (op
);
1433 /* Return false if this is any eliminable register. Otherwise
1437 general_no_elim_operand (op
, mode
)
1439 enum machine_mode mode
;
1442 if (GET_CODE (t
) == SUBREG
)
1444 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
1445 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
1446 || t
== virtual_stack_dynamic_rtx
)
1449 return general_operand (op
, mode
);
1452 /* Return false if this is any eliminable register. Otherwise
1453 register_operand or const_int. */
1456 nonmemory_no_elim_operand (op
, mode
)
1458 enum machine_mode mode
;
1461 if (GET_CODE (t
) == SUBREG
)
1463 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
1464 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
1465 || t
== virtual_stack_dynamic_rtx
)
1468 return GET_CODE (op
) == CONST_INT
|| register_operand (op
, mode
);
1471 /* Return true if op is a Q_REGS class register. */
1474 q_regs_operand (op
, mode
)
1476 enum machine_mode mode
;
1478 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
1480 if (GET_CODE (op
) == SUBREG
)
1481 op
= SUBREG_REG (op
);
1482 return QI_REG_P (op
);
1485 /* Return true if op is a NON_Q_REGS class register. */
1488 non_q_regs_operand (op
, mode
)
1490 enum machine_mode mode
;
1492 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
1494 if (GET_CODE (op
) == SUBREG
)
1495 op
= SUBREG_REG (op
);
1496 return NON_QI_REG_P (op
);
1499 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
1502 sse_comparison_operator (op
, mode
)
1504 enum machine_mode mode ATTRIBUTE_UNUSED
;
1506 enum rtx_code code
= GET_CODE (op
);
1509 /* Operations supported directly. */
1519 /* These are equivalent to ones above in non-IEEE comparisons. */
1526 return !TARGET_IEEE_FP
;
1531 /* Return 1 if OP is a valid comparison operator in valid mode. */
1533 ix86_comparison_operator (op
, mode
)
1535 enum machine_mode mode
;
1537 enum machine_mode inmode
;
1538 enum rtx_code code
= GET_CODE (op
);
1539 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
1541 if (GET_RTX_CLASS (code
) != '<')
1543 inmode
= GET_MODE (XEXP (op
, 0));
1545 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
1547 enum rtx_code second_code
, bypass_code
;
1548 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
1549 return (bypass_code
== NIL
&& second_code
== NIL
);
1556 if (inmode
== CCmode
|| inmode
== CCGCmode
1557 || inmode
== CCGOCmode
|| inmode
== CCNOmode
)
1560 case LTU
: case GTU
: case LEU
: case ORDERED
: case UNORDERED
: case GEU
:
1561 if (inmode
== CCmode
)
1565 if (inmode
== CCmode
|| inmode
== CCGCmode
|| inmode
== CCNOmode
)
1573 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
1576 fcmov_comparison_operator (op
, mode
)
1578 enum machine_mode mode
;
1580 enum machine_mode inmode
;
1581 enum rtx_code code
= GET_CODE (op
);
1582 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
1584 if (GET_RTX_CLASS (code
) != '<')
1586 inmode
= GET_MODE (XEXP (op
, 0));
1587 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
1589 enum rtx_code second_code
, bypass_code
;
1590 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
1591 if (bypass_code
!= NIL
|| second_code
!= NIL
)
1593 code
= ix86_fp_compare_code_to_integer (code
);
1595 /* i387 supports just limited amount of conditional codes. */
1598 case LTU
: case GTU
: case LEU
: case GEU
:
1599 if (inmode
== CCmode
|| inmode
== CCFPmode
|| inmode
== CCFPUmode
)
1602 case ORDERED
: case UNORDERED
:
1610 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1613 promotable_binary_operator (op
, mode
)
1615 enum machine_mode mode ATTRIBUTE_UNUSED
;
1617 switch (GET_CODE (op
))
1620 /* Modern CPUs have same latency for HImode and SImode multiply,
1621 but 386 and 486 do HImode multiply faster. */
1622 return ix86_cpu
> PROCESSOR_I486
;
1634 /* Nearly general operand, but accept any const_double, since we wish
1635 to be able to drop them into memory rather than have them get pulled
1639 cmp_fp_expander_operand (op
, mode
)
1641 enum machine_mode mode
;
1643 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
1645 if (GET_CODE (op
) == CONST_DOUBLE
)
1647 return general_operand (op
, mode
);
1650 /* Match an SI or HImode register for a zero_extract. */
1653 ext_register_operand (op
, mode
)
1655 enum machine_mode mode ATTRIBUTE_UNUSED
;
1657 if (GET_MODE (op
) != SImode
&& GET_MODE (op
) != HImode
)
1659 return register_operand (op
, VOIDmode
);
1662 /* Return 1 if this is a valid binary floating-point operation.
1663 OP is the expression matched, and MODE is its mode. */
1666 binary_fp_operator (op
, mode
)
1668 enum machine_mode mode
;
1670 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
1673 switch (GET_CODE (op
))
1679 return GET_MODE_CLASS (GET_MODE (op
)) == MODE_FLOAT
;
1687 mult_operator(op
, mode
)
1689 enum machine_mode mode ATTRIBUTE_UNUSED
;
1691 return GET_CODE (op
) == MULT
;
1695 div_operator(op
, mode
)
1697 enum machine_mode mode ATTRIBUTE_UNUSED
;
1699 return GET_CODE (op
) == DIV
;
1703 arith_or_logical_operator (op
, mode
)
1705 enum machine_mode mode
;
1707 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
1708 && (GET_RTX_CLASS (GET_CODE (op
)) == 'c'
1709 || GET_RTX_CLASS (GET_CODE (op
)) == '2'));
1712 /* Returns 1 if OP is memory operand with a displacement. */
1715 memory_displacement_operand (op
, mode
)
1717 enum machine_mode mode
;
1719 struct ix86_address parts
;
1721 if (! memory_operand (op
, mode
))
1724 if (! ix86_decompose_address (XEXP (op
, 0), &parts
))
1727 return parts
.disp
!= NULL_RTX
;
1730 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
1731 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1733 ??? It seems likely that this will only work because cmpsi is an
1734 expander, and no actual insns use this. */
1737 cmpsi_operand (op
, mode
)
1739 enum machine_mode mode
;
1741 if (general_operand (op
, mode
))
1744 if (GET_CODE (op
) == AND
1745 && GET_MODE (op
) == SImode
1746 && GET_CODE (XEXP (op
, 0)) == ZERO_EXTRACT
1747 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
1748 && GET_CODE (XEXP (XEXP (op
, 0), 2)) == CONST_INT
1749 && INTVAL (XEXP (XEXP (op
, 0), 1)) == 8
1750 && INTVAL (XEXP (XEXP (op
, 0), 2)) == 8
1751 && GET_CODE (XEXP (op
, 1)) == CONST_INT
)
1757 /* Returns 1 if OP is memory operand that can not be represented by the
1761 long_memory_operand (op
, mode
)
1763 enum machine_mode mode
;
1765 if (! memory_operand (op
, mode
))
1768 return memory_address_length (op
) != 0;
1771 /* Return nonzero if the rtx is known aligned. */
1774 aligned_operand (op
, mode
)
1776 enum machine_mode mode
;
1778 struct ix86_address parts
;
1780 if (!general_operand (op
, mode
))
1783 /* Registers and immediate operands are always "aligned". */
1784 if (GET_CODE (op
) != MEM
)
1787 /* Don't even try to do any aligned optimizations with volatiles. */
1788 if (MEM_VOLATILE_P (op
))
1793 /* Pushes and pops are only valid on the stack pointer. */
1794 if (GET_CODE (op
) == PRE_DEC
1795 || GET_CODE (op
) == POST_INC
)
1798 /* Decode the address. */
1799 if (! ix86_decompose_address (op
, &parts
))
1802 /* Look for some component that isn't known to be aligned. */
1806 && REGNO_POINTER_ALIGN (REGNO (parts
.index
)) < 32)
1811 if (REGNO_POINTER_ALIGN (REGNO (parts
.base
)) < 32)
1816 if (GET_CODE (parts
.disp
) != CONST_INT
1817 || (INTVAL (parts
.disp
) & 3) != 0)
1821 /* Didn't find one -- this must be an aligned address. */
1825 /* Return true if the constant is something that can be loaded with
1826 a special instruction. Only handle 0.0 and 1.0; others are less
1830 standard_80387_constant_p (x
)
1833 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
1835 /* Note that on the 80387, other constants, such as pi, that we should support
1836 too. On some machines, these are much slower to load as standard constant,
1837 than to load from doubles in memory. */
1838 if (x
== CONST0_RTX (GET_MODE (x
)))
1840 if (x
== CONST1_RTX (GET_MODE (x
)))
1845 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
1848 standard_sse_constant_p (x
)
1851 if (GET_CODE (x
) != CONST_DOUBLE
)
1853 return (x
== CONST0_RTX (GET_MODE (x
)));
1856 /* Returns 1 if OP contains a symbol reference */
1859 symbolic_reference_mentioned_p (op
)
1862 register const char *fmt
;
1865 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
1868 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
1869 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
1875 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
1876 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
1880 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
1887 /* Return 1 if it is appropriate to emit `ret' instructions in the
1888 body of a function. Do this only if the epilogue is simple, needing a
1889 couple of insns. Prior to reloading, we can't tell how many registers
1890 must be saved, so return 0 then. Return 0 if there is no frame
1891 marker to de-allocate.
1893 If NON_SAVING_SETJMP is defined and true, then it is not possible
1894 for the epilogue to be simple, so return 0. This is a special case
1895 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1896 until final, but jump_optimize may need to know sooner if a
1900 ix86_can_use_return_insn_p ()
1902 struct ix86_frame frame
;
1904 #ifdef NON_SAVING_SETJMP
1905 if (NON_SAVING_SETJMP
&& current_function_calls_setjmp
)
1908 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
1909 if (profile_block_flag
== 2)
1913 if (! reload_completed
|| frame_pointer_needed
)
1916 /* Don't allow more than 32 pop, since that's all we can do
1917 with one instruction. */
1918 if (current_function_pops_args
1919 && current_function_args_size
>= 32768)
1922 ix86_compute_frame_layout (&frame
);
1923 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
1926 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
1928 x86_64_sign_extended_value (value
)
1931 switch (GET_CODE (value
))
1933 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
1934 to be at least 32 and this all acceptable constants are
1935 represented as CONST_INT. */
1937 if (HOST_BITS_PER_WIDE_INT
== 32)
1941 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (value
), DImode
);
1942 return trunc_int_for_mode (val
, SImode
) == val
;
1946 /* For certain code models, the symbolic references are known to fit. */
1948 return ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_KERNEL
;
1950 /* For certain code models, the code is near as well. */
1952 return ix86_cmodel
!= CM_LARGE
&& ix86_cmodel
!= CM_SMALL_PIC
;
1954 /* We also may accept the offsetted memory references in certain special
1957 if (GET_CODE (XEXP (value
, 0)) == UNSPEC
1958 && XVECLEN (XEXP (value
, 0), 0) == 1
1959 && XINT (XEXP (value
, 0), 1) == 15)
1961 else if (GET_CODE (XEXP (value
, 0)) == PLUS
)
1963 rtx op1
= XEXP (XEXP (value
, 0), 0);
1964 rtx op2
= XEXP (XEXP (value
, 0), 1);
1965 HOST_WIDE_INT offset
;
1967 if (ix86_cmodel
== CM_LARGE
)
1969 if (GET_CODE (op2
) != CONST_INT
)
1971 offset
= trunc_int_for_mode (INTVAL (op2
), DImode
);
1972 switch (GET_CODE (op1
))
1975 /* For CM_SMALL assume that latest object is 1MB before
1976 end of 31bits boundary. We may also accept pretty
1977 large negative constants knowing that all objects are
1978 in the positive half of address space. */
1979 if (ix86_cmodel
== CM_SMALL
1980 && offset
< 1024*1024*1024
1981 && trunc_int_for_mode (offset
, SImode
) == offset
)
1983 /* For CM_KERNEL we know that all object resist in the
1984 negative half of 32bits address space. We may not
1985 accept negative offsets, since they may be just off
1986 and we may accept pretty large possitive ones. */
1987 if (ix86_cmodel
== CM_KERNEL
1989 && trunc_int_for_mode (offset
, SImode
) == offset
)
1993 /* These conditions are similar to SYMBOL_REF ones, just the
1994 constraints for code models differ. */
1995 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
1996 && offset
< 1024*1024*1024
1997 && trunc_int_for_mode (offset
, SImode
) == offset
)
1999 if (ix86_cmodel
== CM_KERNEL
2001 && trunc_int_for_mode (offset
, SImode
) == offset
)
2014 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
2016 x86_64_zero_extended_value (value
)
2019 switch (GET_CODE (value
))
2022 if (HOST_BITS_PER_WIDE_INT
== 32)
2023 return (GET_MODE (value
) == VOIDmode
2024 && !CONST_DOUBLE_HIGH (value
));
2028 if (HOST_BITS_PER_WIDE_INT
== 32)
2029 return INTVAL (value
) >= 0;
2031 return !(INTVAL (value
) & ~(HOST_WIDE_INT
)0xffffffff);
2034 /* For certain code models, the symbolic references are known to fit. */
2036 return ix86_cmodel
== CM_SMALL
;
2038 /* For certain code models, the code is near as well. */
2040 return ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
;
2042 /* We also may accept the offsetted memory references in certain special
2045 if (GET_CODE (XEXP (value
, 0)) == PLUS
)
2047 rtx op1
= XEXP (XEXP (value
, 0), 0);
2048 rtx op2
= XEXP (XEXP (value
, 0), 1);
2050 if (ix86_cmodel
== CM_LARGE
)
2052 switch (GET_CODE (op1
))
2056 /* For small code model we may accept pretty large possitive
2057 offsets, since one bit is available for free. Negative
2058 offsets are limited by the size of NULL pointer area
2059 specified by the ABI. */
2060 if (ix86_cmodel
== CM_SMALL
2061 && GET_CODE (op2
) == CONST_INT
2062 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
2063 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
2066 /* ??? For the kernel, we may accept adjustment of
2067 -0x10000000, since we know that it will just convert
2068 negative address space to possitive, but perhaps this
2069 is not worthwhile. */
2072 /* These conditions are similar to SYMBOL_REF ones, just the
2073 constraints for code models differ. */
2074 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
2075 && GET_CODE (op2
) == CONST_INT
2076 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
2077 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
2091 /* Value should be nonzero if functions must have frame pointers.
2092 Zero means the frame pointer need not be set up (and parms may
2093 be accessed via the stack pointer) in functions that seem suitable. */
2096 ix86_frame_pointer_required ()
2098 /* If we accessed previous frames, then the generated code expects
2099 to be able to access the saved ebp value in our frame. */
2100 if (cfun
->machine
->accesses_prev_frame
)
2103 /* Several x86 os'es need a frame pointer for other reasons,
2104 usually pertaining to setjmp. */
2105 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
2108 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
2109 the frame pointer by default. Turn it back on now if we've not
2110 got a leaf function. */
2111 if (TARGET_OMIT_LEAF_FRAME_POINTER
&& ! leaf_function_p ())
2117 /* Record that the current function accesses previous call frames. */
2120 ix86_setup_frame_addresses ()
2122 cfun
->machine
->accesses_prev_frame
= 1;
2125 static char pic_label_name
[32];
2127 /* This function generates code for -fpic that loads %ebx with
2128 the return address of the caller and then returns. */
2131 ix86_asm_file_end (file
)
2136 if (! TARGET_DEEP_BRANCH_PREDICTION
|| pic_label_name
[0] == 0)
2139 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
2140 to updating relocations to a section being discarded such that this
2141 doesn't work. Ought to detect this at configure time. */
2142 #if 0 && defined (ASM_OUTPUT_SECTION_NAME)
2143 /* The trick here is to create a linkonce section containing the
2144 pic label thunk, but to refer to it with an internal label.
2145 Because the label is internal, we don't have inter-dso name
2146 binding issues on hosts that don't support ".hidden".
2148 In order to use these macros, however, we must create a fake
2151 tree decl
= build_decl (FUNCTION_DECL
,
2152 get_identifier ("i686.get_pc_thunk"),
2154 DECL_ONE_ONLY (decl
) = 1;
2155 UNIQUE_SECTION (decl
, 0);
2156 named_section (decl
, NULL
, 0);
2162 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
2163 internal (non-global) label that's being emitted, it didn't make
2164 sense to have .type information for local labels. This caused
2165 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
2166 me debug info for a label that you're declaring non-global?) this
2167 was changed to call ASM_OUTPUT_LABEL() instead. */
2169 ASM_OUTPUT_LABEL (file
, pic_label_name
);
2171 xops
[0] = pic_offset_table_rtx
;
2172 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
2173 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
2174 output_asm_insn ("ret", xops
);
2178 load_pic_register ()
2182 gotsym
= gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
2184 if (TARGET_DEEP_BRANCH_PREDICTION
)
2186 if (! pic_label_name
[0])
2187 ASM_GENERATE_INTERNAL_LABEL (pic_label_name
, "LPR", 0);
2188 pclab
= gen_rtx_MEM (QImode
, gen_rtx_SYMBOL_REF (Pmode
, pic_label_name
));
2192 pclab
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
2195 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx
, pclab
));
2197 if (! TARGET_DEEP_BRANCH_PREDICTION
)
2198 emit_insn (gen_popsi1 (pic_offset_table_rtx
));
2200 emit_insn (gen_prologue_set_got (pic_offset_table_rtx
, gotsym
, pclab
));
2203 /* Generate an SImode "push" pattern for input ARG. */
2209 return gen_rtx_SET (VOIDmode
,
2210 gen_rtx_MEM (SImode
,
2211 gen_rtx_PRE_DEC (SImode
,
2212 stack_pointer_rtx
)),
2216 /* Return 1 if we need to save REGNO. */
2218 ix86_save_reg (regno
)
2221 int pic_reg_used
= flag_pic
&& (current_function_uses_pic_offset_table
2222 || current_function_uses_const_pool
);
2223 return ((regs_ever_live
[regno
] && !call_used_regs
[regno
]
2224 && !fixed_regs
[regno
]
2225 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
))
2226 || (regno
== PIC_OFFSET_TABLE_REGNUM
&& pic_reg_used
));
2230 /* Return number of registers to be saved on the stack. */
2238 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
2239 if (ix86_save_reg (regno
))
2244 /* Return the offset between two registers, one to be eliminated, and the other
2245 its replacement, at the start of a routine. */
2248 ix86_initial_elimination_offset (from
, to
)
2252 struct ix86_frame frame
;
2253 ix86_compute_frame_layout (&frame
);
2255 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
2256 return frame
.hard_frame_pointer_offset
;
2257 else if (from
== FRAME_POINTER_REGNUM
2258 && to
== HARD_FRAME_POINTER_REGNUM
)
2259 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
2262 if (to
!= STACK_POINTER_REGNUM
)
2264 else if (from
== ARG_POINTER_REGNUM
)
2265 return frame
.stack_pointer_offset
;
2266 else if (from
!= FRAME_POINTER_REGNUM
)
2269 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
2273 /* Fill structure ix86_frame about frame of currently computed function. */
2276 ix86_compute_frame_layout (frame
)
2277 struct ix86_frame
*frame
;
2279 HOST_WIDE_INT total_size
;
2280 int stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
2282 int preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
2283 HOST_WIDE_INT size
= get_frame_size ();
2285 frame
->nregs
= ix86_nsaved_regs ();
2288 /* Skip return value and save base pointer. */
2289 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
2291 frame
->hard_frame_pointer_offset
= offset
;
2293 /* Do some sanity checking of stack_alignment_needed and
2294 preferred_alignment, since i386 port is the only using those features
2295 that may break easilly. */
2297 if (size
&& !stack_alignment_needed
)
2299 if (preferred_alignment
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
2301 if (preferred_alignment
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
2303 if (stack_alignment_needed
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
2306 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
2307 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
2309 /* Register save area */
2310 offset
+= frame
->nregs
* UNITS_PER_WORD
;
2312 /* Align start of frame for local function. */
2313 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
2314 & -stack_alignment_needed
) - offset
;
2316 offset
+= frame
->padding1
;
2318 /* Frame pointer points here. */
2319 frame
->frame_pointer_offset
= offset
;
2323 /* Add outgoing arguments area. */
2324 if (ACCUMULATE_OUTGOING_ARGS
)
2326 offset
+= current_function_outgoing_args_size
;
2327 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
2330 frame
->outgoing_arguments_size
= 0;
2332 /* Align stack boundary. */
2333 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
2334 & -preferred_alignment
) - offset
;
2336 offset
+= frame
->padding2
;
2338 /* We've reached end of stack frame. */
2339 frame
->stack_pointer_offset
= offset
;
2341 /* Size prologue needs to allocate. */
2342 frame
->to_allocate
=
2343 (size
+ frame
->padding1
+ frame
->padding2
2344 + frame
->outgoing_arguments_size
);
2347 fprintf (stderr
, "nregs: %i\n", frame
->nregs
);
2348 fprintf (stderr
, "size: %i\n", size
);
2349 fprintf (stderr
, "alignment1: %i\n", stack_alignment_needed
);
2350 fprintf (stderr
, "padding1: %i\n", frame
->padding1
);
2351 fprintf (stderr
, "padding2: %i\n", frame
->padding2
);
2352 fprintf (stderr
, "to_allocate: %i\n", frame
->to_allocate
);
2353 fprintf (stderr
, "frame_pointer_offset: %i\n", frame
->frame_pointer_offset
);
2354 fprintf (stderr
, "hard_frame_pointer_offset: %i\n",
2355 frame
->hard_frame_pointer_offset
);
2356 fprintf (stderr
, "stack_pointer_offset: %i\n", frame
->stack_pointer_offset
);
2360 /* Emit code to save registers in the prologue. */
2363 ix86_emit_save_regs ()
2368 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
2369 if (ix86_save_reg (regno
))
2371 insn
= emit_insn (gen_push (gen_rtx_REG (SImode
, regno
)));
2372 RTX_FRAME_RELATED_P (insn
) = 1;
2376 /* Expand the prologue into a bunch of separate insns. */
2379 ix86_expand_prologue ()
2382 int pic_reg_used
= flag_pic
&& (current_function_uses_pic_offset_table
2383 || current_function_uses_const_pool
);
2384 struct ix86_frame frame
;
2386 ix86_compute_frame_layout (&frame
);
2388 /* Note: AT&T enter does NOT have reversed args. Enter is probably
2389 slower on all targets. Also sdb doesn't like it. */
2391 if (frame_pointer_needed
)
2393 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
2394 RTX_FRAME_RELATED_P (insn
) = 1;
2396 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
2397 RTX_FRAME_RELATED_P (insn
) = 1;
2400 ix86_emit_save_regs ();
2402 if (frame
.to_allocate
== 0)
2404 else if (! TARGET_STACK_PROBE
|| frame
.to_allocate
< CHECK_STACK_LIMIT
)
2406 if (frame_pointer_needed
)
2407 insn
= emit_insn (gen_pro_epilogue_adjust_stack
2408 (stack_pointer_rtx
, stack_pointer_rtx
,
2409 GEN_INT (-frame
.to_allocate
), hard_frame_pointer_rtx
));
2411 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
2412 GEN_INT (-frame
.to_allocate
)));
2413 RTX_FRAME_RELATED_P (insn
) = 1;
2417 /* ??? Is this only valid for Win32? */
2421 arg0
= gen_rtx_REG (SImode
, 0);
2422 emit_move_insn (arg0
, GEN_INT (frame
.to_allocate
));
2424 sym
= gen_rtx_MEM (FUNCTION_MODE
,
2425 gen_rtx_SYMBOL_REF (Pmode
, "_alloca"));
2426 insn
= emit_call_insn (gen_call (sym
, const0_rtx
));
2428 CALL_INSN_FUNCTION_USAGE (insn
)
2429 = gen_rtx_EXPR_LIST (VOIDmode
, gen_rtx_USE (VOIDmode
, arg0
),
2430 CALL_INSN_FUNCTION_USAGE (insn
));
2433 #ifdef SUBTARGET_PROLOGUE
2438 load_pic_register ();
2440 /* If we are profiling, make sure no instructions are scheduled before
2441 the call to mcount. However, if -fpic, the above call will have
2443 if ((profile_flag
|| profile_block_flag
) && ! pic_reg_used
)
2444 emit_insn (gen_blockage ());
2447 /* Emit code to add TSIZE to esp value. Use POP instruction when
2451 ix86_emit_epilogue_esp_adjustment (tsize
)
2454 /* If a frame pointer is present, we must be sure to tie the sp
2455 to the fp so that we don't mis-schedule. */
2456 if (frame_pointer_needed
)
2457 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
2460 hard_frame_pointer_rtx
));
2462 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
2466 /* Emit code to restore saved registers using MOV insns. First register
2467 is restored from POINTER + OFFSET. */
2469 ix86_emit_restore_regs_using_mov (pointer
, offset
)
2475 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
2476 if (ix86_save_reg (regno
))
2478 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
2479 adj_offsettable_operand (gen_rtx_MEM (Pmode
,
2482 offset
+= UNITS_PER_WORD
;
2486 /* Restore function stack, frame, and registers. */
2489 ix86_expand_epilogue (emit_return
)
2493 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
2494 struct ix86_frame frame
;
2495 HOST_WIDE_INT offset
;
2497 ix86_compute_frame_layout (&frame
);
2499 /* Calculate start of saved registers relative to ebp. */
2500 offset
= -frame
.nregs
* UNITS_PER_WORD
;
2502 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
2503 if (profile_block_flag
== 2)
2505 FUNCTION_BLOCK_PROFILER_EXIT
;
2509 /* If we're only restoring one register and sp is not valid then
2510 using a move instruction to restore the register since it's
2511 less work than reloading sp and popping the register.
2513 The default code result in stack adjustment using add/lea instruction,
2514 while this code results in LEAVE instruction (or discrete equivalent),
2515 so it is profitable in some other cases as well. Especially when there
2516 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2517 and there is exactly one register to pop. This heruistic may need some
2518 tuning in future. */
2519 if ((!sp_valid
&& frame
.nregs
<= 1)
2520 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
2521 || (frame_pointer_needed
&& TARGET_USE_LEAVE
&& !optimize_size
2522 && frame
.nregs
== 1))
2524 /* Restore registers. We can use ebp or esp to address the memory
2525 locations. If both are available, default to ebp, since offsets
2526 are known to be small. Only exception is esp pointing directly to the
2527 end of block of saved registers, where we may simplify addressing
2530 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
2531 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
2533 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
, offset
);
2535 if (!frame_pointer_needed
)
2536 ix86_emit_epilogue_esp_adjustment (frame
.to_allocate
2537 + frame
.nregs
* UNITS_PER_WORD
);
2538 /* If not an i386, mov & pop is faster than "leave". */
2539 else if (TARGET_USE_LEAVE
|| optimize_size
)
2540 emit_insn (gen_leave ());
2543 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
2544 hard_frame_pointer_rtx
,
2546 hard_frame_pointer_rtx
));
2547 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
2552 /* First step is to deallocate the stack frame so that we can
2553 pop the registers. */
2556 if (!frame_pointer_needed
)
2558 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
2559 hard_frame_pointer_rtx
,
2561 hard_frame_pointer_rtx
));
2563 else if (frame
.to_allocate
)
2564 ix86_emit_epilogue_esp_adjustment (frame
.to_allocate
);
2566 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
2567 if (ix86_save_reg (regno
))
2568 emit_insn (gen_popsi1 (gen_rtx_REG (SImode
, regno
)));
2569 if (frame_pointer_needed
)
2570 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
2573 /* Sibcall epilogues don't want a return instruction. */
2577 if (current_function_pops_args
&& current_function_args_size
)
2579 rtx popc
= GEN_INT (current_function_pops_args
);
2581 /* i386 can only pop 64K bytes. If asked to pop more, pop
2582 return address, do explicit add, and jump indirectly to the
2585 if (current_function_pops_args
>= 65536)
2587 rtx ecx
= gen_rtx_REG (SImode
, 2);
2589 emit_insn (gen_popsi1 (ecx
));
2590 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
2591 emit_jump_insn (gen_return_indirect_internal (ecx
));
2594 emit_jump_insn (gen_return_pop_internal (popc
));
2597 emit_jump_insn (gen_return_internal ());
2600 /* Extract the parts of an RTL expression that is a valid memory address
2601 for an instruction. Return false if the structure of the address is
2605 ix86_decompose_address (addr
, out
)
2607 struct ix86_address
*out
;
2609 rtx base
= NULL_RTX
;
2610 rtx index
= NULL_RTX
;
2611 rtx disp
= NULL_RTX
;
2612 HOST_WIDE_INT scale
= 1;
2613 rtx scale_rtx
= NULL_RTX
;
2615 if (GET_CODE (addr
) == REG
|| GET_CODE (addr
) == SUBREG
)
2617 else if (GET_CODE (addr
) == PLUS
)
2619 rtx op0
= XEXP (addr
, 0);
2620 rtx op1
= XEXP (addr
, 1);
2621 enum rtx_code code0
= GET_CODE (op0
);
2622 enum rtx_code code1
= GET_CODE (op1
);
2624 if (code0
== REG
|| code0
== SUBREG
)
2626 if (code1
== REG
|| code1
== SUBREG
)
2627 index
= op0
, base
= op1
; /* index + base */
2629 base
= op0
, disp
= op1
; /* base + displacement */
2631 else if (code0
== MULT
)
2633 index
= XEXP (op0
, 0);
2634 scale_rtx
= XEXP (op0
, 1);
2635 if (code1
== REG
|| code1
== SUBREG
)
2636 base
= op1
; /* index*scale + base */
2638 disp
= op1
; /* index*scale + disp */
2640 else if (code0
== PLUS
&& GET_CODE (XEXP (op0
, 0)) == MULT
)
2642 index
= XEXP (XEXP (op0
, 0), 0); /* index*scale + base + disp */
2643 scale_rtx
= XEXP (XEXP (op0
, 0), 1);
2644 base
= XEXP (op0
, 1);
2647 else if (code0
== PLUS
)
2649 index
= XEXP (op0
, 0); /* index + base + disp */
2650 base
= XEXP (op0
, 1);
2656 else if (GET_CODE (addr
) == MULT
)
2658 index
= XEXP (addr
, 0); /* index*scale */
2659 scale_rtx
= XEXP (addr
, 1);
2661 else if (GET_CODE (addr
) == ASHIFT
)
2665 /* We're called for lea too, which implements ashift on occasion. */
2666 index
= XEXP (addr
, 0);
2667 tmp
= XEXP (addr
, 1);
2668 if (GET_CODE (tmp
) != CONST_INT
)
2670 scale
= INTVAL (tmp
);
2671 if ((unsigned HOST_WIDE_INT
) scale
> 3)
2676 disp
= addr
; /* displacement */
2678 /* Extract the integral value of scale. */
2681 if (GET_CODE (scale_rtx
) != CONST_INT
)
2683 scale
= INTVAL (scale_rtx
);
2686 /* Allow arg pointer and stack pointer as index if there is not scaling */
2687 if (base
&& index
&& scale
== 1
2688 && (index
== arg_pointer_rtx
|| index
== frame_pointer_rtx
2689 || index
== stack_pointer_rtx
))
2696 /* Special case: %ebp cannot be encoded as a base without a displacement. */
2697 if ((base
== hard_frame_pointer_rtx
2698 || base
== frame_pointer_rtx
2699 || base
== arg_pointer_rtx
) && !disp
)
2702 /* Special case: on K6, [%esi] makes the instruction vector decoded.
2703 Avoid this by transforming to [%esi+0]. */
2704 if (ix86_cpu
== PROCESSOR_K6
&& !optimize_size
2705 && base
&& !index
&& !disp
2707 && REGNO_REG_CLASS (REGNO (base
)) == SIREG
)
2710 /* Special case: encode reg+reg instead of reg*2. */
2711 if (!base
&& index
&& scale
&& scale
== 2)
2712 base
= index
, scale
= 1;
2714 /* Special case: scaling cannot be encoded without base or displacement. */
2715 if (!base
&& !disp
&& index
&& scale
!= 1)
2726 /* Return cost of the memory address x.
2727 For i386, it is better to use a complex address than let gcc copy
2728 the address into a reg and make a new pseudo. But not if the address
2729 requires to two regs - that would mean more pseudos with longer
2732 ix86_address_cost (x
)
2735 struct ix86_address parts
;
2738 if (!ix86_decompose_address (x
, &parts
))
2741 /* More complex memory references are better. */
2742 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
2745 /* Attempt to minimize number of registers in the address. */
2747 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
2749 && (!REG_P (parts
.index
)
2750 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
2754 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
2756 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
2757 && parts
.base
!= parts
.index
)
2760 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
2761 since it's predecode logic can't detect the length of instructions
2762 and it degenerates to vector decoded. Increase cost of such
2763 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
2764 to split such addresses or even refuse such addresses at all.
2766 Following addressing modes are affected:
2771 The first and last case may be avoidable by explicitly coding the zero in
2772 memory address, but I don't have AMD-K6 machine handy to check this
2776 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
2777 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
2778 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
2784 /* If X is a machine specific address (i.e. a symbol or label being
2785 referenced as a displacement from the GOT implemented using an
2786 UNSPEC), then return the base term. Otherwise return X. */
2789 ix86_find_base_term (x
)
2794 if (GET_CODE (x
) != PLUS
2795 || XEXP (x
, 0) != pic_offset_table_rtx
2796 || GET_CODE (XEXP (x
, 1)) != CONST
)
2799 term
= XEXP (XEXP (x
, 1), 0);
2801 if (GET_CODE (term
) == PLUS
&& GET_CODE (XEXP (term
, 1)) == CONST_INT
)
2802 term
= XEXP (term
, 0);
2804 if (GET_CODE (term
) != UNSPEC
2805 || XVECLEN (term
, 0) != 1
2806 || XINT (term
, 1) != 7)
2809 term
= XVECEXP (term
, 0, 0);
2811 if (GET_CODE (term
) != SYMBOL_REF
2812 && GET_CODE (term
) != LABEL_REF
)
2818 /* Determine if a given CONST RTX is a valid memory displacement
2822 legitimate_pic_address_disp_p (disp
)
2825 if (GET_CODE (disp
) != CONST
)
2827 disp
= XEXP (disp
, 0);
2829 if (GET_CODE (disp
) == PLUS
)
2831 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
2833 disp
= XEXP (disp
, 0);
2836 if (GET_CODE (disp
) != UNSPEC
2837 || XVECLEN (disp
, 0) != 1)
2840 /* Must be @GOT or @GOTOFF. */
2841 if (XINT (disp
, 1) != 6
2842 && XINT (disp
, 1) != 7)
2845 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
2846 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
2852 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
2853 memory address for an instruction. The MODE argument is the machine mode
2854 for the MEM expression that wants to use this address.
2856 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
2857 convert common non-canonical forms to canonical form so that they will
2861 legitimate_address_p (mode
, addr
, strict
)
2862 enum machine_mode mode
;
2866 struct ix86_address parts
;
2867 rtx base
, index
, disp
;
2868 HOST_WIDE_INT scale
;
2869 const char *reason
= NULL
;
2870 rtx reason_rtx
= NULL_RTX
;
2872 if (TARGET_DEBUG_ADDR
)
2875 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
2876 GET_MODE_NAME (mode
), strict
);
2880 if (! ix86_decompose_address (addr
, &parts
))
2882 reason
= "decomposition failed";
2887 index
= parts
.index
;
2889 scale
= parts
.scale
;
2891 /* Validate base register.
2893 Don't allow SUBREG's here, it can lead to spill failures when the base
2894 is one word out of a two word structure, which is represented internally
2901 if (GET_CODE (base
) != REG
)
2903 reason
= "base is not a register";
2907 if (GET_MODE (base
) != Pmode
)
2909 reason
= "base is not in Pmode";
2913 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (base
))
2914 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (base
)))
2916 reason
= "base is not valid";
2921 /* Validate index register.
2923 Don't allow SUBREG's here, it can lead to spill failures when the index
2924 is one word out of a two word structure, which is represented internally
2931 if (GET_CODE (index
) != REG
)
2933 reason
= "index is not a register";
2937 if (GET_MODE (index
) != Pmode
)
2939 reason
= "index is not in Pmode";
2943 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (index
))
2944 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (index
)))
2946 reason
= "index is not valid";
2951 /* Validate scale factor. */
2954 reason_rtx
= GEN_INT (scale
);
2957 reason
= "scale without index";
2961 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
2963 reason
= "scale is not a valid multiplier";
2968 /* Validate displacement. */
2973 if (!CONSTANT_ADDRESS_P (disp
))
2975 reason
= "displacement is not constant";
2979 if (GET_CODE (disp
) == CONST_DOUBLE
)
2981 reason
= "displacement is a const_double";
2985 if (flag_pic
&& SYMBOLIC_CONST (disp
))
2987 if (! legitimate_pic_address_disp_p (disp
))
2989 reason
= "displacement is an invalid pic construct";
2993 /* This code used to verify that a symbolic pic displacement
2994 includes the pic_offset_table_rtx register.
2996 While this is good idea, unfortunately these constructs may
2997 be created by "adds using lea" optimization for incorrect
3006 This code is nonsensical, but results in addressing
3007 GOT table with pic_offset_table_rtx base. We can't
3008 just refuse it easilly, since it gets matched by
3009 "addsi3" pattern, that later gets split to lea in the
3010 case output register differs from input. While this
3011 can be handled by separate addsi pattern for this case
3012 that never results in lea, this seems to be easier and
3013 correct fix for crash to disable this test. */
3015 else if (HALF_PIC_P ())
3017 if (! HALF_PIC_ADDRESS_P (disp
)
3018 || (base
!= NULL_RTX
|| index
!= NULL_RTX
))
3020 reason
= "displacement is an invalid half-pic reference";
3026 /* Everything looks valid. */
3027 if (TARGET_DEBUG_ADDR
)
3028 fprintf (stderr
, "Success.\n");
3032 if (TARGET_DEBUG_ADDR
)
3034 fprintf (stderr
, "Error: %s\n", reason
);
3035 debug_rtx (reason_rtx
);
3040 /* Return an unique alias set for the GOT. */
3042 static HOST_WIDE_INT
3043 ix86_GOT_alias_set ()
3045 static HOST_WIDE_INT set
= -1;
3047 set
= new_alias_set ();
3051 /* Return a legitimate reference for ORIG (an address) using the
3052 register REG. If REG is 0, a new pseudo is generated.
3054 There are two types of references that must be handled:
3056 1. Global data references must load the address from the GOT, via
3057 the PIC reg. An insn is emitted to do this load, and the reg is
3060 2. Static data references, constant pool addresses, and code labels
3061 compute the address as an offset from the GOT, whose base is in
3062 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
3063 differentiate them from global data objects. The returned
3064 address is the PIC reg + an unspec constant.
3066 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
3067 reg also appears in the address. */
3070 legitimize_pic_address (orig
, reg
)
3078 if (GET_CODE (addr
) == LABEL_REF
3079 || (GET_CODE (addr
) == SYMBOL_REF
3080 && (CONSTANT_POOL_ADDRESS_P (addr
)
3081 || SYMBOL_REF_FLAG (addr
))))
3083 /* This symbol may be referenced via a displacement from the PIC
3084 base address (@GOTOFF). */
3086 current_function_uses_pic_offset_table
= 1;
3087 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), 7);
3088 new = gen_rtx_CONST (Pmode
, new);
3089 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
3093 emit_move_insn (reg
, new);
3097 else if (GET_CODE (addr
) == SYMBOL_REF
)
3099 /* This symbol must be referenced via a load from the
3100 Global Offset Table (@GOT). */
3102 current_function_uses_pic_offset_table
= 1;
3103 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), 6);
3104 new = gen_rtx_CONST (Pmode
, new);
3105 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
3106 new = gen_rtx_MEM (Pmode
, new);
3107 RTX_UNCHANGING_P (new) = 1;
3108 MEM_ALIAS_SET (new) = ix86_GOT_alias_set ();
3111 reg
= gen_reg_rtx (Pmode
);
3112 emit_move_insn (reg
, new);
3117 if (GET_CODE (addr
) == CONST
)
3119 addr
= XEXP (addr
, 0);
3120 if (GET_CODE (addr
) == UNSPEC
)
3122 /* Check that the unspec is one of the ones we generate? */
3124 else if (GET_CODE (addr
) != PLUS
)
3127 if (GET_CODE (addr
) == PLUS
)
3129 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
3131 /* Check first to see if this is a constant offset from a @GOTOFF
3132 symbol reference. */
3133 if ((GET_CODE (op0
) == LABEL_REF
3134 || (GET_CODE (op0
) == SYMBOL_REF
3135 && (CONSTANT_POOL_ADDRESS_P (op0
)
3136 || SYMBOL_REF_FLAG (op0
))))
3137 && GET_CODE (op1
) == CONST_INT
)
3139 current_function_uses_pic_offset_table
= 1;
3140 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
), 7);
3141 new = gen_rtx_PLUS (Pmode
, new, op1
);
3142 new = gen_rtx_CONST (Pmode
, new);
3143 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
3147 emit_move_insn (reg
, new);
3153 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
3154 new = legitimize_pic_address (XEXP (addr
, 1),
3155 base
== reg
? NULL_RTX
: reg
);
3157 if (GET_CODE (new) == CONST_INT
)
3158 new = plus_constant (base
, INTVAL (new));
3161 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
3163 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
3164 new = XEXP (new, 1);
3166 new = gen_rtx_PLUS (Pmode
, base
, new);
3174 /* Try machine-dependent ways of modifying an illegitimate address
3175 to be legitimate. If we find one, return the new, valid address.
3176 This macro is used in only one place: `memory_address' in explow.c.
3178 OLDX is the address as it was before break_out_memory_refs was called.
3179 In some cases it is useful to look at this to decide what needs to be done.
3181 MODE and WIN are passed so that this macro can use
3182 GO_IF_LEGITIMATE_ADDRESS.
3184 It is always safe for this macro to do nothing. It exists to recognize
3185 opportunities to optimize the output.
3187 For the 80386, we handle X+REG by loading X into a register R and
3188 using R+REG. R will go in a general reg and indexing will be used.
3189 However, if REG is a broken-out memory address or multiplication,
3190 nothing needs to be done because REG can certainly go in a general reg.
3192 When -fpic is used, special handling is needed for symbolic references.
3193 See comments by legitimize_pic_address in i386.c for details. */
3196 legitimize_address (x
, oldx
, mode
)
3198 register rtx oldx ATTRIBUTE_UNUSED
;
3199 enum machine_mode mode
;
3204 if (TARGET_DEBUG_ADDR
)
3206 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
3207 GET_MODE_NAME (mode
));
3211 if (flag_pic
&& SYMBOLIC_CONST (x
))
3212 return legitimize_pic_address (x
, 0);
3214 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
3215 if (GET_CODE (x
) == ASHIFT
3216 && GET_CODE (XEXP (x
, 1)) == CONST_INT
3217 && (log
= (unsigned)exact_log2 (INTVAL (XEXP (x
, 1)))) < 4)
3220 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
3221 GEN_INT (1 << log
));
3224 if (GET_CODE (x
) == PLUS
)
3226 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
3228 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
3229 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
3230 && (log
= (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)))) < 4)
3233 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
3234 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
3235 GEN_INT (1 << log
));
3238 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
3239 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
3240 && (log
= (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x
, 1), 1)))) < 4)
3243 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
3244 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
3245 GEN_INT (1 << log
));
3248 /* Put multiply first if it isn't already. */
3249 if (GET_CODE (XEXP (x
, 1)) == MULT
)
3251 rtx tmp
= XEXP (x
, 0);
3252 XEXP (x
, 0) = XEXP (x
, 1);
3257 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
3258 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
3259 created by virtual register instantiation, register elimination, and
3260 similar optimizations. */
3261 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
3264 x
= gen_rtx_PLUS (Pmode
,
3265 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
3266 XEXP (XEXP (x
, 1), 0)),
3267 XEXP (XEXP (x
, 1), 1));
3271 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3272 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
3273 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
3274 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
3275 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
3276 && CONSTANT_P (XEXP (x
, 1)))
3279 rtx other
= NULL_RTX
;
3281 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
3283 constant
= XEXP (x
, 1);
3284 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
3286 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
3288 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
3289 other
= XEXP (x
, 1);
3297 x
= gen_rtx_PLUS (Pmode
,
3298 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
3299 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
3300 plus_constant (other
, INTVAL (constant
)));
3304 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
3307 if (GET_CODE (XEXP (x
, 0)) == MULT
)
3310 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
3313 if (GET_CODE (XEXP (x
, 1)) == MULT
)
3316 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
3320 && GET_CODE (XEXP (x
, 1)) == REG
3321 && GET_CODE (XEXP (x
, 0)) == REG
)
3324 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
3327 x
= legitimize_pic_address (x
, 0);
3330 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
3333 if (GET_CODE (XEXP (x
, 0)) == REG
)
3335 register rtx temp
= gen_reg_rtx (Pmode
);
3336 register rtx val
= force_operand (XEXP (x
, 1), temp
);
3338 emit_move_insn (temp
, val
);
3344 else if (GET_CODE (XEXP (x
, 1)) == REG
)
3346 register rtx temp
= gen_reg_rtx (Pmode
);
3347 register rtx val
= force_operand (XEXP (x
, 0), temp
);
3349 emit_move_insn (temp
, val
);
3359 /* Print an integer constant expression in assembler syntax. Addition
3360 and subtraction are the only arithmetic that may appear in these
3361 expressions. FILE is the stdio stream to write to, X is the rtx, and
3362 CODE is the operand print code from the output string. */
3365 output_pic_addr_const (file
, x
, code
)
3372 switch (GET_CODE (x
))
3382 assemble_name (file
, XSTR (x
, 0));
3383 if (code
== 'P' && ! SYMBOL_REF_FLAG (x
))
3384 fputs ("@PLT", file
);
3391 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
3392 assemble_name (asm_out_file
, buf
);
3396 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
3400 /* This used to output parentheses around the expression,
3401 but that does not work on the 386 (either ATT or BSD assembler). */
3402 output_pic_addr_const (file
, XEXP (x
, 0), code
);
3406 if (GET_MODE (x
) == VOIDmode
)
3408 /* We can use %d if the number is <32 bits and positive. */
3409 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
3410 fprintf (file
, "0x%lx%08lx",
3411 (unsigned long) CONST_DOUBLE_HIGH (x
),
3412 (unsigned long) CONST_DOUBLE_LOW (x
));
3414 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
3417 /* We can't handle floating point constants;
3418 PRINT_OPERAND must handle them. */
3419 output_operand_lossage ("floating constant misused");
3423 /* Some assemblers need integer constants to appear first. */
3424 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
3426 output_pic_addr_const (file
, XEXP (x
, 0), code
);
3428 output_pic_addr_const (file
, XEXP (x
, 1), code
);
3430 else if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
3432 output_pic_addr_const (file
, XEXP (x
, 1), code
);
3434 output_pic_addr_const (file
, XEXP (x
, 0), code
);
3441 putc (ASSEMBLER_DIALECT
? '(' : '[', file
);
3442 output_pic_addr_const (file
, XEXP (x
, 0), code
);
3444 output_pic_addr_const (file
, XEXP (x
, 1), code
);
3445 putc (ASSEMBLER_DIALECT
? ')' : ']', file
);
3449 if (XVECLEN (x
, 0) != 1)
3451 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
3452 switch (XINT (x
, 1))
3455 fputs ("@GOT", file
);
3458 fputs ("@GOTOFF", file
);
3461 fputs ("@PLT", file
);
3464 output_operand_lossage ("invalid UNSPEC as operand");
3470 output_operand_lossage ("invalid expression as operand");
3474 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
3475 We need to handle our special PIC relocations. */
3478 i386_dwarf_output_addr_const (file
, x
)
3482 fprintf (file
, "%s", INT_ASM_OP
);
3484 output_pic_addr_const (file
, x
, '\0');
3486 output_addr_const (file
, x
);
3490 /* In the name of slightly smaller debug output, and to cater to
3491 general assembler losage, recognize PIC+GOTOFF and turn it back
3492 into a direct symbol reference. */
3495 i386_simplify_dwarf_addr (orig_x
)
3500 if (GET_CODE (x
) != PLUS
3501 || GET_CODE (XEXP (x
, 0)) != REG
3502 || GET_CODE (XEXP (x
, 1)) != CONST
)
3505 x
= XEXP (XEXP (x
, 1), 0);
3506 if (GET_CODE (x
) == UNSPEC
3507 && (XINT (x
, 1) == 6
3508 || XINT (x
, 1) == 7))
3509 return XVECEXP (x
, 0, 0);
3511 if (GET_CODE (x
) == PLUS
3512 && GET_CODE (XEXP (x
, 0)) == UNSPEC
3513 && GET_CODE (XEXP (x
, 1)) == CONST_INT
3514 && (XINT (XEXP (x
, 0), 1) == 6
3515 || XINT (XEXP (x
, 0), 1) == 7))
3516 return gen_rtx_PLUS (VOIDmode
, XVECEXP (XEXP (x
, 0), 0, 0), XEXP (x
, 1));
3522 put_condition_code (code
, mode
, reverse
, fp
, file
)
3524 enum machine_mode mode
;
3530 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
3532 enum rtx_code second_code
, bypass_code
;
3533 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
3534 if (bypass_code
!= NIL
|| second_code
!= NIL
)
3536 code
= ix86_fp_compare_code_to_integer (code
);
3540 code
= reverse_condition (code
);
3551 if (mode
!= CCmode
&& mode
!= CCNOmode
&& mode
!= CCGCmode
)
3556 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
3557 Those same assemblers have the same but opposite losage on cmov. */
3560 suffix
= fp
? "nbe" : "a";
3563 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
3565 else if (mode
== CCmode
|| mode
== CCGCmode
)
3576 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
3578 else if (mode
== CCmode
|| mode
== CCGCmode
)
3587 suffix
= fp
? "nb" : "ae";
3590 if (mode
!= CCmode
&& mode
!= CCGCmode
&& mode
!= CCNOmode
)
3600 suffix
= fp
? "u" : "p";
3603 suffix
= fp
? "nu" : "np";
3608 fputs (suffix
, file
);
3612 print_reg (x
, code
, file
)
3617 if (REGNO (x
) == ARG_POINTER_REGNUM
3618 || REGNO (x
) == FRAME_POINTER_REGNUM
3619 || REGNO (x
) == FLAGS_REG
3620 || REGNO (x
) == FPSR_REG
)
3623 if (ASSEMBLER_DIALECT
== 0 || USER_LABEL_PREFIX
[0] == 0)
3628 else if (code
== 'b')
3630 else if (code
== 'k')
3632 else if (code
== 'q')
3634 else if (code
== 'y')
3636 else if (code
== 'h')
3638 else if (code
== 'm' || MMX_REG_P (x
))
3641 code
= GET_MODE_SIZE (GET_MODE (x
));
3643 /* Irritatingly, AMD extended registers use different naming convention
3644 from the normal registers. */
3645 if (REX_INT_REG_P (x
))
3650 error ("Extended registers have no high halves\n");
3653 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
3656 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
3659 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
3662 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
3665 error ("Unsupported operand size for extended register.\n");
3673 fputs (hi_reg_name
[REGNO (x
)], file
);
3676 if (STACK_TOP_P (x
))
3678 fputs ("st(0)", file
);
3685 if (! ANY_FP_REG_P (x
))
3686 putc (code
== 8 ? 'r' : 'e', file
);
3690 fputs (hi_reg_name
[REGNO (x
)], file
);
3693 fputs (qi_reg_name
[REGNO (x
)], file
);
3696 fputs (qi_high_reg_name
[REGNO (x
)], file
);
3704 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
3705 C -- print opcode suffix for set/cmov insn.
3706 c -- like C, but print reversed condition
3707 R -- print the prefix for register names.
3708 z -- print the opcode suffix for the size of the current operand.
3709 * -- print a star (in certain assembler syntax)
3710 A -- print an absolute memory reference.
3711 w -- print the operand as if it's a "word" (HImode) even if it isn't.
3712 s -- print a shift double count, followed by the assemblers argument
3714 b -- print the QImode name of the register for the indicated operand.
3715 %b0 would print %al if operands[0] is reg 0.
3716 w -- likewise, print the HImode name of the register.
3717 k -- likewise, print the SImode name of the register.
3718 q -- likewise, print the DImode name of the register.
3719 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
3720 y -- print "st(0)" instead of "st" as a register.
3721 m -- print "st(n)" as an mmx register.
3722 D -- print condition for SSE cmp instruction.
3726 print_operand (file
, x
, code
)
3736 if (ASSEMBLER_DIALECT
== 0)
3741 if (ASSEMBLER_DIALECT
== 0)
3743 else if (ASSEMBLER_DIALECT
== 1)
3745 /* Intel syntax. For absolute addresses, registers should not
3746 be surrounded by braces. */
3747 if (GET_CODE (x
) != REG
)
3750 PRINT_OPERAND (file
, x
, 0);
3756 PRINT_OPERAND (file
, x
, 0);
3761 if (ASSEMBLER_DIALECT
== 0)
3766 if (ASSEMBLER_DIALECT
== 0)
3771 if (ASSEMBLER_DIALECT
== 0)
3776 if (ASSEMBLER_DIALECT
== 0)
3781 if (ASSEMBLER_DIALECT
== 0)
3786 if (ASSEMBLER_DIALECT
== 0)
3791 /* 387 opcodes don't get size suffixes if the operands are
3794 if (STACK_REG_P (x
))
3797 /* this is the size of op from size of operand */
3798 switch (GET_MODE_SIZE (GET_MODE (x
)))
3801 #ifdef HAVE_GAS_FILDS_FISTS
3807 if (GET_MODE (x
) == SFmode
)
3822 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
3824 #ifdef GAS_MNEMONICS
3851 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
3853 PRINT_OPERAND (file
, x
, 0);
3859 /* Little bit of braindamage here. The SSE compare instructions
3860 does use completely different names for the comparisons that the
3861 fp conditional moves. */
3862 switch (GET_CODE (x
))
3877 fputs ("unord", file
);
3881 fputs ("neq", file
);
3885 fputs ("nlt", file
);
3889 fputs ("nle", file
);
3892 fputs ("ord", file
);
3900 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
3903 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
3906 /* Like above, but reverse condition */
3908 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
3911 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
3917 sprintf (str
, "invalid operand code `%c'", code
);
3918 output_operand_lossage (str
);
3923 if (GET_CODE (x
) == REG
)
3925 PRINT_REG (x
, code
, file
);
3928 else if (GET_CODE (x
) == MEM
)
3930 /* No `byte ptr' prefix for call instructions. */
3931 if (ASSEMBLER_DIALECT
!= 0 && code
!= 'X' && code
!= 'P')
3934 switch (GET_MODE_SIZE (GET_MODE (x
)))
3936 case 1: size
= "BYTE"; break;
3937 case 2: size
= "WORD"; break;
3938 case 4: size
= "DWORD"; break;
3939 case 8: size
= "QWORD"; break;
3940 case 12: size
= "XWORD"; break;
3941 case 16: size
= "XMMWORD"; break;
3946 /* Check for explicit size override (codes 'b', 'w' and 'k') */
3949 else if (code
== 'w')
3951 else if (code
== 'k')
3955 fputs (" PTR ", file
);
3959 if (flag_pic
&& CONSTANT_ADDRESS_P (x
))
3960 output_pic_addr_const (file
, x
, code
);
3965 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
3970 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3971 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
3973 if (ASSEMBLER_DIALECT
== 0)
3975 fprintf (file
, "0x%lx", l
);
3978 /* These float cases don't actually occur as immediate operands. */
3979 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
3984 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3985 REAL_VALUE_TO_DECIMAL (r
, "%.22e", dstr
);
3986 fprintf (file
, "%s", dstr
);
3989 else if (GET_CODE (x
) == CONST_DOUBLE
3990 && (GET_MODE (x
) == XFmode
|| GET_MODE (x
) == TFmode
))
3995 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3996 REAL_VALUE_TO_DECIMAL (r
, "%.22e", dstr
);
3997 fprintf (file
, "%s", dstr
);
4003 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
4005 if (ASSEMBLER_DIALECT
== 0)
4008 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
4009 || GET_CODE (x
) == LABEL_REF
)
4011 if (ASSEMBLER_DIALECT
== 0)
4014 fputs ("OFFSET FLAT:", file
);
4017 if (GET_CODE (x
) == CONST_INT
)
4018 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
4020 output_pic_addr_const (file
, x
, code
);
4022 output_addr_const (file
, x
);
4026 /* Print a memory operand whose address is ADDR. */
4029 print_operand_address (file
, addr
)
4033 struct ix86_address parts
;
4034 rtx base
, index
, disp
;
4037 if (! ix86_decompose_address (addr
, &parts
))
4041 index
= parts
.index
;
4043 scale
= parts
.scale
;
4045 if (!base
&& !index
)
4047 /* Displacement only requires special attention. */
4049 if (GET_CODE (disp
) == CONST_INT
)
4051 if (ASSEMBLER_DIALECT
!= 0)
4053 if (USER_LABEL_PREFIX
[0] == 0)
4055 fputs ("ds:", file
);
4057 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (addr
));
4060 output_pic_addr_const (file
, addr
, 0);
4062 output_addr_const (file
, addr
);
4066 if (ASSEMBLER_DIALECT
== 0)
4071 output_pic_addr_const (file
, disp
, 0);
4072 else if (GET_CODE (disp
) == LABEL_REF
)
4073 output_asm_label (disp
);
4075 output_addr_const (file
, disp
);
4080 PRINT_REG (base
, 0, file
);
4084 PRINT_REG (index
, 0, file
);
4086 fprintf (file
, ",%d", scale
);
4092 rtx offset
= NULL_RTX
;
4096 /* Pull out the offset of a symbol; print any symbol itself. */
4097 if (GET_CODE (disp
) == CONST
4098 && GET_CODE (XEXP (disp
, 0)) == PLUS
4099 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
4101 offset
= XEXP (XEXP (disp
, 0), 1);
4102 disp
= gen_rtx_CONST (VOIDmode
,
4103 XEXP (XEXP (disp
, 0), 0));
4107 output_pic_addr_const (file
, disp
, 0);
4108 else if (GET_CODE (disp
) == LABEL_REF
)
4109 output_asm_label (disp
);
4110 else if (GET_CODE (disp
) == CONST_INT
)
4113 output_addr_const (file
, disp
);
4119 PRINT_REG (base
, 0, file
);
4122 if (INTVAL (offset
) >= 0)
4124 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
4128 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
4135 PRINT_REG (index
, 0, file
);
4137 fprintf (file
, "*%d", scale
);
4144 /* Split one or more DImode RTL references into pairs of SImode
4145 references. The RTL can be REG, offsettable MEM, integer constant, or
4146 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
4147 split and "num" is its length. lo_half and hi_half are output arrays
4148 that parallel "operands". */
4151 split_di (operands
, num
, lo_half
, hi_half
)
4154 rtx lo_half
[], hi_half
[];
4158 rtx op
= operands
[num
];
4159 if (CONSTANT_P (op
))
4160 split_double (op
, &lo_half
[num
], &hi_half
[num
]);
4161 else if (! reload_completed
)
4163 lo_half
[num
] = gen_lowpart (SImode
, op
);
4164 hi_half
[num
] = gen_highpart (SImode
, op
);
4166 else if (GET_CODE (op
) == REG
)
4168 lo_half
[num
] = gen_rtx_REG (SImode
, REGNO (op
));
4169 hi_half
[num
] = gen_rtx_REG (SImode
, REGNO (op
) + 1);
4171 else if (offsettable_memref_p (op
))
4173 rtx lo_addr
= XEXP (op
, 0);
4174 rtx hi_addr
= XEXP (adj_offsettable_operand (op
, 4), 0);
4175 lo_half
[num
] = change_address (op
, SImode
, lo_addr
);
4176 hi_half
[num
] = change_address (op
, SImode
, hi_addr
);
4183 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
4184 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
4185 is the expression of the binary operation. The output may either be
4186 emitted here, or returned to the caller, like all output_* functions.
4188 There is no guarantee that the operands are the same mode, as they
4189 might be within FLOAT or FLOAT_EXTEND expressions. */
4191 #ifndef SYSV386_COMPAT
4192 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
4193 wants to fix the assemblers because that causes incompatibility
4194 with gcc. No-one wants to fix gcc because that causes
4195 incompatibility with assemblers... You can use the option of
4196 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
4197 #define SYSV386_COMPAT 1
4201 output_387_binary_op (insn
, operands
)
4205 static char buf
[30];
4208 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]) | SSE_REG_P (operands
[2]);
4210 #ifdef ENABLE_CHECKING
4211 /* Even if we do not want to check the inputs, this documents input
4212 constraints. Which helps in understanding the following code. */
4213 if (STACK_REG_P (operands
[0])
4214 && ((REG_P (operands
[1])
4215 && REGNO (operands
[0]) == REGNO (operands
[1])
4216 && (STACK_REG_P (operands
[2]) || GET_CODE (operands
[2]) == MEM
))
4217 || (REG_P (operands
[2])
4218 && REGNO (operands
[0]) == REGNO (operands
[2])
4219 && (STACK_REG_P (operands
[1]) || GET_CODE (operands
[1]) == MEM
)))
4220 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
4226 switch (GET_CODE (operands
[3]))
4229 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
4230 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
4238 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
4239 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
4247 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
4248 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
4256 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
4257 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
4271 if (GET_MODE (operands
[0]) == SFmode
)
4272 strcat (buf
, "ss\t{%2, %0|%0, %2}");
4274 strcat (buf
, "sd\t{%2, %0|%0, %2}");
4279 switch (GET_CODE (operands
[3]))
4283 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
4285 rtx temp
= operands
[2];
4286 operands
[2] = operands
[1];
4290 /* know operands[0] == operands[1]. */
4292 if (GET_CODE (operands
[2]) == MEM
)
4298 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
4300 if (STACK_TOP_P (operands
[0]))
4301 /* How is it that we are storing to a dead operand[2]?
4302 Well, presumably operands[1] is dead too. We can't
4303 store the result to st(0) as st(0) gets popped on this
4304 instruction. Instead store to operands[2] (which I
4305 think has to be st(1)). st(1) will be popped later.
4306 gcc <= 2.8.1 didn't have this check and generated
4307 assembly code that the Unixware assembler rejected. */
4308 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
4310 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
4314 if (STACK_TOP_P (operands
[0]))
4315 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
4317 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
4322 if (GET_CODE (operands
[1]) == MEM
)
4328 if (GET_CODE (operands
[2]) == MEM
)
4334 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
4337 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
4338 derived assemblers, confusingly reverse the direction of
4339 the operation for fsub{r} and fdiv{r} when the
4340 destination register is not st(0). The Intel assembler
4341 doesn't have this brain damage. Read !SYSV386_COMPAT to
4342 figure out what the hardware really does. */
4343 if (STACK_TOP_P (operands
[0]))
4344 p
= "{p\t%0, %2|rp\t%2, %0}";
4346 p
= "{rp\t%2, %0|p\t%0, %2}";
4348 if (STACK_TOP_P (operands
[0]))
4349 /* As above for fmul/fadd, we can't store to st(0). */
4350 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
4352 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
4357 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
4360 if (STACK_TOP_P (operands
[0]))
4361 p
= "{rp\t%0, %1|p\t%1, %0}";
4363 p
= "{p\t%1, %0|rp\t%0, %1}";
4365 if (STACK_TOP_P (operands
[0]))
4366 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
4368 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
4373 if (STACK_TOP_P (operands
[0]))
4375 if (STACK_TOP_P (operands
[1]))
4376 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
4378 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
4381 else if (STACK_TOP_P (operands
[1]))
4384 p
= "{\t%1, %0|r\t%0, %1}";
4386 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
4392 p
= "{r\t%2, %0|\t%0, %2}";
4394 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
4407 /* Output code for INSN to convert a float to a signed int. OPERANDS
4408 are the insn operands. The output may be [HSD]Imode and the input
4409 operand may be [SDX]Fmode. */
4412 output_fix_trunc (insn
, operands
)
4416 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
4417 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
4420 /* Jump through a hoop or two for DImode, since the hardware has no
4421 non-popping instruction. We used to do this a different way, but
4422 that was somewhat fragile and broke with post-reload splitters. */
4423 if (dimode_p
&& !stack_top_dies
)
4424 output_asm_insn ("fld\t%y1", operands
);
4426 if (! STACK_TOP_P (operands
[1]))
4429 xops
[0] = GEN_INT (12);
4430 xops
[1] = adj_offsettable_operand (operands
[2], 1);
4431 xops
[1] = change_address (xops
[1], QImode
, NULL_RTX
);
4433 xops
[2] = operands
[0];
4434 if (GET_CODE (operands
[0]) != MEM
)
4435 xops
[2] = operands
[3];
4437 output_asm_insn ("fnstcw\t%2", operands
);
4438 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands
);
4439 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops
);
4440 output_asm_insn ("fldcw\t%2", operands
);
4441 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands
);
4443 if (stack_top_dies
|| dimode_p
)
4444 output_asm_insn ("fistp%z2\t%2", xops
);
4446 output_asm_insn ("fist%z2\t%2", xops
);
4448 output_asm_insn ("fldcw\t%2", operands
);
4450 if (GET_CODE (operands
[0]) != MEM
)
4454 split_di (operands
+0, 1, xops
+0, xops
+1);
4455 split_di (operands
+3, 1, xops
+2, xops
+3);
4456 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
4457 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops
);
4459 else if (GET_MODE (operands
[0]) == SImode
)
4460 output_asm_insn ("mov{l}\t{%3, %0|%0, %3}", operands
);
4462 output_asm_insn ("mov{w}\t{%3, %0|%0, %3}", operands
);
4468 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
4469 should be used and 2 when fnstsw should be used. UNORDERED_P is true
4470 when fucom should be used. */
4473 output_fp_compare (insn
, operands
, eflags_p
, unordered_p
)
4476 int eflags_p
, unordered_p
;
4479 rtx cmp_op0
= operands
[0];
4480 rtx cmp_op1
= operands
[1];
4481 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]);
4486 cmp_op1
= operands
[2];
4490 if (GET_MODE (operands
[0]) == SFmode
)
4492 return "ucomiss\t{%1, %0|%0, %1}";
4494 return "comiss\t{%1, %0|%0, %y}";
4497 return "ucomisd\t{%1, %0|%0, %1}";
4499 return "comisd\t{%1, %0|%0, %y}";
4502 if (! STACK_TOP_P (cmp_op0
))
4505 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
4507 if (STACK_REG_P (cmp_op1
)
4509 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
4510 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
4512 /* If both the top of the 387 stack dies, and the other operand
4513 is also a stack register that dies, then this must be a
4514 `fcompp' float compare */
4518 /* There is no double popping fcomi variant. Fortunately,
4519 eflags is immune from the fstp's cc clobbering. */
4521 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
4523 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
4531 return "fucompp\n\tfnstsw\t%0";
4533 return "fcompp\n\tfnstsw\t%0";
4546 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
4548 static const char * const alt
[24] =
4560 "fcomi\t{%y1, %0|%0, %y1}",
4561 "fcomip\t{%y1, %0|%0, %y1}",
4562 "fucomi\t{%y1, %0|%0, %y1}",
4563 "fucomip\t{%y1, %0|%0, %y1}",
4570 "fcom%z2\t%y2\n\tfnstsw\t%0",
4571 "fcomp%z2\t%y2\n\tfnstsw\t%0",
4572 "fucom%z2\t%y2\n\tfnstsw\t%0",
4573 "fucomp%z2\t%y2\n\tfnstsw\t%0",
4575 "ficom%z2\t%y2\n\tfnstsw\t%0",
4576 "ficomp%z2\t%y2\n\tfnstsw\t%0",
4584 mask
= eflags_p
<< 3;
4585 mask
|= (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
) << 2;
4586 mask
|= unordered_p
<< 1;
4587 mask
|= stack_top_dies
;
4599 /* Output assembler code to FILE to initialize basic-block profiling.
4601 If profile_block_flag == 2
4603 Output code to call the subroutine `__bb_init_trace_func'
4604 and pass two parameters to it. The first parameter is
4605 the address of a block allocated in the object module.
4606 The second parameter is the number of the first basic block
4609 The name of the block is a local symbol made with this statement:
4611 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4613 Of course, since you are writing the definition of
4614 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4615 can take a short cut in the definition of this macro and use the
4616 name that you know will result.
4618 The number of the first basic block of the function is
4619 passed to the macro in BLOCK_OR_LABEL.
4621 If described in a virtual assembler language the code to be
4625 parameter2 <- BLOCK_OR_LABEL
4626 call __bb_init_trace_func
4628 else if profile_block_flag != 0
4630 Output code to call the subroutine `__bb_init_func'
4631 and pass one single parameter to it, which is the same
4632 as the first parameter to `__bb_init_trace_func'.
4634 The first word of this parameter is a flag which will be nonzero if
4635 the object module has already been initialized. So test this word
4636 first, and do not call `__bb_init_func' if the flag is nonzero.
4637 Note: When profile_block_flag == 2 the test need not be done
4638 but `__bb_init_trace_func' *must* be called.
4640 BLOCK_OR_LABEL may be used to generate a label number as a
4641 branch destination in case `__bb_init_func' will not be called.
4643 If described in a virtual assembler language the code to be
4654 ix86_output_function_block_profiler (file
, block_or_label
)
4658 static int num_func
= 0;
4660 char block_table
[80], false_label
[80];
4662 ASM_GENERATE_INTERNAL_LABEL (block_table
, "LPBX", 0);
4664 xops
[1] = gen_rtx_SYMBOL_REF (VOIDmode
, block_table
);
4665 xops
[5] = stack_pointer_rtx
;
4666 xops
[7] = gen_rtx_REG (Pmode
, 0); /* eax */
4668 CONSTANT_POOL_ADDRESS_P (xops
[1]) = TRUE
;
4670 switch (profile_block_flag
)
4673 xops
[2] = GEN_INT (block_or_label
);
4674 xops
[3] = gen_rtx_MEM (Pmode
,
4675 gen_rtx_SYMBOL_REF (VOIDmode
, "__bb_init_trace_func"));
4676 xops
[6] = GEN_INT (8);
4678 output_asm_insn ("push{l}\t%2", xops
);
4680 output_asm_insn ("push{l}\t%1", xops
);
4683 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops
);
4684 output_asm_insn ("push{l}\t%7", xops
);
4686 output_asm_insn ("call\t%P3", xops
);
4687 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops
);
4691 ASM_GENERATE_INTERNAL_LABEL (false_label
, "LPBZ", num_func
);
4693 xops
[0] = const0_rtx
;
4694 xops
[2] = gen_rtx_MEM (Pmode
,
4695 gen_rtx_SYMBOL_REF (VOIDmode
, false_label
));
4696 xops
[3] = gen_rtx_MEM (Pmode
,
4697 gen_rtx_SYMBOL_REF (VOIDmode
, "__bb_init_func"));
4698 xops
[4] = gen_rtx_MEM (Pmode
, xops
[1]);
4699 xops
[6] = GEN_INT (4);
4701 CONSTANT_POOL_ADDRESS_P (xops
[2]) = TRUE
;
4703 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops
);
4704 output_asm_insn ("jne\t%2", xops
);
4707 output_asm_insn ("push{l}\t%1", xops
);
4710 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops
);
4711 output_asm_insn ("push{l}\t%7", xops
);
4713 output_asm_insn ("call\t%P3", xops
);
4714 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops
);
4715 ASM_OUTPUT_INTERNAL_LABEL (file
, "LPBZ", num_func
);
4721 /* Output assembler code to FILE to increment a counter associated
4722 with basic block number BLOCKNO.
4724 If profile_block_flag == 2
4726 Output code to initialize the global structure `__bb' and
4727 call the function `__bb_trace_func' which will increment the
4730 `__bb' consists of two words. In the first word the number
4731 of the basic block has to be stored. In the second word
4732 the address of a block allocated in the object module
4735 The basic block number is given by BLOCKNO.
4737 The address of the block is given by the label created with
4739 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4741 by FUNCTION_BLOCK_PROFILER.
4743 Of course, since you are writing the definition of
4744 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4745 can take a short cut in the definition of this macro and use the
4746 name that you know will result.
4748 If described in a virtual assembler language the code to be
4751 move BLOCKNO -> (__bb)
4752 move LPBX0 -> (__bb+4)
4753 call __bb_trace_func
4755 Note that function `__bb_trace_func' must not change the
4756 machine state, especially the flag register. To grant
4757 this, you must output code to save and restore registers
4758 either in this macro or in the macros MACHINE_STATE_SAVE
4759 and MACHINE_STATE_RESTORE. The last two macros will be
4760 used in the function `__bb_trace_func', so you must make
4761 sure that the function prologue does not change any
4762 register prior to saving it with MACHINE_STATE_SAVE.
4764 else if profile_block_flag != 0
4766 Output code to increment the counter directly.
4767 Basic blocks are numbered separately from zero within each
4768 compiled object module. The count associated with block number
4769 BLOCKNO is at index BLOCKNO in an array of words; the name of
4770 this array is a local symbol made with this statement:
4772 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
4774 Of course, since you are writing the definition of
4775 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4776 can take a short cut in the definition of this macro and use the
4777 name that you know will result.
4779 If described in a virtual assembler language the code to be
4782 inc (LPBX2+4*BLOCKNO)
4786 ix86_output_block_profiler (file
, blockno
)
4787 FILE *file ATTRIBUTE_UNUSED
;
4790 rtx xops
[8], cnt_rtx
;
4792 char *block_table
= counts
;
4794 switch (profile_block_flag
)
4797 ASM_GENERATE_INTERNAL_LABEL (block_table
, "LPBX", 0);
4799 xops
[1] = gen_rtx_SYMBOL_REF (VOIDmode
, block_table
);
4800 xops
[2] = GEN_INT (blockno
);
4801 xops
[3] = gen_rtx_MEM (Pmode
,
4802 gen_rtx_SYMBOL_REF (VOIDmode
, "__bb_trace_func"));
4803 xops
[4] = gen_rtx_SYMBOL_REF (VOIDmode
, "__bb");
4804 xops
[5] = plus_constant (xops
[4], 4);
4805 xops
[0] = gen_rtx_MEM (SImode
, xops
[4]);
4806 xops
[6] = gen_rtx_MEM (SImode
, xops
[5]);
4808 CONSTANT_POOL_ADDRESS_P (xops
[1]) = TRUE
;
4810 output_asm_insn ("pushf", xops
);
4811 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
4814 xops
[7] = gen_rtx_REG (Pmode
, 0); /* eax */
4815 output_asm_insn ("push{l}\t%7", xops
);
4816 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops
);
4817 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops
);
4818 output_asm_insn ("pop{l}\t%7", xops
);
4821 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops
);
4822 output_asm_insn ("call\t%P3", xops
);
4823 output_asm_insn ("popf", xops
);
4828 ASM_GENERATE_INTERNAL_LABEL (counts
, "LPBX", 2);
4829 cnt_rtx
= gen_rtx_SYMBOL_REF (VOIDmode
, counts
);
4830 SYMBOL_REF_FLAG (cnt_rtx
) = TRUE
;
4833 cnt_rtx
= plus_constant (cnt_rtx
, blockno
*4);
4836 cnt_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, cnt_rtx
);
4838 xops
[0] = gen_rtx_MEM (SImode
, cnt_rtx
);
4839 output_asm_insn ("inc{l}\t%0", xops
);
4846 ix86_expand_move (mode
, operands
)
4847 enum machine_mode mode
;
4850 int strict
= (reload_in_progress
|| reload_completed
);
4853 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (operands
[1], Pmode
))
4855 /* Emit insns to move operands[1] into operands[0]. */
4857 if (GET_CODE (operands
[0]) == MEM
)
4858 operands
[1] = force_reg (Pmode
, operands
[1]);
4861 rtx temp
= operands
[0];
4862 if (GET_CODE (temp
) != REG
)
4863 temp
= gen_reg_rtx (Pmode
);
4864 temp
= legitimize_pic_address (operands
[1], temp
);
4865 if (temp
== operands
[0])
4872 if (GET_CODE (operands
[0]) == MEM
4873 && (GET_MODE (operands
[0]) == QImode
4874 || !push_operand (operands
[0], mode
))
4875 && GET_CODE (operands
[1]) == MEM
)
4876 operands
[1] = force_reg (mode
, operands
[1]);
4878 if (push_operand (operands
[0], mode
)
4879 && ! general_no_elim_operand (operands
[1], mode
))
4880 operands
[1] = copy_to_mode_reg (mode
, operands
[1]);
4882 if (FLOAT_MODE_P (mode
))
4884 /* If we are loading a floating point constant to a register,
4885 force the value to memory now, since we'll get better code
4886 out the back end. */
4890 else if (GET_CODE (operands
[1]) == CONST_DOUBLE
4891 && register_operand (operands
[0], mode
))
4892 operands
[1] = validize_mem (force_const_mem (mode
, operands
[1]));
4896 insn
= gen_rtx_SET (VOIDmode
, operands
[0], operands
[1]);
4901 /* Attempt to expand a binary operator. Make the expansion closer to the
4902 actual machine, then just general_operand, which will allow 3 separate
4903 memory references (one output, two input) in a single insn. */
4906 ix86_expand_binary_operator (code
, mode
, operands
)
4908 enum machine_mode mode
;
4911 int matching_memory
;
4912 rtx src1
, src2
, dst
, op
, clob
;
4918 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
4919 if (GET_RTX_CLASS (code
) == 'c'
4920 && (rtx_equal_p (dst
, src2
)
4921 || immediate_operand (src1
, mode
)))
4928 /* If the destination is memory, and we do not have matching source
4929 operands, do things in registers. */
4930 matching_memory
= 0;
4931 if (GET_CODE (dst
) == MEM
)
4933 if (rtx_equal_p (dst
, src1
))
4934 matching_memory
= 1;
4935 else if (GET_RTX_CLASS (code
) == 'c'
4936 && rtx_equal_p (dst
, src2
))
4937 matching_memory
= 2;
4939 dst
= gen_reg_rtx (mode
);
4942 /* Both source operands cannot be in memory. */
4943 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
4945 if (matching_memory
!= 2)
4946 src2
= force_reg (mode
, src2
);
4948 src1
= force_reg (mode
, src1
);
4951 /* If the operation is not commutable, source 1 cannot be a constant
4952 or non-matching memory. */
4953 if ((CONSTANT_P (src1
)
4954 || (!matching_memory
&& GET_CODE (src1
) == MEM
))
4955 && GET_RTX_CLASS (code
) != 'c')
4956 src1
= force_reg (mode
, src1
);
4958 /* If optimizing, copy to regs to improve CSE */
4959 if (optimize
&& ! no_new_pseudos
)
4961 if (GET_CODE (dst
) == MEM
)
4962 dst
= gen_reg_rtx (mode
);
4963 if (GET_CODE (src1
) == MEM
)
4964 src1
= force_reg (mode
, src1
);
4965 if (GET_CODE (src2
) == MEM
)
4966 src2
= force_reg (mode
, src2
);
4969 /* Emit the instruction. */
4971 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
4972 if (reload_in_progress
)
4974 /* Reload doesn't know about the flags register, and doesn't know that
4975 it doesn't want to clobber it. We can only do this with PLUS. */
4982 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
4983 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
4986 /* Fix up the destination if needed. */
4987 if (dst
!= operands
[0])
4988 emit_move_insn (operands
[0], dst
);
4991 /* Return TRUE or FALSE depending on whether the binary operator meets the
4992 appropriate constraints. */
4995 ix86_binary_operator_ok (code
, mode
, operands
)
4997 enum machine_mode mode ATTRIBUTE_UNUSED
;
5000 /* Both source operands cannot be in memory. */
5001 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
5003 /* If the operation is not commutable, source 1 cannot be a constant. */
5004 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != 'c')
5006 /* If the destination is memory, we must have a matching source operand. */
5007 if (GET_CODE (operands
[0]) == MEM
5008 && ! (rtx_equal_p (operands
[0], operands
[1])
5009 || (GET_RTX_CLASS (code
) == 'c'
5010 && rtx_equal_p (operands
[0], operands
[2]))))
5012 /* If the operation is not commutable and the source 1 is memory, we must
5013 have a matching destionation. */
5014 if (GET_CODE (operands
[1]) == MEM
5015 && GET_RTX_CLASS (code
) != 'c'
5016 && ! rtx_equal_p (operands
[0], operands
[1]))
5021 /* Attempt to expand a unary operator. Make the expansion closer to the
5022 actual machine, then just general_operand, which will allow 2 separate
5023 memory references (one output, one input) in a single insn. */
5026 ix86_expand_unary_operator (code
, mode
, operands
)
5028 enum machine_mode mode
;
5031 int matching_memory
;
5032 rtx src
, dst
, op
, clob
;
5037 /* If the destination is memory, and we do not have matching source
5038 operands, do things in registers. */
5039 matching_memory
= 0;
5040 if (GET_CODE (dst
) == MEM
)
5042 if (rtx_equal_p (dst
, src
))
5043 matching_memory
= 1;
5045 dst
= gen_reg_rtx (mode
);
5048 /* When source operand is memory, destination must match. */
5049 if (!matching_memory
&& GET_CODE (src
) == MEM
)
5050 src
= force_reg (mode
, src
);
5052 /* If optimizing, copy to regs to improve CSE */
5053 if (optimize
&& ! no_new_pseudos
)
5055 if (GET_CODE (dst
) == MEM
)
5056 dst
= gen_reg_rtx (mode
);
5057 if (GET_CODE (src
) == MEM
)
5058 src
= force_reg (mode
, src
);
5061 /* Emit the instruction. */
5063 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
5064 if (reload_in_progress
|| code
== NOT
)
5066 /* Reload doesn't know about the flags register, and doesn't know that
5067 it doesn't want to clobber it. */
5074 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
5075 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
5078 /* Fix up the destination if needed. */
5079 if (dst
!= operands
[0])
5080 emit_move_insn (operands
[0], dst
);
5083 /* Return TRUE or FALSE depending on whether the unary operator meets the
5084 appropriate constraints. */
5087 ix86_unary_operator_ok (code
, mode
, operands
)
5088 enum rtx_code code ATTRIBUTE_UNUSED
;
5089 enum machine_mode mode ATTRIBUTE_UNUSED
;
5090 rtx operands
[2] ATTRIBUTE_UNUSED
;
5092 /* If one of operands is memory, source and destination must match. */
5093 if ((GET_CODE (operands
[0]) == MEM
5094 || GET_CODE (operands
[1]) == MEM
)
5095 && ! rtx_equal_p (operands
[0], operands
[1]))
5100 /* Return TRUE or FALSE depending on whether the first SET in INSN
5101 has source and destination with matching CC modes, and that the
5102 CC mode is at least as constrained as REQ_MODE. */
5105 ix86_match_ccmode (insn
, req_mode
)
5107 enum machine_mode req_mode
;
5110 enum machine_mode set_mode
;
5112 set
= PATTERN (insn
);
5113 if (GET_CODE (set
) == PARALLEL
)
5114 set
= XVECEXP (set
, 0, 0);
5115 if (GET_CODE (set
) != SET
)
5117 if (GET_CODE (SET_SRC (set
)) != COMPARE
)
5120 set_mode
= GET_MODE (SET_DEST (set
));
5124 if (req_mode
!= CCNOmode
5125 && (req_mode
!= CCmode
5126 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
5130 if (req_mode
== CCGCmode
)
5134 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
5138 if (req_mode
== CCZmode
)
5148 return (GET_MODE (SET_SRC (set
)) == set_mode
);
5151 /* Generate insn patterns to do an integer compare of OPERANDS. */
5154 ix86_expand_int_compare (code
, op0
, op1
)
5158 enum machine_mode cmpmode
;
5161 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
5162 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
5164 /* This is very simple, but making the interface the same as in the
5165 FP case makes the rest of the code easier. */
5166 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
5167 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
5169 /* Return the test that should be put into the flags user, i.e.
5170 the bcc, scc, or cmov instruction. */
5171 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
5174 /* Figure out whether to use ordered or unordered fp comparisons.
5175 Return the appropriate mode to use. */
5178 ix86_fp_compare_mode (code
)
5179 enum rtx_code code ATTRIBUTE_UNUSED
;
5181 /* ??? In order to make all comparisons reversible, we do all comparisons
5182 non-trapping when compiling for IEEE. Once gcc is able to distinguish
5183 all forms trapping and nontrapping comparisons, we can make inequality
5184 comparisons trapping again, since it results in better code when using
5185 FCOM based compares. */
5186 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
5190 ix86_cc_mode (code
, op0
, op1
)
5194 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
5195 return ix86_fp_compare_mode (code
);
5198 /* Only zero flag is needed. */
5200 case NE
: /* ZF!=0 */
5202 /* Codes needing carry flag. */
5203 case GEU
: /* CF=0 */
5204 case GTU
: /* CF=0 & ZF=0 */
5205 case LTU
: /* CF=1 */
5206 case LEU
: /* CF=1 | ZF=1 */
5208 /* Codes possibly doable only with sign flag when
5209 comparing against zero. */
5210 case GE
: /* SF=OF or SF=0 */
5211 case LT
: /* SF<>OF or SF=1 */
5212 if (op1
== const0_rtx
)
5215 /* For other cases Carry flag is not required. */
5217 /* Codes doable only with sign flag when comparing
5218 against zero, but we miss jump instruction for it
5219 so we need to use relational tests agains overflow
5220 that thus needs to be zero. */
5221 case GT
: /* ZF=0 & SF=OF */
5222 case LE
: /* ZF=1 | SF<>OF */
5223 if (op1
== const0_rtx
)
5232 /* Return true if we should use an FCOMI instruction for this fp comparison. */
5235 ix86_use_fcomi_compare (code
)
5236 enum rtx_code code ATTRIBUTE_UNUSED
;
5238 enum rtx_code swapped_code
= swap_condition (code
);
5239 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
5240 || (ix86_fp_comparison_cost (swapped_code
)
5241 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
5244 /* Swap, force into registers, or otherwise massage the two operands
5245 to a fp comparison. The operands are updated in place; the new
5246 comparsion code is returned. */
5248 static enum rtx_code
5249 ix86_prepare_fp_compare_args (code
, pop0
, pop1
)
5253 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
5254 rtx op0
= *pop0
, op1
= *pop1
;
5255 enum machine_mode op_mode
= GET_MODE (op0
);
5256 int is_sse
= SSE_REG_P (op0
) | SSE_REG_P (op1
);
5258 /* All of the unordered compare instructions only work on registers.
5259 The same is true of the XFmode compare instructions. The same is
5260 true of the fcomi compare instructions. */
5263 && (fpcmp_mode
== CCFPUmode
5264 || op_mode
== XFmode
5265 || op_mode
== TFmode
5266 || ix86_use_fcomi_compare (code
)))
5268 op0
= force_reg (op_mode
, op0
);
5269 op1
= force_reg (op_mode
, op1
);
5273 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
5274 things around if they appear profitable, otherwise force op0
5277 if (standard_80387_constant_p (op0
) == 0
5278 || (GET_CODE (op0
) == MEM
5279 && ! (standard_80387_constant_p (op1
) == 0
5280 || GET_CODE (op1
) == MEM
)))
5283 tmp
= op0
, op0
= op1
, op1
= tmp
;
5284 code
= swap_condition (code
);
5287 if (GET_CODE (op0
) != REG
)
5288 op0
= force_reg (op_mode
, op0
);
5290 if (CONSTANT_P (op1
))
5292 if (standard_80387_constant_p (op1
))
5293 op1
= force_reg (op_mode
, op1
);
5295 op1
= validize_mem (force_const_mem (op_mode
, op1
));
5299 /* Try to rearrange the comparison to make it cheaper. */
5300 if (ix86_fp_comparison_cost (code
)
5301 > ix86_fp_comparison_cost (swap_condition (code
))
5302 && (GET_CODE (op0
) == REG
|| !reload_completed
))
5305 tmp
= op0
, op0
= op1
, op1
= tmp
;
5306 code
= swap_condition (code
);
5307 if (GET_CODE (op0
) != REG
)
5308 op0
= force_reg (op_mode
, op0
);
5316 /* Convert comparison codes we use to represent FP comparison to integer
5317 code that will result in proper branch. Return UNKNOWN if no such code
5319 static enum rtx_code
5320 ix86_fp_compare_code_to_integer (code
)
5350 /* Split comparison code CODE into comparisons we can do using branch
5351 instructions. BYPASS_CODE is comparison code for branch that will
5352 branch around FIRST_CODE and SECOND_CODE. If some of branches
5353 is not required, set value to NIL.
5354 We never require more than two branches. */
5356 ix86_fp_comparison_codes (code
, bypass_code
, first_code
, second_code
)
5357 enum rtx_code code
, *bypass_code
, *first_code
, *second_code
;
5363 /* The fcomi comparison sets flags as follows:
5373 case GT
: /* GTU - CF=0 & ZF=0 */
5374 case GE
: /* GEU - CF=0 */
5375 case ORDERED
: /* PF=0 */
5376 case UNORDERED
: /* PF=1 */
5377 case UNEQ
: /* EQ - ZF=1 */
5378 case UNLT
: /* LTU - CF=1 */
5379 case UNLE
: /* LEU - CF=1 | ZF=1 */
5380 case LTGT
: /* EQ - ZF=0 */
5382 case LT
: /* LTU - CF=1 - fails on unordered */
5384 *bypass_code
= UNORDERED
;
5386 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
5388 *bypass_code
= UNORDERED
;
5390 case EQ
: /* EQ - ZF=1 - fails on unordered */
5392 *bypass_code
= UNORDERED
;
5394 case NE
: /* NE - ZF=0 - fails on unordered */
5396 *second_code
= UNORDERED
;
5398 case UNGE
: /* GEU - CF=0 - fails on unordered */
5400 *second_code
= UNORDERED
;
5402 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
5404 *second_code
= UNORDERED
;
5409 if (!TARGET_IEEE_FP
)
5416 /* Return cost of comparison done fcom + arithmetics operations on AX.
5417 All following functions do use number of instructions as an cost metrics.
5418 In future this should be tweaked to compute bytes for optimize_size and
5419 take into account performance of various instructions on various CPUs. */
5421 ix86_fp_comparison_arithmetics_cost (code
)
5424 if (!TARGET_IEEE_FP
)
5426 /* The cost of code output by ix86_expand_fp_compare. */
5454 /* Return cost of comparison done using fcomi operation.
5455 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5457 ix86_fp_comparison_fcomi_cost (code
)
5460 enum rtx_code bypass_code
, first_code
, second_code
;
5461 /* Return arbitarily high cost when instruction is not supported - this
5462 prevents gcc from using it. */
5465 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
5466 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 2;
5469 /* Return cost of comparison done using sahf operation.
5470 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5472 ix86_fp_comparison_sahf_cost (code
)
5475 enum rtx_code bypass_code
, first_code
, second_code
;
5476 /* Return arbitarily high cost when instruction is not preferred - this
5477 avoids gcc from using it. */
5478 if (!TARGET_USE_SAHF
&& !optimize_size
)
5480 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
5481 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 3;
5484 /* Compute cost of the comparison done using any method.
5485 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5487 ix86_fp_comparison_cost (code
)
5490 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
5493 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
5494 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
5496 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
5497 if (min
> sahf_cost
)
5499 if (min
> fcomi_cost
)
5504 /* Generate insn patterns to do a floating point compare of OPERANDS. */
5507 ix86_expand_fp_compare (code
, op0
, op1
, scratch
, second_test
, bypass_test
)
5509 rtx op0
, op1
, scratch
;
5513 enum machine_mode fpcmp_mode
, intcmp_mode
;
5515 int cost
= ix86_fp_comparison_cost (code
);
5516 enum rtx_code bypass_code
, first_code
, second_code
;
5518 fpcmp_mode
= ix86_fp_compare_mode (code
);
5519 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
5522 *second_test
= NULL_RTX
;
5524 *bypass_test
= NULL_RTX
;
5526 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
5528 /* Do fcomi/sahf based test when profitable. */
5529 if ((bypass_code
== NIL
|| bypass_test
)
5530 && (second_code
== NIL
|| second_test
)
5531 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
5535 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
5536 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
5542 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
5543 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), 9);
5545 scratch
= gen_reg_rtx (HImode
);
5546 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
5547 emit_insn (gen_x86_sahf_1 (scratch
));
5550 /* The FP codes work out to act like unsigned. */
5551 intcmp_mode
= fpcmp_mode
;
5553 if (bypass_code
!= NIL
)
5554 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
5555 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
5557 if (second_code
!= NIL
)
5558 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
5559 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
5564 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
5565 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
5566 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), 9);
5568 scratch
= gen_reg_rtx (HImode
);
5569 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
5571 /* In the unordered case, we have to check C2 for NaN's, which
5572 doesn't happen to work out to anything nice combination-wise.
5573 So do some bit twiddling on the value we've got in AH to come
5574 up with an appropriate set of condition codes. */
5576 intcmp_mode
= CCNOmode
;
5581 if (code
== GT
|| !TARGET_IEEE_FP
)
5583 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
5588 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
5589 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
5590 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
5591 intcmp_mode
= CCmode
;
5597 if (code
== LT
&& TARGET_IEEE_FP
)
5599 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
5600 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
5601 intcmp_mode
= CCmode
;
5606 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
5612 if (code
== GE
|| !TARGET_IEEE_FP
)
5614 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
5619 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
5620 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
5627 if (code
== LE
&& TARGET_IEEE_FP
)
5629 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
5630 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
5631 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
5632 intcmp_mode
= CCmode
;
5637 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
5643 if (code
== EQ
&& TARGET_IEEE_FP
)
5645 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
5646 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
5647 intcmp_mode
= CCmode
;
5652 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
5659 if (code
== NE
&& TARGET_IEEE_FP
)
5661 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
5662 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
5668 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
5674 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
5678 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
5687 /* Return the test that should be put into the flags user, i.e.
5688 the bcc, scc, or cmov instruction. */
5689 return gen_rtx_fmt_ee (code
, VOIDmode
,
5690 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
5695 ix86_expand_compare (code
, second_test
, bypass_test
)
5697 rtx
*second_test
, *bypass_test
;
5700 op0
= ix86_compare_op0
;
5701 op1
= ix86_compare_op1
;
5704 *second_test
= NULL_RTX
;
5706 *bypass_test
= NULL_RTX
;
5708 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
5709 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
5710 second_test
, bypass_test
);
5712 ret
= ix86_expand_int_compare (code
, op0
, op1
);
5718 ix86_expand_branch (code
, label
)
5724 switch (GET_MODE (ix86_compare_op0
))
5729 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
5730 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
5731 gen_rtx_LABEL_REF (VOIDmode
, label
),
5733 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
5740 /* Don't expand the comparison early, so that we get better code
5741 when jump or whoever decides to reverse the comparison. */
5746 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
5749 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
5750 ix86_compare_op0
, ix86_compare_op1
);
5751 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
5752 gen_rtx_LABEL_REF (VOIDmode
, label
),
5754 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
5756 use_fcomi
= ix86_use_fcomi_compare (code
);
5757 vec
= rtvec_alloc (3 + !use_fcomi
);
5758 RTVEC_ELT (vec
, 0) = tmp
;
5760 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
5762 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
5765 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
5767 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
5772 /* Expand DImode branch into multiple compare+branch. */
5774 rtx lo
[2], hi
[2], label2
;
5775 enum rtx_code code1
, code2
, code3
;
5777 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
5779 tmp
= ix86_compare_op0
;
5780 ix86_compare_op0
= ix86_compare_op1
;
5781 ix86_compare_op1
= tmp
;
5782 code
= swap_condition (code
);
5784 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
5785 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
5787 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
5788 avoid two branches. This costs one extra insn, so disable when
5789 optimizing for size. */
5791 if ((code
== EQ
|| code
== NE
)
5793 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
5798 if (hi
[1] != const0_rtx
)
5799 xor1
= expand_binop (SImode
, xor_optab
, xor1
, hi
[1],
5800 NULL_RTX
, 0, OPTAB_WIDEN
);
5803 if (lo
[1] != const0_rtx
)
5804 xor0
= expand_binop (SImode
, xor_optab
, xor0
, lo
[1],
5805 NULL_RTX
, 0, OPTAB_WIDEN
);
5807 tmp
= expand_binop (SImode
, ior_optab
, xor1
, xor0
,
5808 NULL_RTX
, 0, OPTAB_WIDEN
);
5810 ix86_compare_op0
= tmp
;
5811 ix86_compare_op1
= const0_rtx
;
5812 ix86_expand_branch (code
, label
);
5816 /* Otherwise, if we are doing less-than or greater-or-equal-than,
5817 op1 is a constant and the low word is zero, then we can just
5818 examine the high word. */
5820 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
)
5823 case LT
: case LTU
: case GE
: case GEU
:
5824 ix86_compare_op0
= hi
[0];
5825 ix86_compare_op1
= hi
[1];
5826 ix86_expand_branch (code
, label
);
5832 /* Otherwise, we need two or three jumps. */
5834 label2
= gen_label_rtx ();
5837 code2
= swap_condition (code
);
5838 code3
= unsigned_condition (code
);
5842 case LT
: case GT
: case LTU
: case GTU
:
5845 case LE
: code1
= LT
; code2
= GT
; break;
5846 case GE
: code1
= GT
; code2
= LT
; break;
5847 case LEU
: code1
= LTU
; code2
= GTU
; break;
5848 case GEU
: code1
= GTU
; code2
= LTU
; break;
5850 case EQ
: code1
= NIL
; code2
= NE
; break;
5851 case NE
: code2
= NIL
; break;
5859 * if (hi(a) < hi(b)) goto true;
5860 * if (hi(a) > hi(b)) goto false;
5861 * if (lo(a) < lo(b)) goto true;
5865 ix86_compare_op0
= hi
[0];
5866 ix86_compare_op1
= hi
[1];
5869 ix86_expand_branch (code1
, label
);
5871 ix86_expand_branch (code2
, label2
);
5873 ix86_compare_op0
= lo
[0];
5874 ix86_compare_op1
= lo
[1];
5875 ix86_expand_branch (code3
, label
);
5878 emit_label (label2
);
5887 /* Split branch based on floating point condition. */
5889 ix86_split_fp_branch (condition
, op1
, op2
, target1
, target2
, tmp
)
5890 rtx condition
, op1
, op2
, target1
, target2
, tmp
;
5893 rtx label
= NULL_RTX
;
5894 enum rtx_code code
= GET_CODE (condition
);
5896 if (target2
!= pc_rtx
)
5899 code
= reverse_condition_maybe_unordered (code
);
5904 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
5905 tmp
, &second
, &bypass
);
5906 if (bypass
!= NULL_RTX
)
5908 label
= gen_label_rtx ();
5909 emit_jump_insn (gen_rtx_SET
5911 gen_rtx_IF_THEN_ELSE (VOIDmode
,
5913 gen_rtx_LABEL_REF (VOIDmode
,
5917 /* AMD Athlon and probably other CPUs too have fast bypass path between the
5918 comparison and first branch. The second branch takes longer to execute
5919 so place first branch the worse predicable one if possible. */
5920 if (second
!= NULL_RTX
5921 && (GET_CODE (second
) == UNORDERED
|| GET_CODE (second
) == ORDERED
))
5923 rtx tmp
= condition
;
5927 emit_jump_insn (gen_rtx_SET
5929 gen_rtx_IF_THEN_ELSE (VOIDmode
,
5930 condition
, target1
, target2
)));
5931 if (second
!= NULL_RTX
)
5932 emit_jump_insn (gen_rtx_SET
5934 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
, target2
)));
5935 if (label
!= NULL_RTX
)
5940 ix86_expand_setcc (code
, dest
)
5944 rtx ret
, tmp
, tmpreg
;
5945 rtx second_test
, bypass_test
;
5948 if (GET_MODE (ix86_compare_op0
) == DImode
)
5949 return 0; /* FAIL */
5951 /* Three modes of generation:
5952 0 -- destination does not overlap compare sources:
5953 clear dest first, emit strict_low_part setcc.
5954 1 -- destination does overlap compare sources:
5955 emit subreg setcc, zero extend.
5956 2 -- destination is in QImode:
5962 if (GET_MODE (dest
) == QImode
)
5964 else if (reg_overlap_mentioned_p (dest
, ix86_compare_op0
)
5965 || reg_overlap_mentioned_p (dest
, ix86_compare_op1
))
5969 emit_move_insn (dest
, const0_rtx
);
5971 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
5972 PUT_MODE (ret
, QImode
);
5978 tmp
= gen_lowpart (QImode
, dest
);
5980 tmp
= gen_rtx_STRICT_LOW_PART (VOIDmode
, tmp
);
5984 if (!cse_not_expected
)
5985 tmp
= gen_reg_rtx (QImode
);
5987 tmp
= gen_lowpart (QImode
, dest
);
5991 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
5992 if (bypass_test
|| second_test
)
5994 rtx test
= second_test
;
5996 rtx tmp2
= gen_reg_rtx (QImode
);
6003 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
6005 PUT_MODE (test
, QImode
);
6006 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
6009 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
6011 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
6018 tmp
= gen_rtx_ZERO_EXTEND (GET_MODE (dest
), tmp
);
6019 tmp
= gen_rtx_SET (VOIDmode
, dest
, tmp
);
6020 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
6021 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
6025 return 1; /* DONE */
6029 ix86_expand_int_movcc (operands
)
6032 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
6033 rtx compare_seq
, compare_op
;
6034 rtx second_test
, bypass_test
;
6036 /* When the compare code is not LTU or GEU, we can not use sbbl case.
6037 In case comparsion is done with immediate, we can convert it to LTU or
6038 GEU by altering the integer. */
6040 if ((code
== LEU
|| code
== GTU
)
6041 && GET_CODE (ix86_compare_op1
) == CONST_INT
6042 && GET_MODE (operands
[0]) != HImode
6043 && (unsigned int)INTVAL (ix86_compare_op1
) != 0xffffffff
6044 && GET_CODE (operands
[2]) == CONST_INT
6045 && GET_CODE (operands
[3]) == CONST_INT
)
6051 ix86_compare_op1
= GEN_INT (INTVAL (ix86_compare_op1
) + 1);
6055 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
6056 compare_seq
= gen_sequence ();
6059 compare_code
= GET_CODE (compare_op
);
6061 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
6062 HImode insns, we'd be swallowed in word prefix ops. */
6064 if (GET_MODE (operands
[0]) != HImode
6065 && GET_CODE (operands
[2]) == CONST_INT
6066 && GET_CODE (operands
[3]) == CONST_INT
)
6068 rtx out
= operands
[0];
6069 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
6070 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
6073 if ((compare_code
== LTU
|| compare_code
== GEU
)
6074 && !second_test
&& !bypass_test
)
6077 /* Detect overlap between destination and compare sources. */
6080 /* To simplify rest of code, restrict to the GEU case. */
6081 if (compare_code
== LTU
)
6086 compare_code
= reverse_condition (compare_code
);
6087 code
= reverse_condition (code
);
6091 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
6092 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
6093 tmp
= gen_reg_rtx (SImode
);
6095 emit_insn (compare_seq
);
6096 emit_insn (gen_x86_movsicc_0_m1 (tmp
));
6108 emit_insn (gen_addsi3 (out
, out
, GEN_INT (ct
)));
6119 emit_insn (gen_iorsi3 (out
, out
, GEN_INT (ct
)));
6121 else if (diff
== -1 && ct
)
6131 emit_insn (gen_one_cmplsi2 (tmp
, tmp
));
6133 emit_insn (gen_addsi3 (out
, out
, GEN_INT (cf
)));
6140 * andl cf - ct, dest
6145 emit_insn (gen_andsi3 (out
, out
, GEN_INT (cf
- ct
)));
6147 emit_insn (gen_addsi3 (out
, out
, GEN_INT (ct
)));
6151 emit_move_insn (out
, tmp
);
6153 return 1; /* DONE */
6160 tmp
= ct
, ct
= cf
, cf
= tmp
;
6162 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
6164 /* We may be reversing unordered compare to normal compare, that
6165 is not valid in general (we may convert non-trapping condition
6166 to trapping one), however on i386 we currently emit all
6167 comparisons unordered. */
6168 compare_code
= reverse_condition_maybe_unordered (compare_code
);
6169 code
= reverse_condition_maybe_unordered (code
);
6173 compare_code
= reverse_condition (compare_code
);
6174 code
= reverse_condition (code
);
6177 if (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
6178 || diff
== 3 || diff
== 5 || diff
== 9)
6184 * lea cf(dest*(ct-cf)),dest
6188 * This also catches the degenerate setcc-only case.
6194 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
6195 ix86_compare_op1
, VOIDmode
, 0, 1);
6202 tmp
= gen_rtx_MULT (SImode
, out
, GEN_INT (diff
& ~1));
6206 tmp
= gen_rtx_PLUS (SImode
, tmp
, out
);
6212 tmp
= gen_rtx_PLUS (SImode
, tmp
, GEN_INT (cf
));
6218 emit_move_insn (out
, tmp
);
6223 clob
= gen_rtx_REG (CCmode
, FLAGS_REG
);
6224 clob
= gen_rtx_CLOBBER (VOIDmode
, clob
);
6226 tmp
= gen_rtx_SET (VOIDmode
, out
, tmp
);
6227 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
6231 emit_insn (gen_rtx_SET (VOIDmode
, out
, tmp
));
6233 if (out
!= operands
[0])
6234 emit_move_insn (operands
[0], out
);
6236 return 1; /* DONE */
6240 * General case: Jumpful:
6241 * xorl dest,dest cmpl op1, op2
6242 * cmpl op1, op2 movl ct, dest
6244 * decl dest movl cf, dest
6245 * andl (cf-ct),dest 1:
6250 * This is reasonably steep, but branch mispredict costs are
6251 * high on modern cpus, so consider failing only if optimizing
6254 * %%% Parameterize branch_cost on the tuning architecture, then
6255 * use that. The 80386 couldn't care less about mispredicts.
6258 if (!optimize_size
&& !TARGET_CMOVE
)
6264 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
6266 /* We may be reversing unordered compare to normal compare,
6267 that is not valid in general (we may convert non-trapping
6268 condition to trapping one), however on i386 we currently
6269 emit all comparisons unordered. */
6270 compare_code
= reverse_condition_maybe_unordered (compare_code
);
6271 code
= reverse_condition_maybe_unordered (code
);
6275 compare_code
= reverse_condition (compare_code
);
6276 code
= reverse_condition (code
);
6280 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
6281 ix86_compare_op1
, VOIDmode
, 0, 1);
6283 emit_insn (gen_addsi3 (out
, out
, constm1_rtx
));
6284 emit_insn (gen_andsi3 (out
, out
, GEN_INT (cf
-ct
)));
6286 emit_insn (gen_addsi3 (out
, out
, GEN_INT (ct
)));
6287 if (out
!= operands
[0])
6288 emit_move_insn (operands
[0], out
);
6290 return 1; /* DONE */
6296 /* Try a few things more with specific constants and a variable. */
6299 rtx var
, orig_out
, out
, tmp
;
6302 return 0; /* FAIL */
6304 /* If one of the two operands is an interesting constant, load a
6305 constant with the above and mask it in with a logical operation. */
6307 if (GET_CODE (operands
[2]) == CONST_INT
)
6310 if (INTVAL (operands
[2]) == 0)
6311 operands
[3] = constm1_rtx
, op
= and_optab
;
6312 else if (INTVAL (operands
[2]) == -1)
6313 operands
[3] = const0_rtx
, op
= ior_optab
;
6315 return 0; /* FAIL */
6317 else if (GET_CODE (operands
[3]) == CONST_INT
)
6320 if (INTVAL (operands
[3]) == 0)
6321 operands
[2] = constm1_rtx
, op
= and_optab
;
6322 else if (INTVAL (operands
[3]) == -1)
6323 operands
[2] = const0_rtx
, op
= ior_optab
;
6325 return 0; /* FAIL */
6328 return 0; /* FAIL */
6330 orig_out
= operands
[0];
6331 tmp
= gen_reg_rtx (GET_MODE (orig_out
));
6334 /* Recurse to get the constant loaded. */
6335 if (ix86_expand_int_movcc (operands
) == 0)
6336 return 0; /* FAIL */
6338 /* Mask in the interesting variable. */
6339 out
= expand_binop (GET_MODE (orig_out
), op
, var
, tmp
, orig_out
, 0,
6341 if (out
!= orig_out
)
6342 emit_move_insn (orig_out
, out
);
6344 return 1; /* DONE */
6348 * For comparison with above,
6358 if (! nonimmediate_operand (operands
[2], GET_MODE (operands
[0])))
6359 operands
[2] = force_reg (GET_MODE (operands
[0]), operands
[2]);
6360 if (! nonimmediate_operand (operands
[3], GET_MODE (operands
[0])))
6361 operands
[3] = force_reg (GET_MODE (operands
[0]), operands
[3]);
6363 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
6365 rtx tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
6366 emit_move_insn (tmp
, operands
[3]);
6369 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
6371 rtx tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
6372 emit_move_insn (tmp
, operands
[2]);
6376 emit_insn (compare_seq
);
6377 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
6378 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
6379 compare_op
, operands
[2],
6382 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
6383 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
6388 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
6389 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
6394 return 1; /* DONE */
6398 ix86_expand_fp_movcc (operands
)
6403 rtx compare_op
, second_test
, bypass_test
;
6405 /* For SF/DFmode conditional moves based on comparisons
6406 in same mode, we may want to use SSE min/max instructions. */
6407 if (((TARGET_SSE
&& GET_MODE (operands
[0]) == SFmode
)
6408 || (TARGET_SSE2
&& GET_MODE (operands
[0]) == DFmode
))
6409 && GET_MODE (ix86_compare_op0
) == GET_MODE (operands
[0])
6410 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
6412 || (GET_CODE (operands
[1]) != LTGT
&& GET_CODE (operands
[1]) != UNEQ
))
6413 /* We may be called from the post-reload splitter. */
6414 && (!REG_P (operands
[0])
6415 || SSE_REG_P (operands
[0])
6416 || REGNO (operands
[0]) >= FIRST_PSEUDO_REGISTER
))
6418 rtx op0
= ix86_compare_op0
, op1
= ix86_compare_op1
;
6419 code
= GET_CODE (operands
[1]);
6421 /* See if we have (cross) match between comparison operands and
6422 conditional move operands. */
6423 if (rtx_equal_p (operands
[2], op1
))
6428 code
= reverse_condition_maybe_unordered (code
);
6430 if (rtx_equal_p (operands
[2], op0
) && rtx_equal_p (operands
[3], op1
))
6432 /* Check for min operation. */
6435 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
6436 if (memory_operand (op0
, VOIDmode
))
6437 op0
= force_reg (GET_MODE (operands
[0]), op0
);
6438 if (GET_MODE (operands
[0]) == SFmode
)
6439 emit_insn (gen_minsf3 (operands
[0], op0
, op1
));
6441 emit_insn (gen_mindf3 (operands
[0], op0
, op1
));
6444 /* Check for max operation. */
6447 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
6448 if (memory_operand (op0
, VOIDmode
))
6449 op0
= force_reg (GET_MODE (operands
[0]), op0
);
6450 if (GET_MODE (operands
[0]) == SFmode
)
6451 emit_insn (gen_maxsf3 (operands
[0], op0
, op1
));
6453 emit_insn (gen_maxdf3 (operands
[0], op0
, op1
));
6457 /* Manage condition to be sse_comparison_operator. In case we are
6458 in non-ieee mode, try to canonicalize the destination operand
6459 to be first in the comparison - this helps reload to avoid extra
6461 if (!sse_comparison_operator (operands
[1], VOIDmode
)
6462 || (rtx_equal_p (operands
[0], ix86_compare_op1
) && !TARGET_IEEE_FP
))
6464 rtx tmp
= ix86_compare_op0
;
6465 ix86_compare_op0
= ix86_compare_op1
;
6466 ix86_compare_op1
= tmp
;
6467 operands
[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands
[1])),
6468 VOIDmode
, ix86_compare_op0
,
6471 /* Similary try to manage result to be first operand of conditional
6472 move. We also don't support the NE comparison on SSE, so try to
6474 if (rtx_equal_p (operands
[0], operands
[3])
6475 || GET_CODE (operands
[1]) == NE
)
6477 rtx tmp
= operands
[2];
6478 operands
[2] = operands
[3];
6480 operands
[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
6481 (GET_CODE (operands
[1])),
6482 VOIDmode
, ix86_compare_op0
,
6485 if (GET_MODE (operands
[0]) == SFmode
)
6486 emit_insn (gen_sse_movsfcc (operands
[0], operands
[1],
6487 operands
[2], operands
[3],
6488 ix86_compare_op0
, ix86_compare_op1
));
6490 emit_insn (gen_sse_movdfcc (operands
[0], operands
[1],
6491 operands
[2], operands
[3],
6492 ix86_compare_op0
, ix86_compare_op1
));
6496 /* The floating point conditional move instructions don't directly
6497 support conditions resulting from a signed integer comparison. */
6499 code
= GET_CODE (operands
[1]);
6500 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
6502 /* The floating point conditional move instructions don't directly
6503 support signed integer comparisons. */
6505 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
6507 if (second_test
!= NULL
|| bypass_test
!= NULL
)
6509 tmp
= gen_reg_rtx (QImode
);
6510 ix86_expand_setcc (code
, tmp
);
6512 ix86_compare_op0
= tmp
;
6513 ix86_compare_op1
= const0_rtx
;
6514 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
6516 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
6518 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
6519 emit_move_insn (tmp
, operands
[3]);
6522 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
6524 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
6525 emit_move_insn (tmp
, operands
[2]);
6529 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
6530 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
6535 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
6536 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
6541 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
6542 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
6550 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
6551 works for floating pointer parameters and nonoffsetable memories.
6552 For pushes, it returns just stack offsets; the values will be saved
6553 in the right order. Maximally three parts are generated. */
6556 ix86_split_to_parts (operand
, parts
, mode
)
6559 enum machine_mode mode
;
6561 int size
= mode
== TFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
6563 if (GET_CODE (operand
) == REG
&& MMX_REGNO_P (REGNO (operand
)))
6565 if (size
< 2 || size
> 3)
6568 /* Optimize constant pool reference to immediates. This is used by fp moves,
6569 that force all constants to memory to allow combining. */
6571 if (GET_CODE (operand
) == MEM
6572 && GET_CODE (XEXP (operand
, 0)) == SYMBOL_REF
6573 && CONSTANT_POOL_ADDRESS_P (XEXP (operand
, 0)))
6574 operand
= get_pool_constant (XEXP (operand
, 0));
6576 if (GET_CODE (operand
) == MEM
&& !offsettable_memref_p (operand
))
6578 /* The only non-offsetable memories we handle are pushes. */
6579 if (! push_operand (operand
, VOIDmode
))
6582 PUT_MODE (operand
, SImode
);
6583 parts
[0] = parts
[1] = parts
[2] = operand
;
6588 split_di (&operand
, 1, &parts
[0], &parts
[1]);
6591 if (REG_P (operand
))
6593 if (!reload_completed
)
6595 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
6596 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
6598 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
6600 else if (offsettable_memref_p (operand
))
6602 PUT_MODE (operand
, SImode
);
6604 parts
[1] = adj_offsettable_operand (operand
, 4);
6606 parts
[2] = adj_offsettable_operand (operand
, 8);
6608 else if (GET_CODE (operand
) == CONST_DOUBLE
)
6613 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
6618 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
6619 parts
[2] = GEN_INT (l
[2]);
6622 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
6627 parts
[1] = GEN_INT (l
[1]);
6628 parts
[0] = GEN_INT (l
[0]);
6638 /* Emit insns to perform a move or push of DI, DF, and XF values.
6639 Return false when normal moves are needed; true when all required
6640 insns have been emitted. Operands 2-4 contain the input values
6641 int the correct order; operands 5-7 contain the output values. */
6644 ix86_split_long_move (operands1
)
6653 /* Make our own copy to avoid clobbering the operands. */
6654 operands
[0] = copy_rtx (operands1
[0]);
6655 operands
[1] = copy_rtx (operands1
[1]);
6657 /* The only non-offsettable memory we handle is push. */
6658 if (push_operand (operands
[0], VOIDmode
))
6660 else if (GET_CODE (operands
[0]) == MEM
6661 && ! offsettable_memref_p (operands
[0]))
6664 size
= ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands1
[0]));
6665 ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands1
[0]));
6667 /* When emitting push, take care for source operands on the stack. */
6668 if (push
&& GET_CODE (operands
[1]) == MEM
6669 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
6672 part
[1][1] = part
[1][2];
6673 part
[1][0] = part
[1][1];
6676 /* We need to do copy in the right order in case an address register
6677 of the source overlaps the destination. */
6678 if (REG_P (part
[0][0]) && GET_CODE (part
[1][0]) == MEM
)
6680 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
6682 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
6685 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
6688 /* Collision in the middle part can be handled by reordering. */
6689 if (collisions
== 1 && size
== 3
6690 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
6693 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
6694 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
6697 /* If there are more collisions, we can't handle it by reordering.
6698 Do an lea to the last part and use only one colliding move. */
6699 else if (collisions
> 1)
6702 emit_insn (gen_rtx_SET (VOIDmode
, part
[0][size
- 1],
6703 XEXP (part
[1][0], 0)));
6704 part
[1][0] = change_address (part
[1][0], SImode
, part
[0][size
- 1]);
6705 part
[1][1] = adj_offsettable_operand (part
[1][0], 4);
6707 part
[1][2] = adj_offsettable_operand (part
[1][0], 8);
6715 /* We use only first 12 bytes of TFmode value, but for pushing we
6716 are required to adjust stack as if we were pushing real 16byte
6718 if (GET_MODE (operands1
[0]) == TFmode
)
6719 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
6721 emit_insn (gen_push (part
[1][2]));
6723 emit_insn (gen_push (part
[1][1]));
6724 emit_insn (gen_push (part
[1][0]));
6728 /* Choose correct order to not overwrite the source before it is copied. */
6729 if ((REG_P (part
[0][0])
6730 && REG_P (part
[1][1])
6731 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
6733 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
6735 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
6739 operands1
[2] = part
[0][2];
6740 operands1
[3] = part
[0][1];
6741 operands1
[4] = part
[0][0];
6742 operands1
[5] = part
[1][2];
6743 operands1
[6] = part
[1][1];
6744 operands1
[7] = part
[1][0];
6748 operands1
[2] = part
[0][1];
6749 operands1
[3] = part
[0][0];
6750 operands1
[5] = part
[1][1];
6751 operands1
[6] = part
[1][0];
6758 operands1
[2] = part
[0][0];
6759 operands1
[3] = part
[0][1];
6760 operands1
[4] = part
[0][2];
6761 operands1
[5] = part
[1][0];
6762 operands1
[6] = part
[1][1];
6763 operands1
[7] = part
[1][2];
6767 operands1
[2] = part
[0][0];
6768 operands1
[3] = part
[0][1];
6769 operands1
[5] = part
[1][0];
6770 operands1
[6] = part
[1][1];
6778 ix86_split_ashldi (operands
, scratch
)
6779 rtx
*operands
, scratch
;
6781 rtx low
[2], high
[2];
6784 if (GET_CODE (operands
[2]) == CONST_INT
)
6786 split_di (operands
, 2, low
, high
);
6787 count
= INTVAL (operands
[2]) & 63;
6791 emit_move_insn (high
[0], low
[1]);
6792 emit_move_insn (low
[0], const0_rtx
);
6795 emit_insn (gen_ashlsi3 (high
[0], high
[0], GEN_INT (count
- 32)));
6799 if (!rtx_equal_p (operands
[0], operands
[1]))
6800 emit_move_insn (operands
[0], operands
[1]);
6801 emit_insn (gen_x86_shld_1 (high
[0], low
[0], GEN_INT (count
)));
6802 emit_insn (gen_ashlsi3 (low
[0], low
[0], GEN_INT (count
)));
6807 if (!rtx_equal_p (operands
[0], operands
[1]))
6808 emit_move_insn (operands
[0], operands
[1]);
6810 split_di (operands
, 1, low
, high
);
6812 emit_insn (gen_x86_shld_1 (high
[0], low
[0], operands
[2]));
6813 emit_insn (gen_ashlsi3 (low
[0], low
[0], operands
[2]));
6815 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
6817 if (! no_new_pseudos
)
6818 scratch
= force_reg (SImode
, const0_rtx
);
6820 emit_move_insn (scratch
, const0_rtx
);
6822 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2],
6826 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
6831 ix86_split_ashrdi (operands
, scratch
)
6832 rtx
*operands
, scratch
;
6834 rtx low
[2], high
[2];
6837 if (GET_CODE (operands
[2]) == CONST_INT
)
6839 split_di (operands
, 2, low
, high
);
6840 count
= INTVAL (operands
[2]) & 63;
6844 emit_move_insn (low
[0], high
[1]);
6846 if (! reload_completed
)
6847 emit_insn (gen_ashrsi3 (high
[0], low
[0], GEN_INT (31)));
6850 emit_move_insn (high
[0], low
[0]);
6851 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
6855 emit_insn (gen_ashrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
6859 if (!rtx_equal_p (operands
[0], operands
[1]))
6860 emit_move_insn (operands
[0], operands
[1]);
6861 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
6862 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (count
)));
6867 if (!rtx_equal_p (operands
[0], operands
[1]))
6868 emit_move_insn (operands
[0], operands
[1]);
6870 split_di (operands
, 1, low
, high
);
6872 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
6873 emit_insn (gen_ashrsi3 (high
[0], high
[0], operands
[2]));
6875 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
6877 if (! no_new_pseudos
)
6878 scratch
= gen_reg_rtx (SImode
);
6879 emit_move_insn (scratch
, high
[0]);
6880 emit_insn (gen_ashrsi3 (scratch
, scratch
, GEN_INT (31)));
6881 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
6885 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
6890 ix86_split_lshrdi (operands
, scratch
)
6891 rtx
*operands
, scratch
;
6893 rtx low
[2], high
[2];
6896 if (GET_CODE (operands
[2]) == CONST_INT
)
6898 split_di (operands
, 2, low
, high
);
6899 count
= INTVAL (operands
[2]) & 63;
6903 emit_move_insn (low
[0], high
[1]);
6904 emit_move_insn (high
[0], const0_rtx
);
6907 emit_insn (gen_lshrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
6911 if (!rtx_equal_p (operands
[0], operands
[1]))
6912 emit_move_insn (operands
[0], operands
[1]);
6913 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
6914 emit_insn (gen_lshrsi3 (high
[0], high
[0], GEN_INT (count
)));
6919 if (!rtx_equal_p (operands
[0], operands
[1]))
6920 emit_move_insn (operands
[0], operands
[1]);
6922 split_di (operands
, 1, low
, high
);
6924 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
6925 emit_insn (gen_lshrsi3 (high
[0], high
[0], operands
[2]));
6927 /* Heh. By reversing the arguments, we can reuse this pattern. */
6928 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
6930 if (! no_new_pseudos
)
6931 scratch
= force_reg (SImode
, const0_rtx
);
6933 emit_move_insn (scratch
, const0_rtx
);
6935 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
6939 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
6943 /* Expand the appropriate insns for doing strlen if not just doing
6946 out = result, initialized with the start address
6947 align_rtx = alignment of the address.
6948 scratch = scratch register, initialized with the startaddress when
6949 not aligned, otherwise undefined
6951 This is just the body. It needs the initialisations mentioned above and
6952 some address computing at the end. These things are done in i386.md. */
6955 ix86_expand_strlensi_unroll_1 (out
, align_rtx
, scratch
)
6956 rtx out
, align_rtx
, scratch
;
6960 rtx align_2_label
= NULL_RTX
;
6961 rtx align_3_label
= NULL_RTX
;
6962 rtx align_4_label
= gen_label_rtx ();
6963 rtx end_0_label
= gen_label_rtx ();
6965 rtx tmpreg
= gen_reg_rtx (SImode
);
6968 if (GET_CODE (align_rtx
) == CONST_INT
)
6969 align
= INTVAL (align_rtx
);
6971 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
6973 /* Is there a known alignment and is it less than 4? */
6976 /* Is there a known alignment and is it not 2? */
6979 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
6980 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
6982 /* Leave just the 3 lower bits. */
6983 align_rtx
= expand_binop (SImode
, and_optab
, scratch
, GEN_INT (3),
6984 NULL_RTX
, 0, OPTAB_WIDEN
);
6986 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
6987 SImode
, 1, 0, align_4_label
);
6988 emit_cmp_and_jump_insns (align_rtx
, GEN_INT (2), EQ
, NULL
,
6989 SImode
, 1, 0, align_2_label
);
6990 emit_cmp_and_jump_insns (align_rtx
, GEN_INT (2), GTU
, NULL
,
6991 SImode
, 1, 0, align_3_label
);
6995 /* Since the alignment is 2, we have to check 2 or 0 bytes;
6996 check if is aligned to 4 - byte. */
6998 align_rtx
= expand_binop (SImode
, and_optab
, scratch
, GEN_INT (2),
6999 NULL_RTX
, 0, OPTAB_WIDEN
);
7001 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
7002 SImode
, 1, 0, align_4_label
);
7005 mem
= gen_rtx_MEM (QImode
, out
);
7007 /* Now compare the bytes. */
7009 /* Compare the first n unaligned byte on a byte per byte basis. */
7010 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
7011 QImode
, 1, 0, end_0_label
);
7013 /* Increment the address. */
7014 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
7016 /* Not needed with an alignment of 2 */
7019 emit_label (align_2_label
);
7021 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
7022 QImode
, 1, 0, end_0_label
);
7024 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
7026 emit_label (align_3_label
);
7029 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
7030 QImode
, 1, 0, end_0_label
);
7032 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
7035 /* Generate loop to check 4 bytes at a time. It is not a good idea to
7036 align this loop. It gives only huge programs, but does not help to
7038 emit_label (align_4_label
);
7040 mem
= gen_rtx_MEM (SImode
, out
);
7041 emit_move_insn (scratch
, mem
);
7042 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
7044 /* This formula yields a nonzero result iff one of the bytes is zero.
7045 This saves three branches inside loop and many cycles. */
7047 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
7048 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
7049 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
7050 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, GEN_INT (0x80808080)));
7051 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0,
7052 SImode
, 1, 0, align_4_label
);
7056 rtx reg
= gen_reg_rtx (SImode
);
7057 emit_move_insn (reg
, tmpreg
);
7058 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
7060 /* If zero is not in the first two bytes, move two bytes forward. */
7061 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
7062 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
7063 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
7064 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
7065 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
7068 /* Emit lea manually to avoid clobbering of flags. */
7069 emit_insn (gen_rtx_SET (SImode
, reg
,
7070 gen_rtx_PLUS (SImode
, out
, GEN_INT (2))));
7072 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
7073 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
7074 emit_insn (gen_rtx_SET (VOIDmode
, out
,
7075 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
7082 rtx end_2_label
= gen_label_rtx ();
7083 /* Is zero in the first two bytes? */
7085 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
7086 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
7087 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
7088 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
7089 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
7091 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
7092 JUMP_LABEL (tmp
) = end_2_label
;
7094 /* Not in the first two. Move two bytes forward. */
7095 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
7096 emit_insn (gen_addsi3 (out
, out
, GEN_INT (2)));
7098 emit_label (end_2_label
);
7102 /* Avoid branch in fixing the byte. */
7103 tmpreg
= gen_lowpart (QImode
, tmpreg
);
7104 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
7105 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3)));
7107 emit_label (end_0_label
);
7110 /* Clear stack slot assignments remembered from previous functions.
7111 This is called from INIT_EXPANDERS once before RTL is emitted for each
7115 ix86_init_machine_status (p
)
7118 p
->machine
= (struct machine_function
*)
7119 xcalloc (1, sizeof (struct machine_function
));
7122 /* Mark machine specific bits of P for GC. */
7124 ix86_mark_machine_status (p
)
7127 struct machine_function
*machine
= p
->machine
;
7128 enum machine_mode mode
;
7134 for (mode
= VOIDmode
; (int) mode
< (int) MAX_MACHINE_MODE
;
7135 mode
= (enum machine_mode
) ((int) mode
+ 1))
7136 for (n
= 0; n
< MAX_386_STACK_LOCALS
; n
++)
7137 ggc_mark_rtx (machine
->stack_locals
[(int) mode
][n
]);
7141 ix86_free_machine_status (p
)
7148 /* Return a MEM corresponding to a stack slot with mode MODE.
7149 Allocate a new slot if necessary.
7151 The RTL for a function can have several slots available: N is
7152 which slot to use. */
7155 assign_386_stack_local (mode
, n
)
7156 enum machine_mode mode
;
7159 if (n
< 0 || n
>= MAX_386_STACK_LOCALS
)
7162 if (ix86_stack_locals
[(int) mode
][n
] == NULL_RTX
)
7163 ix86_stack_locals
[(int) mode
][n
]
7164 = assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
7166 return ix86_stack_locals
[(int) mode
][n
];
7169 /* Calculate the length of the memory address in the instruction
7170 encoding. Does not include the one-byte modrm, opcode, or prefix. */
7173 memory_address_length (addr
)
7176 struct ix86_address parts
;
7177 rtx base
, index
, disp
;
7180 if (GET_CODE (addr
) == PRE_DEC
7181 || GET_CODE (addr
) == POST_INC
)
7184 if (! ix86_decompose_address (addr
, &parts
))
7188 index
= parts
.index
;
7192 /* Register Indirect. */
7193 if (base
&& !index
&& !disp
)
7195 /* Special cases: ebp and esp need the two-byte modrm form. */
7196 if (addr
== stack_pointer_rtx
7197 || addr
== arg_pointer_rtx
7198 || addr
== frame_pointer_rtx
7199 || addr
== hard_frame_pointer_rtx
)
7203 /* Direct Addressing. */
7204 else if (disp
&& !base
&& !index
)
7209 /* Find the length of the displacement constant. */
7212 if (GET_CODE (disp
) == CONST_INT
7213 && CONST_OK_FOR_LETTER_P (INTVAL (disp
), 'K'))
7219 /* An index requires the two-byte modrm form. */
7227 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
7228 expect that insn have 8bit immediate alternative. */
7230 ix86_attr_length_immediate_default (insn
, shortform
)
7236 extract_insn_cached (insn
);
7237 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
7238 if (CONSTANT_P (recog_data
.operand
[i
]))
7243 && GET_CODE (recog_data
.operand
[i
]) == CONST_INT
7244 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data
.operand
[i
]), 'K'))
7248 switch (get_attr_mode (insn
))
7260 fatal_insn ("Unknown insn mode", insn
);
7266 /* Compute default value for "length_address" attribute. */
7268 ix86_attr_length_address_default (insn
)
7272 extract_insn_cached (insn
);
7273 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
7274 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
7276 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
7282 /* Return the maximum number of instructions a cpu can issue. */
7289 case PROCESSOR_PENTIUM
:
7293 case PROCESSOR_PENTIUMPRO
:
7294 case PROCESSOR_PENTIUM4
:
7295 case PROCESSOR_ATHLON
:
7303 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
7304 by DEP_INSN and nothing set by DEP_INSN. */
7307 ix86_flags_dependant (insn
, dep_insn
, insn_type
)
7309 enum attr_type insn_type
;
7313 /* Simplify the test for uninteresting insns. */
7314 if (insn_type
!= TYPE_SETCC
7315 && insn_type
!= TYPE_ICMOV
7316 && insn_type
!= TYPE_FCMOV
7317 && insn_type
!= TYPE_IBR
)
7320 if ((set
= single_set (dep_insn
)) != 0)
7322 set
= SET_DEST (set
);
7325 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
7326 && XVECLEN (PATTERN (dep_insn
), 0) == 2
7327 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
7328 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
7330 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
7331 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
7336 if (GET_CODE (set
) != REG
|| REGNO (set
) != FLAGS_REG
)
7339 /* This test is true if the dependant insn reads the flags but
7340 not any other potentially set register. */
7341 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
7344 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
7350 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
7351 address with operands set by DEP_INSN. */
7354 ix86_agi_dependant (insn
, dep_insn
, insn_type
)
7356 enum attr_type insn_type
;
7360 if (insn_type
== TYPE_LEA
)
7362 addr
= PATTERN (insn
);
7363 if (GET_CODE (addr
) == SET
)
7365 else if (GET_CODE (addr
) == PARALLEL
7366 && GET_CODE (XVECEXP (addr
, 0, 0)) == SET
)
7367 addr
= XVECEXP (addr
, 0, 0);
7370 addr
= SET_SRC (addr
);
7375 extract_insn_cached (insn
);
7376 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
7377 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
7379 addr
= XEXP (recog_data
.operand
[i
], 0);
7386 return modified_in_p (addr
, dep_insn
);
7390 ix86_adjust_cost (insn
, link
, dep_insn
, cost
)
7391 rtx insn
, link
, dep_insn
;
7394 enum attr_type insn_type
, dep_insn_type
;
7395 enum attr_memory memory
;
7397 int dep_insn_code_number
;
7399 /* Anti and output depenancies have zero cost on all CPUs. */
7400 if (REG_NOTE_KIND (link
) != 0)
7403 dep_insn_code_number
= recog_memoized (dep_insn
);
7405 /* If we can't recognize the insns, we can't really do anything. */
7406 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
7409 insn_type
= get_attr_type (insn
);
7410 dep_insn_type
= get_attr_type (dep_insn
);
7412 /* Prologue and epilogue allocators can have a false dependency on ebp.
7413 This results in one cycle extra stall on Pentium prologue scheduling,
7414 so handle this important case manually. */
7415 if (dep_insn_code_number
== CODE_FOR_pro_epilogue_adjust_stack
7416 && dep_insn_type
== TYPE_ALU
7417 && !reg_mentioned_p (stack_pointer_rtx
, insn
))
7422 case PROCESSOR_PENTIUM
:
7423 /* Address Generation Interlock adds a cycle of latency. */
7424 if (ix86_agi_dependant (insn
, dep_insn
, insn_type
))
7427 /* ??? Compares pair with jump/setcc. */
7428 if (ix86_flags_dependant (insn
, dep_insn
, insn_type
))
7431 /* Floating point stores require value to be ready one cycle ealier. */
7432 if (insn_type
== TYPE_FMOV
7433 && get_attr_memory (insn
) == MEMORY_STORE
7434 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
7438 case PROCESSOR_PENTIUMPRO
:
7439 /* Since we can't represent delayed latencies of load+operation,
7440 increase the cost here for non-imov insns. */
7441 if (dep_insn_type
!= TYPE_IMOV
7442 && dep_insn_type
!= TYPE_FMOV
7443 && ((memory
= get_attr_memory (dep_insn
) == MEMORY_LOAD
)
7444 || memory
== MEMORY_BOTH
))
7447 /* INT->FP conversion is expensive. */
7448 if (get_attr_fp_int_src (dep_insn
))
7451 /* There is one cycle extra latency between an FP op and a store. */
7452 if (insn_type
== TYPE_FMOV
7453 && (set
= single_set (dep_insn
)) != NULL_RTX
7454 && (set2
= single_set (insn
)) != NULL_RTX
7455 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
7456 && GET_CODE (SET_DEST (set2
)) == MEM
)
7461 /* The esp dependency is resolved before the instruction is really
7463 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
7464 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
7467 /* Since we can't represent delayed latencies of load+operation,
7468 increase the cost here for non-imov insns. */
7469 if ((memory
= get_attr_memory (dep_insn
) == MEMORY_LOAD
)
7470 || memory
== MEMORY_BOTH
)
7471 cost
+= (dep_insn_type
!= TYPE_IMOV
) ? 2 : 1;
7473 /* INT->FP conversion is expensive. */
7474 if (get_attr_fp_int_src (dep_insn
))
7478 case PROCESSOR_ATHLON
:
7479 if ((memory
= get_attr_memory (dep_insn
)) == MEMORY_LOAD
7480 || memory
== MEMORY_BOTH
)
7482 if (dep_insn_type
== TYPE_IMOV
|| dep_insn_type
== TYPE_FMOV
)
7497 struct ppro_sched_data
7500 int issued_this_cycle
;
7505 ix86_safe_length (insn
)
7508 if (recog_memoized (insn
) >= 0)
7509 return get_attr_length(insn
);
7515 ix86_safe_length_prefix (insn
)
7518 if (recog_memoized (insn
) >= 0)
7519 return get_attr_length(insn
);
7524 static enum attr_memory
7525 ix86_safe_memory (insn
)
7528 if (recog_memoized (insn
) >= 0)
7529 return get_attr_memory(insn
);
7531 return MEMORY_UNKNOWN
;
7534 static enum attr_pent_pair
7535 ix86_safe_pent_pair (insn
)
7538 if (recog_memoized (insn
) >= 0)
7539 return get_attr_pent_pair(insn
);
7541 return PENT_PAIR_NP
;
7544 static enum attr_ppro_uops
7545 ix86_safe_ppro_uops (insn
)
7548 if (recog_memoized (insn
) >= 0)
7549 return get_attr_ppro_uops (insn
);
7551 return PPRO_UOPS_MANY
;
7555 ix86_dump_ppro_packet (dump
)
7558 if (ix86_sched_data
.ppro
.decode
[0])
7560 fprintf (dump
, "PPRO packet: %d",
7561 INSN_UID (ix86_sched_data
.ppro
.decode
[0]));
7562 if (ix86_sched_data
.ppro
.decode
[1])
7563 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[1]));
7564 if (ix86_sched_data
.ppro
.decode
[2])
7565 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[2]));
7570 /* We're beginning a new block. Initialize data structures as necessary. */
7573 ix86_sched_init (dump
, sched_verbose
)
7574 FILE *dump ATTRIBUTE_UNUSED
;
7575 int sched_verbose ATTRIBUTE_UNUSED
;
7577 memset (&ix86_sched_data
, 0, sizeof (ix86_sched_data
));
7580 /* Shift INSN to SLOT, and shift everything else down. */
7583 ix86_reorder_insn (insnp
, slot
)
7590 insnp
[0] = insnp
[1];
7591 while (++insnp
!= slot
);
7596 /* Find an instruction with given pairability and minimal amount of cycles
7597 lost by the fact that the CPU waits for both pipelines to finish before
7598 reading next instructions. Also take care that both instructions together
7599 can not exceed 7 bytes. */
7602 ix86_pent_find_pair (e_ready
, ready
, type
, first
)
7605 enum attr_pent_pair type
;
7608 int mincycles
, cycles
;
7609 enum attr_pent_pair tmp
;
7610 enum attr_memory memory
;
7611 rtx
*insnp
, *bestinsnp
= NULL
;
7613 if (ix86_safe_length (first
) > 7 + ix86_safe_length_prefix (first
))
7616 memory
= ix86_safe_memory (first
);
7617 cycles
= result_ready_cost (first
);
7618 mincycles
= INT_MAX
;
7620 for (insnp
= e_ready
; insnp
>= ready
&& mincycles
; --insnp
)
7621 if ((tmp
= ix86_safe_pent_pair (*insnp
)) == type
7622 && ix86_safe_length (*insnp
) <= 7 + ix86_safe_length_prefix (*insnp
))
7624 enum attr_memory second_memory
;
7625 int secondcycles
, currentcycles
;
7627 second_memory
= ix86_safe_memory (*insnp
);
7628 secondcycles
= result_ready_cost (*insnp
);
7629 currentcycles
= abs (cycles
- secondcycles
);
7631 if (secondcycles
>= 1 && cycles
>= 1)
7633 /* Two read/modify/write instructions together takes two
7635 if (memory
== MEMORY_BOTH
&& second_memory
== MEMORY_BOTH
)
7638 /* Read modify/write instruction followed by read/modify
7639 takes one cycle longer. */
7640 if (memory
== MEMORY_BOTH
&& second_memory
== MEMORY_LOAD
7641 && tmp
!= PENT_PAIR_UV
7642 && ix86_safe_pent_pair (first
) != PENT_PAIR_UV
)
7645 if (currentcycles
< mincycles
)
7646 bestinsnp
= insnp
, mincycles
= currentcycles
;
7652 /* Subroutines of ix86_sched_reorder. */
7655 ix86_sched_reorder_pentium (ready
, e_ready
)
7659 enum attr_pent_pair pair1
, pair2
;
7662 /* This wouldn't be necessary if Haifa knew that static insn ordering
7663 is important to which pipe an insn is issued to. So we have to make
7664 some minor rearrangements. */
7666 pair1
= ix86_safe_pent_pair (*e_ready
);
7668 /* If the first insn is non-pairable, let it be. */
7669 if (pair1
== PENT_PAIR_NP
)
7672 pair2
= PENT_PAIR_NP
;
7675 /* If the first insn is UV or PV pairable, search for a PU
7677 if (pair1
== PENT_PAIR_UV
|| pair1
== PENT_PAIR_PV
)
7679 insnp
= ix86_pent_find_pair (e_ready
-1, ready
,
7680 PENT_PAIR_PU
, *e_ready
);
7682 pair2
= PENT_PAIR_PU
;
7685 /* If the first insn is PU or UV pairable, search for a PV
7687 if (pair2
== PENT_PAIR_NP
7688 && (pair1
== PENT_PAIR_PU
|| pair1
== PENT_PAIR_UV
))
7690 insnp
= ix86_pent_find_pair (e_ready
-1, ready
,
7691 PENT_PAIR_PV
, *e_ready
);
7693 pair2
= PENT_PAIR_PV
;
7696 /* If the first insn is pairable, search for a UV
7698 if (pair2
== PENT_PAIR_NP
)
7700 insnp
= ix86_pent_find_pair (e_ready
-1, ready
,
7701 PENT_PAIR_UV
, *e_ready
);
7703 pair2
= PENT_PAIR_UV
;
7706 if (pair2
== PENT_PAIR_NP
)
7709 /* Found something! Decide if we need to swap the order. */
7710 if (pair1
== PENT_PAIR_PV
|| pair2
== PENT_PAIR_PU
7711 || (pair1
== PENT_PAIR_UV
&& pair2
== PENT_PAIR_UV
7712 && ix86_safe_memory (*e_ready
) == MEMORY_BOTH
7713 && ix86_safe_memory (*insnp
) == MEMORY_LOAD
))
7714 ix86_reorder_insn (insnp
, e_ready
);
7716 ix86_reorder_insn (insnp
, e_ready
- 1);
7720 ix86_sched_reorder_ppro (ready
, e_ready
)
7725 enum attr_ppro_uops cur_uops
;
7726 int issued_this_cycle
;
7730 /* At this point .ppro.decode contains the state of the three
7731 decoders from last "cycle". That is, those insns that were
7732 actually independent. But here we're scheduling for the
7733 decoder, and we may find things that are decodable in the
7736 memcpy (decode
, ix86_sched_data
.ppro
.decode
, sizeof (decode
));
7737 issued_this_cycle
= 0;
7740 cur_uops
= ix86_safe_ppro_uops (*insnp
);
7742 /* If the decoders are empty, and we've a complex insn at the
7743 head of the priority queue, let it issue without complaint. */
7744 if (decode
[0] == NULL
)
7746 if (cur_uops
== PPRO_UOPS_MANY
)
7752 /* Otherwise, search for a 2-4 uop unsn to issue. */
7753 while (cur_uops
!= PPRO_UOPS_FEW
)
7757 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
7760 /* If so, move it to the head of the line. */
7761 if (cur_uops
== PPRO_UOPS_FEW
)
7762 ix86_reorder_insn (insnp
, e_ready
);
7764 /* Issue the head of the queue. */
7765 issued_this_cycle
= 1;
7766 decode
[0] = *e_ready
--;
7769 /* Look for simple insns to fill in the other two slots. */
7770 for (i
= 1; i
< 3; ++i
)
7771 if (decode
[i
] == NULL
)
7773 if (ready
>= e_ready
)
7777 cur_uops
= ix86_safe_ppro_uops (*insnp
);
7778 while (cur_uops
!= PPRO_UOPS_ONE
)
7782 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
7785 /* Found one. Move it to the head of the queue and issue it. */
7786 if (cur_uops
== PPRO_UOPS_ONE
)
7788 ix86_reorder_insn (insnp
, e_ready
);
7789 decode
[i
] = *e_ready
--;
7790 issued_this_cycle
++;
7794 /* ??? Didn't find one. Ideally, here we would do a lazy split
7795 of 2-uop insns, issue one and queue the other. */
7799 if (issued_this_cycle
== 0)
7800 issued_this_cycle
= 1;
7801 ix86_sched_data
.ppro
.issued_this_cycle
= issued_this_cycle
;
7804 /* We are about to being issuing insns for this clock cycle.
7805 Override the default sort algorithm to better slot instructions. */
7807 ix86_sched_reorder (dump
, sched_verbose
, ready
, n_ready
, clock_var
)
7808 FILE *dump ATTRIBUTE_UNUSED
;
7809 int sched_verbose ATTRIBUTE_UNUSED
;
7812 int clock_var ATTRIBUTE_UNUSED
;
7814 rtx
*e_ready
= ready
+ n_ready
- 1;
7824 case PROCESSOR_PENTIUM
:
7825 ix86_sched_reorder_pentium (ready
, e_ready
);
7828 case PROCESSOR_PENTIUMPRO
:
7829 ix86_sched_reorder_ppro (ready
, e_ready
);
7834 return ix86_issue_rate ();
7837 /* We are about to issue INSN. Return the number of insns left on the
7838 ready queue that can be issued this cycle. */
7841 ix86_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
)
7851 return can_issue_more
- 1;
7853 case PROCESSOR_PENTIUMPRO
:
7855 enum attr_ppro_uops uops
= ix86_safe_ppro_uops (insn
);
7857 if (uops
== PPRO_UOPS_MANY
)
7860 ix86_dump_ppro_packet (dump
);
7861 ix86_sched_data
.ppro
.decode
[0] = insn
;
7862 ix86_sched_data
.ppro
.decode
[1] = NULL
;
7863 ix86_sched_data
.ppro
.decode
[2] = NULL
;
7865 ix86_dump_ppro_packet (dump
);
7866 ix86_sched_data
.ppro
.decode
[0] = NULL
;
7868 else if (uops
== PPRO_UOPS_FEW
)
7871 ix86_dump_ppro_packet (dump
);
7872 ix86_sched_data
.ppro
.decode
[0] = insn
;
7873 ix86_sched_data
.ppro
.decode
[1] = NULL
;
7874 ix86_sched_data
.ppro
.decode
[2] = NULL
;
7878 for (i
= 0; i
< 3; ++i
)
7879 if (ix86_sched_data
.ppro
.decode
[i
] == NULL
)
7881 ix86_sched_data
.ppro
.decode
[i
] = insn
;
7889 ix86_dump_ppro_packet (dump
);
7890 ix86_sched_data
.ppro
.decode
[0] = NULL
;
7891 ix86_sched_data
.ppro
.decode
[1] = NULL
;
7892 ix86_sched_data
.ppro
.decode
[2] = NULL
;
7896 return --ix86_sched_data
.ppro
.issued_this_cycle
;
7900 /* Walk through INSNS and look for MEM references whose address is DSTREG or
7901 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
7905 ix86_set_move_mem_attrs (insns
, dstref
, srcref
, dstreg
, srcreg
)
7907 rtx dstref
, srcref
, dstreg
, srcreg
;
7911 for (insn
= insns
; insn
!= 0 ; insn
= NEXT_INSN (insn
))
7913 ix86_set_move_mem_attrs_1 (PATTERN (insn
), dstref
, srcref
,
7917 /* Subroutine of above to actually do the updating by recursively walking
7921 ix86_set_move_mem_attrs_1 (x
, dstref
, srcref
, dstreg
, srcreg
)
7923 rtx dstref
, srcref
, dstreg
, srcreg
;
7925 enum rtx_code code
= GET_CODE (x
);
7926 const char *format_ptr
= GET_RTX_FORMAT (code
);
7929 if (code
== MEM
&& XEXP (x
, 0) == dstreg
)
7930 MEM_COPY_ATTRIBUTES (x
, dstref
);
7931 else if (code
== MEM
&& XEXP (x
, 0) == srcreg
)
7932 MEM_COPY_ATTRIBUTES (x
, srcref
);
7934 for (i
= 0; i
< GET_RTX_LENGTH (code
); i
++, format_ptr
++)
7936 if (*format_ptr
== 'e')
7937 ix86_set_move_mem_attrs_1 (XEXP (x
, i
), dstref
, srcref
,
7939 else if (*format_ptr
== 'E')
7940 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
7941 ix86_set_move_mem_attrs_1 (XVECEXP (x
, i
, j
), dstref
, srcref
,
7946 /* Compute the alignment given to a constant that is being placed in memory.
7947 EXP is the constant and ALIGN is the alignment that the object would
7949 The value of this function is used instead of that alignment to align
7953 ix86_constant_alignment (exp
, align
)
7957 if (TREE_CODE (exp
) == REAL_CST
)
7959 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
7961 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
7964 else if (TREE_CODE (exp
) == STRING_CST
&& TREE_STRING_LENGTH (exp
) >= 31
7971 /* Compute the alignment for a static variable.
7972 TYPE is the data type, and ALIGN is the alignment that
7973 the object would ordinarily have. The value of this function is used
7974 instead of that alignment to align the object. */
7977 ix86_data_alignment (type
, align
)
7981 if (AGGREGATE_TYPE_P (type
)
7983 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
7984 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 256
7985 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 256)
7988 if (TREE_CODE (type
) == ARRAY_TYPE
)
7990 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
7992 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
7995 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
7998 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
8000 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
8003 else if ((TREE_CODE (type
) == RECORD_TYPE
8004 || TREE_CODE (type
) == UNION_TYPE
8005 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
8006 && TYPE_FIELDS (type
))
8008 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
8010 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
8013 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
8014 || TREE_CODE (type
) == INTEGER_TYPE
)
8016 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
8018 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
8025 /* Compute the alignment for a local variable.
8026 TYPE is the data type, and ALIGN is the alignment that
8027 the object would ordinarily have. The value of this macro is used
8028 instead of that alignment to align the object. */
8031 ix86_local_alignment (type
, align
)
8035 if (TREE_CODE (type
) == ARRAY_TYPE
)
8037 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
8039 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
8042 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
8044 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
8046 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
8049 else if ((TREE_CODE (type
) == RECORD_TYPE
8050 || TREE_CODE (type
) == UNION_TYPE
8051 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
8052 && TYPE_FIELDS (type
))
8054 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
8056 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
8059 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
8060 || TREE_CODE (type
) == INTEGER_TYPE
)
8063 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
8065 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
8071 #define def_builtin(NAME, TYPE, CODE) \
8072 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL_PTR)
8073 struct builtin_description
8075 enum insn_code icode
;
8077 enum ix86_builtins code
;
8078 enum rtx_code comparison
;
8082 static struct builtin_description bdesc_comi
[] =
8084 { CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, EQ
, 0 },
8085 { CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, LT
, 0 },
8086 { CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, LE
, 0 },
8087 { CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, LT
, 1 },
8088 { CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, LE
, 1 },
8089 { CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, NE
, 0 },
8090 { CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, EQ
, 0 },
8091 { CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, LT
, 0 },
8092 { CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, LE
, 0 },
8093 { CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, LT
, 1 },
8094 { CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, LE
, 1 },
8095 { CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, NE
, 0 }
8098 static struct builtin_description bdesc_2arg
[] =
8101 { CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
8102 { CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
8103 { CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
8104 { CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
8105 { CODE_FOR_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
8106 { CODE_FOR_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
8107 { CODE_FOR_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
8108 { CODE_FOR_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
8110 { CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
8111 { CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
8112 { CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
8113 { CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, 1 },
8114 { CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, 1 },
8115 { CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
8116 { CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, EQ
, 0 },
8117 { CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, LT
, 0 },
8118 { CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, LE
, 0 },
8119 { CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, LT
, 1 },
8120 { CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, LE
, 1 },
8121 { CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, UNORDERED
, 0 },
8122 { CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
8123 { CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
8124 { CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
8125 { CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS
, LT
, 1 },
8126 { CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS
, LE
, 1 },
8127 { CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
8128 { CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, EQ
, 0 },
8129 { CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, LT
, 0 },
8130 { CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, LE
, 0 },
8131 { CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, LT
, 1 },
8132 { CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, LE
, 1 },
8133 { CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
8135 { CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
8136 { CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
8137 { CODE_FOR_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
8138 { CODE_FOR_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
8140 { CODE_FOR_sse_andti3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
8141 { CODE_FOR_sse_nandti3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
8142 { CODE_FOR_sse_iorti3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
8143 { CODE_FOR_sse_xorti3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
8145 { CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
8146 { CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
8147 { CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
8148 { CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
8149 { CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
8152 { CODE_FOR_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
8153 { CODE_FOR_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
8154 { CODE_FOR_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
8155 { CODE_FOR_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
8156 { CODE_FOR_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
8157 { CODE_FOR_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
8159 { CODE_FOR_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
8160 { CODE_FOR_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
8161 { CODE_FOR_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
8162 { CODE_FOR_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
8163 { CODE_FOR_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
8164 { CODE_FOR_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
8165 { CODE_FOR_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
8166 { CODE_FOR_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
8168 { CODE_FOR_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
8169 { CODE_FOR_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
8170 { CODE_FOR_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
8172 { CODE_FOR_mmx_anddi3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
8173 { CODE_FOR_mmx_nanddi3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
8174 { CODE_FOR_mmx_iordi3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
8175 { CODE_FOR_mmx_xordi3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
8177 { CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
8178 { CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
8180 { CODE_FOR_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
8181 { CODE_FOR_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
8182 { CODE_FOR_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
8183 { CODE_FOR_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
8184 { CODE_FOR_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
8185 { CODE_FOR_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
8187 { CODE_FOR_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
8188 { CODE_FOR_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
8189 { CODE_FOR_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
8190 { CODE_FOR_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
8192 { CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
8193 { CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
8194 { CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
8195 { CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
8196 { CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
8197 { CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
8200 { CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
8201 { CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
8202 { CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
8204 { CODE_FOR_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
8205 { CODE_FOR_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
8207 { CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
8208 { CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
8209 { CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
8210 { CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
8211 { CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
8212 { CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
8214 { CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
8215 { CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
8216 { CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
8217 { CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
8218 { CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
8219 { CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
8221 { CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
8222 { CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
8223 { CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
8224 { CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
8226 { CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
8227 { CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 }
8231 static struct builtin_description bdesc_1arg
[] =
8233 { CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
8234 { CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
8236 { CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
8237 { CODE_FOR_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
8238 { CODE_FOR_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
8240 { CODE_FOR_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
8241 { CODE_FOR_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
8242 { CODE_FOR_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
8243 { CODE_FOR_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 }
8247 /* Expand all the target specific builtins. This is not called if TARGET_MMX
8248 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
8251 ix86_init_builtins ()
8253 struct builtin_description
* d
;
8255 tree endlink
= void_list_node
;
8257 tree pchar_type_node
= build_pointer_type (char_type_node
);
8258 tree pfloat_type_node
= build_pointer_type (float_type_node
);
8259 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
8260 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
8263 tree int_ftype_v4sf_v4sf
8264 = build_function_type (integer_type_node
,
8265 tree_cons (NULL_TREE
, V4SF_type_node
,
8266 tree_cons (NULL_TREE
,
8269 tree v4si_ftype_v4sf_v4sf
8270 = build_function_type (V4SI_type_node
,
8271 tree_cons (NULL_TREE
, V4SF_type_node
,
8272 tree_cons (NULL_TREE
,
8275 /* MMX/SSE/integer conversions. */
8276 tree int_ftype_v4sf_int
8277 = build_function_type (integer_type_node
,
8278 tree_cons (NULL_TREE
, V4SF_type_node
,
8279 tree_cons (NULL_TREE
,
8283 = build_function_type (integer_type_node
,
8284 tree_cons (NULL_TREE
, V4SF_type_node
,
8287 = build_function_type (integer_type_node
,
8288 tree_cons (NULL_TREE
, V8QI_type_node
,
8291 = build_function_type (integer_type_node
,
8292 tree_cons (NULL_TREE
, V2SI_type_node
,
8295 = build_function_type (V2SI_type_node
,
8296 tree_cons (NULL_TREE
, integer_type_node
,
8298 tree v4sf_ftype_v4sf_int
8299 = build_function_type (integer_type_node
,
8300 tree_cons (NULL_TREE
, V4SF_type_node
,
8301 tree_cons (NULL_TREE
, integer_type_node
,
8303 tree v4sf_ftype_v4sf_v2si
8304 = build_function_type (V4SF_type_node
,
8305 tree_cons (NULL_TREE
, V4SF_type_node
,
8306 tree_cons (NULL_TREE
, V2SI_type_node
,
8308 tree int_ftype_v4hi_int
8309 = build_function_type (integer_type_node
,
8310 tree_cons (NULL_TREE
, V4HI_type_node
,
8311 tree_cons (NULL_TREE
, integer_type_node
,
8313 tree v4hi_ftype_v4hi_int_int
8314 = build_function_type (V4HI_type_node
,
8315 tree_cons (NULL_TREE
, V4HI_type_node
,
8316 tree_cons (NULL_TREE
, integer_type_node
,
8317 tree_cons (NULL_TREE
,
8320 /* Miscellaneous. */
8321 tree v8qi_ftype_v4hi_v4hi
8322 = build_function_type (V8QI_type_node
,
8323 tree_cons (NULL_TREE
, V4HI_type_node
,
8324 tree_cons (NULL_TREE
, V4HI_type_node
,
8326 tree v4hi_ftype_v2si_v2si
8327 = build_function_type (V4HI_type_node
,
8328 tree_cons (NULL_TREE
, V2SI_type_node
,
8329 tree_cons (NULL_TREE
, V2SI_type_node
,
8331 tree v4sf_ftype_v4sf_v4sf_int
8332 = build_function_type (V4SF_type_node
,
8333 tree_cons (NULL_TREE
, V4SF_type_node
,
8334 tree_cons (NULL_TREE
, V4SF_type_node
,
8335 tree_cons (NULL_TREE
,
8338 tree v4hi_ftype_v8qi_v8qi
8339 = build_function_type (V4HI_type_node
,
8340 tree_cons (NULL_TREE
, V8QI_type_node
,
8341 tree_cons (NULL_TREE
, V8QI_type_node
,
8343 tree v2si_ftype_v4hi_v4hi
8344 = build_function_type (V2SI_type_node
,
8345 tree_cons (NULL_TREE
, V4HI_type_node
,
8346 tree_cons (NULL_TREE
, V4HI_type_node
,
8348 tree v4hi_ftype_v4hi_int
8349 = build_function_type (V4HI_type_node
,
8350 tree_cons (NULL_TREE
, V4HI_type_node
,
8351 tree_cons (NULL_TREE
, integer_type_node
,
8353 tree di_ftype_di_int
8354 = build_function_type (long_long_unsigned_type_node
,
8355 tree_cons (NULL_TREE
, long_long_unsigned_type_node
,
8356 tree_cons (NULL_TREE
, integer_type_node
,
8358 tree v8qi_ftype_v8qi_di
8359 = build_function_type (V8QI_type_node
,
8360 tree_cons (NULL_TREE
, V8QI_type_node
,
8361 tree_cons (NULL_TREE
,
8362 long_long_integer_type_node
,
8364 tree v4hi_ftype_v4hi_di
8365 = build_function_type (V4HI_type_node
,
8366 tree_cons (NULL_TREE
, V4HI_type_node
,
8367 tree_cons (NULL_TREE
,
8368 long_long_integer_type_node
,
8370 tree v2si_ftype_v2si_di
8371 = build_function_type (V2SI_type_node
,
8372 tree_cons (NULL_TREE
, V2SI_type_node
,
8373 tree_cons (NULL_TREE
,
8374 long_long_integer_type_node
,
8376 tree void_ftype_void
8377 = build_function_type (void_type_node
, endlink
);
8378 tree void_ftype_pchar_int
8379 = build_function_type (void_type_node
,
8380 tree_cons (NULL_TREE
, pchar_type_node
,
8381 tree_cons (NULL_TREE
, integer_type_node
,
8383 tree void_ftype_unsigned
8384 = build_function_type (void_type_node
,
8385 tree_cons (NULL_TREE
, unsigned_type_node
,
8387 tree unsigned_ftype_void
8388 = build_function_type (unsigned_type_node
, endlink
);
8390 = build_function_type (long_long_unsigned_type_node
, endlink
);
8392 = build_function_type (intTI_type_node
, endlink
);
8393 tree v2si_ftype_v4sf
8394 = build_function_type (V2SI_type_node
,
8395 tree_cons (NULL_TREE
, V4SF_type_node
,
8398 tree maskmovq_args
= tree_cons (NULL_TREE
, V8QI_type_node
,
8399 tree_cons (NULL_TREE
, V8QI_type_node
,
8400 tree_cons (NULL_TREE
,
8403 tree void_ftype_v8qi_v8qi_pchar
8404 = build_function_type (void_type_node
, maskmovq_args
);
8405 tree v4sf_ftype_pfloat
8406 = build_function_type (V4SF_type_node
,
8407 tree_cons (NULL_TREE
, pfloat_type_node
,
8409 tree v4sf_ftype_float
8410 = build_function_type (V4SF_type_node
,
8411 tree_cons (NULL_TREE
, float_type_node
,
8413 tree v4sf_ftype_float_float_float_float
8414 = build_function_type (V4SF_type_node
,
8415 tree_cons (NULL_TREE
, float_type_node
,
8416 tree_cons (NULL_TREE
, float_type_node
,
8417 tree_cons (NULL_TREE
,
8419 tree_cons (NULL_TREE
,
8422 /* @@@ the type is bogus */
8423 tree v4sf_ftype_v4sf_pv2si
8424 = build_function_type (V4SF_type_node
,
8425 tree_cons (NULL_TREE
, V4SF_type_node
,
8426 tree_cons (NULL_TREE
, pv2si_type_node
,
8428 tree v4sf_ftype_pv2si_v4sf
8429 = build_function_type (V4SF_type_node
,
8430 tree_cons (NULL_TREE
, V4SF_type_node
,
8431 tree_cons (NULL_TREE
, pv2si_type_node
,
8433 tree void_ftype_pfloat_v4sf
8434 = build_function_type (void_type_node
,
8435 tree_cons (NULL_TREE
, pfloat_type_node
,
8436 tree_cons (NULL_TREE
, V4SF_type_node
,
8438 tree void_ftype_pdi_di
8439 = build_function_type (void_type_node
,
8440 tree_cons (NULL_TREE
, pdi_type_node
,
8441 tree_cons (NULL_TREE
,
8442 long_long_unsigned_type_node
,
8444 /* Normal vector unops. */
8445 tree v4sf_ftype_v4sf
8446 = build_function_type (V4SF_type_node
,
8447 tree_cons (NULL_TREE
, V4SF_type_node
,
8450 /* Normal vector binops. */
8451 tree v4sf_ftype_v4sf_v4sf
8452 = build_function_type (V4SF_type_node
,
8453 tree_cons (NULL_TREE
, V4SF_type_node
,
8454 tree_cons (NULL_TREE
, V4SF_type_node
,
8456 tree v8qi_ftype_v8qi_v8qi
8457 = build_function_type (V8QI_type_node
,
8458 tree_cons (NULL_TREE
, V8QI_type_node
,
8459 tree_cons (NULL_TREE
, V8QI_type_node
,
8461 tree v4hi_ftype_v4hi_v4hi
8462 = build_function_type (V4HI_type_node
,
8463 tree_cons (NULL_TREE
, V4HI_type_node
,
8464 tree_cons (NULL_TREE
, V4HI_type_node
,
8466 tree v2si_ftype_v2si_v2si
8467 = build_function_type (V2SI_type_node
,
8468 tree_cons (NULL_TREE
, V2SI_type_node
,
8469 tree_cons (NULL_TREE
, V2SI_type_node
,
8472 = build_function_type (intTI_type_node
,
8473 tree_cons (NULL_TREE
, intTI_type_node
,
8474 tree_cons (NULL_TREE
, intTI_type_node
,
8477 = build_function_type (long_long_unsigned_type_node
,
8478 tree_cons (NULL_TREE
, long_long_unsigned_type_node
,
8479 tree_cons (NULL_TREE
,
8480 long_long_unsigned_type_node
,
8483 /* Add all builtins that are more or less simple operations on two
8485 for (i
= 0, d
= bdesc_2arg
; i
< sizeof (bdesc_2arg
) / sizeof *d
; i
++, d
++)
8487 /* Use one of the operands; the target can have a different mode for
8488 mask-generating compares. */
8489 enum machine_mode mode
;
8494 mode
= insn_data
[d
->icode
].operand
[1].mode
;
8496 if (! TARGET_SSE
&& ! VALID_MMX_REG_MODE (mode
))
8502 type
= v4sf_ftype_v4sf_v4sf
;
8505 type
= v8qi_ftype_v8qi_v8qi
;
8508 type
= v4hi_ftype_v4hi_v4hi
;
8511 type
= v2si_ftype_v2si_v2si
;
8514 type
= ti_ftype_ti_ti
;
8517 type
= di_ftype_di_di
;
8524 /* Override for comparisons. */
8525 if (d
->icode
== CODE_FOR_maskcmpv4sf3
8526 || d
->icode
== CODE_FOR_maskncmpv4sf3
8527 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
8528 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
)
8529 type
= v4si_ftype_v4sf_v4sf
;
8531 def_builtin (d
->name
, type
, d
->code
);
8534 /* Add the remaining MMX insns with somewhat more complicated types. */
8535 def_builtin ("__builtin_ia32_m_from_int", v2si_ftype_int
, IX86_BUILTIN_M_FROM_INT
);
8536 def_builtin ("__builtin_ia32_m_to_int", int_ftype_v2si
, IX86_BUILTIN_M_TO_INT
);
8537 def_builtin ("__builtin_ia32_mmx_zero", di_ftype_void
, IX86_BUILTIN_MMX_ZERO
);
8538 def_builtin ("__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
8539 def_builtin ("__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
8540 def_builtin ("__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
8541 def_builtin ("__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
8542 def_builtin ("__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
8543 def_builtin ("__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
8545 def_builtin ("__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
8546 def_builtin ("__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
8547 def_builtin ("__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
8549 def_builtin ("__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
8550 def_builtin ("__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
8552 def_builtin ("__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
8553 def_builtin ("__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
8555 /* Everything beyond this point is SSE only. */
8559 /* comi/ucomi insns. */
8560 for (i
= 0, d
= bdesc_comi
; i
< sizeof (bdesc_comi
) / sizeof *d
; i
++, d
++)
8561 def_builtin (d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
8563 def_builtin ("__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
8564 def_builtin ("__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
8565 def_builtin ("__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
8567 def_builtin ("__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
8568 def_builtin ("__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
8569 def_builtin ("__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
8570 def_builtin ("__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
8571 def_builtin ("__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
8572 def_builtin ("__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
8574 def_builtin ("__builtin_ia32_pextrw", int_ftype_v4hi_int
, IX86_BUILTIN_PEXTRW
);
8575 def_builtin ("__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int
, IX86_BUILTIN_PINSRW
);
8577 def_builtin ("__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
8579 def_builtin ("__builtin_ia32_loadaps", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADAPS
);
8580 def_builtin ("__builtin_ia32_loadups", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADUPS
);
8581 def_builtin ("__builtin_ia32_loadss", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADSS
);
8582 def_builtin ("__builtin_ia32_storeaps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREAPS
);
8583 def_builtin ("__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
8584 def_builtin ("__builtin_ia32_storess", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STORESS
);
8586 def_builtin ("__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
8587 def_builtin ("__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
8588 def_builtin ("__builtin_ia32_storehps", v4sf_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
8589 def_builtin ("__builtin_ia32_storelps", v4sf_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
8591 def_builtin ("__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
8592 def_builtin ("__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
8593 def_builtin ("__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
8594 def_builtin ("__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
8596 def_builtin ("__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
8597 def_builtin ("__builtin_ia32_prefetch", void_ftype_pchar_int
, IX86_BUILTIN_PREFETCH
);
8599 def_builtin ("__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
8601 def_builtin ("__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
8602 def_builtin ("__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
8603 def_builtin ("__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
8604 def_builtin ("__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
8605 def_builtin ("__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
8606 def_builtin ("__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
8608 def_builtin ("__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
8610 /* Composite intrinsics. */
8611 def_builtin ("__builtin_ia32_setps1", v4sf_ftype_float
, IX86_BUILTIN_SETPS1
);
8612 def_builtin ("__builtin_ia32_setps", v4sf_ftype_float_float_float_float
, IX86_BUILTIN_SETPS
);
8613 def_builtin ("__builtin_ia32_setzerops", ti_ftype_void
, IX86_BUILTIN_CLRPS
);
8614 def_builtin ("__builtin_ia32_loadps1", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADPS1
);
8615 def_builtin ("__builtin_ia32_loadrps", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADRPS
);
8616 def_builtin ("__builtin_ia32_storeps1", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREPS1
);
8617 def_builtin ("__builtin_ia32_storerps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STORERPS
);
8620 /* Errors in the source file can cause expand_expr to return const0_rtx
8621 where we expect a vector. To avoid crashing, use one of the vector
8622 clear instructions. */
8624 safe_vector_operand (x
, mode
)
8626 enum machine_mode mode
;
8628 if (x
!= const0_rtx
)
8630 x
= gen_reg_rtx (mode
);
8632 if (VALID_MMX_REG_MODE (mode
))
8633 emit_insn (gen_mmx_clrdi (mode
== DImode
? x
8634 : gen_rtx_SUBREG (DImode
, x
, 0)));
8636 emit_insn (gen_sse_clrti (mode
== TImode
? x
8637 : gen_rtx_SUBREG (TImode
, x
, 0)));
8641 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
8644 ix86_expand_binop_builtin (icode
, arglist
, target
)
8645 enum insn_code icode
;
8650 tree arg0
= TREE_VALUE (arglist
);
8651 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
8652 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
8653 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
8654 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
8655 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
8656 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
8658 if (VECTOR_MODE_P (mode0
))
8659 op0
= safe_vector_operand (op0
, mode0
);
8660 if (VECTOR_MODE_P (mode1
))
8661 op1
= safe_vector_operand (op1
, mode1
);
8664 || GET_MODE (target
) != tmode
8665 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
8666 target
= gen_reg_rtx (tmode
);
8668 /* In case the insn wants input operands in modes different from
8669 the result, abort. */
8670 if (GET_MODE (op0
) != mode0
|| GET_MODE (op1
) != mode1
)
8673 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
8674 op0
= copy_to_mode_reg (mode0
, op0
);
8675 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
8676 op1
= copy_to_mode_reg (mode1
, op1
);
8678 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
8685 /* Subroutine of ix86_expand_builtin to take care of stores. */
8688 ix86_expand_store_builtin (icode
, arglist
, shuffle
)
8689 enum insn_code icode
;
8694 tree arg0
= TREE_VALUE (arglist
);
8695 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
8696 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
8697 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
8698 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
8699 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
8701 if (VECTOR_MODE_P (mode1
))
8702 op1
= safe_vector_operand (op1
, mode1
);
8704 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
8705 if (shuffle
>= 0 || ! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
8706 op1
= copy_to_mode_reg (mode1
, op1
);
8708 emit_insn (gen_sse_shufps (op1
, op1
, op1
, GEN_INT (shuffle
)));
8709 pat
= GEN_FCN (icode
) (op0
, op1
);
8715 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
8718 ix86_expand_unop_builtin (icode
, arglist
, target
, do_load
)
8719 enum insn_code icode
;
8725 tree arg0
= TREE_VALUE (arglist
);
8726 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
8727 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
8728 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
8731 || GET_MODE (target
) != tmode
8732 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
8733 target
= gen_reg_rtx (tmode
);
8735 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
8738 if (VECTOR_MODE_P (mode0
))
8739 op0
= safe_vector_operand (op0
, mode0
);
8741 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
8742 op0
= copy_to_mode_reg (mode0
, op0
);
8745 pat
= GEN_FCN (icode
) (target
, op0
);
8752 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
8753 sqrtss, rsqrtss, rcpss. */
8756 ix86_expand_unop1_builtin (icode
, arglist
, target
)
8757 enum insn_code icode
;
8762 tree arg0
= TREE_VALUE (arglist
);
8763 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
8764 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
8765 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
8768 || GET_MODE (target
) != tmode
8769 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
8770 target
= gen_reg_rtx (tmode
);
8772 if (VECTOR_MODE_P (mode0
))
8773 op0
= safe_vector_operand (op0
, mode0
);
8775 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
8776 op0
= copy_to_mode_reg (mode0
, op0
);
8778 pat
= GEN_FCN (icode
) (target
, op0
, op0
);
8785 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
8788 ix86_expand_sse_compare (d
, arglist
, target
)
8789 struct builtin_description
*d
;
8794 tree arg0
= TREE_VALUE (arglist
);
8795 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
8796 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
8797 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
8799 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
8800 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
8801 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
8802 enum rtx_code comparison
= d
->comparison
;
8804 if (VECTOR_MODE_P (mode0
))
8805 op0
= safe_vector_operand (op0
, mode0
);
8806 if (VECTOR_MODE_P (mode1
))
8807 op1
= safe_vector_operand (op1
, mode1
);
8809 /* Swap operands if we have a comparison that isn't available in
8813 target
= gen_reg_rtx (tmode
);
8814 emit_move_insn (target
, op1
);
8817 comparison
= swap_condition (comparison
);
8820 || GET_MODE (target
) != tmode
8821 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
8822 target
= gen_reg_rtx (tmode
);
8824 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
8825 op0
= copy_to_mode_reg (mode0
, op0
);
8826 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
8827 op1
= copy_to_mode_reg (mode1
, op1
);
8829 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
8830 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
8837 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
8840 ix86_expand_sse_comi (d
, arglist
, target
)
8841 struct builtin_description
*d
;
8846 tree arg0
= TREE_VALUE (arglist
);
8847 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
8848 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
8849 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
8851 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
8852 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
8853 enum rtx_code comparison
= d
->comparison
;
8855 if (VECTOR_MODE_P (mode0
))
8856 op0
= safe_vector_operand (op0
, mode0
);
8857 if (VECTOR_MODE_P (mode1
))
8858 op1
= safe_vector_operand (op1
, mode1
);
8860 /* Swap operands if we have a comparison that isn't available in
8867 comparison
= swap_condition (comparison
);
8870 target
= gen_reg_rtx (SImode
);
8871 emit_move_insn (target
, const0_rtx
);
8872 target
= gen_rtx_SUBREG (QImode
, target
, 0);
8874 if (! (*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
8875 op0
= copy_to_mode_reg (mode0
, op0
);
8876 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
8877 op1
= copy_to_mode_reg (mode1
, op1
);
8879 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
8880 pat
= GEN_FCN (d
->icode
) (op0
, op1
, op2
);
8884 emit_insn (gen_setcc_2 (target
, op2
));
8889 /* Expand an expression EXP that calls a built-in function,
8890 with result going to TARGET if that's convenient
8891 (and in mode MODE if that's convenient).
8892 SUBTARGET may be used as the target for computing one of EXP's operands.
8893 IGNORE is nonzero if the value is to be ignored. */
8896 ix86_expand_builtin (exp
, target
, subtarget
, mode
, ignore
)
8899 rtx subtarget ATTRIBUTE_UNUSED
;
8900 enum machine_mode mode ATTRIBUTE_UNUSED
;
8901 int ignore ATTRIBUTE_UNUSED
;
8903 struct builtin_description
*d
;
8905 enum insn_code icode
;
8906 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
8907 tree arglist
= TREE_OPERAND (exp
, 1);
8908 tree arg0
, arg1
, arg2
, arg3
;
8909 rtx op0
, op1
, op2
, pat
;
8910 enum machine_mode tmode
, mode0
, mode1
, mode2
;
8911 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
8915 case IX86_BUILTIN_EMMS
:
8916 emit_insn (gen_emms ());
8919 case IX86_BUILTIN_SFENCE
:
8920 emit_insn (gen_sfence ());
8923 case IX86_BUILTIN_M_FROM_INT
:
8924 target
= gen_reg_rtx (DImode
);
8925 op0
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
8926 emit_move_insn (gen_rtx_SUBREG (SImode
, target
, 0), op0
);
8929 case IX86_BUILTIN_M_TO_INT
:
8930 op0
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
8931 op0
= copy_to_mode_reg (DImode
, op0
);
8932 target
= gen_reg_rtx (SImode
);
8933 emit_move_insn (target
, gen_rtx_SUBREG (SImode
, op0
, 0));
8936 case IX86_BUILTIN_PEXTRW
:
8937 icode
= CODE_FOR_mmx_pextrw
;
8938 arg0
= TREE_VALUE (arglist
);
8939 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
8940 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
8941 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
8942 tmode
= insn_data
[icode
].operand
[0].mode
;
8943 mode0
= insn_data
[icode
].operand
[1].mode
;
8944 mode1
= insn_data
[icode
].operand
[2].mode
;
8946 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
8947 op0
= copy_to_mode_reg (mode0
, op0
);
8948 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
8950 /* @@@ better error message */
8951 error ("selector must be an immediate");
8955 || GET_MODE (target
) != tmode
8956 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
8957 target
= gen_reg_rtx (tmode
);
8958 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
8964 case IX86_BUILTIN_PINSRW
:
8965 icode
= CODE_FOR_mmx_pinsrw
;
8966 arg0
= TREE_VALUE (arglist
);
8967 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
8968 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
8969 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
8970 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
8971 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
8972 tmode
= insn_data
[icode
].operand
[0].mode
;
8973 mode0
= insn_data
[icode
].operand
[1].mode
;
8974 mode1
= insn_data
[icode
].operand
[2].mode
;
8975 mode2
= insn_data
[icode
].operand
[3].mode
;
8977 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
8978 op0
= copy_to_mode_reg (mode0
, op0
);
8979 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
8980 op1
= copy_to_mode_reg (mode1
, op1
);
8981 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
8983 /* @@@ better error message */
8984 error ("selector must be an immediate");
8988 || GET_MODE (target
) != tmode
8989 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
8990 target
= gen_reg_rtx (tmode
);
8991 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
8997 case IX86_BUILTIN_MASKMOVQ
:
8998 icode
= CODE_FOR_mmx_maskmovq
;
8999 /* Note the arg order is different from the operand order. */
9000 arg1
= TREE_VALUE (arglist
);
9001 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
9002 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
9003 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
9004 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
9005 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
9006 mode0
= insn_data
[icode
].operand
[0].mode
;
9007 mode1
= insn_data
[icode
].operand
[1].mode
;
9008 mode2
= insn_data
[icode
].operand
[2].mode
;
9010 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
9011 op0
= copy_to_mode_reg (mode0
, op0
);
9012 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
9013 op1
= copy_to_mode_reg (mode1
, op1
);
9014 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
9015 op2
= copy_to_mode_reg (mode2
, op2
);
9016 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
9022 case IX86_BUILTIN_SQRTSS
:
9023 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2
, arglist
, target
);
9024 case IX86_BUILTIN_RSQRTSS
:
9025 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2
, arglist
, target
);
9026 case IX86_BUILTIN_RCPSS
:
9027 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2
, arglist
, target
);
9029 case IX86_BUILTIN_LOADAPS
:
9030 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps
, arglist
, target
, 1);
9032 case IX86_BUILTIN_LOADUPS
:
9033 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
9035 case IX86_BUILTIN_STOREAPS
:
9036 return ix86_expand_store_builtin (CODE_FOR_sse_movaps
, arglist
, -1);
9037 case IX86_BUILTIN_STOREUPS
:
9038 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
, -1);
9040 case IX86_BUILTIN_LOADSS
:
9041 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss
, arglist
, target
, 1);
9043 case IX86_BUILTIN_STORESS
:
9044 return ix86_expand_store_builtin (CODE_FOR_sse_storess
, arglist
, -1);
9046 case IX86_BUILTIN_LOADHPS
:
9047 case IX86_BUILTIN_LOADLPS
:
9048 icode
= (fcode
== IX86_BUILTIN_LOADHPS
9049 ? CODE_FOR_sse_movhps
: CODE_FOR_sse_movlps
);
9050 arg0
= TREE_VALUE (arglist
);
9051 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
9052 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
9053 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
9054 tmode
= insn_data
[icode
].operand
[0].mode
;
9055 mode0
= insn_data
[icode
].operand
[1].mode
;
9056 mode1
= insn_data
[icode
].operand
[2].mode
;
9058 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
9059 op0
= copy_to_mode_reg (mode0
, op0
);
9060 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
9062 || GET_MODE (target
) != tmode
9063 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
9064 target
= gen_reg_rtx (tmode
);
9065 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
9071 case IX86_BUILTIN_STOREHPS
:
9072 case IX86_BUILTIN_STORELPS
:
9073 icode
= (fcode
== IX86_BUILTIN_STOREHPS
9074 ? CODE_FOR_sse_movhps
: CODE_FOR_sse_movlps
);
9075 arg0
= TREE_VALUE (arglist
);
9076 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
9077 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
9078 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
9079 mode0
= insn_data
[icode
].operand
[1].mode
;
9080 mode1
= insn_data
[icode
].operand
[2].mode
;
9082 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
9083 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
9084 op1
= copy_to_mode_reg (mode1
, op1
);
9086 pat
= GEN_FCN (icode
) (op0
, op0
, op1
);
9092 case IX86_BUILTIN_MOVNTPS
:
9093 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
, -1);
9094 case IX86_BUILTIN_MOVNTQ
:
9095 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
, -1);
9097 case IX86_BUILTIN_LDMXCSR
:
9098 op0
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
9099 target
= assign_386_stack_local (SImode
, 0);
9100 emit_move_insn (target
, op0
);
9101 emit_insn (gen_ldmxcsr (target
));
9104 case IX86_BUILTIN_STMXCSR
:
9105 target
= assign_386_stack_local (SImode
, 0);
9106 emit_insn (gen_stmxcsr (target
));
9107 return copy_to_mode_reg (SImode
, target
);
9109 case IX86_BUILTIN_PREFETCH
:
9110 icode
= CODE_FOR_prefetch
;
9111 arg0
= TREE_VALUE (arglist
);
9112 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
9113 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
9114 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
9115 mode0
= insn_data
[icode
].operand
[0].mode
;
9116 mode1
= insn_data
[icode
].operand
[1].mode
;
9118 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
9120 /* @@@ better error message */
9121 error ("selector must be an immediate");
9125 op0
= copy_to_mode_reg (Pmode
, op0
);
9126 pat
= GEN_FCN (icode
) (op0
, op1
);
9132 case IX86_BUILTIN_SHUFPS
:
9133 icode
= CODE_FOR_sse_shufps
;
9134 arg0
= TREE_VALUE (arglist
);
9135 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
9136 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
9137 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
9138 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
9139 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
9140 tmode
= insn_data
[icode
].operand
[0].mode
;
9141 mode0
= insn_data
[icode
].operand
[1].mode
;
9142 mode1
= insn_data
[icode
].operand
[2].mode
;
9143 mode2
= insn_data
[icode
].operand
[3].mode
;
9145 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
9146 op0
= copy_to_mode_reg (mode0
, op0
);
9147 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
9148 op1
= copy_to_mode_reg (mode1
, op1
);
9149 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
9151 /* @@@ better error message */
9152 error ("mask must be an immediate");
9156 || GET_MODE (target
) != tmode
9157 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
9158 target
= gen_reg_rtx (tmode
);
9159 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
9165 case IX86_BUILTIN_PSHUFW
:
9166 icode
= CODE_FOR_mmx_pshufw
;
9167 arg0
= TREE_VALUE (arglist
);
9168 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
9169 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
9170 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
9171 tmode
= insn_data
[icode
].operand
[0].mode
;
9172 mode0
= insn_data
[icode
].operand
[2].mode
;
9173 mode1
= insn_data
[icode
].operand
[3].mode
;
9175 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
9176 op0
= copy_to_mode_reg (mode0
, op0
);
9177 if (! (*insn_data
[icode
].operand
[3].predicate
) (op1
, mode1
))
9179 /* @@@ better error message */
9180 error ("mask must be an immediate");
9184 || GET_MODE (target
) != tmode
9185 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
9186 target
= gen_reg_rtx (tmode
);
9187 pat
= GEN_FCN (icode
) (target
, target
, op0
, op1
);
9193 /* Composite intrinsics. */
9194 case IX86_BUILTIN_SETPS1
:
9195 target
= assign_386_stack_local (SFmode
, 0);
9196 arg0
= TREE_VALUE (arglist
);
9197 emit_move_insn (change_address (target
, SFmode
, XEXP (target
, 0)),
9198 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
9199 op0
= gen_reg_rtx (V4SFmode
);
9200 emit_insn (gen_sse_loadss (op0
, change_address (target
, V4SFmode
,
9201 XEXP (target
, 0))));
9202 emit_insn (gen_sse_shufps (op0
, op0
, op0
, GEN_INT (0)));
9205 case IX86_BUILTIN_SETPS
:
9206 target
= assign_386_stack_local (V4SFmode
, 0);
9207 op0
= change_address (target
, SFmode
, XEXP (target
, 0));
9208 arg0
= TREE_VALUE (arglist
);
9209 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
9210 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
9211 arg3
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
9212 emit_move_insn (op0
,
9213 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
9214 emit_move_insn (adj_offsettable_operand (op0
, 4),
9215 expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0));
9216 emit_move_insn (adj_offsettable_operand (op0
, 8),
9217 expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0));
9218 emit_move_insn (adj_offsettable_operand (op0
, 12),
9219 expand_expr (arg3
, NULL_RTX
, VOIDmode
, 0));
9220 op0
= gen_reg_rtx (V4SFmode
);
9221 emit_insn (gen_sse_movaps (op0
, target
));
9224 case IX86_BUILTIN_CLRPS
:
9225 target
= gen_reg_rtx (TImode
);
9226 emit_insn (gen_sse_clrti (target
));
9229 case IX86_BUILTIN_LOADRPS
:
9230 target
= ix86_expand_unop_builtin (CODE_FOR_sse_movaps
, arglist
,
9231 gen_reg_rtx (V4SFmode
), 1);
9232 emit_insn (gen_sse_shufps (target
, target
, target
, GEN_INT (0x1b)));
9235 case IX86_BUILTIN_LOADPS1
:
9236 target
= ix86_expand_unop_builtin (CODE_FOR_sse_loadss
, arglist
,
9237 gen_reg_rtx (V4SFmode
), 1);
9238 emit_insn (gen_sse_shufps (target
, target
, target
, const0_rtx
));
9241 case IX86_BUILTIN_STOREPS1
:
9242 return ix86_expand_store_builtin (CODE_FOR_sse_movaps
, arglist
, 0);
9243 case IX86_BUILTIN_STORERPS
:
9244 return ix86_expand_store_builtin (CODE_FOR_sse_movaps
, arglist
, 0x1B);
9246 case IX86_BUILTIN_MMX_ZERO
:
9247 target
= gen_reg_rtx (DImode
);
9248 emit_insn (gen_mmx_clrdi (target
));
9255 for (i
= 0, d
= bdesc_2arg
; i
< sizeof (bdesc_2arg
) / sizeof *d
; i
++, d
++)
9256 if (d
->code
== fcode
)
9258 /* Compares are treated specially. */
9259 if (d
->icode
== CODE_FOR_maskcmpv4sf3
9260 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
9261 || d
->icode
== CODE_FOR_maskncmpv4sf3
9262 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
)
9263 return ix86_expand_sse_compare (d
, arglist
, target
);
9265 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
9268 for (i
= 0, d
= bdesc_1arg
; i
< sizeof (bdesc_1arg
) / sizeof *d
; i
++, d
++)
9269 if (d
->code
== fcode
)
9270 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
9272 for (i
= 0, d
= bdesc_comi
; i
< sizeof (bdesc_comi
) / sizeof *d
; i
++, d
++)
9273 if (d
->code
== fcode
)
9274 return ix86_expand_sse_comi (d
, arglist
, target
);
9276 /* @@@ Should really do something sensible here. */
9280 /* Store OPERAND to the memory after reload is completed. This means
9281 that we can't easilly use assign_stack_local. */
9283 ix86_force_to_memory (mode
, operand
)
9284 enum machine_mode mode
;
9287 if (!reload_completed
)
9294 split_di (&operand
, 1, operands
, operands
+1);
9296 gen_rtx_SET (VOIDmode
,
9297 gen_rtx_MEM (SImode
,
9298 gen_rtx_PRE_DEC (Pmode
,
9299 stack_pointer_rtx
)),
9302 gen_rtx_SET (VOIDmode
,
9303 gen_rtx_MEM (SImode
,
9304 gen_rtx_PRE_DEC (Pmode
,
9305 stack_pointer_rtx
)),
9310 /* It is better to store HImodes as SImodes. */
9311 if (!TARGET_PARTIAL_REG_STALL
)
9312 operand
= gen_lowpart (SImode
, operand
);
9316 gen_rtx_SET (VOIDmode
,
9317 gen_rtx_MEM (GET_MODE (operand
),
9318 gen_rtx_PRE_DEC (SImode
,
9319 stack_pointer_rtx
)),
9325 return gen_rtx_MEM (mode
, stack_pointer_rtx
);
9328 /* Free operand from the memory. */
9330 ix86_free_from_memory (mode
)
9331 enum machine_mode mode
;
9333 /* Use LEA to deallocate stack space. In peephole2 it will be converted
9334 to pop or add instruction if registers are available. */
9335 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9336 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
9337 GEN_INT (mode
== DImode
9339 : mode
== HImode
&& TARGET_PARTIAL_REG_STALL
9344 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
9345 QImode must go into class Q_REGS.
9346 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
9347 movdf to do mem-to-mem moves through integer regs. */
9349 ix86_preferred_reload_class (x
, class)
9351 enum reg_class
class;
9353 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
9355 /* SSE can't load any constant directly yet. */
9356 if (SSE_CLASS_P (class))
9358 /* Floats can load 0 and 1. */
9359 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x
))
9361 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
9362 if (MAYBE_SSE_CLASS_P (class))
9363 return (reg_class_subset_p (class, GENERAL_REGS
)
9364 ? GENERAL_REGS
: FLOAT_REGS
);
9368 /* General regs can load everything. */
9369 if (reg_class_subset_p (class, GENERAL_REGS
))
9370 return GENERAL_REGS
;
9371 /* In case we haven't resolved FLOAT or SSE yet, give up. */
9372 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
9375 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x
))
9377 if (GET_MODE (x
) == QImode
&& ! reg_class_subset_p (class, Q_REGS
))
9382 /* If we are copying between general and FP registers, we need a memory
9383 location. The same is true for SSE and MMX registers.
9385 The macro can't work reliably when one of the CLASSES is class containing
9386 registers from multiple units (SSE, MMX, integer). We avoid this by never
9387 combining those units in single alternative in the machine description.
9388 Ensure that this constraint holds to avoid unexpected surprises.
9390 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
9391 enforce these sanity checks. */
9393 ix86_secondary_memory_needed (class1
, class2
, mode
, strict
)
9394 enum reg_class class1
, class2
;
9395 enum machine_mode mode
;
9398 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
9399 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
9400 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
9401 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
9402 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
9403 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
9410 return (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
)
9411 || (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
)
9412 && (mode
) != SImode
)
9413 || (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
9414 && (mode
) != SImode
));
9416 /* Return the cost of moving data from a register in class CLASS1 to
9417 one in class CLASS2.
9419 It is not required that the cost always equal 2 when FROM is the same as TO;
9420 on some machines it is expensive to move between registers if they are not
9421 general registers. */
9423 ix86_register_move_cost (mode
, class1
, class2
)
9424 enum machine_mode mode
;
9425 enum reg_class class1
, class2
;
9427 /* In case we require secondary memory, compute cost of the store followed
9428 by load. In case of copying from general_purpose_register we may emit
9429 multiple stores followed by single load causing memory size mismatch
9430 stall. Count this as arbitarily high cost of 20. */
9431 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
9433 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
9435 return (MEMORY_MOVE_COST (mode
, class1
, 0)
9436 + MEMORY_MOVE_COST (mode
, class2
, 1));
9438 /* Moves between SSE/MMX and integer unit are expensive.
9439 ??? We should make this cost CPU specific. */
9440 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
9441 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
9442 return ix86_cost
->mmxsse_to_integer
;
9443 if (MAYBE_FLOAT_CLASS_P (class1
))
9444 return ix86_cost
->fp_move
;
9445 if (MAYBE_SSE_CLASS_P (class1
))
9446 return ix86_cost
->sse_move
;
9447 if (MAYBE_MMX_CLASS_P (class1
))
9448 return ix86_cost
->mmx_move
;
9452 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
9454 ix86_hard_regno_mode_ok (regno
, mode
)
9456 enum machine_mode mode
;
9458 /* Flags and only flags can only hold CCmode values. */
9459 if (CC_REGNO_P (regno
))
9460 return GET_MODE_CLASS (mode
) == MODE_CC
;
9461 if (GET_MODE_CLASS (mode
) == MODE_CC
9462 || GET_MODE_CLASS (mode
) == MODE_RANDOM
9463 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
9465 if (FP_REGNO_P (regno
))
9466 return VALID_FP_MODE_P (mode
);
9467 if (SSE_REGNO_P (regno
))
9468 return VALID_SSE_REG_MODE (mode
);
9469 if (MMX_REGNO_P (regno
))
9470 return VALID_MMX_REG_MODE (mode
);
9471 /* We handle both integer and floats in the general purpose registers.
9472 In future we should be able to handle vector modes as well. */
9473 if (!VALID_INT_MODE_P (mode
) && !VALID_FP_MODE_P (mode
))
9475 /* Take care for QImode values - they can be in non-QI regs, but then
9476 they do cause partial register stalls. */
9477 if (regno
< 4 || mode
!= QImode
|| TARGET_64BIT
)
9479 return reload_in_progress
|| reload_completed
|| !TARGET_PARTIAL_REG_STALL
;
9482 /* Return the cost of moving data of mode M between a
9483 register and memory. A value of 2 is the default; this cost is
9484 relative to those in `REGISTER_MOVE_COST'.
9486 If moving between registers and memory is more expensive than
9487 between two registers, you should define this macro to express the
9490 Model also increased moving costs of QImode registers in non
9494 ix86_memory_move_cost (mode
, class, in
)
9495 enum machine_mode mode
;
9496 enum reg_class
class;
9499 if (FLOAT_CLASS_P (class))
9517 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
9519 if (SSE_CLASS_P (class))
9522 switch (GET_MODE_SIZE (mode
))
9536 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
9538 if (MMX_CLASS_P (class))
9541 switch (GET_MODE_SIZE (mode
))
9552 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
9554 switch (GET_MODE_SIZE (mode
))
9558 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
9559 : ix86_cost
->movzbl_load
);
9561 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
9562 : ix86_cost
->int_store
[0] + 4);
9565 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
9567 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
9570 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
9571 * (int) GET_MODE_SIZE (mode
) / 4);