1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000
3 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-flags.h"
35 #include "insn-attr.h"
42 #include "basic-block.h"
45 #ifdef EXTRA_CONSTRAINT
46 /* If EXTRA_CONSTRAINT is defined, then the 'S'
47 constraint in REG_CLASS_FROM_LETTER will no longer work, and various
48 asm statements that need 'S' for class SIREG will break. */
49 error EXTRA_CONSTRAINT conflicts with S constraint letter
50 /* The previous line used to be #error, but some compilers barf
51 even if the conditional was untrue. */
54 #ifndef CHECK_STACK_LIMIT
55 #define CHECK_STACK_LIMIT -1
58 /* Processor costs (relative to an add) */
59 struct processor_costs i386_cost
= { /* 386 specific costs */
60 1, /* cost of an add instruction */
61 1, /* cost of a lea instruction */
62 3, /* variable shift costs */
63 2, /* constant shift costs */
64 6, /* cost of starting a multiply */
65 1, /* cost of multiply per each bit set */
66 23, /* cost of a divide/mod */
67 15, /* "large" insn */
69 4, /* cost for loading QImode using movzbl */
70 {2, 4, 2}, /* cost of loading integer registers
71 in QImode, HImode and SImode.
72 Relative to reg-reg move (2). */
73 {2, 4, 2}, /* cost of storing integer registers */
74 2, /* cost of reg,reg fld/fst */
75 {8, 8, 8}, /* cost of loading fp registers
76 in SFmode, DFmode and XFmode */
77 {8, 8, 8} /* cost of loading integer registers */
80 struct processor_costs i486_cost
= { /* 486 specific costs */
81 1, /* cost of an add instruction */
82 1, /* cost of a lea instruction */
83 3, /* variable shift costs */
84 2, /* constant shift costs */
85 12, /* cost of starting a multiply */
86 1, /* cost of multiply per each bit set */
87 40, /* cost of a divide/mod */
88 15, /* "large" insn */
90 4, /* cost for loading QImode using movzbl */
91 {2, 4, 2}, /* cost of loading integer registers
92 in QImode, HImode and SImode.
93 Relative to reg-reg move (2). */
94 {2, 4, 2}, /* cost of storing integer registers */
95 2, /* cost of reg,reg fld/fst */
96 {8, 8, 8}, /* cost of loading fp registers
97 in SFmode, DFmode and XFmode */
98 {8, 8, 8} /* cost of loading integer registers */
101 struct processor_costs pentium_cost
= {
102 1, /* cost of an add instruction */
103 1, /* cost of a lea instruction */
104 4, /* variable shift costs */
105 1, /* constant shift costs */
106 11, /* cost of starting a multiply */
107 0, /* cost of multiply per each bit set */
108 25, /* cost of a divide/mod */
109 8, /* "large" insn */
111 6, /* cost for loading QImode using movzbl */
112 {2, 4, 2}, /* cost of loading integer registers
113 in QImode, HImode and SImode.
114 Relative to reg-reg move (2). */
115 {2, 4, 2}, /* cost of storing integer registers */
116 2, /* cost of reg,reg fld/fst */
117 {2, 2, 6}, /* cost of loading fp registers
118 in SFmode, DFmode and XFmode */
119 {4, 4, 6} /* cost of loading integer registers */
122 struct processor_costs pentiumpro_cost
= {
123 1, /* cost of an add instruction */
124 1, /* cost of a lea instruction */
125 1, /* variable shift costs */
126 1, /* constant shift costs */
127 4, /* cost of starting a multiply */
128 0, /* cost of multiply per each bit set */
129 17, /* cost of a divide/mod */
130 8, /* "large" insn */
132 2, /* cost for loading QImode using movzbl */
133 {4, 4, 4}, /* cost of loading integer registers
134 in QImode, HImode and SImode.
135 Relative to reg-reg move (2). */
136 {2, 2, 2}, /* cost of storing integer registers */
137 2, /* cost of reg,reg fld/fst */
138 {2, 2, 6}, /* cost of loading fp registers
139 in SFmode, DFmode and XFmode */
140 {4, 4, 6} /* cost of loading integer registers */
143 struct processor_costs k6_cost
= {
144 1, /* cost of an add instruction */
145 2, /* cost of a lea instruction */
146 1, /* variable shift costs */
147 1, /* constant shift costs */
148 3, /* cost of starting a multiply */
149 0, /* cost of multiply per each bit set */
150 18, /* cost of a divide/mod */
151 8, /* "large" insn */
153 3, /* cost for loading QImode using movzbl */
154 {4, 5, 4}, /* cost of loading integer registers
155 in QImode, HImode and SImode.
156 Relative to reg-reg move (2). */
157 {2, 3, 2}, /* cost of storing integer registers */
158 4, /* cost of reg,reg fld/fst */
159 {6, 6, 6}, /* cost of loading fp registers
160 in SFmode, DFmode and XFmode */
161 {4, 4, 4} /* cost of loading integer registers */
164 struct processor_costs athlon_cost
= {
165 1, /* cost of an add instruction */
166 2, /* cost of a lea instruction */
167 1, /* variable shift costs */
168 1, /* constant shift costs */
169 5, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
171 42, /* cost of a divide/mod */
172 8, /* "large" insn */
174 4, /* cost for loading QImode using movzbl */
175 {4, 5, 4}, /* cost of loading integer registers
176 in QImode, HImode and SImode.
177 Relative to reg-reg move (2). */
178 {2, 3, 2}, /* cost of storing integer registers */
179 4, /* cost of reg,reg fld/fst */
180 {6, 6, 20}, /* cost of loading fp registers
181 in SFmode, DFmode and XFmode */
182 {4, 4, 16} /* cost of loading integer registers */
185 struct processor_costs
*ix86_cost
= &pentium_cost
;
187 /* Processor feature/optimization bitmasks. */
188 #define m_386 (1<<PROCESSOR_I386)
189 #define m_486 (1<<PROCESSOR_I486)
190 #define m_PENT (1<<PROCESSOR_PENTIUM)
191 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
192 #define m_K6 (1<<PROCESSOR_K6)
193 #define m_ATHLON (1<<PROCESSOR_ATHLON)
195 const int x86_use_leave
= m_386
| m_K6
| m_ATHLON
;
196 const int x86_push_memory
= m_386
| m_K6
| m_ATHLON
;
197 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
198 const int x86_movx
= m_ATHLON
| m_PPRO
/* m_386 | m_K6 */;
199 const int x86_double_with_add
= ~m_386
;
200 const int x86_use_bit_test
= m_386
;
201 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON
| m_K6
;
202 const int x86_use_q_reg
= m_PENT
| m_PPRO
| m_K6
;
203 const int x86_use_any_reg
= m_486
;
204 const int x86_cmove
= m_PPRO
| m_ATHLON
;
205 const int x86_deep_branch
= m_PPRO
| m_K6
| m_ATHLON
;
206 const int x86_use_sahf
= m_PPRO
| m_K6
| m_ATHLON
;
207 const int x86_partial_reg_stall
= m_PPRO
;
208 const int x86_use_loop
= m_K6
;
209 const int x86_use_fiop
= ~(m_PPRO
| m_ATHLON
| m_PENT
);
210 const int x86_use_mov0
= m_K6
;
211 const int x86_use_cltd
= ~(m_PENT
| m_K6
);
212 const int x86_read_modify_write
= ~m_PENT
;
213 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
214 const int x86_split_long_moves
= m_PPRO
;
215 const int x86_promote_QImode
= m_K6
| m_PENT
| m_386
| m_486
;
216 const int x86_single_stringop
= m_386
;
217 const int x86_qimode_math
= ~(0);
218 const int x86_promote_qi_regs
= 0;
219 const int x86_himode_math
= ~(m_PPRO
);
220 const int x86_promote_hi_regs
= m_PPRO
;
221 const int x86_sub_esp_4
= m_ATHLON
| m_PPRO
;
222 const int x86_sub_esp_8
= m_ATHLON
| m_PPRO
| m_386
| m_486
;
223 const int x86_add_esp_4
= m_ATHLON
| m_K6
;
224 const int x86_add_esp_8
= m_ATHLON
| m_PPRO
| m_K6
| m_386
| m_486
;
225 const int x86_integer_DFmode_moves
= ~m_ATHLON
;
226 const int x86_partial_reg_dependency
= m_ATHLON
;
227 const int x86_memory_mismatch_stall
= m_ATHLON
;
229 #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
231 const char * const hi_reg_name
[] = HI_REGISTER_NAMES
;
232 const char * const qi_reg_name
[] = QI_REGISTER_NAMES
;
233 const char * const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
235 /* Array of the smallest class containing reg number REGNO, indexed by
236 REGNO. Used by REGNO_REG_CLASS in i386.h. */
238 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
241 AREG
, DREG
, CREG
, BREG
,
243 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
245 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
246 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
249 /* flags, fpsr, dirflag, frame */
250 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
251 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
253 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
257 /* The "default" register map. */
259 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
261 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
262 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
263 -1, -1, -1, -1, /* arg, flags, fpsr, dir */
264 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
265 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
268 /* Define the register numbers to be used in Dwarf debugging information.
269 The SVR4 reference port C compiler uses the following register numbers
270 in its Dwarf output code:
271 0 for %eax (gcc regno = 0)
272 1 for %ecx (gcc regno = 2)
273 2 for %edx (gcc regno = 1)
274 3 for %ebx (gcc regno = 3)
275 4 for %esp (gcc regno = 7)
276 5 for %ebp (gcc regno = 6)
277 6 for %esi (gcc regno = 4)
278 7 for %edi (gcc regno = 5)
279 The following three DWARF register numbers are never generated by
280 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
281 believes these numbers have these meanings.
282 8 for %eip (no gcc equivalent)
283 9 for %eflags (gcc regno = 17)
284 10 for %trapno (no gcc equivalent)
285 It is not at all clear how we should number the FP stack registers
286 for the x86 architecture. If the version of SDB on x86/svr4 were
287 a bit less brain dead with respect to floating-point then we would
288 have a precedent to follow with respect to DWARF register numbers
289 for x86 FP registers, but the SDB on x86/svr4 is so completely
290 broken with respect to FP registers that it is hardly worth thinking
291 of it as something to strive for compatibility with.
292 The version of x86/svr4 SDB I have at the moment does (partially)
293 seem to believe that DWARF register number 11 is associated with
294 the x86 register %st(0), but that's about all. Higher DWARF
295 register numbers don't seem to be associated with anything in
296 particular, and even for DWARF regno 11, SDB only seems to under-
297 stand that it should say that a variable lives in %st(0) (when
298 asked via an `=' command) if we said it was in DWARF regno 11,
299 but SDB still prints garbage when asked for the value of the
300 variable in question (via a `/' command).
301 (Also note that the labels SDB prints for various FP stack regs
302 when doing an `x' command are all wrong.)
303 Note that these problems generally don't affect the native SVR4
304 C compiler because it doesn't allow the use of -O with -g and
305 because when it is *not* optimizing, it allocates a memory
306 location for each floating-point variable, and the memory
307 location is what gets described in the DWARF AT_location
308 attribute for the variable in question.
309 Regardless of the severe mental illness of the x86/svr4 SDB, we
310 do something sensible here and we use the following DWARF
311 register numbers. Note that these are all stack-top-relative
313 11 for %st(0) (gcc regno = 8)
314 12 for %st(1) (gcc regno = 9)
315 13 for %st(2) (gcc regno = 10)
316 14 for %st(3) (gcc regno = 11)
317 15 for %st(4) (gcc regno = 12)
318 16 for %st(5) (gcc regno = 13)
319 17 for %st(6) (gcc regno = 14)
320 18 for %st(7) (gcc regno = 15)
322 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
324 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
325 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
326 -1, 9, -1, -1, /* arg, flags, fpsr, dir */
327 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
328 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
333 /* Test and compare insns in i386.md store the information needed to
334 generate branch and scc insns here. */
336 struct rtx_def
*ix86_compare_op0
= NULL_RTX
;
337 struct rtx_def
*ix86_compare_op1
= NULL_RTX
;
339 #define MAX_386_STACK_LOCALS 2
341 /* Define the structure for the machine field in struct function. */
342 struct machine_function
344 rtx stack_locals
[(int) MAX_MACHINE_MODE
][MAX_386_STACK_LOCALS
];
347 #define ix86_stack_locals (cfun->machine->stack_locals)
349 /* which cpu are we scheduling for */
350 enum processor_type ix86_cpu
;
352 /* which instruction set architecture to use. */
355 /* Strings to hold which cpu and instruction set architecture to use. */
356 const char *ix86_cpu_string
; /* for -mcpu=<xxx> */
357 const char *ix86_arch_string
; /* for -march=<xxx> */
359 /* Register allocation order */
360 const char *ix86_reg_alloc_order
;
361 static char regs_allocated
[FIRST_PSEUDO_REGISTER
];
363 /* # of registers to use to pass arguments. */
364 const char *ix86_regparm_string
;
366 /* ix86_regparm_string as a number */
369 /* Alignment to use for loops and jumps: */
371 /* Power of two alignment for loops. */
372 const char *ix86_align_loops_string
;
374 /* Power of two alignment for non-loop jumps. */
375 const char *ix86_align_jumps_string
;
377 /* Power of two alignment for stack boundary in bytes. */
378 const char *ix86_preferred_stack_boundary_string
;
380 /* Preferred alignment for stack boundary in bits. */
381 int ix86_preferred_stack_boundary
;
383 /* Values 1-5: see jump.c */
384 int ix86_branch_cost
;
385 const char *ix86_branch_cost_string
;
387 /* Power of two alignment for functions. */
388 int ix86_align_funcs
;
389 const char *ix86_align_funcs_string
;
391 /* Power of two alignment for loops. */
392 int ix86_align_loops
;
394 /* Power of two alignment for non-loop jumps. */
395 int ix86_align_jumps
;
397 static void output_pic_addr_const
PARAMS ((FILE *, rtx
, int));
398 static void put_condition_code
PARAMS ((enum rtx_code
, enum machine_mode
,
400 static enum rtx_code unsigned_comparison
PARAMS ((enum rtx_code code
));
401 static rtx ix86_expand_int_compare
PARAMS ((enum rtx_code
, rtx
, rtx
));
402 static enum machine_mode ix86_fp_compare_mode
PARAMS ((enum rtx_code
));
403 static enum rtx_code ix86_prepare_fp_compare_args
PARAMS ((enum rtx_code
,
405 static rtx ix86_expand_compare
PARAMS ((enum rtx_code
));
406 static rtx gen_push
PARAMS ((rtx
));
407 static int memory_address_length
PARAMS ((rtx addr
));
408 static int ix86_flags_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
409 static int ix86_agi_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
410 static int ix86_safe_length
PARAMS ((rtx
));
411 static enum attr_memory ix86_safe_memory
PARAMS ((rtx
));
412 static enum attr_pent_pair ix86_safe_pent_pair
PARAMS ((rtx
));
413 static enum attr_ppro_uops ix86_safe_ppro_uops
PARAMS ((rtx
));
414 static void ix86_dump_ppro_packet
PARAMS ((FILE *));
415 static void ix86_reorder_insn
PARAMS ((rtx
*, rtx
*));
416 static rtx
* ix86_pent_find_pair
PARAMS ((rtx
*, rtx
*, enum attr_pent_pair
,
418 static void ix86_init_machine_status
PARAMS ((struct function
*));
419 static void ix86_mark_machine_status
PARAMS ((struct function
*));
420 static void ix86_split_to_parts
PARAMS ((rtx
, rtx
*, enum machine_mode
));
421 static int ix86_safe_length_prefix
PARAMS ((rtx
));
422 static HOST_WIDE_INT ix86_compute_frame_size
PARAMS((HOST_WIDE_INT
,
423 int *, int *, int *));
424 static int ix86_nsaved_regs
PARAMS((void));
425 static void ix86_emit_save_regs
PARAMS((void));
426 static void ix86_emit_restore_regs_using_mov
PARAMS ((rtx
, int));
427 static void ix86_emit_epilogue_esp_adjustment
PARAMS((int));
428 static void ix86_sched_reorder_pentium
PARAMS((rtx
*, rtx
*));
429 static void ix86_sched_reorder_ppro
PARAMS((rtx
*, rtx
*));
433 rtx base
, index
, disp
;
437 static int ix86_decompose_address
PARAMS ((rtx
, struct ix86_address
*));
439 /* Sometimes certain combinations of command options do not make
440 sense on a particular target machine. You can define a macro
441 `OVERRIDE_OPTIONS' to take account of this. This macro, if
442 defined, is executed once just after all the command options have
445 Don't use this macro to turn on various extra optimizations for
446 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
451 /* Comes from final.c -- no real reason to change it. */
452 #define MAX_CODE_ALIGN 16
456 struct processor_costs
*cost
; /* Processor costs */
457 int target_enable
; /* Target flags to enable. */
458 int target_disable
; /* Target flags to disable. */
459 int align_loop
; /* Default alignments. */
464 const processor_target_table
[PROCESSOR_max
] =
466 {&i386_cost
, 0, 0, 2, 2, 2, 1},
467 {&i486_cost
, 0, 0, 4, 4, 4, 1},
468 {&pentium_cost
, 0, 0, -4, -4, -4, 1},
469 {&pentiumpro_cost
, 0, 0, 4, -4, 4, 1},
470 {&k6_cost
, 0, 0, -5, -5, 4, 1},
471 {&athlon_cost
, 0, 0, 4, -4, 4, 1}
476 const char *name
; /* processor name or nickname. */
477 enum processor_type processor
;
479 const processor_alias_table
[] =
481 {"i386", PROCESSOR_I386
},
482 {"i486", PROCESSOR_I486
},
483 {"i586", PROCESSOR_PENTIUM
},
484 {"pentium", PROCESSOR_PENTIUM
},
485 {"i686", PROCESSOR_PENTIUMPRO
},
486 {"pentiumpro", PROCESSOR_PENTIUMPRO
},
487 {"k6", PROCESSOR_K6
},
488 {"athlon", PROCESSOR_ATHLON
},
491 int const pta_size
= sizeof(processor_alias_table
)/sizeof(struct pta
);
493 #ifdef SUBTARGET_OVERRIDE_OPTIONS
494 SUBTARGET_OVERRIDE_OPTIONS
;
497 ix86_arch
= PROCESSOR_I386
;
498 ix86_cpu
= (enum processor_type
) TARGET_CPU_DEFAULT
;
500 if (ix86_arch_string
!= 0)
503 for (i
= 0; i
< pta_size
; i
++)
504 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
506 ix86_arch
= processor_alias_table
[i
].processor
;
507 /* Default cpu tuning to the architecture. */
508 ix86_cpu
= ix86_arch
;
512 error ("bad value (%s) for -march= switch", ix86_arch_string
);
515 if (ix86_cpu_string
!= 0)
518 for (i
= 0; i
< pta_size
; i
++)
519 if (! strcmp (ix86_cpu_string
, processor_alias_table
[i
].name
))
521 ix86_cpu
= processor_alias_table
[i
].processor
;
525 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string
);
528 ix86_cost
= processor_target_table
[ix86_cpu
].cost
;
529 target_flags
|= processor_target_table
[ix86_cpu
].target_enable
;
530 target_flags
&= ~processor_target_table
[ix86_cpu
].target_disable
;
532 /* Arrange to set up i386_stack_locals for all functions. */
533 init_machine_status
= ix86_init_machine_status
;
534 mark_machine_status
= ix86_mark_machine_status
;
536 /* Validate registers in register allocation order. */
537 if (ix86_reg_alloc_order
)
540 for (i
= 0; (ch
= ix86_reg_alloc_order
[i
]) != '\0'; i
++)
546 case 'a': regno
= 0; break;
547 case 'd': regno
= 1; break;
548 case 'c': regno
= 2; break;
549 case 'b': regno
= 3; break;
550 case 'S': regno
= 4; break;
551 case 'D': regno
= 5; break;
552 case 'B': regno
= 6; break;
554 default: fatal ("Register '%c' is unknown", ch
);
557 if (regs_allocated
[regno
])
558 fatal ("Register '%c' already specified in allocation order", ch
);
560 regs_allocated
[regno
] = 1;
564 /* Validate -mregparm= value. */
565 if (ix86_regparm_string
)
567 ix86_regparm
= atoi (ix86_regparm_string
);
568 if (ix86_regparm
< 0 || ix86_regparm
> REGPARM_MAX
)
569 fatal ("-mregparm=%d is not between 0 and %d",
570 ix86_regparm
, REGPARM_MAX
);
573 /* Validate -malign-loops= value, or provide default. */
574 ix86_align_loops
= processor_target_table
[ix86_cpu
].align_loop
;
575 if (ix86_align_loops_string
)
577 ix86_align_loops
= atoi (ix86_align_loops_string
);
578 if (ix86_align_loops
< 0 || ix86_align_loops
> MAX_CODE_ALIGN
)
579 fatal ("-malign-loops=%d is not between 0 and %d",
580 ix86_align_loops
, MAX_CODE_ALIGN
);
583 /* Validate -malign-jumps= value, or provide default. */
584 ix86_align_jumps
= processor_target_table
[ix86_cpu
].align_jump
;
585 if (ix86_align_jumps_string
)
587 ix86_align_jumps
= atoi (ix86_align_jumps_string
);
588 if (ix86_align_jumps
< 0 || ix86_align_jumps
> MAX_CODE_ALIGN
)
589 fatal ("-malign-jumps=%d is not between 0 and %d",
590 ix86_align_jumps
, MAX_CODE_ALIGN
);
593 /* Validate -malign-functions= value, or provide default. */
594 ix86_align_funcs
= processor_target_table
[ix86_cpu
].align_func
;
595 if (ix86_align_funcs_string
)
597 ix86_align_funcs
= atoi (ix86_align_funcs_string
);
598 if (ix86_align_funcs
< 0 || ix86_align_funcs
> MAX_CODE_ALIGN
)
599 fatal ("-malign-functions=%d is not between 0 and %d",
600 ix86_align_funcs
, MAX_CODE_ALIGN
);
603 /* Validate -mpreferred-stack-boundary= value, or provide default.
604 The default of 128 bits is for Pentium III's SSE __m128. */
605 ix86_preferred_stack_boundary
= 128;
606 if (ix86_preferred_stack_boundary_string
)
608 int i
= atoi (ix86_preferred_stack_boundary_string
);
610 fatal ("-mpreferred-stack-boundary=%d is not between 2 and 31", i
);
611 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
614 /* Validate -mbranch-cost= value, or provide default. */
615 ix86_branch_cost
= processor_target_table
[ix86_cpu
].branch_cost
;
616 if (ix86_branch_cost_string
)
618 ix86_branch_cost
= atoi (ix86_branch_cost_string
);
619 if (ix86_branch_cost
< 0 || ix86_branch_cost
> 5)
620 fatal ("-mbranch-cost=%d is not between 0 and 5",
624 /* Keep nonleaf frame pointers. */
625 if (TARGET_OMIT_LEAF_FRAME_POINTER
)
626 flag_omit_frame_pointer
= 1;
628 /* If we're doing fast math, we don't care about comparison order
629 wrt NaNs. This lets us use a shorter comparison sequence. */
631 target_flags
&= ~MASK_IEEE_FP
;
633 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
636 target_flags
|= MASK_MMX
;
639 /* A C statement (sans semicolon) to choose the order in which to
640 allocate hard registers for pseudo-registers local to a basic
643 Store the desired register order in the array `reg_alloc_order'.
644 Element 0 should be the register to allocate first; element 1, the
645 next register; and so on.
647 The macro body should not assume anything about the contents of
648 `reg_alloc_order' before execution of the macro.
650 On most machines, it is not necessary to define this macro. */
653 order_regs_for_local_alloc ()
657 /* User specified the register allocation order. */
659 if (ix86_reg_alloc_order
)
661 for (i
= order
= 0; (ch
= ix86_reg_alloc_order
[i
]) != '\0'; i
++)
667 case 'a': regno
= 0; break;
668 case 'd': regno
= 1; break;
669 case 'c': regno
= 2; break;
670 case 'b': regno
= 3; break;
671 case 'S': regno
= 4; break;
672 case 'D': regno
= 5; break;
673 case 'B': regno
= 6; break;
676 reg_alloc_order
[order
++] = regno
;
679 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
681 if (! regs_allocated
[i
])
682 reg_alloc_order
[order
++] = i
;
686 /* If user did not specify a register allocation order, use natural order. */
689 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
690 reg_alloc_order
[i
] = i
;
695 optimization_options (level
, size
)
697 int size ATTRIBUTE_UNUSED
;
699 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
700 make the problem with not enough registers even worse. */
701 #ifdef INSN_SCHEDULING
703 flag_schedule_insns
= 0;
707 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
708 attribute for DECL. The attributes in ATTRIBUTES have previously been
712 ix86_valid_decl_attribute_p (decl
, attributes
, identifier
, args
)
713 tree decl ATTRIBUTE_UNUSED
;
714 tree attributes ATTRIBUTE_UNUSED
;
715 tree identifier ATTRIBUTE_UNUSED
;
716 tree args ATTRIBUTE_UNUSED
;
721 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
722 attribute for TYPE. The attributes in ATTRIBUTES have previously been
726 ix86_valid_type_attribute_p (type
, attributes
, identifier
, args
)
728 tree attributes ATTRIBUTE_UNUSED
;
732 if (TREE_CODE (type
) != FUNCTION_TYPE
733 && TREE_CODE (type
) != METHOD_TYPE
734 && TREE_CODE (type
) != FIELD_DECL
735 && TREE_CODE (type
) != TYPE_DECL
)
738 /* Stdcall attribute says callee is responsible for popping arguments
739 if they are not variable. */
740 if (is_attribute_p ("stdcall", identifier
))
741 return (args
== NULL_TREE
);
743 /* Cdecl attribute says the callee is a normal C declaration. */
744 if (is_attribute_p ("cdecl", identifier
))
745 return (args
== NULL_TREE
);
747 /* Regparm attribute specifies how many integer arguments are to be
748 passed in registers. */
749 if (is_attribute_p ("regparm", identifier
))
753 if (! args
|| TREE_CODE (args
) != TREE_LIST
754 || TREE_CHAIN (args
) != NULL_TREE
755 || TREE_VALUE (args
) == NULL_TREE
)
758 cst
= TREE_VALUE (args
);
759 if (TREE_CODE (cst
) != INTEGER_CST
)
762 if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
771 /* Return 0 if the attributes for two types are incompatible, 1 if they
772 are compatible, and 2 if they are nearly compatible (which causes a
773 warning to be generated). */
776 ix86_comp_type_attributes (type1
, type2
)
780 /* Check for mismatch of non-default calling convention. */
781 const char *rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
783 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
786 /* Check for mismatched return types (cdecl vs stdcall). */
787 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
788 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
793 /* Value is the number of bytes of arguments automatically
794 popped when returning from a subroutine call.
795 FUNDECL is the declaration node of the function (as a tree),
796 FUNTYPE is the data type of the function (as a tree),
797 or for a library call it is an identifier node for the subroutine name.
798 SIZE is the number of bytes of arguments passed on the stack.
800 On the 80386, the RTD insn may be used to pop them if the number
801 of args is fixed, but if the number is variable then the caller
802 must pop them all. RTD can't be used for library calls now
803 because the library is compiled with the Unix compiler.
804 Use of RTD is a selectable option, since it is incompatible with
805 standard Unix calling sequences. If the option is not selected,
806 the caller must always pop the args.
808 The attribute stdcall is equivalent to RTD on a per module basis. */
811 ix86_return_pops_args (fundecl
, funtype
, size
)
816 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
818 /* Cdecl functions override -mrtd, and never pop the stack. */
819 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
821 /* Stdcall functions will pop the stack if not variable args. */
822 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
)))
826 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
827 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
832 /* Lose any fake structure return argument. */
833 if (aggregate_value_p (TREE_TYPE (funtype
)))
834 return GET_MODE_SIZE (Pmode
);
839 /* Argument support functions. */
841 /* Initialize a variable CUM of type CUMULATIVE_ARGS
842 for a call to a function whose data type is FNTYPE.
843 For a library call, FNTYPE is 0. */
846 init_cumulative_args (cum
, fntype
, libname
)
847 CUMULATIVE_ARGS
*cum
; /* Argument info to initialize */
848 tree fntype
; /* tree ptr for function decl */
849 rtx libname
; /* SYMBOL_REF of library name or 0 */
851 static CUMULATIVE_ARGS zero_cum
;
852 tree param
, next_param
;
854 if (TARGET_DEBUG_ARG
)
856 fprintf (stderr
, "\ninit_cumulative_args (");
858 fprintf (stderr
, "fntype code = %s, ret code = %s",
859 tree_code_name
[(int) TREE_CODE (fntype
)],
860 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
862 fprintf (stderr
, "no fntype");
865 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
870 /* Set up the number of registers to use for passing arguments. */
871 cum
->nregs
= ix86_regparm
;
874 tree attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype
));
877 cum
->nregs
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
880 /* Determine if this function has variable arguments. This is
881 indicated by the last argument being 'void_type_mode' if there
882 are no variable arguments. If there are variable arguments, then
883 we won't pass anything in registers */
887 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
888 param
!= 0; param
= next_param
)
890 next_param
= TREE_CHAIN (param
);
891 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
896 if (TARGET_DEBUG_ARG
)
897 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
902 /* Update the data in CUM to advance over an argument
903 of mode MODE and data type TYPE.
904 (TYPE is null for libcalls where that information may not be available.) */
907 function_arg_advance (cum
, mode
, type
, named
)
908 CUMULATIVE_ARGS
*cum
; /* current arg information */
909 enum machine_mode mode
; /* current arg mode */
910 tree type
; /* type of the argument or 0 if lib support */
911 int named
; /* whether or not the argument was named */
914 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
915 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
917 if (TARGET_DEBUG_ARG
)
919 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
920 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
935 /* Define where to put the arguments to a function.
936 Value is zero to push the argument on the stack,
937 or a hard register in which to store the argument.
939 MODE is the argument's machine mode.
940 TYPE is the data type of the argument (as a tree).
941 This is null for libcalls where that information may
943 CUM is a variable of type CUMULATIVE_ARGS which gives info about
944 the preceding args and about the function being called.
945 NAMED is nonzero if this argument is a named parameter
946 (otherwise it is an extra parameter matching an ellipsis). */
949 function_arg (cum
, mode
, type
, named
)
950 CUMULATIVE_ARGS
*cum
; /* current arg information */
951 enum machine_mode mode
; /* current arg mode */
952 tree type
; /* type of the argument or 0 if lib support */
953 int named
; /* != 0 for normal args, == 0 for ... args */
957 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
958 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
962 /* For now, pass fp/complex values on the stack. */
971 if (words
<= cum
->nregs
)
972 ret
= gen_rtx_REG (mode
, cum
->regno
);
976 if (TARGET_DEBUG_ARG
)
979 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
980 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
983 fprintf (stderr
, ", reg=%%e%s", reg_names
[ REGNO(ret
) ]);
985 fprintf (stderr
, ", stack");
987 fprintf (stderr
, " )\n");
994 /* Return nonzero if OP is (const_int 1), else return zero. */
997 const_int_1_operand (op
, mode
)
999 enum machine_mode mode ATTRIBUTE_UNUSED
;
1001 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) == 1);
1004 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
1005 reference and a constant. */
1008 symbolic_operand (op
, mode
)
1010 enum machine_mode mode ATTRIBUTE_UNUSED
;
1012 switch (GET_CODE (op
))
1020 if (GET_CODE (op
) == SYMBOL_REF
1021 || GET_CODE (op
) == LABEL_REF
1022 || (GET_CODE (op
) == UNSPEC
1023 && XINT (op
, 1) >= 6
1024 && XINT (op
, 1) <= 7))
1026 if (GET_CODE (op
) != PLUS
1027 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
1031 if (GET_CODE (op
) == SYMBOL_REF
1032 || GET_CODE (op
) == LABEL_REF
)
1034 /* Only @GOTOFF gets offsets. */
1035 if (GET_CODE (op
) != UNSPEC
1036 || XINT (op
, 1) != 7)
1039 op
= XVECEXP (op
, 0, 0);
1040 if (GET_CODE (op
) == SYMBOL_REF
1041 || GET_CODE (op
) == LABEL_REF
)
1050 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
1053 pic_symbolic_operand (op
, mode
)
1055 enum machine_mode mode ATTRIBUTE_UNUSED
;
1057 if (GET_CODE (op
) == CONST
)
1060 if (GET_CODE (op
) == UNSPEC
)
1062 if (GET_CODE (op
) != PLUS
1063 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
1066 if (GET_CODE (op
) == UNSPEC
)
1072 /* Test for a valid operand for a call instruction. Don't allow the
1073 arg pointer register or virtual regs since they may decay into
1074 reg + const, which the patterns can't handle. */
1077 call_insn_operand (op
, mode
)
1079 enum machine_mode mode ATTRIBUTE_UNUSED
;
1081 if (GET_CODE (op
) != MEM
)
1085 /* Disallow indirect through a virtual register. This leads to
1086 compiler aborts when trying to eliminate them. */
1087 if (GET_CODE (op
) == REG
1088 && (op
== arg_pointer_rtx
1089 || op
== frame_pointer_rtx
1090 || (REGNO (op
) >= FIRST_PSEUDO_REGISTER
1091 && REGNO (op
) <= LAST_VIRTUAL_REGISTER
)))
1094 /* Disallow `call 1234'. Due to varying assembler lameness this
1095 gets either rejected or translated to `call .+1234'. */
1096 if (GET_CODE (op
) == CONST_INT
)
1099 /* Explicitly allow SYMBOL_REF even if pic. */
1100 if (GET_CODE (op
) == SYMBOL_REF
)
1103 /* Half-pic doesn't allow anything but registers and constants.
1104 We've just taken care of the later. */
1106 return register_operand (op
, Pmode
);
1108 /* Otherwise we can allow any general_operand in the address. */
1109 return general_operand (op
, Pmode
);
1113 constant_call_address_operand (op
, mode
)
1115 enum machine_mode mode ATTRIBUTE_UNUSED
;
1117 return (GET_CODE (op
) == MEM
1118 && CONSTANT_ADDRESS_P (XEXP (op
, 0))
1119 && GET_CODE (XEXP (op
, 0)) != CONST_INT
);
1122 /* Match exactly zero and one. */
1125 const0_operand (op
, mode
)
1127 enum machine_mode mode
;
1129 return op
== CONST0_RTX (mode
);
1133 const1_operand (op
, mode
)
1135 enum machine_mode mode ATTRIBUTE_UNUSED
;
1137 return op
== const1_rtx
;
1140 /* Match 2, 4, or 8. Used for leal multiplicands. */
1143 const248_operand (op
, mode
)
1145 enum machine_mode mode ATTRIBUTE_UNUSED
;
1147 return (GET_CODE (op
) == CONST_INT
1148 && (INTVAL (op
) == 2 || INTVAL (op
) == 4 || INTVAL (op
) == 8));
1151 /* True if this is a constant appropriate for an increment or decremenmt. */
1154 incdec_operand (op
, mode
)
1156 enum machine_mode mode
;
1158 if (op
== const1_rtx
|| op
== constm1_rtx
)
1160 if (GET_CODE (op
) != CONST_INT
)
1162 if (mode
== SImode
&& INTVAL (op
) == (HOST_WIDE_INT
) 0xffffffff)
1164 if (mode
== HImode
&& INTVAL (op
) == (HOST_WIDE_INT
) 0xffff)
1166 if (mode
== QImode
&& INTVAL (op
) == (HOST_WIDE_INT
) 0xff)
1171 /* Return false if this is the stack pointer, or any other fake
1172 register eliminable to the stack pointer. Otherwise, this is
1175 This is used to prevent esp from being used as an index reg.
1176 Which would only happen in pathological cases. */
1179 reg_no_sp_operand (op
, mode
)
1181 enum machine_mode mode
;
1184 if (GET_CODE (t
) == SUBREG
)
1186 if (t
== stack_pointer_rtx
|| t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
)
1189 return register_operand (op
, mode
);
1192 /* Return false if this is any eliminable register. Otherwise
1196 general_no_elim_operand (op
, mode
)
1198 enum machine_mode mode
;
1201 if (GET_CODE (t
) == SUBREG
)
1203 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
1204 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
1205 || t
== virtual_stack_dynamic_rtx
)
1208 return general_operand (op
, mode
);
1211 /* Return false if this is any eliminable register. Otherwise
1212 register_operand or const_int. */
1215 nonmemory_no_elim_operand (op
, mode
)
1217 enum machine_mode mode
;
1220 if (GET_CODE (t
) == SUBREG
)
1222 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
1223 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
1224 || t
== virtual_stack_dynamic_rtx
)
1227 return GET_CODE (op
) == CONST_INT
|| register_operand (op
, mode
);
1230 /* Return true if op is a Q_REGS class register. */
1233 q_regs_operand (op
, mode
)
1235 enum machine_mode mode
;
1237 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
1239 if (GET_CODE (op
) == SUBREG
)
1240 op
= SUBREG_REG (op
);
1241 return QI_REG_P (op
);
1244 /* Return true if op is a NON_Q_REGS class register. */
1247 non_q_regs_operand (op
, mode
)
1249 enum machine_mode mode
;
1251 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
1253 if (GET_CODE (op
) == SUBREG
)
1254 op
= SUBREG_REG (op
);
1255 return NON_QI_REG_P (op
);
1258 /* Return 1 if OP is a comparison operator that can use the condition code
1259 generated by a logical operation, which characteristicly does not set
1260 overflow or carry. To be used with CCNOmode. */
1263 no_comparison_operator (op
, mode
)
1265 enum machine_mode mode
;
1267 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
1270 switch (GET_CODE (op
))
1274 case LEU
: case LTU
: case GEU
: case GTU
:
1282 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
1285 fcmov_comparison_operator (op
, mode
)
1287 enum machine_mode mode
;
1289 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
1292 switch (GET_CODE (op
))
1295 case LEU
: case LTU
: case GEU
: case GTU
:
1296 case UNORDERED
: case ORDERED
:
1304 /* Return 1 if OP is any normal comparison operator plus {UN}ORDERED. */
1307 uno_comparison_operator (op
, mode
)
1309 enum machine_mode mode
;
1311 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
1314 switch (GET_CODE (op
))
1317 case LE
: case LT
: case GE
: case GT
:
1318 case LEU
: case LTU
: case GEU
: case GTU
:
1319 case UNORDERED
: case ORDERED
:
1327 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1330 promotable_binary_operator (op
, mode
)
1332 enum machine_mode mode ATTRIBUTE_UNUSED
;
1334 switch (GET_CODE (op
))
1337 /* Modern CPUs have same latency for HImode and SImode multiply,
1338 but 386 and 486 do HImode multiply faster. */
1339 return ix86_cpu
> PROCESSOR_I486
;
1351 /* Nearly general operand, but accept any const_double, since we wish
1352 to be able to drop them into memory rather than have them get pulled
1356 cmp_fp_expander_operand (op
, mode
)
1358 enum machine_mode mode
;
1360 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
1362 if (GET_CODE (op
) == CONST_DOUBLE
)
1364 return general_operand (op
, mode
);
1367 /* Match an SI or HImode register for a zero_extract. */
1370 ext_register_operand (op
, mode
)
1372 enum machine_mode mode ATTRIBUTE_UNUSED
;
1374 if (GET_MODE (op
) != SImode
&& GET_MODE (op
) != HImode
)
1376 return register_operand (op
, VOIDmode
);
1379 /* Return 1 if this is a valid binary floating-point operation.
1380 OP is the expression matched, and MODE is its mode. */
1383 binary_fp_operator (op
, mode
)
1385 enum machine_mode mode
;
1387 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
1390 switch (GET_CODE (op
))
1396 return GET_MODE_CLASS (GET_MODE (op
)) == MODE_FLOAT
;
1404 mult_operator(op
, mode
)
1406 enum machine_mode mode ATTRIBUTE_UNUSED
;
1408 return GET_CODE (op
) == MULT
;
1412 div_operator(op
, mode
)
1414 enum machine_mode mode ATTRIBUTE_UNUSED
;
1416 return GET_CODE (op
) == DIV
;
1420 arith_or_logical_operator (op
, mode
)
1422 enum machine_mode mode
;
1424 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
1425 && (GET_RTX_CLASS (GET_CODE (op
)) == 'c'
1426 || GET_RTX_CLASS (GET_CODE (op
)) == '2'));
1429 /* Returns 1 if OP is memory operand with a displacement. */
1432 memory_displacement_operand (op
, mode
)
1434 enum machine_mode mode
;
1436 struct ix86_address parts
;
1438 if (! memory_operand (op
, mode
))
1441 if (! ix86_decompose_address (XEXP (op
, 0), &parts
))
1444 return parts
.disp
!= NULL_RTX
;
1447 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
1448 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1450 ??? It seems likely that this will only work because cmpsi is an
1451 expander, and no actual insns use this. */
1454 cmpsi_operand (op
, mode
)
1456 enum machine_mode mode
;
1458 if (general_operand (op
, mode
))
1461 if (GET_CODE (op
) == AND
1462 && GET_MODE (op
) == SImode
1463 && GET_CODE (XEXP (op
, 0)) == ZERO_EXTRACT
1464 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
1465 && GET_CODE (XEXP (XEXP (op
, 0), 2)) == CONST_INT
1466 && INTVAL (XEXP (XEXP (op
, 0), 1)) == 8
1467 && INTVAL (XEXP (XEXP (op
, 0), 2)) == 8
1468 && GET_CODE (XEXP (op
, 1)) == CONST_INT
)
1474 /* Returns 1 if OP is memory operand that can not be represented by the
1478 long_memory_operand (op
, mode
)
1480 enum machine_mode mode
;
1482 if (! memory_operand (op
, mode
))
1485 return memory_address_length (op
) != 0;
1488 /* Return nonzero if the rtx is known aligned. */
1491 aligned_operand (op
, mode
)
1493 enum machine_mode mode
;
1495 struct ix86_address parts
;
1497 if (!general_operand (op
, mode
))
1500 /* Registers and immediate operands are always "aligned". */
1501 if (GET_CODE (op
) != MEM
)
1504 /* Don't even try to do any aligned optimizations with volatiles. */
1505 if (MEM_VOLATILE_P (op
))
1510 /* Pushes and pops are only valid on the stack pointer. */
1511 if (GET_CODE (op
) == PRE_DEC
1512 || GET_CODE (op
) == POST_INC
)
1515 /* Decode the address. */
1516 if (! ix86_decompose_address (op
, &parts
))
1519 /* Look for some component that isn't known to be aligned. */
1523 && REGNO_POINTER_ALIGN (REGNO (parts
.index
)) < 32)
1528 if (REGNO_POINTER_ALIGN (REGNO (parts
.base
)) < 32)
1533 if (GET_CODE (parts
.disp
) != CONST_INT
1534 || (INTVAL (parts
.disp
) & 3) != 0)
1538 /* Didn't find one -- this must be an aligned address. */
1542 /* Return true if the constant is something that can be loaded with
1543 a special instruction. Only handle 0.0 and 1.0; others are less
1547 standard_80387_constant_p (x
)
1550 if (GET_CODE (x
) != CONST_DOUBLE
)
1553 #if ! defined (REAL_IS_NOT_DOUBLE) || defined (REAL_ARITHMETIC)
1559 if (setjmp (handler
))
1562 set_float_handler (handler
);
1563 REAL_VALUE_FROM_CONST_DOUBLE (d
, x
);
1564 is0
= REAL_VALUES_EQUAL (d
, dconst0
) && !REAL_VALUE_MINUS_ZERO (d
);
1565 is1
= REAL_VALUES_EQUAL (d
, dconst1
);
1566 set_float_handler (NULL_PTR
);
1574 /* Note that on the 80387, other constants, such as pi,
1575 are much slower to load as standard constants
1576 than to load from doubles in memory! */
1577 /* ??? Not true on K6: all constants are equal cost. */
1584 /* Returns 1 if OP contains a symbol reference */
1587 symbolic_reference_mentioned_p (op
)
1590 register const char *fmt
;
1593 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
1596 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
1597 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
1603 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
1604 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
1608 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
1615 /* Return 1 if it is appropriate to emit `ret' instructions in the
1616 body of a function. Do this only if the epilogue is simple, needing a
1617 couple of insns. Prior to reloading, we can't tell how many registers
1618 must be saved, so return 0 then. Return 0 if there is no frame
1619 marker to de-allocate.
1621 If NON_SAVING_SETJMP is defined and true, then it is not possible
1622 for the epilogue to be simple, so return 0. This is a special case
1623 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1624 until final, but jump_optimize may need to know sooner if a
1628 ix86_can_use_return_insn_p ()
1630 HOST_WIDE_INT tsize
;
1633 #ifdef NON_SAVING_SETJMP
1634 if (NON_SAVING_SETJMP
&& current_function_calls_setjmp
)
1637 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
1638 if (profile_block_flag
== 2)
1642 if (! reload_completed
|| frame_pointer_needed
)
1645 /* Don't allow more than 32 pop, since that's all we can do
1646 with one instruction. */
1647 if (current_function_pops_args
1648 && current_function_args_size
>= 32768)
1651 tsize
= ix86_compute_frame_size (get_frame_size (), &nregs
, NULL
, NULL
);
1652 return tsize
== 0 && nregs
== 0;
1655 static char *pic_label_name
;
1656 static int pic_label_output
;
1657 static char *global_offset_table_name
;
1659 /* This function generates code for -fpic that loads %ebx with
1660 the return address of the caller and then returns. */
1663 asm_output_function_prefix (file
, name
)
1665 const char *name ATTRIBUTE_UNUSED
;
1668 int pic_reg_used
= flag_pic
&& (current_function_uses_pic_offset_table
1669 || current_function_uses_const_pool
);
1670 xops
[0] = pic_offset_table_rtx
;
1671 xops
[1] = stack_pointer_rtx
;
1673 /* Deep branch prediction favors having a return for every call. */
1674 if (pic_reg_used
&& TARGET_DEEP_BRANCH_PREDICTION
)
1676 if (!pic_label_output
)
1678 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
1679 internal (non-global) label that's being emitted, it didn't make
1680 sense to have .type information for local labels. This caused
1681 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
1682 me debug info for a label that you're declaring non-global?) this
1683 was changed to call ASM_OUTPUT_LABEL() instead. */
1685 ASM_OUTPUT_LABEL (file
, pic_label_name
);
1687 xops
[1] = gen_rtx_MEM (SImode
, xops
[1]);
1688 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
1689 output_asm_insn ("ret", xops
);
1691 pic_label_output
= 1;
1697 load_pic_register ()
1701 if (global_offset_table_name
== NULL
)
1703 global_offset_table_name
=
1704 ggc_alloc_string ("_GLOBAL_OFFSET_TABLE_", 21);
1705 ggc_add_string_root (&global_offset_table_name
, 1);
1707 gotsym
= gen_rtx_SYMBOL_REF (Pmode
, global_offset_table_name
);
1709 if (TARGET_DEEP_BRANCH_PREDICTION
)
1711 if (pic_label_name
== NULL
)
1713 pic_label_name
= ggc_alloc_string (NULL
, 32);
1714 ggc_add_string_root (&pic_label_name
, 1);
1715 ASM_GENERATE_INTERNAL_LABEL (pic_label_name
, "LPR", 0);
1717 pclab
= gen_rtx_MEM (QImode
, gen_rtx_SYMBOL_REF (Pmode
, pic_label_name
));
1721 pclab
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
1724 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx
, pclab
));
1726 if (! TARGET_DEEP_BRANCH_PREDICTION
)
1727 emit_insn (gen_popsi1 (pic_offset_table_rtx
));
1729 emit_insn (gen_prologue_set_got (pic_offset_table_rtx
, gotsym
, pclab
));
1732 /* Generate an SImode "push" pattern for input ARG. */
1738 return gen_rtx_SET (VOIDmode
,
1739 gen_rtx_MEM (SImode
,
1740 gen_rtx_PRE_DEC (SImode
,
1741 stack_pointer_rtx
)),
1745 /* Return number of registers to be saved on the stack. */
1751 int pic_reg_used
= flag_pic
&& (current_function_uses_pic_offset_table
1752 || current_function_uses_const_pool
);
1753 int limit
= (frame_pointer_needed
1754 ? HARD_FRAME_POINTER_REGNUM
: STACK_POINTER_REGNUM
);
1757 for (regno
= limit
- 1; regno
>= 0; regno
--)
1758 if ((regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1759 || (regno
== PIC_OFFSET_TABLE_REGNUM
&& pic_reg_used
))
1766 /* Return the offset between two registers, one to be eliminated, and the other
1767 its replacement, at the start of a routine. */
1770 ix86_initial_elimination_offset (from
, to
)
1777 /* Stack grows downward:
1783 saved frame pointer if frame_pointer_needed
1784 <- HARD_FRAME_POINTER
1794 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
1795 /* Skip saved PC and previous frame pointer.
1796 Executed only when frame_pointer_needed. */
1798 else if (from
== FRAME_POINTER_REGNUM
1799 && to
== HARD_FRAME_POINTER_REGNUM
)
1801 ix86_compute_frame_size (get_frame_size (), &nregs
, &padding1
, (int *)0);
1802 padding1
+= nregs
* UNITS_PER_WORD
;
1807 /* ARG_POINTER or FRAME_POINTER to STACK_POINTER elimination. */
1808 int frame_size
= frame_pointer_needed
? 8 : 4;
1809 HOST_WIDE_INT tsize
= ix86_compute_frame_size (get_frame_size (),
1810 &nregs
, &padding1
, (int *)0);
1813 if (to
!= STACK_POINTER_REGNUM
)
1815 else if (from
== ARG_POINTER_REGNUM
)
1816 return tsize
+ nregs
* UNITS_PER_WORD
+ frame_size
;
1817 else if (from
!= FRAME_POINTER_REGNUM
)
1820 return tsize
- padding1
;
1824 /* Compute the size of local storage taking into consideration the
1825 desired stack alignment which is to be maintained. Also determine
1826 the number of registers saved below the local storage.
1828 PADDING1 returns padding before stack frame and PADDING2 returns
1829 padding after stack frame;
1832 static HOST_WIDE_INT
1833 ix86_compute_frame_size (size
, nregs_on_stack
, rpadding1
, rpadding2
)
1835 int *nregs_on_stack
;
1842 HOST_WIDE_INT total_size
;
1843 int stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
1845 int preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
1847 nregs
= ix86_nsaved_regs ();
1850 offset
= frame_pointer_needed
? 8 : 4;
1852 /* Do some sanity checking of stack_alignment_needed and preferred_alignment,
1853 since i386 port is the only using those features that may break easilly. */
1855 if (size
&& !stack_alignment_needed
)
1857 if (!size
&& stack_alignment_needed
!= STACK_BOUNDARY
/ BITS_PER_UNIT
)
1859 if (preferred_alignment
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
1861 if (preferred_alignment
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
1863 if (stack_alignment_needed
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
1866 if (stack_alignment_needed
< 4)
1867 stack_alignment_needed
= 4;
1869 offset
+= nregs
* UNITS_PER_WORD
;
1871 if (ACCUMULATE_OUTGOING_ARGS
)
1872 total_size
+= current_function_outgoing_args_size
;
1874 total_size
+= offset
;
1876 /* Align start of frame for local function. */
1877 padding1
= ((offset
+ stack_alignment_needed
- 1)
1878 & -stack_alignment_needed
) - offset
;
1879 total_size
+= padding1
;
1881 /* Align stack boundary. */
1882 padding2
= ((total_size
+ preferred_alignment
- 1)
1883 & -preferred_alignment
) - total_size
;
1885 if (ACCUMULATE_OUTGOING_ARGS
)
1886 padding2
+= current_function_outgoing_args_size
;
1889 *nregs_on_stack
= nregs
;
1891 *rpadding1
= padding1
;
1893 *rpadding2
= padding2
;
1895 return size
+ padding1
+ padding2
;
1898 /* Emit code to save registers in the prologue. */
1901 ix86_emit_save_regs ()
1906 int pic_reg_used
= flag_pic
&& (current_function_uses_pic_offset_table
1907 || current_function_uses_const_pool
);
1908 limit
= (frame_pointer_needed
1909 ? HARD_FRAME_POINTER_REGNUM
: STACK_POINTER_REGNUM
);
1911 for (regno
= limit
- 1; regno
>= 0; regno
--)
1912 if ((regs_ever_live
[regno
] && !call_used_regs
[regno
])
1913 || (regno
== PIC_OFFSET_TABLE_REGNUM
&& pic_reg_used
))
1915 insn
= emit_insn (gen_push (gen_rtx_REG (SImode
, regno
)));
1916 RTX_FRAME_RELATED_P (insn
) = 1;
1920 /* Expand the prologue into a bunch of separate insns. */
1923 ix86_expand_prologue ()
1925 HOST_WIDE_INT tsize
= ix86_compute_frame_size (get_frame_size (), (int *)0, (int *)0,
1928 int pic_reg_used
= flag_pic
&& (current_function_uses_pic_offset_table
1929 || current_function_uses_const_pool
);
1931 /* Note: AT&T enter does NOT have reversed args. Enter is probably
1932 slower on all targets. Also sdb doesn't like it. */
1934 if (frame_pointer_needed
)
1936 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
1937 RTX_FRAME_RELATED_P (insn
) = 1;
1939 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
1940 RTX_FRAME_RELATED_P (insn
) = 1;
1943 ix86_emit_save_regs ();
1947 else if (! TARGET_STACK_PROBE
|| tsize
< CHECK_STACK_LIMIT
)
1949 if (frame_pointer_needed
)
1950 insn
= emit_insn (gen_pro_epilogue_adjust_stack
1951 (stack_pointer_rtx
, stack_pointer_rtx
,
1952 GEN_INT (-tsize
), hard_frame_pointer_rtx
));
1954 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
1956 RTX_FRAME_RELATED_P (insn
) = 1;
1960 /* ??? Is this only valid for Win32? */
1964 arg0
= gen_rtx_REG (SImode
, 0);
1965 emit_move_insn (arg0
, GEN_INT (tsize
));
1967 sym
= gen_rtx_MEM (FUNCTION_MODE
,
1968 gen_rtx_SYMBOL_REF (Pmode
, "_alloca"));
1969 insn
= emit_call_insn (gen_call (sym
, const0_rtx
));
1971 CALL_INSN_FUNCTION_USAGE (insn
)
1972 = gen_rtx_EXPR_LIST (VOIDmode
, gen_rtx_USE (VOIDmode
, arg0
),
1973 CALL_INSN_FUNCTION_USAGE (insn
));
1976 #ifdef SUBTARGET_PROLOGUE
1981 load_pic_register ();
1983 /* If we are profiling, make sure no instructions are scheduled before
1984 the call to mcount. However, if -fpic, the above call will have
1986 if ((profile_flag
|| profile_block_flag
) && ! pic_reg_used
)
1987 emit_insn (gen_blockage ());
1990 /* Emit code to add TSIZE to esp value. Use POP instruction when
1994 ix86_emit_epilogue_esp_adjustment (tsize
)
1997 /* If a frame pointer is present, we must be sure to tie the sp
1998 to the fp so that we don't mis-schedule. */
1999 if (frame_pointer_needed
)
2000 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
2003 hard_frame_pointer_rtx
));
2005 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
2009 /* Emit code to restore saved registers using MOV insns. First register
2010 is restored from POINTER + OFFSET. */
2012 ix86_emit_restore_regs_using_mov (pointer
, offset
)
2017 int pic_reg_used
= flag_pic
&& (current_function_uses_pic_offset_table
2018 || current_function_uses_const_pool
);
2019 int limit
= (frame_pointer_needed
2020 ? HARD_FRAME_POINTER_REGNUM
: STACK_POINTER_REGNUM
);
2022 for (regno
= 0; regno
< limit
; regno
++)
2023 if ((regs_ever_live
[regno
] && !call_used_regs
[regno
])
2024 || (regno
== PIC_OFFSET_TABLE_REGNUM
&& pic_reg_used
))
2026 emit_move_insn (gen_rtx_REG (SImode
, regno
),
2027 adj_offsettable_operand (gen_rtx_MEM (SImode
,
2034 /* Restore function stack, frame, and registers. */
2037 ix86_expand_epilogue (emit_return
)
2043 int pic_reg_used
= flag_pic
&& (current_function_uses_pic_offset_table
2044 || current_function_uses_const_pool
);
2045 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
2046 HOST_WIDE_INT offset
;
2047 HOST_WIDE_INT tsize
= ix86_compute_frame_size (get_frame_size (), &nregs
,
2048 (int *)0, (int *)0);
2051 /* Calculate start of saved registers relative to ebp. */
2052 offset
= -nregs
* UNITS_PER_WORD
;
2054 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
2055 if (profile_block_flag
== 2)
2057 FUNCTION_BLOCK_PROFILER_EXIT
;
2061 /* If we're only restoring one register and sp is not valid then
2062 using a move instruction to restore the register since it's
2063 less work than reloading sp and popping the register.
2065 The default code result in stack adjustment using add/lea instruction,
2066 while this code results in LEAVE instruction (or discrete equivalent),
2067 so it is profitable in some other cases as well. Especially when there
2068 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2069 and there is exactly one register to pop. This heruistic may need some
2070 tuning in future. */
2071 if ((!sp_valid
&& nregs
<= 1)
2072 || (frame_pointer_needed
&& !nregs
&& tsize
)
2073 || (frame_pointer_needed
&& TARGET_USE_LEAVE
&& !optimize_size
2076 /* Restore registers. We can use ebp or esp to address the memory
2077 locations. If both are available, default to ebp, since offsets
2078 are known to be small. Only exception is esp pointing directly to the
2079 end of block of saved registers, where we may simplify addressing
2082 if (!frame_pointer_needed
|| (sp_valid
&& !tsize
))
2083 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
, tsize
);
2085 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
, offset
);
2087 if (!frame_pointer_needed
)
2088 ix86_emit_epilogue_esp_adjustment (tsize
+ nregs
* UNITS_PER_WORD
);
2089 /* If not an i386, mov & pop is faster than "leave". */
2090 else if (TARGET_USE_LEAVE
|| optimize_size
)
2091 emit_insn (gen_leave ());
2094 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
2095 hard_frame_pointer_rtx
,
2097 hard_frame_pointer_rtx
));
2098 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
2103 /* First step is to deallocate the stack frame so that we can
2104 pop the registers. */
2107 if (!frame_pointer_needed
)
2109 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
2110 hard_frame_pointer_rtx
,
2112 hard_frame_pointer_rtx
));
2115 ix86_emit_epilogue_esp_adjustment (tsize
);
2117 for (regno
= 0; regno
< STACK_POINTER_REGNUM
; regno
++)
2118 if ((regs_ever_live
[regno
] && !call_used_regs
[regno
])
2119 || (regno
== PIC_OFFSET_TABLE_REGNUM
&& pic_reg_used
))
2120 emit_insn (gen_popsi1 (gen_rtx_REG (SImode
, regno
)));
2123 /* Sibcall epilogues don't want a return instruction. */
2127 if (current_function_pops_args
&& current_function_args_size
)
2129 rtx popc
= GEN_INT (current_function_pops_args
);
2131 /* i386 can only pop 64K bytes. If asked to pop more, pop
2132 return address, do explicit add, and jump indirectly to the
2135 if (current_function_pops_args
>= 65536)
2137 rtx ecx
= gen_rtx_REG (SImode
, 2);
2139 emit_insn (gen_popsi1 (ecx
));
2140 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
2141 emit_jump_insn (gen_return_indirect_internal (ecx
));
2144 emit_jump_insn (gen_return_pop_internal (popc
));
2147 emit_jump_insn (gen_return_internal ());
2150 /* Extract the parts of an RTL expression that is a valid memory address
2151 for an instruction. Return false if the structure of the address is
2155 ix86_decompose_address (addr
, out
)
2157 struct ix86_address
*out
;
2159 rtx base
= NULL_RTX
;
2160 rtx index
= NULL_RTX
;
2161 rtx disp
= NULL_RTX
;
2162 HOST_WIDE_INT scale
= 1;
2163 rtx scale_rtx
= NULL_RTX
;
2165 if (GET_CODE (addr
) == REG
|| GET_CODE (addr
) == SUBREG
)
2167 else if (GET_CODE (addr
) == PLUS
)
2169 rtx op0
= XEXP (addr
, 0);
2170 rtx op1
= XEXP (addr
, 1);
2171 enum rtx_code code0
= GET_CODE (op0
);
2172 enum rtx_code code1
= GET_CODE (op1
);
2174 if (code0
== REG
|| code0
== SUBREG
)
2176 if (code1
== REG
|| code1
== SUBREG
)
2177 index
= op0
, base
= op1
; /* index + base */
2179 base
= op0
, disp
= op1
; /* base + displacement */
2181 else if (code0
== MULT
)
2183 index
= XEXP (op0
, 0);
2184 scale_rtx
= XEXP (op0
, 1);
2185 if (code1
== REG
|| code1
== SUBREG
)
2186 base
= op1
; /* index*scale + base */
2188 disp
= op1
; /* index*scale + disp */
2190 else if (code0
== PLUS
&& GET_CODE (XEXP (op0
, 0)) == MULT
)
2192 index
= XEXP (XEXP (op0
, 0), 0); /* index*scale + base + disp */
2193 scale_rtx
= XEXP (XEXP (op0
, 0), 1);
2194 base
= XEXP (op0
, 1);
2197 else if (code0
== PLUS
)
2199 index
= XEXP (op0
, 0); /* index + base + disp */
2200 base
= XEXP (op0
, 1);
2206 else if (GET_CODE (addr
) == MULT
)
2208 index
= XEXP (addr
, 0); /* index*scale */
2209 scale_rtx
= XEXP (addr
, 1);
2211 else if (GET_CODE (addr
) == ASHIFT
)
2215 /* We're called for lea too, which implements ashift on occasion. */
2216 index
= XEXP (addr
, 0);
2217 tmp
= XEXP (addr
, 1);
2218 if (GET_CODE (tmp
) != CONST_INT
)
2220 scale
= INTVAL (tmp
);
2221 if ((unsigned HOST_WIDE_INT
) scale
> 3)
2226 disp
= addr
; /* displacement */
2228 /* Extract the integral value of scale. */
2231 if (GET_CODE (scale_rtx
) != CONST_INT
)
2233 scale
= INTVAL (scale_rtx
);
2236 /* Allow arg pointer and stack pointer as index if there is not scaling */
2237 if (base
&& index
&& scale
== 1
2238 && (index
== arg_pointer_rtx
|| index
== frame_pointer_rtx
2239 || index
== stack_pointer_rtx
))
2246 /* Special case: %ebp cannot be encoded as a base without a displacement. */
2247 if ((base
== hard_frame_pointer_rtx
2248 || base
== frame_pointer_rtx
2249 || base
== arg_pointer_rtx
) && !disp
)
2252 /* Special case: on K6, [%esi] makes the instruction vector decoded.
2253 Avoid this by transforming to [%esi+0]. */
2254 if (ix86_cpu
== PROCESSOR_K6
&& !optimize_size
2255 && base
&& !index
&& !disp
2257 && REGNO_REG_CLASS (REGNO (base
)) == SIREG
)
2260 /* Special case: encode reg+reg instead of reg*2. */
2261 if (!base
&& index
&& scale
&& scale
== 2)
2262 base
= index
, scale
= 1;
2264 /* Special case: scaling cannot be encoded without base or displacement. */
2265 if (!base
&& !disp
&& index
&& scale
!= 1)
2276 /* Return cost of the memory address x.
2277 For i386, it is better to use a complex address than let gcc copy
2278 the address into a reg and make a new pseudo. But not if the address
2279 requires to two regs - that would mean more pseudos with longer
2282 ix86_address_cost (x
)
2285 struct ix86_address parts
;
2288 if (!ix86_decompose_address (x
, &parts
))
2291 /* More complex memory references are better. */
2292 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
2295 /* Attempt to minimize number of registers in the address. */
2297 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
2299 && (!REG_P (parts
.index
)
2300 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
2304 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
2306 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
2307 && parts
.base
!= parts
.index
)
2310 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
2311 since it's predecode logic can't detect the length of instructions
2312 and it degenerates to vector decoded. Increase cost of such
2313 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
2314 to split such addresses or even refuse such addresses at all.
2316 Following addressing modes are affected:
2321 The first and last case may be avoidable by explicitly coding the zero in
2322 memory address, but I don't have AMD-K6 machine handy to check this
2326 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
2327 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
2328 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
2334 /* Determine if a given CONST RTX is a valid memory displacement
2338 legitimate_pic_address_disp_p (disp
)
2341 if (GET_CODE (disp
) != CONST
)
2343 disp
= XEXP (disp
, 0);
2345 if (GET_CODE (disp
) == PLUS
)
2347 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
2349 disp
= XEXP (disp
, 0);
2352 if (GET_CODE (disp
) != UNSPEC
2353 || XVECLEN (disp
, 0) != 1)
2356 /* Must be @GOT or @GOTOFF. */
2357 if (XINT (disp
, 1) != 6
2358 && XINT (disp
, 1) != 7)
2361 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
2362 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
2368 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
2369 memory address for an instruction. The MODE argument is the machine mode
2370 for the MEM expression that wants to use this address.
2372 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
2373 convert common non-canonical forms to canonical form so that they will
2377 legitimate_address_p (mode
, addr
, strict
)
2378 enum machine_mode mode
;
2382 struct ix86_address parts
;
2383 rtx base
, index
, disp
;
2384 HOST_WIDE_INT scale
;
2385 const char *reason
= NULL
;
2386 rtx reason_rtx
= NULL_RTX
;
2388 if (TARGET_DEBUG_ADDR
)
2391 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
2392 GET_MODE_NAME (mode
), strict
);
2396 if (! ix86_decompose_address (addr
, &parts
))
2398 reason
= "decomposition failed";
2403 index
= parts
.index
;
2405 scale
= parts
.scale
;
2407 /* Validate base register.
2409 Don't allow SUBREG's here, it can lead to spill failures when the base
2410 is one word out of a two word structure, which is represented internally
2417 if (GET_CODE (base
) != REG
)
2419 reason
= "base is not a register";
2423 if (GET_MODE (base
) != Pmode
)
2425 reason
= "base is not in Pmode";
2429 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (base
))
2430 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (base
)))
2432 reason
= "base is not valid";
2437 /* Validate index register.
2439 Don't allow SUBREG's here, it can lead to spill failures when the index
2440 is one word out of a two word structure, which is represented internally
2447 if (GET_CODE (index
) != REG
)
2449 reason
= "index is not a register";
2453 if (GET_MODE (index
) != Pmode
)
2455 reason
= "index is not in Pmode";
2459 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (index
))
2460 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (index
)))
2462 reason
= "index is not valid";
2467 /* Validate scale factor. */
2470 reason_rtx
= GEN_INT (scale
);
2473 reason
= "scale without index";
2477 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
2479 reason
= "scale is not a valid multiplier";
2484 /* Validate displacement. */
2489 if (!CONSTANT_ADDRESS_P (disp
))
2491 reason
= "displacement is not constant";
2495 if (GET_CODE (disp
) == CONST_DOUBLE
)
2497 reason
= "displacement is a const_double";
2501 if (flag_pic
&& SYMBOLIC_CONST (disp
))
2503 if (! legitimate_pic_address_disp_p (disp
))
2505 reason
= "displacement is an invalid pic construct";
2509 /* This code used to verify that a symbolic pic displacement
2510 includes the pic_offset_table_rtx register.
2512 While this is good idea, unfortunately these constructs may
2513 be created by "adds using lea" optimization for incorrect
2522 This code is nonsensical, but results in addressing
2523 GOT table with pic_offset_table_rtx base. We can't
2524 just refuse it easilly, since it gets matched by
2525 "addsi3" pattern, that later gets split to lea in the
2526 case output register differs from input. While this
2527 can be handled by separate addsi pattern for this case
2528 that never results in lea, this seems to be easier and
2529 correct fix for crash to disable this test. */
2531 else if (HALF_PIC_P ())
2533 if (! HALF_PIC_ADDRESS_P (disp
)
2534 || (base
!= NULL_RTX
|| index
!= NULL_RTX
))
2536 reason
= "displacement is an invalid half-pic reference";
2542 /* Everything looks valid. */
2543 if (TARGET_DEBUG_ADDR
)
2544 fprintf (stderr
, "Success.\n");
2548 if (TARGET_DEBUG_ADDR
)
2550 fprintf (stderr
, "Error: %s\n", reason
);
2551 debug_rtx (reason_rtx
);
2556 /* Return a legitimate reference for ORIG (an address) using the
2557 register REG. If REG is 0, a new pseudo is generated.
2559 There are two types of references that must be handled:
2561 1. Global data references must load the address from the GOT, via
2562 the PIC reg. An insn is emitted to do this load, and the reg is
2565 2. Static data references, constant pool addresses, and code labels
2566 compute the address as an offset from the GOT, whose base is in
2567 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
2568 differentiate them from global data objects. The returned
2569 address is the PIC reg + an unspec constant.
2571 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
2572 reg also appears in the address. */
2575 legitimize_pic_address (orig
, reg
)
2583 if (GET_CODE (addr
) == LABEL_REF
2584 || (GET_CODE (addr
) == SYMBOL_REF
2585 && (CONSTANT_POOL_ADDRESS_P (addr
)
2586 || SYMBOL_REF_FLAG (addr
))))
2588 /* This symbol may be referenced via a displacement from the PIC
2589 base address (@GOTOFF). */
2591 current_function_uses_pic_offset_table
= 1;
2592 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), 7);
2593 new = gen_rtx_CONST (Pmode
, new);
2594 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
2598 emit_move_insn (reg
, new);
2602 else if (GET_CODE (addr
) == SYMBOL_REF
)
2604 /* This symbol must be referenced via a load from the
2605 Global Offset Table (@GOT). */
2607 current_function_uses_pic_offset_table
= 1;
2608 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), 6);
2609 new = gen_rtx_CONST (Pmode
, new);
2610 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
2611 new = gen_rtx_MEM (Pmode
, new);
2612 RTX_UNCHANGING_P (new) = 1;
2615 reg
= gen_reg_rtx (Pmode
);
2616 emit_move_insn (reg
, new);
2621 if (GET_CODE (addr
) == CONST
)
2623 addr
= XEXP (addr
, 0);
2624 if (GET_CODE (addr
) == UNSPEC
)
2626 /* Check that the unspec is one of the ones we generate? */
2628 else if (GET_CODE (addr
) != PLUS
)
2631 if (GET_CODE (addr
) == PLUS
)
2633 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
2635 /* Check first to see if this is a constant offset from a @GOTOFF
2636 symbol reference. */
2637 if ((GET_CODE (op0
) == LABEL_REF
2638 || (GET_CODE (op0
) == SYMBOL_REF
2639 && (CONSTANT_POOL_ADDRESS_P (op0
)
2640 || SYMBOL_REF_FLAG (op0
))))
2641 && GET_CODE (op1
) == CONST_INT
)
2643 current_function_uses_pic_offset_table
= 1;
2644 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
), 7);
2645 new = gen_rtx_PLUS (Pmode
, new, op1
);
2646 new = gen_rtx_CONST (Pmode
, new);
2647 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
2651 emit_move_insn (reg
, new);
2657 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
2658 new = legitimize_pic_address (XEXP (addr
, 1),
2659 base
== reg
? NULL_RTX
: reg
);
2661 if (GET_CODE (new) == CONST_INT
)
2662 new = plus_constant (base
, INTVAL (new));
2665 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
2667 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
2668 new = XEXP (new, 1);
2670 new = gen_rtx_PLUS (Pmode
, base
, new);
2678 /* Try machine-dependent ways of modifying an illegitimate address
2679 to be legitimate. If we find one, return the new, valid address.
2680 This macro is used in only one place: `memory_address' in explow.c.
2682 OLDX is the address as it was before break_out_memory_refs was called.
2683 In some cases it is useful to look at this to decide what needs to be done.
2685 MODE and WIN are passed so that this macro can use
2686 GO_IF_LEGITIMATE_ADDRESS.
2688 It is always safe for this macro to do nothing. It exists to recognize
2689 opportunities to optimize the output.
2691 For the 80386, we handle X+REG by loading X into a register R and
2692 using R+REG. R will go in a general reg and indexing will be used.
2693 However, if REG is a broken-out memory address or multiplication,
2694 nothing needs to be done because REG can certainly go in a general reg.
2696 When -fpic is used, special handling is needed for symbolic references.
2697 See comments by legitimize_pic_address in i386.c for details. */
2700 legitimize_address (x
, oldx
, mode
)
2702 register rtx oldx ATTRIBUTE_UNUSED
;
2703 enum machine_mode mode
;
2708 if (TARGET_DEBUG_ADDR
)
2710 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
2711 GET_MODE_NAME (mode
));
2715 if (flag_pic
&& SYMBOLIC_CONST (x
))
2716 return legitimize_pic_address (x
, 0);
2718 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
2719 if (GET_CODE (x
) == ASHIFT
2720 && GET_CODE (XEXP (x
, 1)) == CONST_INT
2721 && (log
= (unsigned)exact_log2 (INTVAL (XEXP (x
, 1)))) < 4)
2724 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
2725 GEN_INT (1 << log
));
2728 if (GET_CODE (x
) == PLUS
)
2730 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
2732 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
2733 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
2734 && (log
= (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)))) < 4)
2737 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
2738 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
2739 GEN_INT (1 << log
));
2742 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
2743 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
2744 && (log
= (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x
, 1), 1)))) < 4)
2747 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
2748 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
2749 GEN_INT (1 << log
));
2752 /* Put multiply first if it isn't already. */
2753 if (GET_CODE (XEXP (x
, 1)) == MULT
)
2755 rtx tmp
= XEXP (x
, 0);
2756 XEXP (x
, 0) = XEXP (x
, 1);
2761 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
2762 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
2763 created by virtual register instantiation, register elimination, and
2764 similar optimizations. */
2765 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
2768 x
= gen_rtx_PLUS (Pmode
,
2769 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
2770 XEXP (XEXP (x
, 1), 0)),
2771 XEXP (XEXP (x
, 1), 1));
2775 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
2776 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
2777 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
2778 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
2779 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
2780 && CONSTANT_P (XEXP (x
, 1)))
2783 rtx other
= NULL_RTX
;
2785 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
2787 constant
= XEXP (x
, 1);
2788 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
2790 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
2792 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
2793 other
= XEXP (x
, 1);
2801 x
= gen_rtx_PLUS (Pmode
,
2802 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
2803 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
2804 plus_constant (other
, INTVAL (constant
)));
2808 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
2811 if (GET_CODE (XEXP (x
, 0)) == MULT
)
2814 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
2817 if (GET_CODE (XEXP (x
, 1)) == MULT
)
2820 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
2824 && GET_CODE (XEXP (x
, 1)) == REG
2825 && GET_CODE (XEXP (x
, 0)) == REG
)
2828 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
2831 x
= legitimize_pic_address (x
, 0);
2834 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
2837 if (GET_CODE (XEXP (x
, 0)) == REG
)
2839 register rtx temp
= gen_reg_rtx (Pmode
);
2840 register rtx val
= force_operand (XEXP (x
, 1), temp
);
2842 emit_move_insn (temp
, val
);
2848 else if (GET_CODE (XEXP (x
, 1)) == REG
)
2850 register rtx temp
= gen_reg_rtx (Pmode
);
2851 register rtx val
= force_operand (XEXP (x
, 0), temp
);
2853 emit_move_insn (temp
, val
);
2863 /* Print an integer constant expression in assembler syntax. Addition
2864 and subtraction are the only arithmetic that may appear in these
2865 expressions. FILE is the stdio stream to write to, X is the rtx, and
2866 CODE is the operand print code from the output string. */
2869 output_pic_addr_const (file
, x
, code
)
2876 switch (GET_CODE (x
))
2886 assemble_name (file
, XSTR (x
, 0));
2887 if (code
== 'P' && ! SYMBOL_REF_FLAG (x
))
2888 fputs ("@PLT", file
);
2895 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
2896 assemble_name (asm_out_file
, buf
);
2900 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
2904 /* This used to output parentheses around the expression,
2905 but that does not work on the 386 (either ATT or BSD assembler). */
2906 output_pic_addr_const (file
, XEXP (x
, 0), code
);
2910 if (GET_MODE (x
) == VOIDmode
)
2912 /* We can use %d if the number is <32 bits and positive. */
2913 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
2914 fprintf (file
, "0x%lx%08lx",
2915 (unsigned long) CONST_DOUBLE_HIGH (x
),
2916 (unsigned long) CONST_DOUBLE_LOW (x
));
2918 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
2921 /* We can't handle floating point constants;
2922 PRINT_OPERAND must handle them. */
2923 output_operand_lossage ("floating constant misused");
2927 /* Some assemblers need integer constants to appear first. */
2928 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
2930 output_pic_addr_const (file
, XEXP (x
, 0), code
);
2932 output_pic_addr_const (file
, XEXP (x
, 1), code
);
2934 else if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
2936 output_pic_addr_const (file
, XEXP (x
, 1), code
);
2938 output_pic_addr_const (file
, XEXP (x
, 0), code
);
2945 putc (ASSEMBLER_DIALECT
? '(' : '[', file
);
2946 output_pic_addr_const (file
, XEXP (x
, 0), code
);
2948 output_pic_addr_const (file
, XEXP (x
, 1), code
);
2949 putc (ASSEMBLER_DIALECT
? ')' : ']', file
);
2953 if (XVECLEN (x
, 0) != 1)
2955 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
2956 switch (XINT (x
, 1))
2959 fputs ("@GOT", file
);
2962 fputs ("@GOTOFF", file
);
2965 fputs ("@PLT", file
);
2968 output_operand_lossage ("invalid UNSPEC as operand");
2974 output_operand_lossage ("invalid expression as operand");
2978 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
2979 We need to handle our special PIC relocations. */
2982 i386_dwarf_output_addr_const (file
, x
)
2986 fprintf (file
, "\t%s\t", INT_ASM_OP
);
2988 output_pic_addr_const (file
, x
, '\0');
2990 output_addr_const (file
, x
);
2994 /* In the name of slightly smaller debug output, and to cater to
2995 general assembler losage, recognize PIC+GOTOFF and turn it back
2996 into a direct symbol reference. */
2999 i386_simplify_dwarf_addr (orig_x
)
3004 if (GET_CODE (x
) != PLUS
3005 || GET_CODE (XEXP (x
, 0)) != REG
3006 || GET_CODE (XEXP (x
, 1)) != CONST
)
3009 x
= XEXP (XEXP (x
, 1), 0);
3010 if (GET_CODE (x
) == UNSPEC
3011 && XINT (x
, 1) == 7)
3012 return XVECEXP (x
, 0, 0);
3014 if (GET_CODE (x
) == PLUS
3015 && GET_CODE (XEXP (x
, 0)) == UNSPEC
3016 && GET_CODE (XEXP (x
, 1)) == CONST_INT
3017 && XINT (XEXP (x
, 0), 1) == 7)
3018 return gen_rtx_PLUS (VOIDmode
, XVECEXP (XEXP (x
, 0), 0, 0), XEXP (x
, 1));
3024 put_condition_code (code
, mode
, reverse
, fp
, file
)
3026 enum machine_mode mode
;
3033 code
= reverse_condition (code
);
3044 if (mode
== CCNOmode
)
3049 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
3050 Those same assemblers have the same but opposite losage on cmov. */
3051 suffix
= fp
? "nbe" : "a";
3054 if (mode
== CCNOmode
)
3063 if (mode
== CCNOmode
)
3070 suffix
= fp
? "nb" : "ae";
3073 if (mode
== CCNOmode
)
3089 fputs (suffix
, file
);
3093 print_reg (x
, code
, file
)
3098 if (REGNO (x
) == ARG_POINTER_REGNUM
3099 || REGNO (x
) == FRAME_POINTER_REGNUM
3100 || REGNO (x
) == FLAGS_REG
3101 || REGNO (x
) == FPSR_REG
)
3104 if (ASSEMBLER_DIALECT
== 0 || USER_LABEL_PREFIX
[0] == 0)
3109 else if (code
== 'b')
3111 else if (code
== 'k')
3113 else if (code
== 'y')
3115 else if (code
== 'h')
3117 else if (code
== 'm' || MMX_REG_P (x
))
3120 code
= GET_MODE_SIZE (GET_MODE (x
));
3125 fputs (hi_reg_name
[REGNO (x
)], file
);
3128 if (STACK_TOP_P (x
))
3130 fputs ("st(0)", file
);
3142 fputs (hi_reg_name
[REGNO (x
)], file
);
3145 fputs (qi_reg_name
[REGNO (x
)], file
);
3148 fputs (qi_high_reg_name
[REGNO (x
)], file
);
3156 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
3157 C -- print opcode suffix for set/cmov insn.
3158 c -- like C, but print reversed condition
3159 R -- print the prefix for register names.
3160 z -- print the opcode suffix for the size of the current operand.
3161 * -- print a star (in certain assembler syntax)
3162 w -- print the operand as if it's a "word" (HImode) even if it isn't.
3163 s -- print a shift double count, followed by the assemblers argument
3165 b -- print the QImode name of the register for the indicated operand.
3166 %b0 would print %al if operands[0] is reg 0.
3167 w -- likewise, print the HImode name of the register.
3168 k -- likewise, print the SImode name of the register.
3169 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
3170 y -- print "st(0)" instead of "st" as a register.
3171 m -- print "st(n)" as an mmx register. */
3174 print_operand (file
, x
, code
)
3184 if (ASSEMBLER_DIALECT
== 0)
3189 if (ASSEMBLER_DIALECT
== 0)
3194 if (ASSEMBLER_DIALECT
== 0)
3199 if (ASSEMBLER_DIALECT
== 0)
3204 if (ASSEMBLER_DIALECT
== 0)
3209 if (ASSEMBLER_DIALECT
== 0)
3214 if (ASSEMBLER_DIALECT
== 0)
3219 /* 387 opcodes don't get size suffixes if the operands are
3222 if (STACK_REG_P (x
))
3225 /* Intel syntax has no truck with instruction suffixes. */
3226 if (ASSEMBLER_DIALECT
!= 0)
3229 /* this is the size of op from size of operand */
3230 switch (GET_MODE_SIZE (GET_MODE (x
)))
3233 #ifdef HAVE_GAS_FILDS_FISTS
3239 if (GET_MODE (x
) == SFmode
)
3253 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
3255 #ifdef GAS_MNEMONICS
3281 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
3283 PRINT_OPERAND (file
, x
, 0);
3289 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
3292 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
3295 /* Like above, but reverse condition */
3297 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
3300 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
3306 sprintf (str
, "invalid operand code `%c'", code
);
3307 output_operand_lossage (str
);
3312 if (GET_CODE (x
) == REG
)
3314 PRINT_REG (x
, code
, file
);
3317 else if (GET_CODE (x
) == MEM
)
3319 /* No `byte ptr' prefix for call instructions. */
3320 if (ASSEMBLER_DIALECT
!= 0 && code
!= 'X' && code
!= 'P')
3323 switch (GET_MODE_SIZE (GET_MODE (x
)))
3325 case 1: size
= "BYTE"; break;
3326 case 2: size
= "WORD"; break;
3327 case 4: size
= "DWORD"; break;
3328 case 8: size
= "QWORD"; break;
3329 case 12: size
= "XWORD"; break;
3330 case 16: size
= "XMMWORD"; break;
3335 fputs (" PTR ", file
);
3339 if (flag_pic
&& CONSTANT_ADDRESS_P (x
))
3340 output_pic_addr_const (file
, x
, code
);
3345 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
3350 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3351 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
3353 if (ASSEMBLER_DIALECT
== 0)
3355 fprintf (file
, "0x%lx", l
);
3358 /* These float cases don't actually occur as immediate operands. */
3359 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
3364 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3365 REAL_VALUE_TO_DECIMAL (r
, "%.22e", dstr
);
3366 fprintf (file
, "%s", dstr
);
3369 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
3374 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3375 REAL_VALUE_TO_DECIMAL (r
, "%.22e", dstr
);
3376 fprintf (file
, "%s", dstr
);
3382 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
3384 if (ASSEMBLER_DIALECT
== 0)
3387 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
3388 || GET_CODE (x
) == LABEL_REF
)
3390 if (ASSEMBLER_DIALECT
== 0)
3393 fputs ("OFFSET FLAT:", file
);
3396 if (GET_CODE (x
) == CONST_INT
)
3397 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
3399 output_pic_addr_const (file
, x
, code
);
3401 output_addr_const (file
, x
);
3405 /* Print a memory operand whose address is ADDR. */
3408 print_operand_address (file
, addr
)
3412 struct ix86_address parts
;
3413 rtx base
, index
, disp
;
3416 if (! ix86_decompose_address (addr
, &parts
))
3420 index
= parts
.index
;
3422 scale
= parts
.scale
;
3424 if (!base
&& !index
)
3426 /* Displacement only requires special attention. */
3428 if (GET_CODE (disp
) == CONST_INT
)
3430 if (ASSEMBLER_DIALECT
!= 0)
3431 fputs ("ds:", file
);
3432 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (addr
));
3435 output_pic_addr_const (file
, addr
, 0);
3437 output_addr_const (file
, addr
);
3441 if (ASSEMBLER_DIALECT
== 0)
3446 output_pic_addr_const (file
, disp
, 0);
3447 else if (GET_CODE (disp
) == LABEL_REF
)
3448 output_asm_label (disp
);
3450 output_addr_const (file
, disp
);
3455 PRINT_REG (base
, 0, file
);
3459 PRINT_REG (index
, 0, file
);
3461 fprintf (file
, ",%d", scale
);
3467 rtx offset
= NULL_RTX
;
3471 /* Pull out the offset of a symbol; print any symbol itself. */
3472 if (GET_CODE (disp
) == CONST
3473 && GET_CODE (XEXP (disp
, 0)) == PLUS
3474 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
3476 offset
= XEXP (XEXP (disp
, 0), 1);
3477 disp
= gen_rtx_CONST (VOIDmode
,
3478 XEXP (XEXP (disp
, 0), 0));
3482 output_pic_addr_const (file
, disp
, 0);
3483 else if (GET_CODE (disp
) == LABEL_REF
)
3484 output_asm_label (disp
);
3485 else if (GET_CODE (disp
) == CONST_INT
)
3488 output_addr_const (file
, disp
);
3494 PRINT_REG (base
, 0, file
);
3497 if (INTVAL (offset
) >= 0)
3499 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
3503 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
3510 PRINT_REG (index
, 0, file
);
3512 fprintf (file
, "*%d", scale
);
3519 /* Split one or more DImode RTL references into pairs of SImode
3520 references. The RTL can be REG, offsettable MEM, integer constant, or
3521 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
3522 split and "num" is its length. lo_half and hi_half are output arrays
3523 that parallel "operands". */
3526 split_di (operands
, num
, lo_half
, hi_half
)
3529 rtx lo_half
[], hi_half
[];
3533 rtx op
= operands
[num
];
3534 if (CONSTANT_P (op
))
3535 split_double (op
, &lo_half
[num
], &hi_half
[num
]);
3536 else if (! reload_completed
)
3538 lo_half
[num
] = gen_lowpart (SImode
, op
);
3539 hi_half
[num
] = gen_highpart (SImode
, op
);
3541 else if (GET_CODE (op
) == REG
)
3543 lo_half
[num
] = gen_rtx_REG (SImode
, REGNO (op
));
3544 hi_half
[num
] = gen_rtx_REG (SImode
, REGNO (op
) + 1);
3546 else if (offsettable_memref_p (op
))
3548 rtx lo_addr
= XEXP (op
, 0);
3549 rtx hi_addr
= XEXP (adj_offsettable_operand (op
, 4), 0);
3550 lo_half
[num
] = change_address (op
, SImode
, lo_addr
);
3551 hi_half
[num
] = change_address (op
, SImode
, hi_addr
);
3558 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
3559 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
3560 is the expression of the binary operation. The output may either be
3561 emitted here, or returned to the caller, like all output_* functions.
3563 There is no guarantee that the operands are the same mode, as they
3564 might be within FLOAT or FLOAT_EXTEND expressions. */
3566 #ifndef SYSV386_COMPAT
3567 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
3568 wants to fix the assemblers because that causes incompatibility
3569 with gcc. No-one wants to fix gcc because that causes
3570 incompatibility with assemblers... You can use the option of
3571 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
3572 #define SYSV386_COMPAT 1
3576 output_387_binary_op (insn
, operands
)
3580 static char buf
[30];
3583 #ifdef ENABLE_CHECKING
3584 /* Even if we do not want to check the inputs, this documents input
3585 constraints. Which helps in understanding the following code. */
3586 if (STACK_REG_P (operands
[0])
3587 && ((REG_P (operands
[1])
3588 && REGNO (operands
[0]) == REGNO (operands
[1])
3589 && (STACK_REG_P (operands
[2]) || GET_CODE (operands
[2]) == MEM
))
3590 || (REG_P (operands
[2])
3591 && REGNO (operands
[0]) == REGNO (operands
[2])
3592 && (STACK_REG_P (operands
[1]) || GET_CODE (operands
[1]) == MEM
)))
3593 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
3599 switch (GET_CODE (operands
[3]))
3602 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
3603 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
3610 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
3611 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
3618 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
3619 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
3626 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
3627 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
3639 switch (GET_CODE (operands
[3]))
3643 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
3645 rtx temp
= operands
[2];
3646 operands
[2] = operands
[1];
3650 /* know operands[0] == operands[1]. */
3652 if (GET_CODE (operands
[2]) == MEM
)
3658 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
3660 if (STACK_TOP_P (operands
[0]))
3661 /* How is it that we are storing to a dead operand[2]?
3662 Well, presumably operands[1] is dead too. We can't
3663 store the result to st(0) as st(0) gets popped on this
3664 instruction. Instead store to operands[2] (which I
3665 think has to be st(1)). st(1) will be popped later.
3666 gcc <= 2.8.1 didn't have this check and generated
3667 assembly code that the Unixware assembler rejected. */
3668 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
3670 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
3674 if (STACK_TOP_P (operands
[0]))
3675 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
3677 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
3682 if (GET_CODE (operands
[1]) == MEM
)
3688 if (GET_CODE (operands
[2]) == MEM
)
3694 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
3697 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
3698 derived assemblers, confusingly reverse the direction of
3699 the operation for fsub{r} and fdiv{r} when the
3700 destination register is not st(0). The Intel assembler
3701 doesn't have this brain damage. Read !SYSV386_COMPAT to
3702 figure out what the hardware really does. */
3703 if (STACK_TOP_P (operands
[0]))
3704 p
= "{p\t%0, %2|rp\t%2, %0}";
3706 p
= "{rp\t%2, %0|p\t%0, %2}";
3708 if (STACK_TOP_P (operands
[0]))
3709 /* As above for fmul/fadd, we can't store to st(0). */
3710 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
3712 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
3717 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
3720 if (STACK_TOP_P (operands
[0]))
3721 p
= "{rp\t%0, %1|p\t%1, %0}";
3723 p
= "{p\t%1, %0|rp\t%0, %1}";
3725 if (STACK_TOP_P (operands
[0]))
3726 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
3728 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
3733 if (STACK_TOP_P (operands
[0]))
3735 if (STACK_TOP_P (operands
[1]))
3736 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
3738 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
3741 else if (STACK_TOP_P (operands
[1]))
3744 p
= "{\t%1, %0|r\t%0, %1}";
3746 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
3752 p
= "{r\t%2, %0|\t%0, %2}";
3754 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
3767 /* Output code for INSN to convert a float to a signed int. OPERANDS
3768 are the insn operands. The output may be [HSD]Imode and the input
3769 operand may be [SDX]Fmode. */
3772 output_fix_trunc (insn
, operands
)
3776 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
3777 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
3780 /* Jump through a hoop or two for DImode, since the hardware has no
3781 non-popping instruction. We used to do this a different way, but
3782 that was somewhat fragile and broke with post-reload splitters. */
3783 if (dimode_p
&& !stack_top_dies
)
3784 output_asm_insn ("fld\t%y1", operands
);
3786 if (! STACK_TOP_P (operands
[1]))
3789 xops
[0] = GEN_INT (12);
3790 xops
[1] = adj_offsettable_operand (operands
[2], 1);
3791 xops
[1] = change_address (xops
[1], QImode
, NULL_RTX
);
3793 xops
[2] = operands
[0];
3794 if (GET_CODE (operands
[0]) != MEM
)
3795 xops
[2] = operands
[3];
3797 output_asm_insn ("fnstcw\t%2", operands
);
3798 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands
);
3799 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops
);
3800 output_asm_insn ("fldcw\t%2", operands
);
3801 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands
);
3803 if (stack_top_dies
|| dimode_p
)
3804 output_asm_insn ("fistp%z2\t%2", xops
);
3806 output_asm_insn ("fist%z2\t%2", xops
);
3808 output_asm_insn ("fldcw\t%2", operands
);
3810 if (GET_CODE (operands
[0]) != MEM
)
3814 split_di (operands
+0, 1, xops
+0, xops
+1);
3815 split_di (operands
+3, 1, xops
+2, xops
+3);
3816 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
3817 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops
);
3819 else if (GET_MODE (operands
[0]) == SImode
)
3820 output_asm_insn ("mov{l}\t{%3, %0|%0, %3}", operands
);
3822 output_asm_insn ("mov{w}\t{%3, %0|%0, %3}", operands
);
3828 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
3829 should be used and 2 when fnstsw should be used. UNORDERED_P is true
3830 when fucom should be used. */
3833 output_fp_compare (insn
, operands
, eflags_p
, unordered_p
)
3836 int eflags_p
, unordered_p
;
3839 rtx cmp_op0
= operands
[0];
3840 rtx cmp_op1
= operands
[1];
3845 cmp_op1
= operands
[2];
3848 if (! STACK_TOP_P (cmp_op0
))
3851 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
3853 if (STACK_REG_P (cmp_op1
)
3855 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
3856 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
3858 /* If both the top of the 387 stack dies, and the other operand
3859 is also a stack register that dies, then this must be a
3860 `fcompp' float compare */
3864 /* There is no double popping fcomi variant. Fortunately,
3865 eflags is immune from the fstp's cc clobbering. */
3867 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
3869 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
3877 return "fucompp\n\tfnstsw\t%0";
3879 return "fcompp\n\tfnstsw\t%0";
3892 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
3894 static const char * const alt
[24] =
3906 "fcomi\t{%y1, %0|%0, %y1}",
3907 "fcomip\t{%y1, %0|%0, %y1}",
3908 "fucomi\t{%y1, %0|%0, %y1}",
3909 "fucomip\t{%y1, %0|%0, %y1}",
3916 "fcom%z2\t%y2\n\tfnstsw\t%0",
3917 "fcomp%z2\t%y2\n\tfnstsw\t%0",
3918 "fucom%z2\t%y2\n\tfnstsw\t%0",
3919 "fucomp%z2\t%y2\n\tfnstsw\t%0",
3921 "ficom%z2\t%y2\n\tfnstsw\t%0",
3922 "ficomp%z2\t%y2\n\tfnstsw\t%0",
3930 mask
= eflags_p
<< 3;
3931 mask
|= (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
) << 2;
3932 mask
|= unordered_p
<< 1;
3933 mask
|= stack_top_dies
;
3945 /* Output assembler code to FILE to initialize basic-block profiling.
3947 If profile_block_flag == 2
3949 Output code to call the subroutine `__bb_init_trace_func'
3950 and pass two parameters to it. The first parameter is
3951 the address of a block allocated in the object module.
3952 The second parameter is the number of the first basic block
3955 The name of the block is a local symbol made with this statement:
3957 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
3959 Of course, since you are writing the definition of
3960 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3961 can take a short cut in the definition of this macro and use the
3962 name that you know will result.
3964 The number of the first basic block of the function is
3965 passed to the macro in BLOCK_OR_LABEL.
3967 If described in a virtual assembler language the code to be
3971 parameter2 <- BLOCK_OR_LABEL
3972 call __bb_init_trace_func
3974 else if profile_block_flag != 0
3976 Output code to call the subroutine `__bb_init_func'
3977 and pass one single parameter to it, which is the same
3978 as the first parameter to `__bb_init_trace_func'.
3980 The first word of this parameter is a flag which will be nonzero if
3981 the object module has already been initialized. So test this word
3982 first, and do not call `__bb_init_func' if the flag is nonzero.
3983 Note: When profile_block_flag == 2 the test need not be done
3984 but `__bb_init_trace_func' *must* be called.
3986 BLOCK_OR_LABEL may be used to generate a label number as a
3987 branch destination in case `__bb_init_func' will not be called.
3989 If described in a virtual assembler language the code to be
4000 ix86_output_function_block_profiler (file
, block_or_label
)
4004 static int num_func
= 0;
4006 char block_table
[80], false_label
[80];
4008 ASM_GENERATE_INTERNAL_LABEL (block_table
, "LPBX", 0);
4010 xops
[1] = gen_rtx_SYMBOL_REF (VOIDmode
, block_table
);
4011 xops
[5] = stack_pointer_rtx
;
4012 xops
[7] = gen_rtx_REG (Pmode
, 0); /* eax */
4014 CONSTANT_POOL_ADDRESS_P (xops
[1]) = TRUE
;
4016 switch (profile_block_flag
)
4019 xops
[2] = GEN_INT (block_or_label
);
4020 xops
[3] = gen_rtx_MEM (Pmode
,
4021 gen_rtx_SYMBOL_REF (VOIDmode
, "__bb_init_trace_func"));
4022 xops
[6] = GEN_INT (8);
4024 output_asm_insn ("push{l}\t%2", xops
);
4026 output_asm_insn ("push{l}\t%1", xops
);
4029 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops
);
4030 output_asm_insn ("push{l}\t%7", xops
);
4032 output_asm_insn ("call\t%P3", xops
);
4033 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops
);
4037 ASM_GENERATE_INTERNAL_LABEL (false_label
, "LPBZ", num_func
);
4039 xops
[0] = const0_rtx
;
4040 xops
[2] = gen_rtx_MEM (Pmode
,
4041 gen_rtx_SYMBOL_REF (VOIDmode
, false_label
));
4042 xops
[3] = gen_rtx_MEM (Pmode
,
4043 gen_rtx_SYMBOL_REF (VOIDmode
, "__bb_init_func"));
4044 xops
[4] = gen_rtx_MEM (Pmode
, xops
[1]);
4045 xops
[6] = GEN_INT (4);
4047 CONSTANT_POOL_ADDRESS_P (xops
[2]) = TRUE
;
4049 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops
);
4050 output_asm_insn ("jne\t%2", xops
);
4053 output_asm_insn ("push{l}\t%1", xops
);
4056 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops
);
4057 output_asm_insn ("push{l}\t%7", xops
);
4059 output_asm_insn ("call\t%P3", xops
);
4060 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops
);
4061 ASM_OUTPUT_INTERNAL_LABEL (file
, "LPBZ", num_func
);
4067 /* Output assembler code to FILE to increment a counter associated
4068 with basic block number BLOCKNO.
4070 If profile_block_flag == 2
4072 Output code to initialize the global structure `__bb' and
4073 call the function `__bb_trace_func' which will increment the
4076 `__bb' consists of two words. In the first word the number
4077 of the basic block has to be stored. In the second word
4078 the address of a block allocated in the object module
4081 The basic block number is given by BLOCKNO.
4083 The address of the block is given by the label created with
4085 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4087 by FUNCTION_BLOCK_PROFILER.
4089 Of course, since you are writing the definition of
4090 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4091 can take a short cut in the definition of this macro and use the
4092 name that you know will result.
4094 If described in a virtual assembler language the code to be
4097 move BLOCKNO -> (__bb)
4098 move LPBX0 -> (__bb+4)
4099 call __bb_trace_func
4101 Note that function `__bb_trace_func' must not change the
4102 machine state, especially the flag register. To grant
4103 this, you must output code to save and restore registers
4104 either in this macro or in the macros MACHINE_STATE_SAVE
4105 and MACHINE_STATE_RESTORE. The last two macros will be
4106 used in the function `__bb_trace_func', so you must make
4107 sure that the function prologue does not change any
4108 register prior to saving it with MACHINE_STATE_SAVE.
4110 else if profile_block_flag != 0
4112 Output code to increment the counter directly.
4113 Basic blocks are numbered separately from zero within each
4114 compiled object module. The count associated with block number
4115 BLOCKNO is at index BLOCKNO in an array of words; the name of
4116 this array is a local symbol made with this statement:
4118 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
4120 Of course, since you are writing the definition of
4121 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4122 can take a short cut in the definition of this macro and use the
4123 name that you know will result.
4125 If described in a virtual assembler language the code to be
4128 inc (LPBX2+4*BLOCKNO)
4132 ix86_output_block_profiler (file
, blockno
)
4133 FILE *file ATTRIBUTE_UNUSED
;
4136 rtx xops
[8], cnt_rtx
;
4138 char *block_table
= counts
;
4140 switch (profile_block_flag
)
4143 ASM_GENERATE_INTERNAL_LABEL (block_table
, "LPBX", 0);
4145 xops
[1] = gen_rtx_SYMBOL_REF (VOIDmode
, block_table
);
4146 xops
[2] = GEN_INT (blockno
);
4147 xops
[3] = gen_rtx_MEM (Pmode
,
4148 gen_rtx_SYMBOL_REF (VOIDmode
, "__bb_trace_func"));
4149 xops
[4] = gen_rtx_SYMBOL_REF (VOIDmode
, "__bb");
4150 xops
[5] = plus_constant (xops
[4], 4);
4151 xops
[0] = gen_rtx_MEM (SImode
, xops
[4]);
4152 xops
[6] = gen_rtx_MEM (SImode
, xops
[5]);
4154 CONSTANT_POOL_ADDRESS_P (xops
[1]) = TRUE
;
4156 output_asm_insn ("pushf", xops
);
4157 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
4160 xops
[7] = gen_rtx_REG (Pmode
, 0); /* eax */
4161 output_asm_insn ("push{l}\t%7", xops
);
4162 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops
);
4163 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops
);
4164 output_asm_insn ("pop{l}\t%7", xops
);
4167 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops
);
4168 output_asm_insn ("call\t%P3", xops
);
4169 output_asm_insn ("popf", xops
);
4174 ASM_GENERATE_INTERNAL_LABEL (counts
, "LPBX", 2);
4175 cnt_rtx
= gen_rtx_SYMBOL_REF (VOIDmode
, counts
);
4176 SYMBOL_REF_FLAG (cnt_rtx
) = TRUE
;
4179 cnt_rtx
= plus_constant (cnt_rtx
, blockno
*4);
4182 cnt_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, cnt_rtx
);
4184 xops
[0] = gen_rtx_MEM (SImode
, cnt_rtx
);
4185 output_asm_insn ("inc{l}\t%0", xops
);
4192 ix86_expand_move (mode
, operands
)
4193 enum machine_mode mode
;
4196 int strict
= (reload_in_progress
|| reload_completed
);
4199 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (operands
[1], Pmode
))
4201 /* Emit insns to move operands[1] into operands[0]. */
4203 if (GET_CODE (operands
[0]) == MEM
)
4204 operands
[1] = force_reg (Pmode
, operands
[1]);
4207 rtx temp
= operands
[0];
4208 if (GET_CODE (temp
) != REG
)
4209 temp
= gen_reg_rtx (Pmode
);
4210 temp
= legitimize_pic_address (operands
[1], temp
);
4211 if (temp
== operands
[0])
4218 if (GET_CODE (operands
[0]) == MEM
4219 && (GET_MODE (operands
[0]) == QImode
4220 || !push_operand (operands
[0], mode
))
4221 && GET_CODE (operands
[1]) == MEM
)
4222 operands
[1] = force_reg (mode
, operands
[1]);
4224 if (push_operand (operands
[0], mode
)
4225 && ! general_no_elim_operand (operands
[1], mode
))
4226 operands
[1] = copy_to_mode_reg (mode
, operands
[1]);
4228 if (FLOAT_MODE_P (mode
))
4230 /* If we are loading a floating point constant to a register,
4231 force the value to memory now, since we'll get better code
4232 out the back end. */
4236 else if (GET_CODE (operands
[1]) == CONST_DOUBLE
4237 && register_operand (operands
[0], mode
))
4238 operands
[1] = validize_mem (force_const_mem (mode
, operands
[1]));
4242 insn
= gen_rtx_SET (VOIDmode
, operands
[0], operands
[1]);
4247 /* Attempt to expand a binary operator. Make the expansion closer to the
4248 actual machine, then just general_operand, which will allow 3 separate
4249 memory references (one output, two input) in a single insn. */
4252 ix86_expand_binary_operator (code
, mode
, operands
)
4254 enum machine_mode mode
;
4257 int matching_memory
;
4258 rtx src1
, src2
, dst
, op
, clob
;
4264 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
4265 if (GET_RTX_CLASS (code
) == 'c'
4266 && (rtx_equal_p (dst
, src2
)
4267 || immediate_operand (src1
, mode
)))
4274 /* If the destination is memory, and we do not have matching source
4275 operands, do things in registers. */
4276 matching_memory
= 0;
4277 if (GET_CODE (dst
) == MEM
)
4279 if (rtx_equal_p (dst
, src1
))
4280 matching_memory
= 1;
4281 else if (GET_RTX_CLASS (code
) == 'c'
4282 && rtx_equal_p (dst
, src2
))
4283 matching_memory
= 2;
4285 dst
= gen_reg_rtx (mode
);
4288 /* Both source operands cannot be in memory. */
4289 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
4291 if (matching_memory
!= 2)
4292 src2
= force_reg (mode
, src2
);
4294 src1
= force_reg (mode
, src1
);
4297 /* If the operation is not commutable, source 1 cannot be a constant
4298 or non-matching memory. */
4299 if ((CONSTANT_P (src1
)
4300 || (!matching_memory
&& GET_CODE (src1
) == MEM
))
4301 && GET_RTX_CLASS (code
) != 'c')
4302 src1
= force_reg (mode
, src1
);
4304 /* If optimizing, copy to regs to improve CSE */
4305 if (optimize
&& ! no_new_pseudos
)
4307 if (GET_CODE (dst
) == MEM
)
4308 dst
= gen_reg_rtx (mode
);
4309 if (GET_CODE (src1
) == MEM
)
4310 src1
= force_reg (mode
, src1
);
4311 if (GET_CODE (src2
) == MEM
)
4312 src2
= force_reg (mode
, src2
);
4315 /* Emit the instruction. */
4317 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
4318 if (reload_in_progress
)
4320 /* Reload doesn't know about the flags register, and doesn't know that
4321 it doesn't want to clobber it. We can only do this with PLUS. */
4328 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
4329 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
4332 /* Fix up the destination if needed. */
4333 if (dst
!= operands
[0])
4334 emit_move_insn (operands
[0], dst
);
4337 /* Return TRUE or FALSE depending on whether the binary operator meets the
4338 appropriate constraints. */
4341 ix86_binary_operator_ok (code
, mode
, operands
)
4343 enum machine_mode mode ATTRIBUTE_UNUSED
;
4346 /* Both source operands cannot be in memory. */
4347 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
4349 /* If the operation is not commutable, source 1 cannot be a constant. */
4350 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != 'c')
4352 /* If the destination is memory, we must have a matching source operand. */
4353 if (GET_CODE (operands
[0]) == MEM
4354 && ! (rtx_equal_p (operands
[0], operands
[1])
4355 || (GET_RTX_CLASS (code
) == 'c'
4356 && rtx_equal_p (operands
[0], operands
[2]))))
4358 /* If the operation is not commutable and the source 1 is memory, we must
4359 have a matching destionation. */
4360 if (GET_CODE (operands
[1]) == MEM
4361 && GET_RTX_CLASS (code
) != 'c'
4362 && ! rtx_equal_p (operands
[0], operands
[1]))
4367 /* Attempt to expand a unary operator. Make the expansion closer to the
4368 actual machine, then just general_operand, which will allow 2 separate
4369 memory references (one output, one input) in a single insn. */
4372 ix86_expand_unary_operator (code
, mode
, operands
)
4374 enum machine_mode mode
;
4377 int matching_memory
;
4378 rtx src
, dst
, op
, clob
;
4383 /* If the destination is memory, and we do not have matching source
4384 operands, do things in registers. */
4385 matching_memory
= 0;
4386 if (GET_CODE (dst
) == MEM
)
4388 if (rtx_equal_p (dst
, src
))
4389 matching_memory
= 1;
4391 dst
= gen_reg_rtx (mode
);
4394 /* When source operand is memory, destination must match. */
4395 if (!matching_memory
&& GET_CODE (src
) == MEM
)
4396 src
= force_reg (mode
, src
);
4398 /* If optimizing, copy to regs to improve CSE */
4399 if (optimize
&& ! no_new_pseudos
)
4401 if (GET_CODE (dst
) == MEM
)
4402 dst
= gen_reg_rtx (mode
);
4403 if (GET_CODE (src
) == MEM
)
4404 src
= force_reg (mode
, src
);
4407 /* Emit the instruction. */
4409 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
4410 if (reload_in_progress
|| code
== NOT
)
4412 /* Reload doesn't know about the flags register, and doesn't know that
4413 it doesn't want to clobber it. */
4420 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
4421 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
4424 /* Fix up the destination if needed. */
4425 if (dst
!= operands
[0])
4426 emit_move_insn (operands
[0], dst
);
4429 /* Return TRUE or FALSE depending on whether the unary operator meets the
4430 appropriate constraints. */
4433 ix86_unary_operator_ok (code
, mode
, operands
)
4434 enum rtx_code code ATTRIBUTE_UNUSED
;
4435 enum machine_mode mode ATTRIBUTE_UNUSED
;
4436 rtx operands
[2] ATTRIBUTE_UNUSED
;
4438 /* If one of operands is memory, source and destination must match. */
4439 if ((GET_CODE (operands
[0]) == MEM
4440 || GET_CODE (operands
[1]) == MEM
)
4441 && ! rtx_equal_p (operands
[0], operands
[1]))
4446 /* Return TRUE or FALSE depending on whether the first SET in INSN
4447 has source and destination with matching CC modes, and that the
4448 CC mode is at least as constrained as REQ_MODE. */
4451 ix86_match_ccmode (insn
, req_mode
)
4453 enum machine_mode req_mode
;
4456 enum machine_mode set_mode
;
4458 set
= PATTERN (insn
);
4459 if (GET_CODE (set
) == PARALLEL
)
4460 set
= XVECEXP (set
, 0, 0);
4461 if (GET_CODE (set
) != SET
)
4464 set_mode
= GET_MODE (SET_DEST (set
));
4468 if (req_mode
== CCNOmode
)
4472 if (req_mode
== CCZmode
)
4482 return (GET_MODE (SET_SRC (set
)) == set_mode
);
4485 /* Produce an unsigned comparison for a given signed comparison. */
4487 static enum rtx_code
4488 unsigned_comparison (code
)
4520 /* Generate insn patterns to do an integer compare of OPERANDS. */
4523 ix86_expand_int_compare (code
, op0
, op1
)
4527 enum machine_mode cmpmode
;
4530 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
4531 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
4533 /* This is very simple, but making the interface the same as in the
4534 FP case makes the rest of the code easier. */
4535 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
4536 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
4538 /* Return the test that should be put into the flags user, i.e.
4539 the bcc, scc, or cmov instruction. */
4540 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
4543 /* Figure out whether to use ordered or unordered fp comparisons.
4544 Return the appropriate mode to use. */
4546 static enum machine_mode
4547 ix86_fp_compare_mode (code
)
4555 /* When not doing IEEE compliant compares, fault on NaNs. */
4556 unordered
= (TARGET_IEEE_FP
!= 0);
4559 case LT
: case LE
: case GT
: case GE
:
4563 case UNORDERED
: case ORDERED
:
4564 case UNEQ
: case UNGE
: case UNGT
: case UNLE
: case UNLT
: case LTGT
:
4572 /* ??? If we knew whether invalid-operand exceptions were masked,
4573 we could rely on fcom to raise an exception and take care of
4574 NaNs. But we don't. We could know this from c99 math pragmas. */
4578 return unordered
? CCFPUmode
: CCFPmode
;
4581 /* Return true if we should use an FCOMI instruction for this fp comparison. */
4584 ix86_use_fcomi_compare (code
)
4587 return (TARGET_CMOVE
4588 && (code
== ORDERED
|| code
== UNORDERED
4589 /* All other unordered compares require checking
4590 multiple sets of bits. */
4591 || ix86_fp_compare_mode (code
) == CCFPmode
));
4594 /* Swap, force into registers, or otherwise massage the two operands
4595 to a fp comparison. The operands are updated in place; the new
4596 comparsion code is returned. */
4598 static enum rtx_code
4599 ix86_prepare_fp_compare_args (code
, pop0
, pop1
)
4603 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
4604 rtx op0
= *pop0
, op1
= *pop1
;
4605 enum machine_mode op_mode
= GET_MODE (op0
);
4607 /* All of the unordered compare instructions only work on registers.
4608 The same is true of the XFmode compare instructions. The same is
4609 true of the fcomi compare instructions. */
4611 if (fpcmp_mode
== CCFPUmode
4612 || op_mode
== XFmode
4613 || ix86_use_fcomi_compare (code
))
4615 op0
= force_reg (op_mode
, op0
);
4616 op1
= force_reg (op_mode
, op1
);
4620 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
4621 things around if they appear profitable, otherwise force op0
4624 if (standard_80387_constant_p (op0
) == 0
4625 || (GET_CODE (op0
) == MEM
4626 && ! (standard_80387_constant_p (op1
) == 0
4627 || GET_CODE (op1
) == MEM
)))
4630 tmp
= op0
, op0
= op1
, op1
= tmp
;
4631 code
= swap_condition (code
);
4634 if (GET_CODE (op0
) != REG
)
4635 op0
= force_reg (op_mode
, op0
);
4637 if (CONSTANT_P (op1
))
4639 if (standard_80387_constant_p (op1
))
4640 op1
= force_reg (op_mode
, op1
);
4642 op1
= validize_mem (force_const_mem (op_mode
, op1
));
4651 /* Generate insn patterns to do a floating point compare of OPERANDS. */
4654 ix86_expand_fp_compare (code
, op0
, op1
, scratch
)
4656 rtx op0
, op1
, scratch
;
4658 enum machine_mode fpcmp_mode
, intcmp_mode
;
4661 fpcmp_mode
= ix86_fp_compare_mode (code
);
4662 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
4664 /* %%% fcomi is probably always faster, even when dealing with memory,
4665 since compare-and-branch would be three insns instead of four. */
4666 if (ix86_use_fcomi_compare (code
))
4668 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
4669 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
), tmp
);
4672 /* The FP codes work out to act like unsigned. */
4673 code
= unsigned_comparison (code
);
4674 intcmp_mode
= CCmode
;
4678 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
4681 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
4682 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), 9);
4683 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
4685 if (fpcmp_mode
== CCFPmode
4687 || code
== UNORDERED
)
4689 /* We have two options here -- use sahf, or testing bits of ah
4690 directly. On PPRO, they are equivalent, sahf being one byte
4691 smaller. On Pentium, sahf is non-pairable while test is UV
4694 if (TARGET_USE_SAHF
|| optimize_size
)
4697 emit_insn (gen_x86_sahf_1 (scratch
));
4699 /* The FP codes work out to act like unsigned. */
4700 code
= unsigned_comparison (code
);
4701 intcmp_mode
= CCmode
;
4706 * The numbers below correspond to the bits of the FPSW in AH.
4707 * C3, C2, and C0 are in bits 0x40, 0x4, and 0x01 respectively.
4729 /* We'd have to use `xorb 1,ah; andb 0x41,ah', so it's
4730 faster in all cases to just fall back on sahf. */
4757 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (mask
)));
4758 intcmp_mode
= CCNOmode
;
4763 /* In the unordered case, we have to check C2 for NaN's, which
4764 doesn't happen to work out to anything nice combination-wise.
4765 So do some bit twiddling on the value we've got in AH to come
4766 up with an appropriate set of condition codes. */
4768 intcmp_mode
= CCNOmode
;
4772 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
4776 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
4777 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
4778 intcmp_mode
= CCmode
;
4782 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
4786 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
4787 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
4788 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
4789 intcmp_mode
= CCmode
;
4793 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
4794 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
4795 intcmp_mode
= CCmode
;
4799 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
4800 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, GEN_INT (0x40)));
4805 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
4809 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
4813 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
4817 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
4818 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, GEN_INT (0x01)));
4822 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
4823 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
4824 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
4828 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
4832 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
4836 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
4846 /* Return the test that should be put into the flags user, i.e.
4847 the bcc, scc, or cmov instruction. */
4848 return gen_rtx_fmt_ee (code
, VOIDmode
,
4849 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
4854 ix86_expand_compare (code
)
4858 op0
= ix86_compare_op0
;
4859 op1
= ix86_compare_op1
;
4861 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
4862 ret
= ix86_expand_fp_compare (code
, op0
, op1
, gen_reg_rtx (HImode
));
4864 ret
= ix86_expand_int_compare (code
, op0
, op1
);
4870 ix86_expand_branch (code
, label
)
4876 switch (GET_MODE (ix86_compare_op0
))
4881 tmp
= ix86_expand_compare (code
);
4882 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
4883 gen_rtx_LABEL_REF (VOIDmode
, label
),
4885 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
4891 /* Don't expand the comparison early, so that we get better code
4892 when jump or whoever decides to reverse the comparison. */
4897 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
4900 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
4901 ix86_compare_op0
, ix86_compare_op1
);
4902 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
4903 gen_rtx_LABEL_REF (VOIDmode
, label
),
4905 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
4907 use_fcomi
= ix86_use_fcomi_compare (code
);
4908 vec
= rtvec_alloc (3 + !use_fcomi
);
4909 RTVEC_ELT (vec
, 0) = tmp
;
4911 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
4913 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
4916 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
4918 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
4923 /* Expand DImode branch into multiple compare+branch. */
4925 rtx lo
[2], hi
[2], label2
;
4926 enum rtx_code code1
, code2
, code3
;
4928 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
4930 tmp
= ix86_compare_op0
;
4931 ix86_compare_op0
= ix86_compare_op1
;
4932 ix86_compare_op1
= tmp
;
4933 code
= swap_condition (code
);
4935 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
4936 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
4938 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
4939 avoid two branches. This costs one extra insn, so disable when
4940 optimizing for size. */
4942 if ((code
== EQ
|| code
== NE
)
4944 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
4949 if (hi
[1] != const0_rtx
)
4950 xor1
= expand_binop (SImode
, xor_optab
, xor1
, hi
[1],
4951 NULL_RTX
, 0, OPTAB_WIDEN
);
4954 if (lo
[1] != const0_rtx
)
4955 xor0
= expand_binop (SImode
, xor_optab
, xor0
, lo
[1],
4956 NULL_RTX
, 0, OPTAB_WIDEN
);
4958 tmp
= expand_binop (SImode
, ior_optab
, xor1
, xor0
,
4959 NULL_RTX
, 0, OPTAB_WIDEN
);
4961 ix86_compare_op0
= tmp
;
4962 ix86_compare_op1
= const0_rtx
;
4963 ix86_expand_branch (code
, label
);
4967 /* Otherwise, if we are doing less-than, op1 is a constant and the
4968 low word is zero, then we can just examine the high word. */
4970 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
4971 && (code
== LT
|| code
== LTU
))
4973 ix86_compare_op0
= hi
[0];
4974 ix86_compare_op1
= hi
[1];
4975 ix86_expand_branch (code
, label
);
4979 /* Otherwise, we need two or three jumps. */
4981 label2
= gen_label_rtx ();
4984 code2
= swap_condition (code
);
4985 code3
= unsigned_condition (code
);
4989 case LT
: case GT
: case LTU
: case GTU
:
4992 case LE
: code1
= LT
; code2
= GT
; break;
4993 case GE
: code1
= GT
; code2
= LT
; break;
4994 case LEU
: code1
= LTU
; code2
= GTU
; break;
4995 case GEU
: code1
= GTU
; code2
= LTU
; break;
4997 case EQ
: code1
= NIL
; code2
= NE
; break;
4998 case NE
: code2
= NIL
; break;
5006 * if (hi(a) < hi(b)) goto true;
5007 * if (hi(a) > hi(b)) goto false;
5008 * if (lo(a) < lo(b)) goto true;
5012 ix86_compare_op0
= hi
[0];
5013 ix86_compare_op1
= hi
[1];
5016 ix86_expand_branch (code1
, label
);
5018 ix86_expand_branch (code2
, label2
);
5020 ix86_compare_op0
= lo
[0];
5021 ix86_compare_op1
= lo
[1];
5022 ix86_expand_branch (code3
, label
);
5025 emit_label (label2
);
5035 ix86_expand_setcc (code
, dest
)
5042 if (GET_MODE (ix86_compare_op0
) == DImode
)
5043 return 0; /* FAIL */
5045 /* Three modes of generation:
5046 0 -- destination does not overlap compare sources:
5047 clear dest first, emit strict_low_part setcc.
5048 1 -- destination does overlap compare sources:
5049 emit subreg setcc, zero extend.
5050 2 -- destination is in QImode:
5056 if (GET_MODE (dest
) == QImode
)
5058 else if (reg_overlap_mentioned_p (dest
, ix86_compare_op0
)
5059 || reg_overlap_mentioned_p (dest
, ix86_compare_op1
))
5063 emit_move_insn (dest
, const0_rtx
);
5065 ret
= ix86_expand_compare (code
);
5066 PUT_MODE (ret
, QImode
);
5071 tmp
= gen_lowpart (QImode
, dest
);
5072 tmp
= gen_rtx_STRICT_LOW_PART (VOIDmode
, tmp
);
5076 if (!cse_not_expected
)
5077 tmp
= gen_reg_rtx (QImode
);
5079 tmp
= gen_lowpart (QImode
, dest
);
5082 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
5088 tmp
= gen_rtx_ZERO_EXTEND (GET_MODE (dest
), tmp
);
5089 tmp
= gen_rtx_SET (VOIDmode
, dest
, tmp
);
5090 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
5091 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
5095 return 1; /* DONE */
5099 ix86_expand_int_movcc (operands
)
5102 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
5103 rtx compare_seq
, compare_op
;
5105 /* When the compare code is not LTU or GEU, we can not use sbbl case.
5106 In case comparsion is done with immediate, we can convert it to LTU or
5107 GEU by altering the integer. */
5109 if ((code
== LEU
|| code
== GTU
)
5110 && GET_CODE (ix86_compare_op1
) == CONST_INT
5111 && GET_MODE (operands
[0]) != HImode
5112 && (unsigned int)INTVAL (ix86_compare_op1
) != 0xffffffff
5113 && GET_CODE (operands
[2]) == CONST_INT
5114 && GET_CODE (operands
[3]) == CONST_INT
)
5120 ix86_compare_op1
= GEN_INT (INTVAL (ix86_compare_op1
) + 1);
5124 compare_op
= ix86_expand_compare (code
);
5125 compare_seq
= gen_sequence ();
5128 compare_code
= GET_CODE (compare_op
);
5130 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
5131 HImode insns, we'd be swallowed in word prefix ops. */
5133 if (GET_MODE (operands
[0]) != HImode
5134 && GET_CODE (operands
[2]) == CONST_INT
5135 && GET_CODE (operands
[3]) == CONST_INT
)
5137 rtx out
= operands
[0];
5138 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
5139 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
5142 if (compare_code
== LTU
|| compare_code
== GEU
)
5145 /* Detect overlap between destination and compare sources. */
5148 /* To simplify rest of code, restrict to the GEU case. */
5149 if (compare_code
== LTU
)
5154 compare_code
= reverse_condition (compare_code
);
5155 code
= reverse_condition (code
);
5159 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
5160 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
5161 tmp
= gen_reg_rtx (SImode
);
5163 emit_insn (compare_seq
);
5164 emit_insn (gen_x86_movsicc_0_m1 (tmp
));
5176 emit_insn (gen_addsi3 (out
, out
, GEN_INT (ct
)));
5187 emit_insn (gen_iorsi3 (out
, out
, GEN_INT (ct
)));
5189 else if (diff
== -1 && ct
)
5199 emit_insn (gen_one_cmplsi2 (tmp
, tmp
));
5201 emit_insn (gen_addsi3 (out
, out
, GEN_INT (cf
)));
5208 * andl cf - ct, dest
5213 emit_insn (gen_andsi3 (out
, out
, GEN_INT (cf
- ct
)));
5215 emit_insn (gen_addsi3 (out
, out
, GEN_INT (ct
)));
5219 emit_move_insn (out
, tmp
);
5221 return 1; /* DONE */
5228 tmp
= ct
, ct
= cf
, cf
= tmp
;
5230 compare_code
= reverse_condition (compare_code
);
5231 code
= reverse_condition (code
);
5233 if (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
5234 || diff
== 3 || diff
== 5 || diff
== 9)
5240 * lea cf(dest*(ct-cf)),dest
5244 * This also catches the degenerate setcc-only case.
5250 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
5251 ix86_compare_op1
, VOIDmode
, 0, 1);
5258 tmp
= gen_rtx_MULT (SImode
, out
, GEN_INT (diff
& ~1));
5262 tmp
= gen_rtx_PLUS (SImode
, tmp
, out
);
5268 tmp
= gen_rtx_PLUS (SImode
, tmp
, GEN_INT (cf
));
5274 emit_move_insn (out
, tmp
);
5279 clob
= gen_rtx_REG (CCmode
, FLAGS_REG
);
5280 clob
= gen_rtx_CLOBBER (VOIDmode
, clob
);
5282 tmp
= gen_rtx_SET (VOIDmode
, out
, tmp
);
5283 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
5287 emit_insn (gen_rtx_SET (VOIDmode
, out
, tmp
));
5289 if (out
!= operands
[0])
5290 emit_move_insn (operands
[0], out
);
5292 return 1; /* DONE */
5296 * General case: Jumpful:
5297 * xorl dest,dest cmpl op1, op2
5298 * cmpl op1, op2 movl ct, dest
5300 * decl dest movl cf, dest
5301 * andl (cf-ct),dest 1:
5306 * This is reasonably steep, but branch mispredict costs are
5307 * high on modern cpus, so consider failing only if optimizing
5310 * %%% Parameterize branch_cost on the tuning architecture, then
5311 * use that. The 80386 couldn't care less about mispredicts.
5314 if (!optimize_size
&& !TARGET_CMOVE
)
5320 compare_code
= reverse_condition (compare_code
);
5321 code
= reverse_condition (code
);
5324 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
5325 ix86_compare_op1
, VOIDmode
, 0, 1);
5327 emit_insn (gen_addsi3 (out
, out
, constm1_rtx
));
5328 emit_insn (gen_andsi3 (out
, out
, GEN_INT (cf
-ct
)));
5330 emit_insn (gen_addsi3 (out
, out
, GEN_INT (ct
)));
5331 if (out
!= operands
[0])
5332 emit_move_insn (operands
[0], out
);
5334 return 1; /* DONE */
5340 /* Try a few things more with specific constants and a variable. */
5343 rtx var
, orig_out
, out
, tmp
;
5346 return 0; /* FAIL */
5348 /* If one of the two operands is an interesting constant, load a
5349 constant with the above and mask it in with a logical operation. */
5351 if (GET_CODE (operands
[2]) == CONST_INT
)
5354 if (INTVAL (operands
[2]) == 0)
5355 operands
[3] = constm1_rtx
, op
= and_optab
;
5356 else if (INTVAL (operands
[2]) == -1)
5357 operands
[3] = const0_rtx
, op
= ior_optab
;
5359 return 0; /* FAIL */
5361 else if (GET_CODE (operands
[3]) == CONST_INT
)
5364 if (INTVAL (operands
[3]) == 0)
5365 operands
[2] = constm1_rtx
, op
= and_optab
;
5366 else if (INTVAL (operands
[3]) == -1)
5367 operands
[2] = const0_rtx
, op
= ior_optab
;
5369 return 0; /* FAIL */
5372 return 0; /* FAIL */
5374 orig_out
= operands
[0];
5375 tmp
= gen_reg_rtx (GET_MODE (orig_out
));
5378 /* Recurse to get the constant loaded. */
5379 if (ix86_expand_int_movcc (operands
) == 0)
5380 return 0; /* FAIL */
5382 /* Mask in the interesting variable. */
5383 out
= expand_binop (GET_MODE (orig_out
), op
, var
, tmp
, orig_out
, 0,
5385 if (out
!= orig_out
)
5386 emit_move_insn (orig_out
, out
);
5388 return 1; /* DONE */
5392 * For comparison with above,
5402 if (! nonimmediate_operand (operands
[2], GET_MODE (operands
[0])))
5403 operands
[2] = force_reg (GET_MODE (operands
[0]), operands
[2]);
5404 if (! nonimmediate_operand (operands
[3], GET_MODE (operands
[0])))
5405 operands
[3] = force_reg (GET_MODE (operands
[0]), operands
[3]);
5407 emit_insn (compare_seq
);
5408 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
5409 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
5410 compare_op
, operands
[2],
5413 return 1; /* DONE */
5417 ix86_expand_fp_movcc (operands
)
5421 enum machine_mode mode
;
5424 /* The floating point conditional move instructions don't directly
5425 support conditions resulting from a signed integer comparison. */
5427 code
= GET_CODE (operands
[1]);
5434 tmp
= gen_reg_rtx (QImode
);
5435 ix86_expand_setcc (code
, tmp
);
5437 ix86_compare_op0
= tmp
;
5438 ix86_compare_op1
= const0_rtx
;
5445 mode
= SELECT_CC_MODE (code
, ix86_compare_op0
, ix86_compare_op1
);
5446 emit_insn (gen_rtx_SET (VOIDmode
, gen_rtx_REG (mode
, FLAGS_REG
),
5447 gen_rtx_COMPARE (mode
,
5449 ix86_compare_op1
)));
5450 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
5451 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
5452 gen_rtx_fmt_ee (code
, VOIDmode
,
5453 gen_rtx_REG (mode
, FLAGS_REG
),
5461 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
5462 works for floating pointer parameters and nonoffsetable memories.
5463 For pushes, it returns just stack offsets; the values will be saved
5464 in the right order. Maximally three parts are generated. */
5467 ix86_split_to_parts (operand
, parts
, mode
)
5470 enum machine_mode mode
;
5472 int size
= GET_MODE_SIZE (mode
) / 4;
5474 if (GET_CODE (operand
) == REG
&& MMX_REGNO_P (REGNO (operand
)))
5476 if (size
< 2 || size
> 3)
5479 /* Optimize constant pool reference to immediates. This is used by fp moves,
5480 that force all constants to memory to allow combining. */
5482 if (GET_CODE (operand
) == MEM
5483 && GET_CODE (XEXP (operand
, 0)) == SYMBOL_REF
5484 && CONSTANT_POOL_ADDRESS_P (XEXP (operand
, 0)))
5485 operand
= get_pool_constant (XEXP (operand
, 0));
5487 if (GET_CODE (operand
) == MEM
&& !offsettable_memref_p (operand
))
5489 /* The only non-offsetable memories we handle are pushes. */
5490 if (! push_operand (operand
, VOIDmode
))
5493 PUT_MODE (operand
, SImode
);
5494 parts
[0] = parts
[1] = parts
[2] = operand
;
5499 split_di (&operand
, 1, &parts
[0], &parts
[1]);
5502 if (REG_P (operand
))
5504 if (!reload_completed
)
5506 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
5507 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
5509 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
5511 else if (offsettable_memref_p (operand
))
5513 PUT_MODE (operand
, SImode
);
5515 parts
[1] = adj_offsettable_operand (operand
, 4);
5517 parts
[2] = adj_offsettable_operand (operand
, 8);
5519 else if (GET_CODE (operand
) == CONST_DOUBLE
)
5524 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
5528 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
5529 parts
[2] = GEN_INT (l
[2]);
5532 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
5537 parts
[1] = GEN_INT (l
[1]);
5538 parts
[0] = GEN_INT (l
[0]);
5548 /* Emit insns to perform a move or push of DI, DF, and XF values.
5549 Return false when normal moves are needed; true when all required
5550 insns have been emitted. Operands 2-4 contain the input values
5551 int the correct order; operands 5-7 contain the output values. */
5554 ix86_split_long_move (operands1
)
5559 int size
= GET_MODE_SIZE (GET_MODE (operands1
[0])) / 4;
5563 /* Make our own copy to avoid clobbering the operands. */
5564 operands
[0] = copy_rtx (operands1
[0]);
5565 operands
[1] = copy_rtx (operands1
[1]);
5567 if (size
< 2 || size
> 3)
5570 /* The only non-offsettable memory we handle is push. */
5571 if (push_operand (operands
[0], VOIDmode
))
5573 else if (GET_CODE (operands
[0]) == MEM
5574 && ! offsettable_memref_p (operands
[0]))
5577 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands1
[0]));
5578 ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands1
[0]));
5580 /* When emitting push, take care for source operands on the stack. */
5581 if (push
&& GET_CODE (operands
[1]) == MEM
5582 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
5585 part
[1][1] = part
[1][2];
5586 part
[1][0] = part
[1][1];
5589 /* We need to do copy in the right order in case an address register
5590 of the source overlaps the destination. */
5591 if (REG_P (part
[0][0]) && GET_CODE (part
[1][0]) == MEM
)
5593 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
5595 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
5598 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
5601 /* Collision in the middle part can be handled by reordering. */
5602 if (collisions
== 1 && size
== 3
5603 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
5606 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
5607 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
5610 /* If there are more collisions, we can't handle it by reordering.
5611 Do an lea to the last part and use only one colliding move. */
5612 else if (collisions
> 1)
5615 emit_insn (gen_rtx_SET (VOIDmode
, part
[0][size
- 1],
5616 XEXP (part
[1][0], 0)));
5617 part
[1][0] = change_address (part
[1][0], SImode
, part
[0][size
- 1]);
5618 part
[1][1] = adj_offsettable_operand (part
[1][0], 4);
5620 part
[1][2] = adj_offsettable_operand (part
[1][0], 8);
5627 emit_insn (gen_push (part
[1][2]));
5628 emit_insn (gen_push (part
[1][1]));
5629 emit_insn (gen_push (part
[1][0]));
5633 /* Choose correct order to not overwrite the source before it is copied. */
5634 if ((REG_P (part
[0][0])
5635 && REG_P (part
[1][1])
5636 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
5638 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
5640 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
5644 operands1
[2] = part
[0][2];
5645 operands1
[3] = part
[0][1];
5646 operands1
[4] = part
[0][0];
5647 operands1
[5] = part
[1][2];
5648 operands1
[6] = part
[1][1];
5649 operands1
[7] = part
[1][0];
5653 operands1
[2] = part
[0][1];
5654 operands1
[3] = part
[0][0];
5655 operands1
[5] = part
[1][1];
5656 operands1
[6] = part
[1][0];
5663 operands1
[2] = part
[0][0];
5664 operands1
[3] = part
[0][1];
5665 operands1
[4] = part
[0][2];
5666 operands1
[5] = part
[1][0];
5667 operands1
[6] = part
[1][1];
5668 operands1
[7] = part
[1][2];
5672 operands1
[2] = part
[0][0];
5673 operands1
[3] = part
[0][1];
5674 operands1
[5] = part
[1][0];
5675 operands1
[6] = part
[1][1];
5683 ix86_split_ashldi (operands
, scratch
)
5684 rtx
*operands
, scratch
;
5686 rtx low
[2], high
[2];
5689 if (GET_CODE (operands
[2]) == CONST_INT
)
5691 split_di (operands
, 2, low
, high
);
5692 count
= INTVAL (operands
[2]) & 63;
5696 emit_move_insn (high
[0], low
[1]);
5697 emit_move_insn (low
[0], const0_rtx
);
5700 emit_insn (gen_ashlsi3 (high
[0], high
[0], GEN_INT (count
- 32)));
5704 if (!rtx_equal_p (operands
[0], operands
[1]))
5705 emit_move_insn (operands
[0], operands
[1]);
5706 emit_insn (gen_x86_shld_1 (high
[0], low
[0], GEN_INT (count
)));
5707 emit_insn (gen_ashlsi3 (low
[0], low
[0], GEN_INT (count
)));
5712 if (!rtx_equal_p (operands
[0], operands
[1]))
5713 emit_move_insn (operands
[0], operands
[1]);
5715 split_di (operands
, 1, low
, high
);
5717 emit_insn (gen_x86_shld_1 (high
[0], low
[0], operands
[2]));
5718 emit_insn (gen_ashlsi3 (low
[0], low
[0], operands
[2]));
5720 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
5722 if (! no_new_pseudos
)
5723 scratch
= force_reg (SImode
, const0_rtx
);
5725 emit_move_insn (scratch
, const0_rtx
);
5727 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2],
5731 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
5736 ix86_split_ashrdi (operands
, scratch
)
5737 rtx
*operands
, scratch
;
5739 rtx low
[2], high
[2];
5742 if (GET_CODE (operands
[2]) == CONST_INT
)
5744 split_di (operands
, 2, low
, high
);
5745 count
= INTVAL (operands
[2]) & 63;
5749 emit_move_insn (low
[0], high
[1]);
5751 if (! reload_completed
)
5752 emit_insn (gen_ashrsi3 (high
[0], low
[0], GEN_INT (31)));
5755 emit_move_insn (high
[0], low
[0]);
5756 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
5760 emit_insn (gen_ashrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
5764 if (!rtx_equal_p (operands
[0], operands
[1]))
5765 emit_move_insn (operands
[0], operands
[1]);
5766 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
5767 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (count
)));
5772 if (!rtx_equal_p (operands
[0], operands
[1]))
5773 emit_move_insn (operands
[0], operands
[1]);
5775 split_di (operands
, 1, low
, high
);
5777 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
5778 emit_insn (gen_ashrsi3 (high
[0], high
[0], operands
[2]));
5780 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
5782 if (! no_new_pseudos
)
5783 scratch
= gen_reg_rtx (SImode
);
5784 emit_move_insn (scratch
, high
[0]);
5785 emit_insn (gen_ashrsi3 (scratch
, scratch
, GEN_INT (31)));
5786 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
5790 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
5795 ix86_split_lshrdi (operands
, scratch
)
5796 rtx
*operands
, scratch
;
5798 rtx low
[2], high
[2];
5801 if (GET_CODE (operands
[2]) == CONST_INT
)
5803 split_di (operands
, 2, low
, high
);
5804 count
= INTVAL (operands
[2]) & 63;
5808 emit_move_insn (low
[0], high
[1]);
5809 emit_move_insn (high
[0], const0_rtx
);
5812 emit_insn (gen_lshrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
5816 if (!rtx_equal_p (operands
[0], operands
[1]))
5817 emit_move_insn (operands
[0], operands
[1]);
5818 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
5819 emit_insn (gen_lshrsi3 (high
[0], high
[0], GEN_INT (count
)));
5824 if (!rtx_equal_p (operands
[0], operands
[1]))
5825 emit_move_insn (operands
[0], operands
[1]);
5827 split_di (operands
, 1, low
, high
);
5829 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
5830 emit_insn (gen_lshrsi3 (high
[0], high
[0], operands
[2]));
5832 /* Heh. By reversing the arguments, we can reuse this pattern. */
5833 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
5835 if (! no_new_pseudos
)
5836 scratch
= force_reg (SImode
, const0_rtx
);
5838 emit_move_insn (scratch
, const0_rtx
);
5840 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
5844 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
5848 /* Expand the appropriate insns for doing strlen if not just doing
5851 out = result, initialized with the start address
5852 align_rtx = alignment of the address.
5853 scratch = scratch register, initialized with the startaddress when
5854 not aligned, otherwise undefined
5856 This is just the body. It needs the initialisations mentioned above and
5857 some address computing at the end. These things are done in i386.md. */
5860 ix86_expand_strlensi_unroll_1 (out
, align_rtx
, scratch
)
5861 rtx out
, align_rtx
, scratch
;
5865 rtx align_2_label
= NULL_RTX
;
5866 rtx align_3_label
= NULL_RTX
;
5867 rtx align_4_label
= gen_label_rtx ();
5868 rtx end_0_label
= gen_label_rtx ();
5870 rtx no_flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
5871 rtx z_flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
5872 rtx tmpreg
= gen_reg_rtx (SImode
);
5875 if (GET_CODE (align_rtx
) == CONST_INT
)
5876 align
= INTVAL (align_rtx
);
5878 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
5880 /* Is there a known alignment and is it less than 4? */
5883 /* Is there a known alignment and is it not 2? */
5886 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
5887 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
5889 /* Leave just the 3 lower bits. */
5890 align_rtx
= expand_binop (SImode
, and_optab
, scratch
, GEN_INT (3),
5891 NULL_RTX
, 0, OPTAB_WIDEN
);
5893 emit_insn (gen_cmpsi_ccz_1 (align_rtx
, const0_rtx
));
5895 tmp
= gen_rtx_EQ (VOIDmode
, z_flags
, const0_rtx
);
5896 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
5897 gen_rtx_LABEL_REF (VOIDmode
,
5900 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
5902 emit_insn (gen_cmpsi_ccno_1 (align_rtx
, GEN_INT (2)));
5904 tmp
= gen_rtx_EQ (VOIDmode
, no_flags
, const0_rtx
);
5905 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
5906 gen_rtx_LABEL_REF (VOIDmode
,
5909 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
5911 tmp
= gen_rtx_GTU (VOIDmode
, no_flags
, const0_rtx
);
5912 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
5913 gen_rtx_LABEL_REF (VOIDmode
,
5916 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
5920 /* Since the alignment is 2, we have to check 2 or 0 bytes;
5921 check if is aligned to 4 - byte. */
5923 align_rtx
= expand_binop (SImode
, and_optab
, scratch
, GEN_INT (2),
5924 NULL_RTX
, 0, OPTAB_WIDEN
);
5926 emit_insn (gen_cmpsi_ccz_1 (align_rtx
, const0_rtx
));
5928 tmp
= gen_rtx_EQ (VOIDmode
, z_flags
, const0_rtx
);
5929 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
5930 gen_rtx_LABEL_REF (VOIDmode
,
5933 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
5936 mem
= gen_rtx_MEM (QImode
, out
);
5938 /* Now compare the bytes. */
5940 /* Compare the first n unaligned byte on a byte per byte basis. */
5941 emit_insn (gen_cmpqi_ccz_1 (mem
, const0_rtx
));
5943 tmp
= gen_rtx_EQ (VOIDmode
, z_flags
, const0_rtx
);
5944 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
5945 gen_rtx_LABEL_REF (VOIDmode
, end_0_label
),
5947 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
5949 /* Increment the address. */
5950 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
5952 /* Not needed with an alignment of 2 */
5955 emit_label (align_2_label
);
5957 emit_insn (gen_cmpqi_ccz_1 (mem
, const0_rtx
));
5959 tmp
= gen_rtx_EQ (VOIDmode
, z_flags
, const0_rtx
);
5960 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
5961 gen_rtx_LABEL_REF (VOIDmode
,
5964 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
5966 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
5968 emit_label (align_3_label
);
5971 emit_insn (gen_cmpqi_ccz_1 (mem
, const0_rtx
));
5973 tmp
= gen_rtx_EQ (VOIDmode
, z_flags
, const0_rtx
);
5974 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
5975 gen_rtx_LABEL_REF (VOIDmode
, end_0_label
),
5977 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
5979 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
5982 /* Generate loop to check 4 bytes at a time. It is not a good idea to
5983 align this loop. It gives only huge programs, but does not help to
5985 emit_label (align_4_label
);
5987 mem
= gen_rtx_MEM (SImode
, out
);
5988 emit_move_insn (scratch
, mem
);
5989 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
5991 /* This formula yields a nonzero result iff one of the bytes is zero.
5992 This saves three branches inside loop and many cycles. */
5994 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
5995 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
5996 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
5997 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, GEN_INT (0x80808080)));
5998 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1, 0, align_4_label
);
6002 rtx reg
= gen_reg_rtx (SImode
);
6003 emit_move_insn (reg
, tmpreg
);
6004 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
6006 /* If zero is not in the first two bytes, move two bytes forward. */
6007 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
6008 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
6009 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
6010 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
6011 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
6014 /* Emit lea manually to avoid clobbering of flags. */
6015 emit_insn (gen_rtx_SET (SImode
, reg
,
6016 gen_rtx_PLUS (SImode
, out
, GEN_INT (2))));
6018 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
6019 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
6020 emit_insn (gen_rtx_SET (VOIDmode
, out
,
6021 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
6028 rtx end_2_label
= gen_label_rtx ();
6029 /* Is zero in the first two bytes? */
6031 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
6032 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
6033 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
6034 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
6035 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
6037 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
6038 JUMP_LABEL (tmp
) = end_2_label
;
6040 /* Not in the first two. Move two bytes forward. */
6041 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
6042 emit_insn (gen_addsi3 (out
, out
, GEN_INT (2)));
6044 emit_label (end_2_label
);
6048 /* Avoid branch in fixing the byte. */
6049 tmpreg
= gen_lowpart (QImode
, tmpreg
);
6050 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
6051 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3)));
6053 emit_label (end_0_label
);
6056 /* Clear stack slot assignments remembered from previous functions.
6057 This is called from INIT_EXPANDERS once before RTL is emitted for each
6061 ix86_init_machine_status (p
)
6064 enum machine_mode mode
;
6067 = (struct machine_function
*) xmalloc (sizeof (struct machine_function
));
6069 for (mode
= VOIDmode
; (int) mode
< (int) MAX_MACHINE_MODE
;
6070 mode
= (enum machine_mode
) ((int) mode
+ 1))
6071 for (n
= 0; n
< MAX_386_STACK_LOCALS
; n
++)
6072 ix86_stack_locals
[(int) mode
][n
] = NULL_RTX
;
6075 /* Mark machine specific bits of P for GC. */
6077 ix86_mark_machine_status (p
)
6080 enum machine_mode mode
;
6083 for (mode
= VOIDmode
; (int) mode
< (int) MAX_MACHINE_MODE
;
6084 mode
= (enum machine_mode
) ((int) mode
+ 1))
6085 for (n
= 0; n
< MAX_386_STACK_LOCALS
; n
++)
6086 ggc_mark_rtx (p
->machine
->stack_locals
[(int) mode
][n
]);
6089 /* Return a MEM corresponding to a stack slot with mode MODE.
6090 Allocate a new slot if necessary.
6092 The RTL for a function can have several slots available: N is
6093 which slot to use. */
6096 assign_386_stack_local (mode
, n
)
6097 enum machine_mode mode
;
6100 if (n
< 0 || n
>= MAX_386_STACK_LOCALS
)
6103 if (ix86_stack_locals
[(int) mode
][n
] == NULL_RTX
)
6104 ix86_stack_locals
[(int) mode
][n
]
6105 = assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
6107 return ix86_stack_locals
[(int) mode
][n
];
6110 /* Calculate the length of the memory address in the instruction
6111 encoding. Does not include the one-byte modrm, opcode, or prefix. */
6114 memory_address_length (addr
)
6117 struct ix86_address parts
;
6118 rtx base
, index
, disp
;
6121 if (GET_CODE (addr
) == PRE_DEC
6122 || GET_CODE (addr
) == POST_INC
)
6125 if (! ix86_decompose_address (addr
, &parts
))
6129 index
= parts
.index
;
6133 /* Register Indirect. */
6134 if (base
&& !index
&& !disp
)
6136 /* Special cases: ebp and esp need the two-byte modrm form. */
6137 if (addr
== stack_pointer_rtx
6138 || addr
== arg_pointer_rtx
6139 || addr
== frame_pointer_rtx
6140 || addr
== hard_frame_pointer_rtx
)
6144 /* Direct Addressing. */
6145 else if (disp
&& !base
&& !index
)
6150 /* Find the length of the displacement constant. */
6153 if (GET_CODE (disp
) == CONST_INT
6154 && CONST_OK_FOR_LETTER_P (INTVAL (disp
), 'K'))
6160 /* An index requires the two-byte modrm form. */
6168 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
6169 expect that insn have 8bit immediate alternative. */
6171 ix86_attr_length_immediate_default (insn
, shortform
)
6177 extract_insn (insn
);
6178 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
6179 if (CONSTANT_P (recog_data
.operand
[i
]))
6184 && GET_CODE (recog_data
.operand
[i
]) == CONST_INT
6185 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data
.operand
[i
]), 'K'))
6189 switch (get_attr_mode (insn
))
6201 fatal_insn ("Unknown insn mode", insn
);
6207 /* Compute default value for "length_address" attribute. */
6209 ix86_attr_length_address_default (insn
)
6213 extract_insn (insn
);
6214 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
6215 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
6217 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
6223 /* Return the maximum number of instructions a cpu can issue. */
6230 case PROCESSOR_PENTIUM
:
6234 case PROCESSOR_PENTIUMPRO
:
6242 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
6243 by DEP_INSN and nothing set by DEP_INSN. */
6246 ix86_flags_dependant (insn
, dep_insn
, insn_type
)
6248 enum attr_type insn_type
;
6252 /* Simplify the test for uninteresting insns. */
6253 if (insn_type
!= TYPE_SETCC
6254 && insn_type
!= TYPE_ICMOV
6255 && insn_type
!= TYPE_FCMOV
6256 && insn_type
!= TYPE_IBR
)
6259 if ((set
= single_set (dep_insn
)) != 0)
6261 set
= SET_DEST (set
);
6264 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
6265 && XVECLEN (PATTERN (dep_insn
), 0) == 2
6266 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
6267 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
6269 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
6270 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
6275 if (GET_CODE (set
) != REG
|| REGNO (set
) != FLAGS_REG
)
6278 /* This test is true if the dependant insn reads the flags but
6279 not any other potentially set register. */
6280 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
6283 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
6289 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
6290 address with operands set by DEP_INSN. */
6293 ix86_agi_dependant (insn
, dep_insn
, insn_type
)
6295 enum attr_type insn_type
;
6299 if (insn_type
== TYPE_LEA
)
6301 addr
= PATTERN (insn
);
6302 if (GET_CODE (addr
) == SET
)
6304 else if (GET_CODE (addr
) == PARALLEL
6305 && GET_CODE (XVECEXP (addr
, 0, 0)) == SET
)
6306 addr
= XVECEXP (addr
, 0, 0);
6309 addr
= SET_SRC (addr
);
6314 extract_insn (insn
);
6315 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
6316 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
6318 addr
= XEXP (recog_data
.operand
[i
], 0);
6325 return modified_in_p (addr
, dep_insn
);
6329 ix86_adjust_cost (insn
, link
, dep_insn
, cost
)
6330 rtx insn
, link
, dep_insn
;
6333 enum attr_type insn_type
, dep_insn_type
;
6334 enum attr_memory memory
;
6336 int dep_insn_code_number
;
6338 /* Anti and output depenancies have zero cost on all CPUs. */
6339 if (REG_NOTE_KIND (link
) != 0)
6342 dep_insn_code_number
= recog_memoized (dep_insn
);
6344 /* If we can't recognize the insns, we can't really do anything. */
6345 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
6348 insn_type
= get_attr_type (insn
);
6349 dep_insn_type
= get_attr_type (dep_insn
);
6351 /* Prologue and epilogue allocators can have a false dependency on ebp.
6352 This results in one cycle extra stall on Pentium prologue scheduling,
6353 so handle this important case manually. */
6354 if (dep_insn_code_number
== CODE_FOR_pro_epilogue_adjust_stack
6355 && dep_insn_type
== TYPE_ALU
6356 && !reg_mentioned_p (stack_pointer_rtx
, insn
))
6361 case PROCESSOR_PENTIUM
:
6362 /* Address Generation Interlock adds a cycle of latency. */
6363 if (ix86_agi_dependant (insn
, dep_insn
, insn_type
))
6366 /* ??? Compares pair with jump/setcc. */
6367 if (ix86_flags_dependant (insn
, dep_insn
, insn_type
))
6370 /* Floating point stores require value to be ready one cycle ealier. */
6371 if (insn_type
== TYPE_FMOV
6372 && get_attr_memory (insn
) == MEMORY_STORE
6373 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
6377 case PROCESSOR_PENTIUMPRO
:
6378 /* Since we can't represent delayed latencies of load+operation,
6379 increase the cost here for non-imov insns. */
6380 if (dep_insn_type
!= TYPE_IMOV
6381 && dep_insn_type
!= TYPE_FMOV
6382 && ((memory
= get_attr_memory (dep_insn
) == MEMORY_LOAD
)
6383 || memory
== MEMORY_BOTH
))
6386 /* INT->FP conversion is expensive. */
6387 if (get_attr_fp_int_src (dep_insn
))
6390 /* There is one cycle extra latency between an FP op and a store. */
6391 if (insn_type
== TYPE_FMOV
6392 && (set
= single_set (dep_insn
)) != NULL_RTX
6393 && (set2
= single_set (insn
)) != NULL_RTX
6394 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
6395 && GET_CODE (SET_DEST (set2
)) == MEM
)
6400 /* The esp dependency is resolved before the instruction is really
6402 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
6403 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
6406 /* Since we can't represent delayed latencies of load+operation,
6407 increase the cost here for non-imov insns. */
6408 if ((memory
= get_attr_memory (dep_insn
) == MEMORY_LOAD
)
6409 || memory
== MEMORY_BOTH
)
6410 cost
+= (dep_insn_type
!= TYPE_IMOV
) ? 2 : 1;
6412 /* INT->FP conversion is expensive. */
6413 if (get_attr_fp_int_src (dep_insn
))
6417 case PROCESSOR_ATHLON
:
6418 if ((memory
= get_attr_memory (dep_insn
)) == MEMORY_LOAD
6419 || memory
== MEMORY_BOTH
)
6421 if (dep_insn_type
== TYPE_IMOV
|| dep_insn_type
== TYPE_FMOV
)
6436 struct ppro_sched_data
6439 int issued_this_cycle
;
6444 ix86_safe_length (insn
)
6447 if (recog_memoized (insn
) >= 0)
6448 return get_attr_length(insn
);
6454 ix86_safe_length_prefix (insn
)
6457 if (recog_memoized (insn
) >= 0)
6458 return get_attr_length(insn
);
6463 static enum attr_memory
6464 ix86_safe_memory (insn
)
6467 if (recog_memoized (insn
) >= 0)
6468 return get_attr_memory(insn
);
6470 return MEMORY_UNKNOWN
;
6473 static enum attr_pent_pair
6474 ix86_safe_pent_pair (insn
)
6477 if (recog_memoized (insn
) >= 0)
6478 return get_attr_pent_pair(insn
);
6480 return PENT_PAIR_NP
;
6483 static enum attr_ppro_uops
6484 ix86_safe_ppro_uops (insn
)
6487 if (recog_memoized (insn
) >= 0)
6488 return get_attr_ppro_uops (insn
);
6490 return PPRO_UOPS_MANY
;
6494 ix86_dump_ppro_packet (dump
)
6497 if (ix86_sched_data
.ppro
.decode
[0])
6499 fprintf (dump
, "PPRO packet: %d",
6500 INSN_UID (ix86_sched_data
.ppro
.decode
[0]));
6501 if (ix86_sched_data
.ppro
.decode
[1])
6502 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[1]));
6503 if (ix86_sched_data
.ppro
.decode
[2])
6504 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[2]));
6509 /* We're beginning a new block. Initialize data structures as necessary. */
6512 ix86_sched_init (dump
, sched_verbose
)
6513 FILE *dump ATTRIBUTE_UNUSED
;
6514 int sched_verbose ATTRIBUTE_UNUSED
;
6516 memset (&ix86_sched_data
, 0, sizeof (ix86_sched_data
));
6519 /* Shift INSN to SLOT, and shift everything else down. */
6522 ix86_reorder_insn (insnp
, slot
)
6529 insnp
[0] = insnp
[1];
6530 while (++insnp
!= slot
);
6535 /* Find an instruction with given pairability and minimal amount of cycles
6536 lost by the fact that the CPU waits for both pipelines to finish before
6537 reading next instructions. Also take care that both instructions together
6538 can not exceed 7 bytes. */
6541 ix86_pent_find_pair (e_ready
, ready
, type
, first
)
6544 enum attr_pent_pair type
;
6547 int mincycles
, cycles
;
6548 enum attr_pent_pair tmp
;
6549 enum attr_memory memory
;
6550 rtx
*insnp
, *bestinsnp
= NULL
;
6552 if (ix86_safe_length (first
) > 7 + ix86_safe_length_prefix (first
))
6555 memory
= ix86_safe_memory (first
);
6556 cycles
= result_ready_cost (first
);
6557 mincycles
= INT_MAX
;
6559 for (insnp
= e_ready
; insnp
>= ready
&& mincycles
; --insnp
)
6560 if ((tmp
= ix86_safe_pent_pair (*insnp
)) == type
6561 && ix86_safe_length (*insnp
) <= 7 + ix86_safe_length_prefix (*insnp
))
6563 enum attr_memory second_memory
;
6564 int secondcycles
, currentcycles
;
6566 second_memory
= ix86_safe_memory (*insnp
);
6567 secondcycles
= result_ready_cost (*insnp
);
6568 currentcycles
= abs (cycles
- secondcycles
);
6570 if (secondcycles
>= 1 && cycles
>= 1)
6572 /* Two read/modify/write instructions together takes two
6574 if (memory
== MEMORY_BOTH
&& second_memory
== MEMORY_BOTH
)
6577 /* Read modify/write instruction followed by read/modify
6578 takes one cycle longer. */
6579 if (memory
== MEMORY_BOTH
&& second_memory
== MEMORY_LOAD
6580 && tmp
!= PENT_PAIR_UV
6581 && ix86_safe_pent_pair (first
) != PENT_PAIR_UV
)
6584 if (currentcycles
< mincycles
)
6585 bestinsnp
= insnp
, mincycles
= currentcycles
;
6591 /* Subroutines of ix86_sched_reorder. */
6594 ix86_sched_reorder_pentium (ready
, e_ready
)
6598 enum attr_pent_pair pair1
, pair2
;
6601 /* This wouldn't be necessary if Haifa knew that static insn ordering
6602 is important to which pipe an insn is issued to. So we have to make
6603 some minor rearrangements. */
6605 pair1
= ix86_safe_pent_pair (*e_ready
);
6607 /* If the first insn is non-pairable, let it be. */
6608 if (pair1
== PENT_PAIR_NP
)
6611 pair2
= PENT_PAIR_NP
;
6614 /* If the first insn is UV or PV pairable, search for a PU
6616 if (pair1
== PENT_PAIR_UV
|| pair1
== PENT_PAIR_PV
)
6618 insnp
= ix86_pent_find_pair (e_ready
-1, ready
,
6619 PENT_PAIR_PU
, *e_ready
);
6621 pair2
= PENT_PAIR_PU
;
6624 /* If the first insn is PU or UV pairable, search for a PV
6626 if (pair2
== PENT_PAIR_NP
6627 && (pair1
== PENT_PAIR_PU
|| pair1
== PENT_PAIR_UV
))
6629 insnp
= ix86_pent_find_pair (e_ready
-1, ready
,
6630 PENT_PAIR_PV
, *e_ready
);
6632 pair2
= PENT_PAIR_PV
;
6635 /* If the first insn is pairable, search for a UV
6637 if (pair2
== PENT_PAIR_NP
)
6639 insnp
= ix86_pent_find_pair (e_ready
-1, ready
,
6640 PENT_PAIR_UV
, *e_ready
);
6642 pair2
= PENT_PAIR_UV
;
6645 if (pair2
== PENT_PAIR_NP
)
6648 /* Found something! Decide if we need to swap the order. */
6649 if (pair1
== PENT_PAIR_PV
|| pair2
== PENT_PAIR_PU
6650 || (pair1
== PENT_PAIR_UV
&& pair2
== PENT_PAIR_UV
6651 && ix86_safe_memory (*e_ready
) == MEMORY_BOTH
6652 && ix86_safe_memory (*insnp
) == MEMORY_LOAD
))
6653 ix86_reorder_insn (insnp
, e_ready
);
6655 ix86_reorder_insn (insnp
, e_ready
- 1);
6659 ix86_sched_reorder_ppro (ready
, e_ready
)
6664 enum attr_ppro_uops cur_uops
;
6665 int issued_this_cycle
;
6669 /* At this point .ppro.decode contains the state of the three
6670 decoders from last "cycle". That is, those insns that were
6671 actually independent. But here we're scheduling for the
6672 decoder, and we may find things that are decodable in the
6675 memcpy (decode
, ix86_sched_data
.ppro
.decode
, sizeof(decode
));
6676 issued_this_cycle
= 0;
6679 cur_uops
= ix86_safe_ppro_uops (*insnp
);
6681 /* If the decoders are empty, and we've a complex insn at the
6682 head of the priority queue, let it issue without complaint. */
6683 if (decode
[0] == NULL
)
6685 if (cur_uops
== PPRO_UOPS_MANY
)
6691 /* Otherwise, search for a 2-4 uop unsn to issue. */
6692 while (cur_uops
!= PPRO_UOPS_FEW
)
6696 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
6699 /* If so, move it to the head of the line. */
6700 if (cur_uops
== PPRO_UOPS_FEW
)
6701 ix86_reorder_insn (insnp
, e_ready
);
6703 /* Issue the head of the queue. */
6704 issued_this_cycle
= 1;
6705 decode
[0] = *e_ready
--;
6708 /* Look for simple insns to fill in the other two slots. */
6709 for (i
= 1; i
< 3; ++i
)
6710 if (decode
[i
] == NULL
)
6712 if (ready
>= e_ready
)
6716 cur_uops
= ix86_safe_ppro_uops (*insnp
);
6717 while (cur_uops
!= PPRO_UOPS_ONE
)
6721 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
6724 /* Found one. Move it to the head of the queue and issue it. */
6725 if (cur_uops
== PPRO_UOPS_ONE
)
6727 ix86_reorder_insn (insnp
, e_ready
);
6728 decode
[i
] = *e_ready
--;
6729 issued_this_cycle
++;
6733 /* ??? Didn't find one. Ideally, here we would do a lazy split
6734 of 2-uop insns, issue one and queue the other. */
6738 if (issued_this_cycle
== 0)
6739 issued_this_cycle
= 1;
6740 ix86_sched_data
.ppro
.issued_this_cycle
= issued_this_cycle
;
6744 /* We are about to being issuing insns for this clock cycle.
6745 Override the default sort algorithm to better slot instructions. */
6747 ix86_sched_reorder (dump
, sched_verbose
, ready
, n_ready
, clock_var
)
6748 FILE *dump ATTRIBUTE_UNUSED
;
6749 int sched_verbose ATTRIBUTE_UNUSED
;
6752 int clock_var ATTRIBUTE_UNUSED
;
6754 rtx
*e_ready
= ready
+ n_ready
- 1;
6764 case PROCESSOR_PENTIUM
:
6765 ix86_sched_reorder_pentium (ready
, e_ready
);
6768 case PROCESSOR_PENTIUMPRO
:
6769 ix86_sched_reorder_ppro (ready
, e_ready
);
6774 return ix86_issue_rate ();
6777 /* We are about to issue INSN. Return the number of insns left on the
6778 ready queue that can be issued this cycle. */
6781 ix86_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
)
6791 return can_issue_more
- 1;
6793 case PROCESSOR_PENTIUMPRO
:
6795 enum attr_ppro_uops uops
= ix86_safe_ppro_uops (insn
);
6797 if (uops
== PPRO_UOPS_MANY
)
6800 ix86_dump_ppro_packet (dump
);
6801 ix86_sched_data
.ppro
.decode
[0] = insn
;
6802 ix86_sched_data
.ppro
.decode
[1] = NULL
;
6803 ix86_sched_data
.ppro
.decode
[2] = NULL
;
6805 ix86_dump_ppro_packet (dump
);
6806 ix86_sched_data
.ppro
.decode
[0] = NULL
;
6808 else if (uops
== PPRO_UOPS_FEW
)
6811 ix86_dump_ppro_packet (dump
);
6812 ix86_sched_data
.ppro
.decode
[0] = insn
;
6813 ix86_sched_data
.ppro
.decode
[1] = NULL
;
6814 ix86_sched_data
.ppro
.decode
[2] = NULL
;
6818 for (i
= 0; i
< 3; ++i
)
6819 if (ix86_sched_data
.ppro
.decode
[i
] == NULL
)
6821 ix86_sched_data
.ppro
.decode
[i
] = insn
;
6829 ix86_dump_ppro_packet (dump
);
6830 ix86_sched_data
.ppro
.decode
[0] = NULL
;
6831 ix86_sched_data
.ppro
.decode
[1] = NULL
;
6832 ix86_sched_data
.ppro
.decode
[2] = NULL
;
6836 return --ix86_sched_data
.ppro
.issued_this_cycle
;
6840 /* Compute the alignment given to a constant that is being placed in memory.
6841 EXP is the constant and ALIGN is the alignment that the object would
6843 The value of this function is used instead of that alignment to align
6847 ix86_constant_alignment (exp
, align
)
6851 if (TREE_CODE (exp
) == REAL_CST
)
6853 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
6855 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
6858 else if (TREE_CODE (exp
) == STRING_CST
&& TREE_STRING_LENGTH (exp
) >= 31
6865 /* Compute the alignment for a static variable.
6866 TYPE is the data type, and ALIGN is the alignment that
6867 the object would ordinarily have. The value of this function is used
6868 instead of that alignment to align the object. */
6871 ix86_data_alignment (type
, align
)
6875 if (AGGREGATE_TYPE_P (type
)
6877 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
6878 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 256
6879 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 256)
6882 if (TREE_CODE (type
) == ARRAY_TYPE
)
6884 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
6886 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
6889 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
6892 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
6894 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
6897 else if ((TREE_CODE (type
) == RECORD_TYPE
6898 || TREE_CODE (type
) == UNION_TYPE
6899 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
6900 && TYPE_FIELDS (type
))
6902 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
6904 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
6907 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
6908 || TREE_CODE (type
) == INTEGER_TYPE
)
6910 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
6912 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
6919 /* Compute the alignment for a local variable.
6920 TYPE is the data type, and ALIGN is the alignment that
6921 the object would ordinarily have. The value of this macro is used
6922 instead of that alignment to align the object. */
6925 ix86_local_alignment (type
, align
)
6929 if (TREE_CODE (type
) == ARRAY_TYPE
)
6931 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
6933 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
6936 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
6938 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
6940 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
6943 else if ((TREE_CODE (type
) == RECORD_TYPE
6944 || TREE_CODE (type
) == UNION_TYPE
6945 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
6946 && TYPE_FIELDS (type
))
6948 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
6950 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
6953 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
6954 || TREE_CODE (type
) == INTEGER_TYPE
)
6957 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
6959 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)