]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
* real.c (toe64): Remove stale #endif from the last change.
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
4592bdcb
JL
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000
3 Free Software Foundation, Inc.
2a2ab3f9
JVA
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
32b5b1aa 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
0b6b2900 22#include <setjmp.h>
2a2ab3f9 23#include "config.h"
bb5177ac 24#include "system.h"
2a2ab3f9 25#include "rtl.h"
6baf1cc8
BS
26#include "tree.h"
27#include "tm_p.h"
2a2ab3f9
JVA
28#include "regs.h"
29#include "hard-reg-set.h"
30#include "real.h"
31#include "insn-config.h"
32#include "conditions.h"
33#include "insn-flags.h"
34#include "output.h"
35#include "insn-attr.h"
2a2ab3f9 36#include "flags.h"
a8ffcc81 37#include "except.h"
ecbc4695 38#include "function.h"
00c79232 39#include "recog.h"
ced8dd8c 40#include "expr.h"
f103890b 41#include "toplev.h"
e075ae69 42#include "basic-block.h"
1526a060 43#include "ggc.h"
2a2ab3f9 44
997de79c
JVA
45#ifdef EXTRA_CONSTRAINT
46/* If EXTRA_CONSTRAINT is defined, then the 'S'
47 constraint in REG_CLASS_FROM_LETTER will no longer work, and various
48 asm statements that need 'S' for class SIREG will break. */
ad5a6adc
RS
49 error EXTRA_CONSTRAINT conflicts with S constraint letter
50/* The previous line used to be #error, but some compilers barf
51 even if the conditional was untrue. */
997de79c
JVA
52#endif
53
8dfe5673
RK
54#ifndef CHECK_STACK_LIMIT
55#define CHECK_STACK_LIMIT -1
56#endif
57
32b5b1aa
SC
58/* Processor costs (relative to an add) */
59struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 60 1, /* cost of an add instruction */
32b5b1aa
SC
61 1, /* cost of a lea instruction */
62 3, /* variable shift costs */
63 2, /* constant shift costs */
64 6, /* cost of starting a multiply */
65 1, /* cost of multiply per each bit set */
e075ae69 66 23, /* cost of a divide/mod */
96e7ae40 67 15, /* "large" insn */
e2e52e1b 68 3, /* MOVE_RATIO */
7c6b971d 69 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
70 {2, 4, 2}, /* cost of loading integer registers
71 in QImode, HImode and SImode.
72 Relative to reg-reg move (2). */
73 {2, 4, 2}, /* cost of storing integer registers */
74 2, /* cost of reg,reg fld/fst */
75 {8, 8, 8}, /* cost of loading fp registers
76 in SFmode, DFmode and XFmode */
77 {8, 8, 8} /* cost of loading integer registers */
32b5b1aa
SC
78};
79
80struct processor_costs i486_cost = { /* 486 specific costs */
81 1, /* cost of an add instruction */
82 1, /* cost of a lea instruction */
83 3, /* variable shift costs */
84 2, /* constant shift costs */
85 12, /* cost of starting a multiply */
86 1, /* cost of multiply per each bit set */
e075ae69 87 40, /* cost of a divide/mod */
96e7ae40 88 15, /* "large" insn */
e2e52e1b 89 3, /* MOVE_RATIO */
7c6b971d 90 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
91 {2, 4, 2}, /* cost of loading integer registers
92 in QImode, HImode and SImode.
93 Relative to reg-reg move (2). */
94 {2, 4, 2}, /* cost of storing integer registers */
95 2, /* cost of reg,reg fld/fst */
96 {8, 8, 8}, /* cost of loading fp registers
97 in SFmode, DFmode and XFmode */
98 {8, 8, 8} /* cost of loading integer registers */
32b5b1aa
SC
99};
100
e5cb57e8 101struct processor_costs pentium_cost = {
32b5b1aa
SC
102 1, /* cost of an add instruction */
103 1, /* cost of a lea instruction */
856b07a1 104 4, /* variable shift costs */
e5cb57e8 105 1, /* constant shift costs */
856b07a1
SC
106 11, /* cost of starting a multiply */
107 0, /* cost of multiply per each bit set */
e075ae69 108 25, /* cost of a divide/mod */
96e7ae40 109 8, /* "large" insn */
e2e52e1b 110 6, /* MOVE_RATIO */
7c6b971d 111 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
112 {2, 4, 2}, /* cost of loading integer registers
113 in QImode, HImode and SImode.
114 Relative to reg-reg move (2). */
115 {2, 4, 2}, /* cost of storing integer registers */
116 2, /* cost of reg,reg fld/fst */
117 {2, 2, 6}, /* cost of loading fp registers
118 in SFmode, DFmode and XFmode */
119 {4, 4, 6} /* cost of loading integer registers */
32b5b1aa
SC
120};
121
856b07a1
SC
122struct processor_costs pentiumpro_cost = {
123 1, /* cost of an add instruction */
124 1, /* cost of a lea instruction */
e075ae69 125 1, /* variable shift costs */
856b07a1 126 1, /* constant shift costs */
369e59b1 127 4, /* cost of starting a multiply */
856b07a1 128 0, /* cost of multiply per each bit set */
e075ae69 129 17, /* cost of a divide/mod */
96e7ae40 130 8, /* "large" insn */
e2e52e1b 131 6, /* MOVE_RATIO */
7c6b971d 132 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
133 {4, 4, 4}, /* cost of loading integer registers
134 in QImode, HImode and SImode.
135 Relative to reg-reg move (2). */
136 {2, 2, 2}, /* cost of storing integer registers */
137 2, /* cost of reg,reg fld/fst */
138 {2, 2, 6}, /* cost of loading fp registers
139 in SFmode, DFmode and XFmode */
140 {4, 4, 6} /* cost of loading integer registers */
856b07a1
SC
141};
142
a269a03c
JC
143struct processor_costs k6_cost = {
144 1, /* cost of an add instruction */
e075ae69 145 2, /* cost of a lea instruction */
a269a03c
JC
146 1, /* variable shift costs */
147 1, /* constant shift costs */
73fe76e4 148 3, /* cost of starting a multiply */
a269a03c 149 0, /* cost of multiply per each bit set */
e075ae69 150 18, /* cost of a divide/mod */
96e7ae40 151 8, /* "large" insn */
e2e52e1b 152 4, /* MOVE_RATIO */
7c6b971d 153 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
154 {4, 5, 4}, /* cost of loading integer registers
155 in QImode, HImode and SImode.
156 Relative to reg-reg move (2). */
157 {2, 3, 2}, /* cost of storing integer registers */
158 4, /* cost of reg,reg fld/fst */
159 {6, 6, 6}, /* cost of loading fp registers
160 in SFmode, DFmode and XFmode */
161 {4, 4, 4} /* cost of loading integer registers */
a269a03c
JC
162};
163
309ada50
JH
164struct processor_costs athlon_cost = {
165 1, /* cost of an add instruction */
166 1, /* cost of a lea instruction */
167 1, /* variable shift costs */
168 1, /* constant shift costs */
169 5, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
171 19, /* cost of a divide/mod */
172 8, /* "large" insn */
e2e52e1b 173 9, /* MOVE_RATIO */
309ada50
JH
174 4, /* cost for loading QImode using movzbl */
175 {4, 5, 4}, /* cost of loading integer registers
176 in QImode, HImode and SImode.
177 Relative to reg-reg move (2). */
178 {2, 3, 2}, /* cost of storing integer registers */
179 4, /* cost of reg,reg fld/fst */
180 {6, 6, 6}, /* cost of loading fp registers
181 in SFmode, DFmode and XFmode */
182 {4, 4, 4} /* cost of loading integer registers */
183};
184
32b5b1aa
SC
185struct processor_costs *ix86_cost = &pentium_cost;
186
a269a03c
JC
187/* Processor feature/optimization bitmasks. */
188#define m_386 (1<<PROCESSOR_I386)
189#define m_486 (1<<PROCESSOR_I486)
190#define m_PENT (1<<PROCESSOR_PENTIUM)
191#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
192#define m_K6 (1<<PROCESSOR_K6)
309ada50 193#define m_ATHLON (1<<PROCESSOR_ATHLON)
a269a03c 194
309ada50
JH
195const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
196const int x86_push_memory = m_386 | m_K6 | m_ATHLON;
a269a03c 197const int x86_zero_extend_with_and = m_486 | m_PENT;
369e59b1 198const int x86_movx = m_ATHLON | m_PPRO /* m_386 | m_K6 */;
e075ae69 199const int x86_double_with_add = ~m_386;
a269a03c 200const int x86_use_bit_test = m_386;
e2e52e1b 201const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
a269a03c
JC
202const int x86_use_q_reg = m_PENT | m_PPRO | m_K6;
203const int x86_use_any_reg = m_486;
309ada50
JH
204const int x86_cmove = m_PPRO | m_ATHLON;
205const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON;
206const int x86_use_sahf = m_PPRO | m_K6 | m_ATHLON;
e075ae69
RH
207const int x86_partial_reg_stall = m_PPRO;
208const int x86_use_loop = m_K6;
309ada50 209const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
e075ae69
RH
210const int x86_use_mov0 = m_K6;
211const int x86_use_cltd = ~(m_PENT | m_K6);
212const int x86_read_modify_write = ~m_PENT;
213const int x86_read_modify = ~(m_PENT | m_PPRO);
214const int x86_split_long_moves = m_PPRO;
e9e80858 215const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
f90800f8 216const int x86_single_stringop = m_386;
d9f32422
JH
217const int x86_qimode_math = ~(0);
218const int x86_promote_qi_regs = 0;
219const int x86_himode_math = ~(m_PPRO);
220const int x86_promote_hi_regs = m_PPRO;
a269a03c 221
564d80f4 222#define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
2a2ab3f9 223
e075ae69
RH
224const char * const hi_reg_name[] = HI_REGISTER_NAMES;
225const char * const qi_reg_name[] = QI_REGISTER_NAMES;
226const char * const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
4c0d89b5
RS
227
228/* Array of the smallest class containing reg number REGNO, indexed by
229 REGNO. Used by REGNO_REG_CLASS in i386.h. */
230
e075ae69 231enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
232{
233 /* ax, dx, cx, bx */
ab408a86 234 AREG, DREG, CREG, BREG,
4c0d89b5 235 /* si, di, bp, sp */
e075ae69 236 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
237 /* FP registers */
238 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 239 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 240 /* arg pointer */
83774849 241 NON_Q_REGS,
564d80f4
JH
242 /* flags, fpsr, dirflag, frame */
243 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS
4c0d89b5 244};
c572e5ba 245
83774849
RH
246/* The "default" register map. */
247
248int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
249{
250 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
251 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
252 -1, -1, -1, -1, /* arg, flags, fpsr, dir */
253};
254
255/* Define the register numbers to be used in Dwarf debugging information.
256 The SVR4 reference port C compiler uses the following register numbers
257 in its Dwarf output code:
258 0 for %eax (gcc regno = 0)
259 1 for %ecx (gcc regno = 2)
260 2 for %edx (gcc regno = 1)
261 3 for %ebx (gcc regno = 3)
262 4 for %esp (gcc regno = 7)
263 5 for %ebp (gcc regno = 6)
264 6 for %esi (gcc regno = 4)
265 7 for %edi (gcc regno = 5)
266 The following three DWARF register numbers are never generated by
267 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
268 believes these numbers have these meanings.
269 8 for %eip (no gcc equivalent)
270 9 for %eflags (gcc regno = 17)
271 10 for %trapno (no gcc equivalent)
272 It is not at all clear how we should number the FP stack registers
273 for the x86 architecture. If the version of SDB on x86/svr4 were
274 a bit less brain dead with respect to floating-point then we would
275 have a precedent to follow with respect to DWARF register numbers
276 for x86 FP registers, but the SDB on x86/svr4 is so completely
277 broken with respect to FP registers that it is hardly worth thinking
278 of it as something to strive for compatibility with.
279 The version of x86/svr4 SDB I have at the moment does (partially)
280 seem to believe that DWARF register number 11 is associated with
281 the x86 register %st(0), but that's about all. Higher DWARF
282 register numbers don't seem to be associated with anything in
283 particular, and even for DWARF regno 11, SDB only seems to under-
284 stand that it should say that a variable lives in %st(0) (when
285 asked via an `=' command) if we said it was in DWARF regno 11,
286 but SDB still prints garbage when asked for the value of the
287 variable in question (via a `/' command).
288 (Also note that the labels SDB prints for various FP stack regs
289 when doing an `x' command are all wrong.)
290 Note that these problems generally don't affect the native SVR4
291 C compiler because it doesn't allow the use of -O with -g and
292 because when it is *not* optimizing, it allocates a memory
293 location for each floating-point variable, and the memory
294 location is what gets described in the DWARF AT_location
295 attribute for the variable in question.
296 Regardless of the severe mental illness of the x86/svr4 SDB, we
297 do something sensible here and we use the following DWARF
298 register numbers. Note that these are all stack-top-relative
299 numbers.
300 11 for %st(0) (gcc regno = 8)
301 12 for %st(1) (gcc regno = 9)
302 13 for %st(2) (gcc regno = 10)
303 14 for %st(3) (gcc regno = 11)
304 15 for %st(4) (gcc regno = 12)
305 16 for %st(5) (gcc regno = 13)
306 17 for %st(6) (gcc regno = 14)
307 18 for %st(7) (gcc regno = 15)
308*/
309int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
310{
311 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
312 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
313 -1, 9, -1, -1, /* arg, flags, fpsr, dir */
314};
315
316
317
c572e5ba
JVA
318/* Test and compare insns in i386.md store the information needed to
319 generate branch and scc insns here. */
320
e075ae69
RH
321struct rtx_def *ix86_compare_op0 = NULL_RTX;
322struct rtx_def *ix86_compare_op1 = NULL_RTX;
f5316dfe 323
36edd3cc
BS
324#define MAX_386_STACK_LOCALS 2
325
326/* Define the structure for the machine field in struct function. */
327struct machine_function
328{
329 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
330};
331
01d939e8 332#define ix86_stack_locals (cfun->machine->stack_locals)
36edd3cc 333
c8c5cb99 334/* which cpu are we scheduling for */
e42ea7f9 335enum processor_type ix86_cpu;
c8c5cb99
SC
336
337/* which instruction set architecture to use. */
c942177e 338int ix86_arch;
c8c5cb99
SC
339
340/* Strings to hold which cpu and instruction set architecture to use. */
9c23aa47
ZW
341const char *ix86_cpu_string; /* for -mcpu=<xxx> */
342const char *ix86_arch_string; /* for -march=<xxx> */
c8c5cb99 343
f5316dfe 344/* Register allocation order */
e075ae69 345const char *ix86_reg_alloc_order;
f5316dfe
MM
346static char regs_allocated[FIRST_PSEUDO_REGISTER];
347
b08de47e 348/* # of registers to use to pass arguments. */
e075ae69 349const char *ix86_regparm_string;
e9a25f70 350
e075ae69
RH
351/* ix86_regparm_string as a number */
352int ix86_regparm;
e9a25f70
JL
353
354/* Alignment to use for loops and jumps: */
355
356/* Power of two alignment for loops. */
e075ae69 357const char *ix86_align_loops_string;
e9a25f70
JL
358
359/* Power of two alignment for non-loop jumps. */
e075ae69 360const char *ix86_align_jumps_string;
e9a25f70 361
3af4bd89 362/* Power of two alignment for stack boundary in bytes. */
e075ae69 363const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
364
365/* Preferred alignment for stack boundary in bits. */
e075ae69 366int ix86_preferred_stack_boundary;
3af4bd89 367
e9a25f70 368/* Values 1-5: see jump.c */
e075ae69
RH
369int ix86_branch_cost;
370const char *ix86_branch_cost_string;
e9a25f70
JL
371
372/* Power of two alignment for functions. */
e075ae69
RH
373int ix86_align_funcs;
374const char *ix86_align_funcs_string;
b08de47e 375
e9a25f70 376/* Power of two alignment for loops. */
e075ae69 377int ix86_align_loops;
b08de47e 378
e9a25f70 379/* Power of two alignment for non-loop jumps. */
e075ae69
RH
380int ix86_align_jumps;
381\f
f6da8bc3
KG
382static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
383static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
e075ae69 384 int, int, FILE *));
f6da8bc3
KG
385static enum rtx_code unsigned_comparison PARAMS ((enum rtx_code code));
386static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
3a3677ff
RH
387static enum machine_mode ix86_fp_compare_mode PARAMS ((enum rtx_code));
388static int ix86_use_fcomi_compare PARAMS ((enum rtx_code));
389static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
390 rtx *, rtx *));
391static rtx ix86_expand_compare PARAMS ((enum rtx_code));
f6da8bc3
KG
392static rtx gen_push PARAMS ((rtx));
393static int memory_address_length PARAMS ((rtx addr));
394static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
395static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
396static int ix86_safe_length PARAMS ((rtx));
397static enum attr_memory ix86_safe_memory PARAMS ((rtx));
398static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
399static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
400static void ix86_dump_ppro_packet PARAMS ((FILE *));
401static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
402static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
e075ae69 403 rtx));
f6da8bc3
KG
404static void ix86_init_machine_status PARAMS ((struct function *));
405static void ix86_mark_machine_status PARAMS ((struct function *));
406static void ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
407static int ix86_safe_length_prefix PARAMS ((rtx));
564d80f4
JH
408static HOST_WIDE_INT ix86_compute_frame_size PARAMS((HOST_WIDE_INT,
409 int *, int *, int *));
0903fcab
JH
410static int ix86_nsaved_regs PARAMS((void));
411static void ix86_emit_save_regs PARAMS((void));
da2d1d3a 412static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int));
0903fcab 413static void ix86_emit_epilogue_esp_adjustment PARAMS((int));
c6991660
KG
414static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
415static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
e075ae69
RH
416
417struct ix86_address
418{
419 rtx base, index, disp;
420 HOST_WIDE_INT scale;
421};
b08de47e 422
e075ae69
RH
423static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
424\f
f5316dfe
MM
425/* Sometimes certain combinations of command options do not make
426 sense on a particular target machine. You can define a macro
427 `OVERRIDE_OPTIONS' to take account of this. This macro, if
428 defined, is executed once just after all the command options have
429 been parsed.
430
431 Don't use this macro to turn on various extra optimizations for
432 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
433
434void
435override_options ()
436{
e075ae69
RH
437 /* Comes from final.c -- no real reason to change it. */
438#define MAX_CODE_ALIGN 16
f5316dfe 439
c8c5cb99
SC
440 static struct ptt
441 {
e075ae69
RH
442 struct processor_costs *cost; /* Processor costs */
443 int target_enable; /* Target flags to enable. */
444 int target_disable; /* Target flags to disable. */
445 int align_loop; /* Default alignments. */
446 int align_jump;
447 int align_func;
448 int branch_cost;
449 }
450 const processor_target_table[PROCESSOR_max] =
451 {
452 {&i386_cost, 0, 0, 2, 2, 2, 1},
453 {&i486_cost, 0, 0, 4, 4, 4, 1},
454 {&pentium_cost, 0, 0, -4, -4, -4, 1},
455 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
309ada50
JH
456 {&k6_cost, 0, 0, -5, -5, 4, 1},
457 {&athlon_cost, 0, 0, 4, -4, 4, 1}
e075ae69
RH
458 };
459
460 static struct pta
461 {
69ddee61 462 const char *name; /* processor name or nickname. */
e075ae69
RH
463 enum processor_type processor;
464 }
465 const processor_alias_table[] =
466 {
467 {"i386", PROCESSOR_I386},
468 {"i486", PROCESSOR_I486},
469 {"i586", PROCESSOR_PENTIUM},
470 {"pentium", PROCESSOR_PENTIUM},
471 {"i686", PROCESSOR_PENTIUMPRO},
472 {"pentiumpro", PROCESSOR_PENTIUMPRO},
e075ae69 473 {"k6", PROCESSOR_K6},
309ada50 474 {"athlon", PROCESSOR_ATHLON},
3af4bd89 475 };
c8c5cb99 476
e075ae69 477 int const pta_size = sizeof(processor_alias_table)/sizeof(struct pta);
c8c5cb99 478
f5316dfe
MM
479#ifdef SUBTARGET_OVERRIDE_OPTIONS
480 SUBTARGET_OVERRIDE_OPTIONS;
481#endif
482
5a6ee819 483 ix86_arch = PROCESSOR_I386;
e075ae69
RH
484 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
485
486 if (ix86_arch_string != 0)
487 {
488 int i;
489 for (i = 0; i < pta_size; i++)
490 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
491 {
492 ix86_arch = processor_alias_table[i].processor;
493 /* Default cpu tuning to the architecture. */
494 ix86_cpu = ix86_arch;
495 break;
496 }
497 if (i == pta_size)
498 error ("bad value (%s) for -march= switch", ix86_arch_string);
499 }
500
501 if (ix86_cpu_string != 0)
502 {
503 int i;
504 for (i = 0; i < pta_size; i++)
505 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
506 {
507 ix86_cpu = processor_alias_table[i].processor;
508 break;
509 }
510 if (i == pta_size)
511 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
512 }
513
514 ix86_cost = processor_target_table[ix86_cpu].cost;
515 target_flags |= processor_target_table[ix86_cpu].target_enable;
516 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
517
36edd3cc
BS
518 /* Arrange to set up i386_stack_locals for all functions. */
519 init_machine_status = ix86_init_machine_status;
1526a060 520 mark_machine_status = ix86_mark_machine_status;
36edd3cc 521
e9a25f70 522 /* Validate registers in register allocation order. */
e075ae69 523 if (ix86_reg_alloc_order)
f5316dfe 524 {
e075ae69
RH
525 int i, ch;
526 for (i = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
f5316dfe 527 {
00c79232 528 int regno = 0;
79325812 529
f5316dfe
MM
530 switch (ch)
531 {
532 case 'a': regno = 0; break;
533 case 'd': regno = 1; break;
534 case 'c': regno = 2; break;
535 case 'b': regno = 3; break;
536 case 'S': regno = 4; break;
537 case 'D': regno = 5; break;
538 case 'B': regno = 6; break;
539
540 default: fatal ("Register '%c' is unknown", ch);
541 }
542
543 if (regs_allocated[regno])
e9a25f70 544 fatal ("Register '%c' already specified in allocation order", ch);
f5316dfe
MM
545
546 regs_allocated[regno] = 1;
547 }
548 }
b08de47e 549
e9a25f70 550 /* Validate -mregparm= value. */
e075ae69 551 if (ix86_regparm_string)
b08de47e 552 {
e075ae69
RH
553 ix86_regparm = atoi (ix86_regparm_string);
554 if (ix86_regparm < 0 || ix86_regparm > REGPARM_MAX)
e9a25f70 555 fatal ("-mregparm=%d is not between 0 and %d",
e075ae69 556 ix86_regparm, REGPARM_MAX);
b08de47e
MM
557 }
558
e9a25f70 559 /* Validate -malign-loops= value, or provide default. */
e075ae69
RH
560 ix86_align_loops = processor_target_table[ix86_cpu].align_loop;
561 if (ix86_align_loops_string)
b08de47e 562 {
e075ae69
RH
563 ix86_align_loops = atoi (ix86_align_loops_string);
564 if (ix86_align_loops < 0 || ix86_align_loops > MAX_CODE_ALIGN)
b08de47e 565 fatal ("-malign-loops=%d is not between 0 and %d",
e075ae69 566 ix86_align_loops, MAX_CODE_ALIGN);
b08de47e 567 }
3af4bd89
JH
568
569 /* Validate -malign-jumps= value, or provide default. */
e075ae69
RH
570 ix86_align_jumps = processor_target_table[ix86_cpu].align_jump;
571 if (ix86_align_jumps_string)
b08de47e 572 {
e075ae69
RH
573 ix86_align_jumps = atoi (ix86_align_jumps_string);
574 if (ix86_align_jumps < 0 || ix86_align_jumps > MAX_CODE_ALIGN)
b08de47e 575 fatal ("-malign-jumps=%d is not between 0 and %d",
e075ae69 576 ix86_align_jumps, MAX_CODE_ALIGN);
b08de47e 577 }
b08de47e 578
e9a25f70 579 /* Validate -malign-functions= value, or provide default. */
e075ae69
RH
580 ix86_align_funcs = processor_target_table[ix86_cpu].align_func;
581 if (ix86_align_funcs_string)
b08de47e 582 {
e075ae69
RH
583 ix86_align_funcs = atoi (ix86_align_funcs_string);
584 if (ix86_align_funcs < 0 || ix86_align_funcs > MAX_CODE_ALIGN)
b08de47e 585 fatal ("-malign-functions=%d is not between 0 and %d",
e075ae69 586 ix86_align_funcs, MAX_CODE_ALIGN);
b08de47e 587 }
3af4bd89 588
e4c0478d 589 /* Validate -mpreferred-stack-boundary= value, or provide default.
3af4bd89 590 The default of 128 bits is for Pentium III's SSE __m128. */
e075ae69
RH
591 ix86_preferred_stack_boundary = 128;
592 if (ix86_preferred_stack_boundary_string)
3af4bd89 593 {
e075ae69 594 int i = atoi (ix86_preferred_stack_boundary_string);
3af4bd89 595 if (i < 2 || i > 31)
e4c0478d 596 fatal ("-mpreferred-stack-boundary=%d is not between 2 and 31", i);
e075ae69 597 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 598 }
77a989d1 599
e9a25f70 600 /* Validate -mbranch-cost= value, or provide default. */
e075ae69
RH
601 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
602 if (ix86_branch_cost_string)
804a8ee0 603 {
e075ae69
RH
604 ix86_branch_cost = atoi (ix86_branch_cost_string);
605 if (ix86_branch_cost < 0 || ix86_branch_cost > 5)
606 fatal ("-mbranch-cost=%d is not between 0 and 5",
607 ix86_branch_cost);
804a8ee0 608 }
804a8ee0 609
e9a25f70
JL
610 /* Keep nonleaf frame pointers. */
611 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 612 flag_omit_frame_pointer = 1;
e075ae69
RH
613
614 /* If we're doing fast math, we don't care about comparison order
615 wrt NaNs. This lets us use a shorter comparison sequence. */
616 if (flag_fast_math)
617 target_flags &= ~MASK_IEEE_FP;
618
619 /* If we're planning on using `loop', use it. */
620 if (TARGET_USE_LOOP && optimize)
621 flag_branch_on_count_reg = 1;
f5316dfe
MM
622}
623\f
624/* A C statement (sans semicolon) to choose the order in which to
625 allocate hard registers for pseudo-registers local to a basic
626 block.
627
628 Store the desired register order in the array `reg_alloc_order'.
629 Element 0 should be the register to allocate first; element 1, the
630 next register; and so on.
631
632 The macro body should not assume anything about the contents of
633 `reg_alloc_order' before execution of the macro.
634
635 On most machines, it is not necessary to define this macro. */
636
637void
638order_regs_for_local_alloc ()
639{
00c79232 640 int i, ch, order;
f5316dfe 641
e9a25f70
JL
642 /* User specified the register allocation order. */
643
e075ae69 644 if (ix86_reg_alloc_order)
f5316dfe 645 {
e075ae69 646 for (i = order = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
f5316dfe 647 {
00c79232 648 int regno = 0;
79325812 649
f5316dfe
MM
650 switch (ch)
651 {
652 case 'a': regno = 0; break;
653 case 'd': regno = 1; break;
654 case 'c': regno = 2; break;
655 case 'b': regno = 3; break;
656 case 'S': regno = 4; break;
657 case 'D': regno = 5; break;
658 case 'B': regno = 6; break;
659 }
660
661 reg_alloc_order[order++] = regno;
662 }
663
664 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
665 {
e9a25f70 666 if (! regs_allocated[i])
f5316dfe
MM
667 reg_alloc_order[order++] = i;
668 }
669 }
670
e9a25f70 671 /* If user did not specify a register allocation order, use natural order. */
f5316dfe
MM
672 else
673 {
674 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
675 reg_alloc_order[i] = i;
f5316dfe
MM
676 }
677}
32b5b1aa
SC
678\f
679void
c6aded7c 680optimization_options (level, size)
32b5b1aa 681 int level;
bb5177ac 682 int size ATTRIBUTE_UNUSED;
32b5b1aa 683{
e9a25f70
JL
684 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
685 make the problem with not enough registers even worse. */
32b5b1aa
SC
686#ifdef INSN_SCHEDULING
687 if (level > 1)
688 flag_schedule_insns = 0;
689#endif
690}
b08de47e
MM
691\f
692/* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
693 attribute for DECL. The attributes in ATTRIBUTES have previously been
694 assigned to DECL. */
695
696int
e075ae69 697ix86_valid_decl_attribute_p (decl, attributes, identifier, args)
bb5177ac
RL
698 tree decl ATTRIBUTE_UNUSED;
699 tree attributes ATTRIBUTE_UNUSED;
700 tree identifier ATTRIBUTE_UNUSED;
701 tree args ATTRIBUTE_UNUSED;
b08de47e
MM
702{
703 return 0;
704}
705
706/* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
707 attribute for TYPE. The attributes in ATTRIBUTES have previously been
708 assigned to TYPE. */
709
710int
e075ae69 711ix86_valid_type_attribute_p (type, attributes, identifier, args)
b08de47e 712 tree type;
bb5177ac 713 tree attributes ATTRIBUTE_UNUSED;
b08de47e
MM
714 tree identifier;
715 tree args;
716{
717 if (TREE_CODE (type) != FUNCTION_TYPE
ac478ac0 718 && TREE_CODE (type) != METHOD_TYPE
b08de47e
MM
719 && TREE_CODE (type) != FIELD_DECL
720 && TREE_CODE (type) != TYPE_DECL)
721 return 0;
722
723 /* Stdcall attribute says callee is responsible for popping arguments
724 if they are not variable. */
725 if (is_attribute_p ("stdcall", identifier))
726 return (args == NULL_TREE);
727
e9a25f70 728 /* Cdecl attribute says the callee is a normal C declaration. */
b08de47e
MM
729 if (is_attribute_p ("cdecl", identifier))
730 return (args == NULL_TREE);
731
732 /* Regparm attribute specifies how many integer arguments are to be
e9a25f70 733 passed in registers. */
b08de47e
MM
734 if (is_attribute_p ("regparm", identifier))
735 {
736 tree cst;
737
e9a25f70 738 if (! args || TREE_CODE (args) != TREE_LIST
b08de47e
MM
739 || TREE_CHAIN (args) != NULL_TREE
740 || TREE_VALUE (args) == NULL_TREE)
741 return 0;
742
743 cst = TREE_VALUE (args);
744 if (TREE_CODE (cst) != INTEGER_CST)
745 return 0;
746
cce097f1 747 if (compare_tree_int (cst, REGPARM_MAX) > 0)
b08de47e
MM
748 return 0;
749
750 return 1;
751 }
752
753 return 0;
754}
755
756/* Return 0 if the attributes for two types are incompatible, 1 if they
757 are compatible, and 2 if they are nearly compatible (which causes a
758 warning to be generated). */
759
760int
e075ae69 761ix86_comp_type_attributes (type1, type2)
afcfe58c
MM
762 tree type1;
763 tree type2;
b08de47e 764{
afcfe58c 765 /* Check for mismatch of non-default calling convention. */
69ddee61 766 const char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
767
768 if (TREE_CODE (type1) != FUNCTION_TYPE)
769 return 1;
770
771 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
772 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
773 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 774 return 0;
b08de47e
MM
775 return 1;
776}
b08de47e
MM
777\f
778/* Value is the number of bytes of arguments automatically
779 popped when returning from a subroutine call.
780 FUNDECL is the declaration node of the function (as a tree),
781 FUNTYPE is the data type of the function (as a tree),
782 or for a library call it is an identifier node for the subroutine name.
783 SIZE is the number of bytes of arguments passed on the stack.
784
785 On the 80386, the RTD insn may be used to pop them if the number
786 of args is fixed, but if the number is variable then the caller
787 must pop them all. RTD can't be used for library calls now
788 because the library is compiled with the Unix compiler.
789 Use of RTD is a selectable option, since it is incompatible with
790 standard Unix calling sequences. If the option is not selected,
791 the caller must always pop the args.
792
793 The attribute stdcall is equivalent to RTD on a per module basis. */
794
795int
e075ae69 796ix86_return_pops_args (fundecl, funtype, size)
b08de47e
MM
797 tree fundecl;
798 tree funtype;
799 int size;
79325812 800{
3345ee7d 801 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 802
e9a25f70
JL
803 /* Cdecl functions override -mrtd, and never pop the stack. */
804 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 805
e9a25f70 806 /* Stdcall functions will pop the stack if not variable args. */
698cdd84
SC
807 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
808 rtd = 1;
79325812 809
698cdd84
SC
810 if (rtd
811 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
812 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
813 == void_type_node)))
698cdd84
SC
814 return size;
815 }
79325812 816
e9a25f70 817 /* Lose any fake structure return argument. */
698cdd84
SC
818 if (aggregate_value_p (TREE_TYPE (funtype)))
819 return GET_MODE_SIZE (Pmode);
79325812 820
2614aac6 821 return 0;
b08de47e 822}
b08de47e
MM
823\f
824/* Argument support functions. */
825
826/* Initialize a variable CUM of type CUMULATIVE_ARGS
827 for a call to a function whose data type is FNTYPE.
828 For a library call, FNTYPE is 0. */
829
830void
831init_cumulative_args (cum, fntype, libname)
e9a25f70 832 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
b08de47e
MM
833 tree fntype; /* tree ptr for function decl */
834 rtx libname; /* SYMBOL_REF of library name or 0 */
835{
836 static CUMULATIVE_ARGS zero_cum;
837 tree param, next_param;
838
839 if (TARGET_DEBUG_ARG)
840 {
841 fprintf (stderr, "\ninit_cumulative_args (");
842 if (fntype)
e9a25f70
JL
843 fprintf (stderr, "fntype code = %s, ret code = %s",
844 tree_code_name[(int) TREE_CODE (fntype)],
845 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
846 else
847 fprintf (stderr, "no fntype");
848
849 if (libname)
850 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
851 }
852
853 *cum = zero_cum;
854
855 /* Set up the number of registers to use for passing arguments. */
e075ae69 856 cum->nregs = ix86_regparm;
b08de47e
MM
857 if (fntype)
858 {
859 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
e9a25f70 860
b08de47e
MM
861 if (attr)
862 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
863 }
864
865 /* Determine if this function has variable arguments. This is
866 indicated by the last argument being 'void_type_mode' if there
867 are no variable arguments. If there are variable arguments, then
868 we won't pass anything in registers */
869
870 if (cum->nregs)
871 {
872 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 873 param != 0; param = next_param)
b08de47e
MM
874 {
875 next_param = TREE_CHAIN (param);
e9a25f70 876 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
b08de47e
MM
877 cum->nregs = 0;
878 }
879 }
880
881 if (TARGET_DEBUG_ARG)
882 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
883
884 return;
885}
886
887/* Update the data in CUM to advance over an argument
888 of mode MODE and data type TYPE.
889 (TYPE is null for libcalls where that information may not be available.) */
890
891void
892function_arg_advance (cum, mode, type, named)
893 CUMULATIVE_ARGS *cum; /* current arg information */
894 enum machine_mode mode; /* current arg mode */
895 tree type; /* type of the argument or 0 if lib support */
896 int named; /* whether or not the argument was named */
897{
e9a25f70
JL
898 int bytes
899 = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
b08de47e
MM
900 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
901
902 if (TARGET_DEBUG_ARG)
903 fprintf (stderr,
e9a25f70 904 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
b08de47e
MM
905 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
906
907 cum->words += words;
908 cum->nregs -= words;
909 cum->regno += words;
910
911 if (cum->nregs <= 0)
912 {
913 cum->nregs = 0;
914 cum->regno = 0;
915 }
916
917 return;
918}
919
920/* Define where to put the arguments to a function.
921 Value is zero to push the argument on the stack,
922 or a hard register in which to store the argument.
923
924 MODE is the argument's machine mode.
925 TYPE is the data type of the argument (as a tree).
926 This is null for libcalls where that information may
927 not be available.
928 CUM is a variable of type CUMULATIVE_ARGS which gives info about
929 the preceding args and about the function being called.
930 NAMED is nonzero if this argument is a named parameter
931 (otherwise it is an extra parameter matching an ellipsis). */
932
933struct rtx_def *
934function_arg (cum, mode, type, named)
935 CUMULATIVE_ARGS *cum; /* current arg information */
936 enum machine_mode mode; /* current arg mode */
937 tree type; /* type of the argument or 0 if lib support */
938 int named; /* != 0 for normal args, == 0 for ... args */
939{
940 rtx ret = NULL_RTX;
e9a25f70
JL
941 int bytes
942 = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
b08de47e
MM
943 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
944
945 switch (mode)
946 {
e9a25f70
JL
947 /* For now, pass fp/complex values on the stack. */
948 default:
b08de47e
MM
949 break;
950
951 case BLKmode:
952 case DImode:
953 case SImode:
954 case HImode:
955 case QImode:
956 if (words <= cum->nregs)
f64cecad 957 ret = gen_rtx_REG (mode, cum->regno);
b08de47e
MM
958 break;
959 }
960
961 if (TARGET_DEBUG_ARG)
962 {
963 fprintf (stderr,
e9a25f70 964 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
b08de47e
MM
965 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
966
967 if (ret)
968 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
969 else
970 fprintf (stderr, ", stack");
971
972 fprintf (stderr, " )\n");
973 }
974
975 return ret;
976}
e075ae69
RH
977\f
978/* Returns 1 if OP is either a symbol reference or a sum of a symbol
979 reference and a constant. */
b08de47e
MM
980
981int
e075ae69
RH
982symbolic_operand (op, mode)
983 register rtx op;
984 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 985{
e075ae69 986 switch (GET_CODE (op))
2a2ab3f9 987 {
e075ae69
RH
988 case SYMBOL_REF:
989 case LABEL_REF:
990 return 1;
991
992 case CONST:
993 op = XEXP (op, 0);
994 if (GET_CODE (op) == SYMBOL_REF
995 || GET_CODE (op) == LABEL_REF
996 || (GET_CODE (op) == UNSPEC
997 && XINT (op, 1) >= 6
998 && XINT (op, 1) <= 7))
999 return 1;
1000 if (GET_CODE (op) != PLUS
1001 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1002 return 0;
1003
1004 op = XEXP (op, 0);
1005 if (GET_CODE (op) == SYMBOL_REF
1006 || GET_CODE (op) == LABEL_REF)
1007 return 1;
1008 /* Only @GOTOFF gets offsets. */
1009 if (GET_CODE (op) != UNSPEC
1010 || XINT (op, 1) != 7)
1011 return 0;
1012
1013 op = XVECEXP (op, 0, 0);
1014 if (GET_CODE (op) == SYMBOL_REF
1015 || GET_CODE (op) == LABEL_REF)
1016 return 1;
1017 return 0;
1018
1019 default:
1020 return 0;
2a2ab3f9
JVA
1021 }
1022}
2a2ab3f9 1023
e075ae69 1024/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 1025
e075ae69
RH
1026int
1027pic_symbolic_operand (op, mode)
1028 register rtx op;
1029 enum machine_mode mode ATTRIBUTE_UNUSED;
1030{
1031 if (GET_CODE (op) == CONST)
2a2ab3f9 1032 {
e075ae69
RH
1033 op = XEXP (op, 0);
1034 if (GET_CODE (op) == UNSPEC)
1035 return 1;
1036 if (GET_CODE (op) != PLUS
1037 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1038 return 0;
1039 op = XEXP (op, 0);
1040 if (GET_CODE (op) == UNSPEC)
1041 return 1;
2a2ab3f9 1042 }
e075ae69 1043 return 0;
2a2ab3f9 1044}
2a2ab3f9 1045
28d52ffb
RH
1046/* Test for a valid operand for a call instruction. Don't allow the
1047 arg pointer register or virtual regs since they may decay into
1048 reg + const, which the patterns can't handle. */
2a2ab3f9 1049
e075ae69
RH
1050int
1051call_insn_operand (op, mode)
1052 rtx op;
1053 enum machine_mode mode ATTRIBUTE_UNUSED;
1054{
1055 if (GET_CODE (op) != MEM)
1056 return 0;
1057 op = XEXP (op, 0);
2a2ab3f9 1058
e075ae69
RH
1059 /* Disallow indirect through a virtual register. This leads to
1060 compiler aborts when trying to eliminate them. */
1061 if (GET_CODE (op) == REG
1062 && (op == arg_pointer_rtx
564d80f4 1063 || op == frame_pointer_rtx
e075ae69
RH
1064 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
1065 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
1066 return 0;
2a2ab3f9 1067
28d52ffb
RH
1068 /* Disallow `call 1234'. Due to varying assembler lameness this
1069 gets either rejected or translated to `call .+1234'. */
1070 if (GET_CODE (op) == CONST_INT)
1071 return 0;
1072
cbbf65e0
RH
1073 /* Explicitly allow SYMBOL_REF even if pic. */
1074 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 1075 return 1;
2a2ab3f9 1076
cbbf65e0
RH
1077 /* Half-pic doesn't allow anything but registers and constants.
1078 We've just taken care of the later. */
1079 if (HALF_PIC_P ())
1080 return register_operand (op, Pmode);
1081
1082 /* Otherwise we can allow any general_operand in the address. */
1083 return general_operand (op, Pmode);
e075ae69 1084}
79325812 1085
e075ae69
RH
1086int
1087constant_call_address_operand (op, mode)
1088 rtx op;
1089 enum machine_mode mode ATTRIBUTE_UNUSED;
1090{
cbbf65e0
RH
1091 return (GET_CODE (op) == MEM
1092 && CONSTANT_ADDRESS_P (XEXP (op, 0))
1093 && GET_CODE (XEXP (op, 0)) != CONST_INT);
e075ae69 1094}
2a2ab3f9 1095
e075ae69 1096/* Match exactly zero and one. */
e9a25f70 1097
e075ae69
RH
1098int
1099const0_operand (op, mode)
1100 register rtx op;
1101 enum machine_mode mode;
1102{
1103 return op == CONST0_RTX (mode);
1104}
e9a25f70 1105
e075ae69
RH
1106int
1107const1_operand (op, mode)
1108 register rtx op;
1109 enum machine_mode mode ATTRIBUTE_UNUSED;
1110{
1111 return op == const1_rtx;
1112}
2a2ab3f9 1113
e075ae69 1114/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 1115
e075ae69
RH
1116int
1117const248_operand (op, mode)
1118 register rtx op;
1119 enum machine_mode mode ATTRIBUTE_UNUSED;
1120{
1121 return (GET_CODE (op) == CONST_INT
1122 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
1123}
e9a25f70 1124
e075ae69 1125/* True if this is a constant appropriate for an increment or decremenmt. */
81fd0956 1126
e075ae69
RH
1127int
1128incdec_operand (op, mode)
1129 register rtx op;
1130 enum machine_mode mode;
1131{
1132 if (op == const1_rtx || op == constm1_rtx)
1133 return 1;
1134 if (GET_CODE (op) != CONST_INT)
1135 return 0;
1136 if (mode == SImode && INTVAL (op) == (HOST_WIDE_INT) 0xffffffff)
1137 return 1;
1138 if (mode == HImode && INTVAL (op) == (HOST_WIDE_INT) 0xffff)
1139 return 1;
1140 if (mode == QImode && INTVAL (op) == (HOST_WIDE_INT) 0xff)
1141 return 1;
1142 return 0;
1143}
2a2ab3f9 1144
e075ae69
RH
1145/* Return false if this is the stack pointer, or any other fake
1146 register eliminable to the stack pointer. Otherwise, this is
1147 a register operand.
2a2ab3f9 1148
e075ae69
RH
1149 This is used to prevent esp from being used as an index reg.
1150 Which would only happen in pathological cases. */
5f1ec3e6 1151
e075ae69
RH
1152int
1153reg_no_sp_operand (op, mode)
1154 register rtx op;
1155 enum machine_mode mode;
1156{
1157 rtx t = op;
1158 if (GET_CODE (t) == SUBREG)
1159 t = SUBREG_REG (t);
564d80f4 1160 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 1161 return 0;
2a2ab3f9 1162
e075ae69 1163 return register_operand (op, mode);
2a2ab3f9 1164}
b840bfb0 1165
2c5a510c
RH
1166/* Return false if this is any eliminable register. Otherwise
1167 general_operand. */
1168
1169int
1170general_no_elim_operand (op, mode)
1171 register rtx op;
1172 enum machine_mode mode;
1173{
1174 rtx t = op;
1175 if (GET_CODE (t) == SUBREG)
1176 t = SUBREG_REG (t);
1177 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1178 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1179 || t == virtual_stack_dynamic_rtx)
1180 return 0;
1181
1182 return general_operand (op, mode);
1183}
1184
1185/* Return false if this is any eliminable register. Otherwise
1186 register_operand or const_int. */
1187
1188int
1189nonmemory_no_elim_operand (op, mode)
1190 register rtx op;
1191 enum machine_mode mode;
1192{
1193 rtx t = op;
1194 if (GET_CODE (t) == SUBREG)
1195 t = SUBREG_REG (t);
1196 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1197 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1198 || t == virtual_stack_dynamic_rtx)
1199 return 0;
1200
1201 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
1202}
1203
e075ae69 1204/* Return true if op is a Q_REGS class register. */
b840bfb0 1205
e075ae69
RH
1206int
1207q_regs_operand (op, mode)
1208 register rtx op;
1209 enum machine_mode mode;
b840bfb0 1210{
e075ae69
RH
1211 if (mode != VOIDmode && GET_MODE (op) != mode)
1212 return 0;
1213 if (GET_CODE (op) == SUBREG)
1214 op = SUBREG_REG (op);
1215 return QI_REG_P (op);
1216}
b840bfb0 1217
e075ae69 1218/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 1219
e075ae69
RH
1220int
1221non_q_regs_operand (op, mode)
1222 register rtx op;
1223 enum machine_mode mode;
1224{
1225 if (mode != VOIDmode && GET_MODE (op) != mode)
1226 return 0;
1227 if (GET_CODE (op) == SUBREG)
1228 op = SUBREG_REG (op);
1229 return NON_QI_REG_P (op);
1230}
b840bfb0 1231
e075ae69
RH
1232/* Return 1 if OP is a comparison operator that can use the condition code
1233 generated by a logical operation, which characteristicly does not set
1234 overflow or carry. To be used with CCNOmode. */
b840bfb0 1235
e075ae69
RH
1236int
1237no_comparison_operator (op, mode)
1238 register rtx op;
1239 enum machine_mode mode;
1240{
3a3677ff
RH
1241 if (mode != VOIDmode && GET_MODE (op) != mode)
1242 return 0;
1243
1244 switch (GET_CODE (op))
1245 {
1246 case EQ: case NE:
1247 case LT: case GE:
1248 case LEU: case LTU: case GEU: case GTU:
1249 return 1;
1250
1251 default:
1252 return 0;
1253 }
e075ae69 1254}
b840bfb0 1255
e075ae69 1256/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
b840bfb0 1257
e075ae69
RH
1258int
1259fcmov_comparison_operator (op, mode)
1260 register rtx op;
1261 enum machine_mode mode;
1262{
3a3677ff
RH
1263 if (mode != VOIDmode && GET_MODE (op) != mode)
1264 return 0;
1265
1266 switch (GET_CODE (op))
1267 {
1268 case EQ: case NE:
1269 case LEU: case LTU: case GEU: case GTU:
1270 case UNORDERED: case ORDERED:
1271 return 1;
1272
1273 default:
1274 return 0;
1275 }
1276}
1277
1278/* Return 1 if OP is any normal comparison operator plus {UN}ORDERED. */
1279
1280int
1281uno_comparison_operator (op, mode)
1282 register rtx op;
1283 enum machine_mode mode;
1284{
1285 if (mode != VOIDmode && GET_MODE (op) != mode)
1286 return 0;
1287
1288 switch (GET_CODE (op))
1289 {
1290 case EQ: case NE:
1291 case LE: case LT: case GE: case GT:
1292 case LEU: case LTU: case GEU: case GTU:
1293 case UNORDERED: case ORDERED:
1294 return 1;
1295
1296 default:
1297 return 0;
1298 }
e075ae69 1299}
b840bfb0 1300
e9e80858
JH
1301/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1302
1303int
1304promotable_binary_operator (op, mode)
1305 register rtx op;
1306 enum machine_mode mode ATTRIBUTE_UNUSED;
1307{
1308 switch (GET_CODE (op))
1309 {
1310 case MULT:
1311 /* Modern CPUs have same latency for HImode and SImode multiply,
1312 but 386 and 486 do HImode multiply faster. */
1313 return ix86_cpu > PROCESSOR_I486;
1314 case PLUS:
1315 case AND:
1316 case IOR:
1317 case XOR:
1318 case ASHIFT:
1319 return 1;
1320 default:
1321 return 0;
1322 }
1323}
1324
e075ae69
RH
1325/* Nearly general operand, but accept any const_double, since we wish
1326 to be able to drop them into memory rather than have them get pulled
1327 into registers. */
b840bfb0 1328
2a2ab3f9 1329int
e075ae69
RH
1330cmp_fp_expander_operand (op, mode)
1331 register rtx op;
1332 enum machine_mode mode;
2a2ab3f9 1333{
e075ae69 1334 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 1335 return 0;
e075ae69 1336 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 1337 return 1;
e075ae69 1338 return general_operand (op, mode);
2a2ab3f9
JVA
1339}
1340
e075ae69 1341/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
1342
1343int
e075ae69 1344ext_register_operand (op, mode)
2a2ab3f9 1345 register rtx op;
bb5177ac 1346 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 1347{
e075ae69
RH
1348 if (GET_MODE (op) != SImode && GET_MODE (op) != HImode)
1349 return 0;
1350 return register_operand (op, VOIDmode);
1351}
1352
1353/* Return 1 if this is a valid binary floating-point operation.
1354 OP is the expression matched, and MODE is its mode. */
1355
1356int
1357binary_fp_operator (op, mode)
1358 register rtx op;
1359 enum machine_mode mode;
1360{
1361 if (mode != VOIDmode && mode != GET_MODE (op))
1362 return 0;
1363
2a2ab3f9
JVA
1364 switch (GET_CODE (op))
1365 {
e075ae69
RH
1366 case PLUS:
1367 case MINUS:
1368 case MULT:
1369 case DIV:
1370 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 1371
2a2ab3f9
JVA
1372 default:
1373 return 0;
1374 }
1375}
fee2770d 1376
e075ae69
RH
1377int
1378mult_operator(op, mode)
1379 register rtx op;
1380 enum machine_mode mode ATTRIBUTE_UNUSED;
1381{
1382 return GET_CODE (op) == MULT;
1383}
1384
1385int
1386div_operator(op, mode)
1387 register rtx op;
1388 enum machine_mode mode ATTRIBUTE_UNUSED;
1389{
1390 return GET_CODE (op) == DIV;
1391}
0a726ef1
JL
1392
1393int
e075ae69
RH
1394arith_or_logical_operator (op, mode)
1395 rtx op;
1396 enum machine_mode mode;
0a726ef1 1397{
e075ae69
RH
1398 return ((mode == VOIDmode || GET_MODE (op) == mode)
1399 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
1400 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
1401}
1402
e075ae69 1403/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
1404
1405int
e075ae69
RH
1406memory_displacement_operand (op, mode)
1407 register rtx op;
1408 enum machine_mode mode;
4f2c8ebb 1409{
e075ae69 1410 struct ix86_address parts;
e9a25f70 1411
e075ae69
RH
1412 if (! memory_operand (op, mode))
1413 return 0;
1414
1415 if (! ix86_decompose_address (XEXP (op, 0), &parts))
1416 abort ();
1417
1418 return parts.disp != NULL_RTX;
4f2c8ebb
RS
1419}
1420
16189740 1421/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
1422 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1423
1424 ??? It seems likely that this will only work because cmpsi is an
1425 expander, and no actual insns use this. */
4f2c8ebb
RS
1426
1427int
e075ae69
RH
1428cmpsi_operand (op, mode)
1429 rtx op;
1430 enum machine_mode mode;
fee2770d 1431{
e075ae69
RH
1432 if (general_operand (op, mode))
1433 return 1;
1434
1435 if (GET_CODE (op) == AND
1436 && GET_MODE (op) == SImode
1437 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
1438 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
1439 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
1440 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
1441 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
1442 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 1443 return 1;
e9a25f70 1444
fee2770d
RS
1445 return 0;
1446}
d784886d 1447
e075ae69
RH
1448/* Returns 1 if OP is memory operand that can not be represented by the
1449 modRM array. */
d784886d
RK
1450
1451int
e075ae69 1452long_memory_operand (op, mode)
d784886d
RK
1453 register rtx op;
1454 enum machine_mode mode;
1455{
e075ae69 1456 if (! memory_operand (op, mode))
d784886d
RK
1457 return 0;
1458
e075ae69 1459 return memory_address_length (op) != 0;
d784886d 1460}
2247f6ed
JH
1461
1462/* Return nonzero if the rtx is known aligned. */
1463
1464int
1465aligned_operand (op, mode)
1466 rtx op;
1467 enum machine_mode mode;
1468{
1469 struct ix86_address parts;
1470
1471 if (!general_operand (op, mode))
1472 return 0;
1473
1474 /* Registers and immediate operands are always "aligned". */
1475 if (GET_CODE (op) != MEM)
1476 return 1;
1477
1478 /* Don't even try to do any aligned optimizations with volatiles. */
1479 if (MEM_VOLATILE_P (op))
1480 return 0;
1481
1482 op = XEXP (op, 0);
1483
1484 /* Pushes and pops are only valid on the stack pointer. */
1485 if (GET_CODE (op) == PRE_DEC
1486 || GET_CODE (op) == POST_INC)
1487 return 1;
1488
1489 /* Decode the address. */
1490 if (! ix86_decompose_address (op, &parts))
1491 abort ();
1492
1493 /* Look for some component that isn't known to be aligned. */
1494 if (parts.index)
1495 {
1496 if (parts.scale < 4
bdb429a5 1497 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
1498 return 0;
1499 }
1500 if (parts.base)
1501 {
bdb429a5 1502 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
1503 return 0;
1504 }
1505 if (parts.disp)
1506 {
1507 if (GET_CODE (parts.disp) != CONST_INT
1508 || (INTVAL (parts.disp) & 3) != 0)
1509 return 0;
1510 }
1511
1512 /* Didn't find one -- this must be an aligned address. */
1513 return 1;
1514}
e075ae69
RH
1515\f
1516/* Return true if the constant is something that can be loaded with
1517 a special instruction. Only handle 0.0 and 1.0; others are less
1518 worthwhile. */
57dbca5e
BS
1519
1520int
e075ae69
RH
1521standard_80387_constant_p (x)
1522 rtx x;
57dbca5e 1523{
e075ae69
RH
1524 if (GET_CODE (x) != CONST_DOUBLE)
1525 return -1;
1526
1527#if ! defined (REAL_IS_NOT_DOUBLE) || defined (REAL_ARITHMETIC)
1528 {
1529 REAL_VALUE_TYPE d;
1530 jmp_buf handler;
1531 int is0, is1;
1532
1533 if (setjmp (handler))
1534 return 0;
1535
1536 set_float_handler (handler);
1537 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
1538 is0 = REAL_VALUES_EQUAL (d, dconst0) && !REAL_VALUE_MINUS_ZERO (d);
1539 is1 = REAL_VALUES_EQUAL (d, dconst1);
1540 set_float_handler (NULL_PTR);
1541
1542 if (is0)
1543 return 1;
1544
1545 if (is1)
1546 return 2;
1547
1548 /* Note that on the 80387, other constants, such as pi,
1549 are much slower to load as standard constants
1550 than to load from doubles in memory! */
1551 /* ??? Not true on K6: all constants are equal cost. */
1552 }
1553#endif
1554
1555 return 0;
57dbca5e
BS
1556}
1557
2a2ab3f9
JVA
1558/* Returns 1 if OP contains a symbol reference */
1559
1560int
1561symbolic_reference_mentioned_p (op)
1562 rtx op;
1563{
6f7d635c 1564 register const char *fmt;
2a2ab3f9
JVA
1565 register int i;
1566
1567 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
1568 return 1;
1569
1570 fmt = GET_RTX_FORMAT (GET_CODE (op));
1571 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
1572 {
1573 if (fmt[i] == 'E')
1574 {
1575 register int j;
1576
1577 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
1578 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
1579 return 1;
1580 }
e9a25f70 1581
2a2ab3f9
JVA
1582 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
1583 return 1;
1584 }
1585
1586 return 0;
1587}
e075ae69
RH
1588
1589/* Return 1 if it is appropriate to emit `ret' instructions in the
1590 body of a function. Do this only if the epilogue is simple, needing a
1591 couple of insns. Prior to reloading, we can't tell how many registers
1592 must be saved, so return 0 then. Return 0 if there is no frame
1593 marker to de-allocate.
1594
1595 If NON_SAVING_SETJMP is defined and true, then it is not possible
1596 for the epilogue to be simple, so return 0. This is a special case
1597 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1598 until final, but jump_optimize may need to know sooner if a
1599 `return' is OK. */
32b5b1aa
SC
1600
1601int
e075ae69 1602ix86_can_use_return_insn_p ()
32b5b1aa 1603{
9a7372d6
RH
1604 HOST_WIDE_INT tsize;
1605 int nregs;
1606
e075ae69
RH
1607#ifdef NON_SAVING_SETJMP
1608 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
1609 return 0;
1610#endif
9a7372d6
RH
1611#ifdef FUNCTION_BLOCK_PROFILER_EXIT
1612 if (profile_block_flag == 2)
1613 return 0;
1614#endif
1615
1616 if (! reload_completed || frame_pointer_needed)
1617 return 0;
32b5b1aa 1618
9a7372d6
RH
1619 /* Don't allow more than 32 pop, since that's all we can do
1620 with one instruction. */
1621 if (current_function_pops_args
1622 && current_function_args_size >= 32768)
e075ae69 1623 return 0;
32b5b1aa 1624
9a7372d6
RH
1625 tsize = ix86_compute_frame_size (get_frame_size (), &nregs, NULL, NULL);
1626 return tsize == 0 && nregs == 0;
e075ae69
RH
1627}
1628\f
21a427cc 1629static char *pic_label_name;
e075ae69 1630static int pic_label_output;
21a427cc 1631static char *global_offset_table_name;
e9a25f70 1632
e075ae69
RH
1633/* This function generates code for -fpic that loads %ebx with
1634 the return address of the caller and then returns. */
1635
1636void
1637asm_output_function_prefix (file, name)
1638 FILE *file;
3cce094d 1639 const char *name ATTRIBUTE_UNUSED;
e075ae69
RH
1640{
1641 rtx xops[2];
1642 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1643 || current_function_uses_const_pool);
1644 xops[0] = pic_offset_table_rtx;
1645 xops[1] = stack_pointer_rtx;
32b5b1aa 1646
e075ae69
RH
1647 /* Deep branch prediction favors having a return for every call. */
1648 if (pic_reg_used && TARGET_DEEP_BRANCH_PREDICTION)
32b5b1aa 1649 {
e075ae69
RH
1650 if (!pic_label_output)
1651 {
1652 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
1653 internal (non-global) label that's being emitted, it didn't make
1654 sense to have .type information for local labels. This caused
1655 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
1656 me debug info for a label that you're declaring non-global?) this
1657 was changed to call ASM_OUTPUT_LABEL() instead. */
32b5b1aa 1658
e075ae69 1659 ASM_OUTPUT_LABEL (file, pic_label_name);
e9a25f70 1660
e075ae69
RH
1661 xops[1] = gen_rtx_MEM (SImode, xops[1]);
1662 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
1663 output_asm_insn ("ret", xops);
0afeb08a 1664
e075ae69 1665 pic_label_output = 1;
32b5b1aa 1666 }
32b5b1aa 1667 }
32b5b1aa 1668}
32b5b1aa 1669
e075ae69
RH
1670void
1671load_pic_register ()
32b5b1aa 1672{
e075ae69 1673 rtx gotsym, pclab;
32b5b1aa 1674
21a427cc
AS
1675 if (global_offset_table_name == NULL)
1676 {
1677 global_offset_table_name =
1678 ggc_alloc_string ("_GLOBAL_OFFSET_TABLE_", 21);
1679 ggc_add_string_root (&global_offset_table_name, 1);
1680 }
1681 gotsym = gen_rtx_SYMBOL_REF (Pmode, global_offset_table_name);
32b5b1aa 1682
e075ae69 1683 if (TARGET_DEEP_BRANCH_PREDICTION)
32b5b1aa 1684 {
21a427cc
AS
1685 if (pic_label_name == NULL)
1686 {
1687 pic_label_name = ggc_alloc_string (NULL, 32);
1688 ggc_add_string_root (&pic_label_name, 1);
1689 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
1690 }
e075ae69 1691 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
32b5b1aa 1692 }
e075ae69 1693 else
e5cb57e8 1694 {
e075ae69 1695 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
e5cb57e8 1696 }
e5cb57e8 1697
e075ae69 1698 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
2a2ab3f9 1699
e075ae69
RH
1700 if (! TARGET_DEEP_BRANCH_PREDICTION)
1701 emit_insn (gen_popsi1 (pic_offset_table_rtx));
79325812 1702
e075ae69 1703 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
e9a25f70 1704}
8dfe5673 1705
e075ae69 1706/* Generate an SImode "push" pattern for input ARG. */
e9a25f70 1707
e075ae69
RH
1708static rtx
1709gen_push (arg)
1710 rtx arg;
e9a25f70 1711{
c5c76735
JL
1712 return gen_rtx_SET (VOIDmode,
1713 gen_rtx_MEM (SImode,
1714 gen_rtx_PRE_DEC (SImode,
1715 stack_pointer_rtx)),
1716 arg);
e9a25f70
JL
1717}
1718
0903fcab
JH
1719/* Return number of registers to be saved on the stack. */
1720
1721static int
1722ix86_nsaved_regs ()
1723{
1724 int nregs = 0;
1725 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1726 || current_function_uses_const_pool);
1727 int limit = (frame_pointer_needed
1728 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1729 int regno;
1730
1731 for (regno = limit - 1; regno >= 0; regno--)
1732 if ((regs_ever_live[regno] && ! call_used_regs[regno])
1733 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1734 {
1735 nregs ++;
1736 }
1737 return nregs;
1738}
1739
1740/* Return the offset between two registers, one to be eliminated, and the other
1741 its replacement, at the start of a routine. */
1742
1743HOST_WIDE_INT
1744ix86_initial_elimination_offset (from, to)
1745 int from;
1746 int to;
1747{
564d80f4
JH
1748 int padding1;
1749 int nregs;
1750
1751 /* Stack grows downward:
1752
1753 [arguments]
1754 <- ARG_POINTER
1755 saved pc
1756
1757 saved frame pointer if frame_pointer_needed
1758 <- HARD_FRAME_POINTER
1c71e60e 1759 [saved regs]
564d80f4
JH
1760
1761 [padding1] \
1762 | <- FRAME_POINTER
1763 [frame] > tsize
1764 |
1765 [padding2] /
564d80f4
JH
1766 */
1767
1768 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
1769 /* Skip saved PC and previous frame pointer.
1770 Executed only when frame_pointer_needed. */
1771 return 8;
1772 else if (from == FRAME_POINTER_REGNUM
1773 && to == HARD_FRAME_POINTER_REGNUM)
1774 {
1775 ix86_compute_frame_size (get_frame_size (), &nregs, &padding1, (int *)0);
1c71e60e 1776 padding1 += nregs * UNITS_PER_WORD;
564d80f4
JH
1777 return -padding1;
1778 }
0903fcab
JH
1779 else
1780 {
564d80f4
JH
1781 /* ARG_POINTER or FRAME_POINTER to STACK_POINTER elimination. */
1782 int frame_size = frame_pointer_needed ? 8 : 4;
0903fcab 1783 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (),
564d80f4 1784 &nregs, &padding1, (int *)0);
0903fcab 1785
0903fcab 1786
564d80f4
JH
1787 if (to != STACK_POINTER_REGNUM)
1788 abort ();
1789 else if (from == ARG_POINTER_REGNUM)
1790 return tsize + nregs * UNITS_PER_WORD + frame_size;
1791 else if (from != FRAME_POINTER_REGNUM)
1792 abort ();
0903fcab 1793 else
1c71e60e 1794 return tsize - padding1;
0903fcab
JH
1795 }
1796}
1797
65954bd8
JL
1798/* Compute the size of local storage taking into consideration the
1799 desired stack alignment which is to be maintained. Also determine
564d80f4
JH
1800 the number of registers saved below the local storage.
1801
1802 PADDING1 returns padding before stack frame and PADDING2 returns
1803 padding after stack frame;
1804 */
1805
1806static HOST_WIDE_INT
1807ix86_compute_frame_size (size, nregs_on_stack, rpadding1, rpadding2)
65954bd8
JL
1808 HOST_WIDE_INT size;
1809 int *nregs_on_stack;
564d80f4
JH
1810 int *rpadding1;
1811 int *rpadding2;
65954bd8 1812{
65954bd8 1813 int nregs;
564d80f4
JH
1814 int padding1 = 0;
1815 int padding2 = 0;
65954bd8 1816 HOST_WIDE_INT total_size;
564d80f4 1817 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
44affdae
JH
1818 int offset;
1819 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
65954bd8 1820
564d80f4 1821 nregs = ix86_nsaved_regs ();
564d80f4 1822 total_size = size;
65954bd8 1823
44affdae 1824 offset = frame_pointer_needed ? 8 : 4;
564d80f4 1825
44affdae
JH
1826 /* Do some sanity checking of stack_alignment_needed and preferred_alignment,
1827 since i386 port is the only using those features that may break easilly. */
564d80f4 1828
44affdae
JH
1829 if (size && !stack_alignment_needed)
1830 abort ();
5f677a9e 1831 if (!size && stack_alignment_needed != STACK_BOUNDARY / BITS_PER_UNIT)
44affdae
JH
1832 abort ();
1833 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
1834 abort ();
1835 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1836 abort ();
1837 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1838 abort ();
564d80f4 1839
44affdae
JH
1840 if (stack_alignment_needed < 4)
1841 stack_alignment_needed = 4;
564d80f4 1842
44affdae 1843 offset += nregs * UNITS_PER_WORD;
65954bd8 1844
f73ad30e
JH
1845 if (ACCUMULATE_OUTGOING_ARGS)
1846 total_size += current_function_outgoing_args_size;
1847
44affdae 1848 total_size += offset;
65954bd8 1849
44affdae
JH
1850 /* Align start of frame for local function. */
1851 padding1 = ((offset + stack_alignment_needed - 1)
1852 & -stack_alignment_needed) - offset;
1853 total_size += padding1;
54ff41b7 1854
44affdae
JH
1855 /* Align stack boundary. */
1856 padding2 = ((total_size + preferred_alignment - 1)
1857 & -preferred_alignment) - total_size;
65954bd8 1858
f73ad30e
JH
1859 if (ACCUMULATE_OUTGOING_ARGS)
1860 padding2 += current_function_outgoing_args_size;
1861
65954bd8
JL
1862 if (nregs_on_stack)
1863 *nregs_on_stack = nregs;
564d80f4
JH
1864 if (rpadding1)
1865 *rpadding1 = padding1;
564d80f4
JH
1866 if (rpadding2)
1867 *rpadding2 = padding2;
1868
1869 return size + padding1 + padding2;
65954bd8
JL
1870}
1871
0903fcab
JH
1872/* Emit code to save registers in the prologue. */
1873
1874static void
1875ix86_emit_save_regs ()
1876{
1877 register int regno;
1878 int limit;
1879 rtx insn;
1880 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1881 || current_function_uses_const_pool);
1882 limit = (frame_pointer_needed
564d80f4 1883 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
0903fcab
JH
1884
1885 for (regno = limit - 1; regno >= 0; regno--)
1886 if ((regs_ever_live[regno] && !call_used_regs[regno])
1887 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1888 {
1889 insn = emit_insn (gen_push (gen_rtx_REG (SImode, regno)));
1890 RTX_FRAME_RELATED_P (insn) = 1;
1891 }
1892}
1893
e075ae69
RH
1894/* Expand the prologue into a bunch of separate insns. */
1895
1896void
1897ix86_expand_prologue ()
2a2ab3f9 1898{
564d80f4
JH
1899 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), (int *)0, (int *)0,
1900 (int *)0);
1901 rtx insn;
aae75261
JVA
1902 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1903 || current_function_uses_const_pool);
79325812 1904
e075ae69
RH
1905 /* Note: AT&T enter does NOT have reversed args. Enter is probably
1906 slower on all targets. Also sdb doesn't like it. */
e9a25f70 1907
2a2ab3f9
JVA
1908 if (frame_pointer_needed)
1909 {
564d80f4 1910 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 1911 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 1912
564d80f4 1913 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 1914 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
1915 }
1916
1c71e60e 1917 ix86_emit_save_regs ();
564d80f4 1918
8dfe5673
RK
1919 if (tsize == 0)
1920 ;
1921 else if (! TARGET_STACK_PROBE || tsize < CHECK_STACK_LIMIT)
469ac993 1922 {
e075ae69 1923 if (frame_pointer_needed)
1c71e60e
JH
1924 insn = emit_insn (gen_pro_epilogue_adjust_stack
1925 (stack_pointer_rtx, stack_pointer_rtx,
1926 GEN_INT (-tsize), hard_frame_pointer_rtx));
79325812 1927 else
e075ae69
RH
1928 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
1929 GEN_INT (-tsize)));
1930 RTX_FRAME_RELATED_P (insn) = 1;
469ac993 1931 }
79325812 1932 else
8dfe5673 1933 {
e075ae69 1934 /* ??? Is this only valid for Win32? */
e9a25f70 1935
e075ae69 1936 rtx arg0, sym;
e9a25f70 1937
e075ae69
RH
1938 arg0 = gen_rtx_REG (SImode, 0);
1939 emit_move_insn (arg0, GEN_INT (tsize));
77a989d1 1940
e075ae69
RH
1941 sym = gen_rtx_MEM (FUNCTION_MODE,
1942 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
1943 insn = emit_call_insn (gen_call (sym, const0_rtx));
1944
1945 CALL_INSN_FUNCTION_USAGE (insn)
276ab4a4
RH
1946 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
1947 CALL_INSN_FUNCTION_USAGE (insn));
e075ae69 1948 }
e9a25f70 1949
84530511
SC
1950#ifdef SUBTARGET_PROLOGUE
1951 SUBTARGET_PROLOGUE;
1952#endif
1953
e9a25f70 1954 if (pic_reg_used)
e075ae69 1955 load_pic_register ();
77a989d1 1956
e9a25f70
JL
1957 /* If we are profiling, make sure no instructions are scheduled before
1958 the call to mcount. However, if -fpic, the above call will have
1959 done that. */
e075ae69 1960 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
e9a25f70 1961 emit_insn (gen_blockage ());
77a989d1
SC
1962}
1963
0903fcab
JH
1964/* Emit code to add TSIZE to esp value. Use POP instruction when
1965 profitable. */
1966
1967static void
1968ix86_emit_epilogue_esp_adjustment (tsize)
1969 int tsize;
1970{
1971 /* Intel's docs say that for 4 or 8 bytes of stack frame one should
1972 use `pop' and not `add'. */
1973 int use_pop = tsize == 4;
1974 rtx edx = 0, ecx;
1975
1976 /* Use two pops only for the Pentium processors. */
1977 if (tsize == 8 && !TARGET_386 && !TARGET_486)
1978 {
1979 rtx retval = current_function_return_rtx;
1980
1981 edx = gen_rtx_REG (SImode, 1);
1982
1983 /* This case is a bit more complex. Since we cannot pop into
1984 %ecx twice we need a second register. But this is only
1985 available if the return value is not of DImode in which
1986 case the %edx register is not available. */
1987 use_pop = (retval == NULL
1988 || !reg_overlap_mentioned_p (edx, retval));
1989 }
1990
1991 if (use_pop)
1992 {
1993 ecx = gen_rtx_REG (SImode, 2);
1994
1995 /* We have to prevent the two pops here from being scheduled.
1996 GCC otherwise would try in some situation to put other
1997 instructions in between them which has a bad effect. */
1998 emit_insn (gen_blockage ());
1999 emit_insn (gen_popsi1 (ecx));
2000 if (tsize == 8)
2001 emit_insn (gen_popsi1 (edx));
2002 }
2003 else
2004 {
1c71e60e
JH
2005 /* If a frame pointer is present, we must be sure to tie the sp
2006 to the fp so that we don't mis-schedule. */
2007 if (frame_pointer_needed)
2008 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2009 stack_pointer_rtx,
2010 GEN_INT (tsize),
2011 hard_frame_pointer_rtx));
2012 else
2013 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
2014 GEN_INT (tsize)));
0903fcab
JH
2015 }
2016}
2017
da2d1d3a
JH
2018/* Emit code to restore saved registers using MOV insns. First register
2019 is restored from POINTER + OFFSET. */
2020static void
2021ix86_emit_restore_regs_using_mov (pointer, offset)
2022 rtx pointer;
2023 int offset;
2024{
2025 int regno;
2026 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2027 || current_function_uses_const_pool);
2028 int limit = (frame_pointer_needed
2029 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
2030
2031 for (regno = 0; regno < limit; regno++)
2032 if ((regs_ever_live[regno] && !call_used_regs[regno])
2033 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2034 {
2035 emit_move_insn (gen_rtx_REG (SImode, regno),
2036 adj_offsettable_operand (gen_rtx_MEM (SImode,
2037 pointer),
2038 offset));
2039 offset += 4;
2040 }
2041}
2042
79325812 2043/* Restore function stack, frame, and registers. */
e9a25f70 2044
2a2ab3f9 2045void
cbbf65e0
RH
2046ix86_expand_epilogue (emit_return)
2047 int emit_return;
2a2ab3f9 2048{
65954bd8 2049 int nregs;
1c71e60e
JH
2050 int regno;
2051
aae75261
JVA
2052 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2053 || current_function_uses_const_pool);
fdb8a883 2054 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
65954bd8 2055 HOST_WIDE_INT offset;
1c71e60e
JH
2056 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), &nregs,
2057 (int *)0, (int *)0);
2a2ab3f9 2058
2a2ab3f9 2059
1c71e60e
JH
2060 /* Calculate start of saved registers relative to ebp. */
2061 offset = -nregs * UNITS_PER_WORD;
2a2ab3f9 2062
1c71e60e
JH
2063#ifdef FUNCTION_BLOCK_PROFILER_EXIT
2064 if (profile_block_flag == 2)
564d80f4 2065 {
1c71e60e 2066 FUNCTION_BLOCK_PROFILER_EXIT;
564d80f4 2067 }
1c71e60e 2068#endif
564d80f4 2069
fdb8a883
JW
2070 /* If we're only restoring one register and sp is not valid then
2071 using a move instruction to restore the register since it's
da2d1d3a
JH
2072 less work than reloading sp and popping the register.
2073
2074 The default code result in stack adjustment using add/lea instruction,
2075 while this code results in LEAVE instruction (or discrete equivalent),
2076 so it is profitable in some other cases as well. Especially when there
2077 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2078 and there is exactly one register to pop. This heruistic may need some
2079 tuning in future. */
2080 if ((!sp_valid && nregs <= 1)
2081 || (frame_pointer_needed && !nregs && tsize)
2082 || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
2083 && nregs == 1))
2a2ab3f9 2084 {
da2d1d3a
JH
2085 /* Restore registers. We can use ebp or esp to address the memory
2086 locations. If both are available, default to ebp, since offsets
2087 are known to be small. Only exception is esp pointing directly to the
2088 end of block of saved registers, where we may simplify addressing
2089 mode. */
2090
2091 if (!frame_pointer_needed || (sp_valid && !tsize))
2092 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, tsize);
2093 else
2094 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, offset);
2a2ab3f9 2095
da2d1d3a
JH
2096 if (!frame_pointer_needed)
2097 ix86_emit_epilogue_esp_adjustment (tsize + nregs * UNITS_PER_WORD);
c8c5cb99 2098 /* If not an i386, mov & pop is faster than "leave". */
da2d1d3a 2099 else if (TARGET_USE_LEAVE || optimize_size)
564d80f4 2100 emit_insn (gen_leave ());
c8c5cb99 2101 else
2a2ab3f9 2102 {
1c71e60e
JH
2103 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2104 hard_frame_pointer_rtx,
2105 const0_rtx,
2106 hard_frame_pointer_rtx));
564d80f4 2107 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
2108 }
2109 }
1c71e60e 2110 else
68f654ec 2111 {
1c71e60e
JH
2112 /* First step is to deallocate the stack frame so that we can
2113 pop the registers. */
2114 if (!sp_valid)
2115 {
2116 if (!frame_pointer_needed)
2117 abort ();
2118 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2119 hard_frame_pointer_rtx,
2120 GEN_INT (offset),
2121 hard_frame_pointer_rtx));
2122 }
2123 else if (tsize)
2124 ix86_emit_epilogue_esp_adjustment (tsize);
2125
2126 for (regno = 0; regno < STACK_POINTER_REGNUM; regno++)
2127 if ((regs_ever_live[regno] && !call_used_regs[regno])
2128 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2129 emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
68f654ec 2130 }
68f654ec 2131
cbbf65e0
RH
2132 /* Sibcall epilogues don't want a return instruction. */
2133 if (! emit_return)
2134 return;
2135
2a2ab3f9
JVA
2136 if (current_function_pops_args && current_function_args_size)
2137 {
e075ae69 2138 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9
JVA
2139
2140 /* i386 can only pop 32K bytes (maybe 64K? Is it signed?). If
2141 asked to pop more, pop return address, do explicit add, and jump
2142 indirectly to the caller. */
2143
2144 if (current_function_pops_args >= 32768)
2145 {
e075ae69 2146 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 2147
e075ae69
RH
2148 emit_insn (gen_popsi1 (ecx));
2149 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
2150 emit_indirect_jump (ecx);
e9a25f70 2151 }
79325812 2152 else
e075ae69
RH
2153 emit_jump_insn (gen_return_pop_internal (popc));
2154 }
2155 else
2156 emit_jump_insn (gen_return_internal ());
2157}
2158\f
2159/* Extract the parts of an RTL expression that is a valid memory address
2160 for an instruction. Return false if the structure of the address is
2161 grossly off. */
2162
2163static int
2164ix86_decompose_address (addr, out)
2165 register rtx addr;
2166 struct ix86_address *out;
2167{
2168 rtx base = NULL_RTX;
2169 rtx index = NULL_RTX;
2170 rtx disp = NULL_RTX;
2171 HOST_WIDE_INT scale = 1;
2172 rtx scale_rtx = NULL_RTX;
2173
2174 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
2175 base = addr;
2176 else if (GET_CODE (addr) == PLUS)
2177 {
2178 rtx op0 = XEXP (addr, 0);
2179 rtx op1 = XEXP (addr, 1);
2180 enum rtx_code code0 = GET_CODE (op0);
2181 enum rtx_code code1 = GET_CODE (op1);
2182
2183 if (code0 == REG || code0 == SUBREG)
2184 {
2185 if (code1 == REG || code1 == SUBREG)
2186 index = op0, base = op1; /* index + base */
2187 else
2188 base = op0, disp = op1; /* base + displacement */
2189 }
2190 else if (code0 == MULT)
e9a25f70 2191 {
e075ae69
RH
2192 index = XEXP (op0, 0);
2193 scale_rtx = XEXP (op0, 1);
2194 if (code1 == REG || code1 == SUBREG)
2195 base = op1; /* index*scale + base */
e9a25f70 2196 else
e075ae69
RH
2197 disp = op1; /* index*scale + disp */
2198 }
2199 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
2200 {
2201 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
2202 scale_rtx = XEXP (XEXP (op0, 0), 1);
2203 base = XEXP (op0, 1);
2204 disp = op1;
2a2ab3f9 2205 }
e075ae69
RH
2206 else if (code0 == PLUS)
2207 {
2208 index = XEXP (op0, 0); /* index + base + disp */
2209 base = XEXP (op0, 1);
2210 disp = op1;
2211 }
2212 else
2213 return FALSE;
2214 }
2215 else if (GET_CODE (addr) == MULT)
2216 {
2217 index = XEXP (addr, 0); /* index*scale */
2218 scale_rtx = XEXP (addr, 1);
2219 }
2220 else if (GET_CODE (addr) == ASHIFT)
2221 {
2222 rtx tmp;
2223
2224 /* We're called for lea too, which implements ashift on occasion. */
2225 index = XEXP (addr, 0);
2226 tmp = XEXP (addr, 1);
2227 if (GET_CODE (tmp) != CONST_INT)
2228 return FALSE;
2229 scale = INTVAL (tmp);
2230 if ((unsigned HOST_WIDE_INT) scale > 3)
2231 return FALSE;
2232 scale = 1 << scale;
2a2ab3f9 2233 }
2a2ab3f9 2234 else
e075ae69
RH
2235 disp = addr; /* displacement */
2236
2237 /* Extract the integral value of scale. */
2238 if (scale_rtx)
e9a25f70 2239 {
e075ae69
RH
2240 if (GET_CODE (scale_rtx) != CONST_INT)
2241 return FALSE;
2242 scale = INTVAL (scale_rtx);
e9a25f70 2243 }
3b3c6a3f 2244
e075ae69
RH
2245 /* Allow arg pointer and stack pointer as index if there is not scaling */
2246 if (base && index && scale == 1
564d80f4
JH
2247 && (index == arg_pointer_rtx || index == frame_pointer_rtx
2248 || index == stack_pointer_rtx))
e075ae69
RH
2249 {
2250 rtx tmp = base;
2251 base = index;
2252 index = tmp;
2253 }
2254
2255 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
2256 if ((base == hard_frame_pointer_rtx
2257 || base == frame_pointer_rtx
2258 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
2259 disp = const0_rtx;
2260
2261 /* Special case: on K6, [%esi] makes the instruction vector decoded.
2262 Avoid this by transforming to [%esi+0]. */
2263 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
2264 && base && !index && !disp
329e1d01 2265 && REG_P (base)
e075ae69
RH
2266 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
2267 disp = const0_rtx;
2268
2269 /* Special case: encode reg+reg instead of reg*2. */
2270 if (!base && index && scale && scale == 2)
2271 base = index, scale = 1;
2272
2273 /* Special case: scaling cannot be encoded without base or displacement. */
2274 if (!base && !disp && index && scale != 1)
2275 disp = const0_rtx;
2276
2277 out->base = base;
2278 out->index = index;
2279 out->disp = disp;
2280 out->scale = scale;
3b3c6a3f 2281
e075ae69
RH
2282 return TRUE;
2283}
3b3c6a3f 2284
e075ae69
RH
2285/* Determine if a given CONST RTX is a valid memory displacement
2286 in PIC mode. */
2287
59be65f6 2288int
91bb873f
RH
2289legitimate_pic_address_disp_p (disp)
2290 register rtx disp;
2291{
2292 if (GET_CODE (disp) != CONST)
2293 return 0;
2294 disp = XEXP (disp, 0);
2295
2296 if (GET_CODE (disp) == PLUS)
2297 {
2298 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
2299 return 0;
2300 disp = XEXP (disp, 0);
2301 }
2302
2303 if (GET_CODE (disp) != UNSPEC
2304 || XVECLEN (disp, 0) != 1)
2305 return 0;
2306
2307 /* Must be @GOT or @GOTOFF. */
2308 if (XINT (disp, 1) != 6
2309 && XINT (disp, 1) != 7)
2310 return 0;
2311
2312 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2313 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
2314 return 0;
2315
2316 return 1;
2317}
2318
e075ae69
RH
2319/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
2320 memory address for an instruction. The MODE argument is the machine mode
2321 for the MEM expression that wants to use this address.
2322
2323 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
2324 convert common non-canonical forms to canonical form so that they will
2325 be recognized. */
2326
3b3c6a3f
MM
2327int
2328legitimate_address_p (mode, addr, strict)
2329 enum machine_mode mode;
2330 register rtx addr;
2331 int strict;
2332{
e075ae69
RH
2333 struct ix86_address parts;
2334 rtx base, index, disp;
2335 HOST_WIDE_INT scale;
2336 const char *reason = NULL;
2337 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
2338
2339 if (TARGET_DEBUG_ADDR)
2340 {
2341 fprintf (stderr,
e9a25f70 2342 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 2343 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
2344 debug_rtx (addr);
2345 }
2346
e075ae69 2347 if (! ix86_decompose_address (addr, &parts))
3b3c6a3f 2348 {
e075ae69
RH
2349 reason = "decomposition failed";
2350 goto error;
3b3c6a3f
MM
2351 }
2352
e075ae69
RH
2353 base = parts.base;
2354 index = parts.index;
2355 disp = parts.disp;
2356 scale = parts.scale;
91f0226f 2357
e075ae69 2358 /* Validate base register.
e9a25f70
JL
2359
2360 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
2361 is one word out of a two word structure, which is represented internally
2362 as a DImode int. */
e9a25f70 2363
3b3c6a3f
MM
2364 if (base)
2365 {
e075ae69
RH
2366 reason_rtx = base;
2367
3d771dfd 2368 if (GET_CODE (base) != REG)
3b3c6a3f 2369 {
e075ae69
RH
2370 reason = "base is not a register";
2371 goto error;
3b3c6a3f
MM
2372 }
2373
c954bd01
RH
2374 if (GET_MODE (base) != Pmode)
2375 {
e075ae69
RH
2376 reason = "base is not in Pmode";
2377 goto error;
c954bd01
RH
2378 }
2379
e9a25f70
JL
2380 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
2381 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
3b3c6a3f 2382 {
e075ae69
RH
2383 reason = "base is not valid";
2384 goto error;
3b3c6a3f
MM
2385 }
2386 }
2387
e075ae69 2388 /* Validate index register.
e9a25f70
JL
2389
2390 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
2391 is one word out of a two word structure, which is represented internally
2392 as a DImode int. */
e075ae69
RH
2393
2394 if (index)
3b3c6a3f 2395 {
e075ae69
RH
2396 reason_rtx = index;
2397
2398 if (GET_CODE (index) != REG)
3b3c6a3f 2399 {
e075ae69
RH
2400 reason = "index is not a register";
2401 goto error;
3b3c6a3f
MM
2402 }
2403
e075ae69 2404 if (GET_MODE (index) != Pmode)
c954bd01 2405 {
e075ae69
RH
2406 reason = "index is not in Pmode";
2407 goto error;
c954bd01
RH
2408 }
2409
e075ae69
RH
2410 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
2411 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
3b3c6a3f 2412 {
e075ae69
RH
2413 reason = "index is not valid";
2414 goto error;
3b3c6a3f
MM
2415 }
2416 }
3b3c6a3f 2417
e075ae69
RH
2418 /* Validate scale factor. */
2419 if (scale != 1)
3b3c6a3f 2420 {
e075ae69
RH
2421 reason_rtx = GEN_INT (scale);
2422 if (!index)
3b3c6a3f 2423 {
e075ae69
RH
2424 reason = "scale without index";
2425 goto error;
3b3c6a3f
MM
2426 }
2427
e075ae69 2428 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 2429 {
e075ae69
RH
2430 reason = "scale is not a valid multiplier";
2431 goto error;
3b3c6a3f
MM
2432 }
2433 }
2434
91bb873f 2435 /* Validate displacement. */
3b3c6a3f
MM
2436 if (disp)
2437 {
e075ae69
RH
2438 reason_rtx = disp;
2439
91bb873f 2440 if (!CONSTANT_ADDRESS_P (disp))
3b3c6a3f 2441 {
e075ae69
RH
2442 reason = "displacement is not constant";
2443 goto error;
3b3c6a3f
MM
2444 }
2445
e075ae69 2446 if (GET_CODE (disp) == CONST_DOUBLE)
3b3c6a3f 2447 {
e075ae69
RH
2448 reason = "displacement is a const_double";
2449 goto error;
3b3c6a3f
MM
2450 }
2451
91bb873f 2452 if (flag_pic && SYMBOLIC_CONST (disp))
3b3c6a3f 2453 {
91bb873f
RH
2454 if (! legitimate_pic_address_disp_p (disp))
2455 {
e075ae69
RH
2456 reason = "displacement is an invalid pic construct";
2457 goto error;
91bb873f
RH
2458 }
2459
e075ae69
RH
2460 /* Verify that a symbolic pic displacement includes
2461 the pic_offset_table_rtx register. */
91bb873f 2462 if (base != pic_offset_table_rtx
e075ae69 2463 && (index != pic_offset_table_rtx || scale != 1))
91bb873f 2464 {
e075ae69
RH
2465 reason = "pic displacement against invalid base";
2466 goto error;
91bb873f 2467 }
3b3c6a3f 2468 }
91bb873f 2469 else if (HALF_PIC_P ())
3b3c6a3f 2470 {
91bb873f 2471 if (! HALF_PIC_ADDRESS_P (disp)
e075ae69 2472 || (base != NULL_RTX || index != NULL_RTX))
91bb873f 2473 {
e075ae69
RH
2474 reason = "displacement is an invalid half-pic reference";
2475 goto error;
91bb873f 2476 }
3b3c6a3f
MM
2477 }
2478 }
2479
e075ae69 2480 /* Everything looks valid. */
3b3c6a3f 2481 if (TARGET_DEBUG_ADDR)
e075ae69 2482 fprintf (stderr, "Success.\n");
3b3c6a3f 2483 return TRUE;
e075ae69
RH
2484
2485error:
2486 if (TARGET_DEBUG_ADDR)
2487 {
2488 fprintf (stderr, "Error: %s\n", reason);
2489 debug_rtx (reason_rtx);
2490 }
2491 return FALSE;
3b3c6a3f 2492}
3b3c6a3f
MM
2493\f
2494/* Return a legitimate reference for ORIG (an address) using the
2495 register REG. If REG is 0, a new pseudo is generated.
2496
91bb873f 2497 There are two types of references that must be handled:
3b3c6a3f
MM
2498
2499 1. Global data references must load the address from the GOT, via
2500 the PIC reg. An insn is emitted to do this load, and the reg is
2501 returned.
2502
91bb873f
RH
2503 2. Static data references, constant pool addresses, and code labels
2504 compute the address as an offset from the GOT, whose base is in
2505 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
2506 differentiate them from global data objects. The returned
2507 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
2508
2509 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 2510 reg also appears in the address. */
3b3c6a3f
MM
2511
2512rtx
2513legitimize_pic_address (orig, reg)
2514 rtx orig;
2515 rtx reg;
2516{
2517 rtx addr = orig;
2518 rtx new = orig;
91bb873f 2519 rtx base;
3b3c6a3f 2520
91bb873f
RH
2521 if (GET_CODE (addr) == LABEL_REF
2522 || (GET_CODE (addr) == SYMBOL_REF
2523 && (CONSTANT_POOL_ADDRESS_P (addr)
2524 || SYMBOL_REF_FLAG (addr))))
3b3c6a3f 2525 {
91bb873f
RH
2526 /* This symbol may be referenced via a displacement from the PIC
2527 base address (@GOTOFF). */
3b3c6a3f 2528
91bb873f
RH
2529 current_function_uses_pic_offset_table = 1;
2530 new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, addr), 7);
2531 new = gen_rtx_CONST (VOIDmode, new);
2532 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 2533
91bb873f
RH
2534 if (reg != 0)
2535 {
3b3c6a3f 2536 emit_move_insn (reg, new);
91bb873f 2537 new = reg;
3b3c6a3f 2538 }
3b3c6a3f 2539 }
91bb873f 2540 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 2541 {
91bb873f
RH
2542 /* This symbol must be referenced via a load from the
2543 Global Offset Table (@GOT). */
3b3c6a3f 2544
91bb873f
RH
2545 current_function_uses_pic_offset_table = 1;
2546 new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, addr), 6);
2547 new = gen_rtx_CONST (VOIDmode, new);
2548 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2549 new = gen_rtx_MEM (Pmode, new);
2550 RTX_UNCHANGING_P (new) = 1;
3b3c6a3f
MM
2551
2552 if (reg == 0)
2553 reg = gen_reg_rtx (Pmode);
91bb873f
RH
2554 emit_move_insn (reg, new);
2555 new = reg;
2556 }
2557 else
2558 {
2559 if (GET_CODE (addr) == CONST)
3b3c6a3f 2560 {
91bb873f
RH
2561 addr = XEXP (addr, 0);
2562 if (GET_CODE (addr) == UNSPEC)
2563 {
2564 /* Check that the unspec is one of the ones we generate? */
2565 }
2566 else if (GET_CODE (addr) != PLUS)
564d80f4 2567 abort ();
3b3c6a3f 2568 }
91bb873f
RH
2569 if (GET_CODE (addr) == PLUS)
2570 {
2571 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 2572
91bb873f
RH
2573 /* Check first to see if this is a constant offset from a @GOTOFF
2574 symbol reference. */
2575 if ((GET_CODE (op0) == LABEL_REF
2576 || (GET_CODE (op0) == SYMBOL_REF
2577 && (CONSTANT_POOL_ADDRESS_P (op0)
2578 || SYMBOL_REF_FLAG (op0))))
2579 && GET_CODE (op1) == CONST_INT)
2580 {
2581 current_function_uses_pic_offset_table = 1;
2582 new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, op0), 7);
2583 new = gen_rtx_PLUS (VOIDmode, new, op1);
2584 new = gen_rtx_CONST (VOIDmode, new);
2585 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2586
2587 if (reg != 0)
2588 {
2589 emit_move_insn (reg, new);
2590 new = reg;
2591 }
2592 }
2593 else
2594 {
2595 base = legitimize_pic_address (XEXP (addr, 0), reg);
2596 new = legitimize_pic_address (XEXP (addr, 1),
2597 base == reg ? NULL_RTX : reg);
2598
2599 if (GET_CODE (new) == CONST_INT)
2600 new = plus_constant (base, INTVAL (new));
2601 else
2602 {
2603 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
2604 {
2605 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
2606 new = XEXP (new, 1);
2607 }
2608 new = gen_rtx_PLUS (Pmode, base, new);
2609 }
2610 }
2611 }
3b3c6a3f
MM
2612 }
2613 return new;
2614}
2615\f
3b3c6a3f
MM
2616/* Try machine-dependent ways of modifying an illegitimate address
2617 to be legitimate. If we find one, return the new, valid address.
2618 This macro is used in only one place: `memory_address' in explow.c.
2619
2620 OLDX is the address as it was before break_out_memory_refs was called.
2621 In some cases it is useful to look at this to decide what needs to be done.
2622
2623 MODE and WIN are passed so that this macro can use
2624 GO_IF_LEGITIMATE_ADDRESS.
2625
2626 It is always safe for this macro to do nothing. It exists to recognize
2627 opportunities to optimize the output.
2628
2629 For the 80386, we handle X+REG by loading X into a register R and
2630 using R+REG. R will go in a general reg and indexing will be used.
2631 However, if REG is a broken-out memory address or multiplication,
2632 nothing needs to be done because REG can certainly go in a general reg.
2633
2634 When -fpic is used, special handling is needed for symbolic references.
2635 See comments by legitimize_pic_address in i386.c for details. */
2636
2637rtx
2638legitimize_address (x, oldx, mode)
2639 register rtx x;
bb5177ac 2640 register rtx oldx ATTRIBUTE_UNUSED;
3b3c6a3f
MM
2641 enum machine_mode mode;
2642{
2643 int changed = 0;
2644 unsigned log;
2645
2646 if (TARGET_DEBUG_ADDR)
2647 {
e9a25f70
JL
2648 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
2649 GET_MODE_NAME (mode));
3b3c6a3f
MM
2650 debug_rtx (x);
2651 }
2652
2653 if (flag_pic && SYMBOLIC_CONST (x))
2654 return legitimize_pic_address (x, 0);
2655
2656 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
2657 if (GET_CODE (x) == ASHIFT
2658 && GET_CODE (XEXP (x, 1)) == CONST_INT
2659 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
2660 {
2661 changed = 1;
a269a03c
JC
2662 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
2663 GEN_INT (1 << log));
3b3c6a3f
MM
2664 }
2665
2666 if (GET_CODE (x) == PLUS)
2667 {
e9a25f70
JL
2668 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
2669
3b3c6a3f
MM
2670 if (GET_CODE (XEXP (x, 0)) == ASHIFT
2671 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2672 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
2673 {
2674 changed = 1;
c5c76735
JL
2675 XEXP (x, 0) = gen_rtx_MULT (Pmode,
2676 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
2677 GEN_INT (1 << log));
3b3c6a3f
MM
2678 }
2679
2680 if (GET_CODE (XEXP (x, 1)) == ASHIFT
2681 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
2682 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
2683 {
2684 changed = 1;
c5c76735
JL
2685 XEXP (x, 1) = gen_rtx_MULT (Pmode,
2686 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
2687 GEN_INT (1 << log));
3b3c6a3f
MM
2688 }
2689
e9a25f70 2690 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
2691 if (GET_CODE (XEXP (x, 1)) == MULT)
2692 {
2693 rtx tmp = XEXP (x, 0);
2694 XEXP (x, 0) = XEXP (x, 1);
2695 XEXP (x, 1) = tmp;
2696 changed = 1;
2697 }
2698
2699 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
2700 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
2701 created by virtual register instantiation, register elimination, and
2702 similar optimizations. */
2703 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
2704 {
2705 changed = 1;
c5c76735
JL
2706 x = gen_rtx_PLUS (Pmode,
2707 gen_rtx_PLUS (Pmode, XEXP (x, 0),
2708 XEXP (XEXP (x, 1), 0)),
2709 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
2710 }
2711
e9a25f70
JL
2712 /* Canonicalize
2713 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
2714 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
2715 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
2716 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
2717 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
2718 && CONSTANT_P (XEXP (x, 1)))
2719 {
00c79232
ML
2720 rtx constant;
2721 rtx other = NULL_RTX;
3b3c6a3f
MM
2722
2723 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2724 {
2725 constant = XEXP (x, 1);
2726 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
2727 }
2728 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
2729 {
2730 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
2731 other = XEXP (x, 1);
2732 }
2733 else
2734 constant = 0;
2735
2736 if (constant)
2737 {
2738 changed = 1;
c5c76735
JL
2739 x = gen_rtx_PLUS (Pmode,
2740 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
2741 XEXP (XEXP (XEXP (x, 0), 1), 0)),
2742 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
2743 }
2744 }
2745
2746 if (changed && legitimate_address_p (mode, x, FALSE))
2747 return x;
2748
2749 if (GET_CODE (XEXP (x, 0)) == MULT)
2750 {
2751 changed = 1;
2752 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
2753 }
2754
2755 if (GET_CODE (XEXP (x, 1)) == MULT)
2756 {
2757 changed = 1;
2758 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
2759 }
2760
2761 if (changed
2762 && GET_CODE (XEXP (x, 1)) == REG
2763 && GET_CODE (XEXP (x, 0)) == REG)
2764 return x;
2765
2766 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
2767 {
2768 changed = 1;
2769 x = legitimize_pic_address (x, 0);
2770 }
2771
2772 if (changed && legitimate_address_p (mode, x, FALSE))
2773 return x;
2774
2775 if (GET_CODE (XEXP (x, 0)) == REG)
2776 {
2777 register rtx temp = gen_reg_rtx (Pmode);
2778 register rtx val = force_operand (XEXP (x, 1), temp);
2779 if (val != temp)
2780 emit_move_insn (temp, val);
2781
2782 XEXP (x, 1) = temp;
2783 return x;
2784 }
2785
2786 else if (GET_CODE (XEXP (x, 1)) == REG)
2787 {
2788 register rtx temp = gen_reg_rtx (Pmode);
2789 register rtx val = force_operand (XEXP (x, 0), temp);
2790 if (val != temp)
2791 emit_move_insn (temp, val);
2792
2793 XEXP (x, 0) = temp;
2794 return x;
2795 }
2796 }
2797
2798 return x;
2799}
2a2ab3f9
JVA
2800\f
2801/* Print an integer constant expression in assembler syntax. Addition
2802 and subtraction are the only arithmetic that may appear in these
2803 expressions. FILE is the stdio stream to write to, X is the rtx, and
2804 CODE is the operand print code from the output string. */
2805
2806static void
2807output_pic_addr_const (file, x, code)
2808 FILE *file;
2809 rtx x;
2810 int code;
2811{
2812 char buf[256];
2813
2814 switch (GET_CODE (x))
2815 {
2816 case PC:
2817 if (flag_pic)
2818 putc ('.', file);
2819 else
2820 abort ();
2821 break;
2822
2823 case SYMBOL_REF:
91bb873f
RH
2824 assemble_name (file, XSTR (x, 0));
2825 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
2826 fputs ("@PLT", file);
2a2ab3f9
JVA
2827 break;
2828
91bb873f
RH
2829 case LABEL_REF:
2830 x = XEXP (x, 0);
2831 /* FALLTHRU */
2a2ab3f9
JVA
2832 case CODE_LABEL:
2833 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
2834 assemble_name (asm_out_file, buf);
2835 break;
2836
2837 case CONST_INT:
f64cecad 2838 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
2839 break;
2840
2841 case CONST:
2842 /* This used to output parentheses around the expression,
2843 but that does not work on the 386 (either ATT or BSD assembler). */
2844 output_pic_addr_const (file, XEXP (x, 0), code);
2845 break;
2846
2847 case CONST_DOUBLE:
2848 if (GET_MODE (x) == VOIDmode)
2849 {
2850 /* We can use %d if the number is <32 bits and positive. */
2851 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
2852 fprintf (file, "0x%lx%08lx",
2853 (unsigned long) CONST_DOUBLE_HIGH (x),
2854 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 2855 else
f64cecad 2856 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
2857 }
2858 else
2859 /* We can't handle floating point constants;
2860 PRINT_OPERAND must handle them. */
2861 output_operand_lossage ("floating constant misused");
2862 break;
2863
2864 case PLUS:
e9a25f70 2865 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
2866 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
2867 {
2a2ab3f9 2868 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 2869 putc ('+', file);
e9a25f70 2870 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 2871 }
91bb873f 2872 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 2873 {
2a2ab3f9 2874 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 2875 putc ('+', file);
e9a25f70 2876 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 2877 }
91bb873f
RH
2878 else
2879 abort ();
2a2ab3f9
JVA
2880 break;
2881
2882 case MINUS:
e075ae69 2883 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
2a2ab3f9 2884 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 2885 putc ('-', file);
2a2ab3f9 2886 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 2887 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
2a2ab3f9
JVA
2888 break;
2889
91bb873f
RH
2890 case UNSPEC:
2891 if (XVECLEN (x, 0) != 1)
2892 abort ();
2893 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
2894 switch (XINT (x, 1))
2895 {
2896 case 6:
2897 fputs ("@GOT", file);
2898 break;
2899 case 7:
2900 fputs ("@GOTOFF", file);
2901 break;
2902 case 8:
2903 fputs ("@PLT", file);
2904 break;
2905 default:
2906 output_operand_lossage ("invalid UNSPEC as operand");
2907 break;
2908 }
2909 break;
2910
2a2ab3f9
JVA
2911 default:
2912 output_operand_lossage ("invalid expression as operand");
2913 }
2914}
1865dbb5
JM
2915
2916/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
2917 We need to handle our special PIC relocations. */
2918
2919void
2920i386_dwarf_output_addr_const (file, x)
2921 FILE *file;
2922 rtx x;
2923{
2924 fprintf (file, "\t%s\t", INT_ASM_OP);
2925 if (flag_pic)
2926 output_pic_addr_const (file, x, '\0');
2927 else
2928 output_addr_const (file, x);
2929 fputc ('\n', file);
2930}
2931
2932/* In the name of slightly smaller debug output, and to cater to
2933 general assembler losage, recognize PIC+GOTOFF and turn it back
2934 into a direct symbol reference. */
2935
2936rtx
2937i386_simplify_dwarf_addr (orig_x)
2938 rtx orig_x;
2939{
2940 rtx x = orig_x;
2941
2942 if (GET_CODE (x) != PLUS
2943 || GET_CODE (XEXP (x, 0)) != REG
2944 || GET_CODE (XEXP (x, 1)) != CONST)
2945 return orig_x;
2946
2947 x = XEXP (XEXP (x, 1), 0);
2948 if (GET_CODE (x) == UNSPEC
2949 && XINT (x, 1) == 7)
2950 return XVECEXP (x, 0, 0);
2951
2952 if (GET_CODE (x) == PLUS
2953 && GET_CODE (XEXP (x, 0)) == UNSPEC
2954 && GET_CODE (XEXP (x, 1)) == CONST_INT
2955 && XINT (XEXP (x, 0), 1) == 7)
2956 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
2957
2958 return orig_x;
2959}
2a2ab3f9 2960\f
a269a03c 2961static void
e075ae69 2962put_condition_code (code, mode, reverse, fp, file)
a269a03c 2963 enum rtx_code code;
e075ae69
RH
2964 enum machine_mode mode;
2965 int reverse, fp;
a269a03c
JC
2966 FILE *file;
2967{
a269a03c
JC
2968 const char *suffix;
2969
a269a03c
JC
2970 if (reverse)
2971 code = reverse_condition (code);
e075ae69 2972
a269a03c
JC
2973 switch (code)
2974 {
2975 case EQ:
2976 suffix = "e";
2977 break;
a269a03c
JC
2978 case NE:
2979 suffix = "ne";
2980 break;
a269a03c 2981 case GT:
e075ae69
RH
2982 if (mode == CCNOmode)
2983 abort ();
2984 suffix = "g";
a269a03c 2985 break;
a269a03c 2986 case GTU:
e075ae69
RH
2987 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
2988 Those same assemblers have the same but opposite losage on cmov. */
2989 suffix = fp ? "nbe" : "a";
a269a03c 2990 break;
a269a03c 2991 case LT:
e075ae69 2992 if (mode == CCNOmode)
a269a03c
JC
2993 suffix = "s";
2994 else
e075ae69 2995 suffix = "l";
a269a03c 2996 break;
a269a03c
JC
2997 case LTU:
2998 suffix = "b";
2999 break;
a269a03c 3000 case GE:
e075ae69 3001 if (mode == CCNOmode)
a269a03c
JC
3002 suffix = "ns";
3003 else
e075ae69 3004 suffix = "ge";
a269a03c 3005 break;
a269a03c 3006 case GEU:
e075ae69
RH
3007 /* ??? As above. */
3008 suffix = fp ? "nb" : "ae";
a269a03c 3009 break;
a269a03c 3010 case LE:
e075ae69
RH
3011 if (mode == CCNOmode)
3012 abort ();
3013 suffix = "le";
a269a03c 3014 break;
a269a03c
JC
3015 case LEU:
3016 suffix = "be";
3017 break;
3a3677ff
RH
3018 case UNORDERED:
3019 suffix = "p";
3020 break;
3021 case ORDERED:
3022 suffix = "np";
3023 break;
a269a03c
JC
3024 default:
3025 abort ();
3026 }
3027 fputs (suffix, file);
3028}
3029
e075ae69
RH
3030void
3031print_reg (x, code, file)
3032 rtx x;
3033 int code;
3034 FILE *file;
e5cb57e8 3035{
e075ae69 3036 if (REGNO (x) == ARG_POINTER_REGNUM
564d80f4 3037 || REGNO (x) == FRAME_POINTER_REGNUM
e075ae69
RH
3038 || REGNO (x) == FLAGS_REG
3039 || REGNO (x) == FPSR_REG)
3040 abort ();
e9a25f70 3041
e075ae69
RH
3042 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
3043 putc ('%', file);
3044
3045 if (code == 'w')
3046 code = 2;
3047 else if (code == 'b')
3048 code = 1;
3049 else if (code == 'k')
3050 code = 4;
3051 else if (code == 'y')
3052 code = 3;
3053 else if (code == 'h')
3054 code = 0;
3055 else
3056 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 3057
e075ae69
RH
3058 switch (code)
3059 {
3060 case 3:
3061 if (STACK_TOP_P (x))
3062 {
3063 fputs ("st(0)", file);
3064 break;
3065 }
3066 /* FALLTHRU */
3067 case 4:
3068 case 8:
3069 case 12:
3070 if (! FP_REG_P (x))
3071 putc ('e', file);
3072 /* FALLTHRU */
3073 case 2:
3074 fputs (hi_reg_name[REGNO (x)], file);
3075 break;
3076 case 1:
3077 fputs (qi_reg_name[REGNO (x)], file);
3078 break;
3079 case 0:
3080 fputs (qi_high_reg_name[REGNO (x)], file);
3081 break;
3082 default:
3083 abort ();
fe25fea3 3084 }
e5cb57e8
SC
3085}
3086
2a2ab3f9 3087/* Meaning of CODE:
fe25fea3 3088 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 3089 C -- print opcode suffix for set/cmov insn.
fe25fea3 3090 c -- like C, but print reversed condition
2a2ab3f9
JVA
3091 R -- print the prefix for register names.
3092 z -- print the opcode suffix for the size of the current operand.
3093 * -- print a star (in certain assembler syntax)
3094 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
3095 s -- print a shift double count, followed by the assemblers argument
3096 delimiter.
fe25fea3
SC
3097 b -- print the QImode name of the register for the indicated operand.
3098 %b0 would print %al if operands[0] is reg 0.
3099 w -- likewise, print the HImode name of the register.
3100 k -- likewise, print the SImode name of the register.
3101 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
e075ae69 3102 y -- print "st(0)" instead of "st" as a register. */
2a2ab3f9
JVA
3103
3104void
3105print_operand (file, x, code)
3106 FILE *file;
3107 rtx x;
3108 int code;
3109{
3110 if (code)
3111 {
3112 switch (code)
3113 {
3114 case '*':
e075ae69 3115 if (ASSEMBLER_DIALECT == 0)
2a2ab3f9
JVA
3116 putc ('*', file);
3117 return;
3118
2a2ab3f9 3119 case 'L':
e075ae69
RH
3120 if (ASSEMBLER_DIALECT == 0)
3121 putc ('l', file);
2a2ab3f9
JVA
3122 return;
3123
3124 case 'W':
e075ae69
RH
3125 if (ASSEMBLER_DIALECT == 0)
3126 putc ('w', file);
2a2ab3f9
JVA
3127 return;
3128
3129 case 'B':
e075ae69
RH
3130 if (ASSEMBLER_DIALECT == 0)
3131 putc ('b', file);
2a2ab3f9
JVA
3132 return;
3133
3134 case 'Q':
e075ae69
RH
3135 if (ASSEMBLER_DIALECT == 0)
3136 putc ('l', file);
2a2ab3f9
JVA
3137 return;
3138
3139 case 'S':
e075ae69
RH
3140 if (ASSEMBLER_DIALECT == 0)
3141 putc ('s', file);
2a2ab3f9
JVA
3142 return;
3143
5f1ec3e6 3144 case 'T':
e075ae69
RH
3145 if (ASSEMBLER_DIALECT == 0)
3146 putc ('t', file);
5f1ec3e6
JVA
3147 return;
3148
2a2ab3f9
JVA
3149 case 'z':
3150 /* 387 opcodes don't get size suffixes if the operands are
3151 registers. */
3152
3153 if (STACK_REG_P (x))
3154 return;
3155
e075ae69
RH
3156 /* Intel syntax has no truck with instruction suffixes. */
3157 if (ASSEMBLER_DIALECT != 0)
3158 return;
3159
2a2ab3f9
JVA
3160 /* this is the size of op from size of operand */
3161 switch (GET_MODE_SIZE (GET_MODE (x)))
3162 {
2a2ab3f9 3163 case 2:
155d8a47
JW
3164#ifdef HAVE_GAS_FILDS_FISTS
3165 putc ('s', file);
3166#endif
2a2ab3f9
JVA
3167 return;
3168
3169 case 4:
3170 if (GET_MODE (x) == SFmode)
3171 {
e075ae69 3172 putc ('s', file);
2a2ab3f9
JVA
3173 return;
3174 }
3175 else
e075ae69 3176 putc ('l', file);
2a2ab3f9
JVA
3177 return;
3178
5f1ec3e6 3179 case 12:
e075ae69
RH
3180 putc ('t', file);
3181 return;
5f1ec3e6 3182
2a2ab3f9
JVA
3183 case 8:
3184 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
3185 {
3186#ifdef GAS_MNEMONICS
e075ae69 3187 putc ('q', file);
56c0e8fa 3188#else
e075ae69
RH
3189 putc ('l', file);
3190 putc ('l', file);
56c0e8fa
JVA
3191#endif
3192 }
e075ae69
RH
3193 else
3194 putc ('l', file);
2a2ab3f9 3195 return;
155d8a47
JW
3196
3197 default:
3198 abort ();
2a2ab3f9 3199 }
4af3895e
JVA
3200
3201 case 'b':
3202 case 'w':
3203 case 'k':
3204 case 'h':
3205 case 'y':
5cb6195d 3206 case 'X':
e075ae69 3207 case 'P':
4af3895e
JVA
3208 break;
3209
2d49677f
SC
3210 case 's':
3211 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
3212 {
3213 PRINT_OPERAND (file, x, 0);
e075ae69 3214 putc (',', file);
2d49677f 3215 }
a269a03c
JC
3216 return;
3217
1853aadd 3218 case 'C':
e075ae69 3219 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 3220 return;
fe25fea3 3221 case 'F':
e075ae69 3222 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
3223 return;
3224
e9a25f70 3225 /* Like above, but reverse condition */
e075ae69
RH
3226 case 'c':
3227 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
3228 return;
fe25fea3 3229 case 'f':
e075ae69 3230 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 3231 return;
e5cb57e8 3232
4af3895e 3233 default:
68daafd4
JVA
3234 {
3235 char str[50];
68daafd4
JVA
3236 sprintf (str, "invalid operand code `%c'", code);
3237 output_operand_lossage (str);
3238 }
2a2ab3f9
JVA
3239 }
3240 }
e9a25f70 3241
2a2ab3f9
JVA
3242 if (GET_CODE (x) == REG)
3243 {
3244 PRINT_REG (x, code, file);
3245 }
e9a25f70 3246
2a2ab3f9
JVA
3247 else if (GET_CODE (x) == MEM)
3248 {
e075ae69
RH
3249 /* No `byte ptr' prefix for call instructions. */
3250 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
2a2ab3f9 3251 {
69ddee61 3252 const char * size;
e075ae69
RH
3253 switch (GET_MODE_SIZE (GET_MODE (x)))
3254 {
3255 case 1: size = "BYTE"; break;
3256 case 2: size = "WORD"; break;
3257 case 4: size = "DWORD"; break;
3258 case 8: size = "QWORD"; break;
3259 case 12: size = "XWORD"; break;
3260 default:
564d80f4 3261 abort ();
e075ae69
RH
3262 }
3263 fputs (size, file);
3264 fputs (" PTR ", file);
2a2ab3f9 3265 }
e075ae69
RH
3266
3267 x = XEXP (x, 0);
3268 if (flag_pic && CONSTANT_ADDRESS_P (x))
3269 output_pic_addr_const (file, x, code);
2a2ab3f9 3270 else
e075ae69 3271 output_address (x);
2a2ab3f9 3272 }
e9a25f70 3273
2a2ab3f9
JVA
3274 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
3275 {
e9a25f70
JL
3276 REAL_VALUE_TYPE r;
3277 long l;
3278
5f1ec3e6
JVA
3279 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3280 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69
RH
3281
3282 if (ASSEMBLER_DIALECT == 0)
3283 putc ('$', file);
52267fcb 3284 fprintf (file, "0x%lx", l);
5f1ec3e6 3285 }
e9a25f70 3286
5f1ec3e6
JVA
3287 /* These float cases don't actually occur as immediate operands. */
3288 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
3289 {
e9a25f70
JL
3290 REAL_VALUE_TYPE r;
3291 char dstr[30];
3292
5f1ec3e6
JVA
3293 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3294 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3295 fprintf (file, "%s", dstr);
2a2ab3f9 3296 }
e9a25f70 3297
5f1ec3e6 3298 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
2a2ab3f9 3299 {
e9a25f70
JL
3300 REAL_VALUE_TYPE r;
3301 char dstr[30];
3302
5f1ec3e6
JVA
3303 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3304 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3305 fprintf (file, "%s", dstr);
2a2ab3f9 3306 }
79325812 3307 else
2a2ab3f9 3308 {
4af3895e 3309 if (code != 'P')
2a2ab3f9 3310 {
695dac07 3311 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69
RH
3312 {
3313 if (ASSEMBLER_DIALECT == 0)
3314 putc ('$', file);
3315 }
2a2ab3f9
JVA
3316 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
3317 || GET_CODE (x) == LABEL_REF)
e075ae69
RH
3318 {
3319 if (ASSEMBLER_DIALECT == 0)
3320 putc ('$', file);
3321 else
3322 fputs ("OFFSET FLAT:", file);
3323 }
2a2ab3f9 3324 }
e075ae69
RH
3325 if (GET_CODE (x) == CONST_INT)
3326 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3327 else if (flag_pic)
2a2ab3f9
JVA
3328 output_pic_addr_const (file, x, code);
3329 else
3330 output_addr_const (file, x);
3331 }
3332}
3333\f
3334/* Print a memory operand whose address is ADDR. */
3335
3336void
3337print_operand_address (file, addr)
3338 FILE *file;
3339 register rtx addr;
3340{
e075ae69
RH
3341 struct ix86_address parts;
3342 rtx base, index, disp;
3343 int scale;
e9a25f70 3344
e075ae69
RH
3345 if (! ix86_decompose_address (addr, &parts))
3346 abort ();
e9a25f70 3347
e075ae69
RH
3348 base = parts.base;
3349 index = parts.index;
3350 disp = parts.disp;
3351 scale = parts.scale;
e9a25f70 3352
e075ae69
RH
3353 if (!base && !index)
3354 {
3355 /* Displacement only requires special attention. */
e9a25f70 3356
e075ae69 3357 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 3358 {
e075ae69
RH
3359 if (ASSEMBLER_DIALECT != 0)
3360 fputs ("ds:", file);
3361 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
2a2ab3f9 3362 }
e075ae69
RH
3363 else if (flag_pic)
3364 output_pic_addr_const (file, addr, 0);
3365 else
3366 output_addr_const (file, addr);
3367 }
3368 else
3369 {
3370 if (ASSEMBLER_DIALECT == 0)
2a2ab3f9 3371 {
e075ae69 3372 if (disp)
2a2ab3f9 3373 {
c399861d 3374 if (flag_pic)
e075ae69
RH
3375 output_pic_addr_const (file, disp, 0);
3376 else if (GET_CODE (disp) == LABEL_REF)
3377 output_asm_label (disp);
2a2ab3f9 3378 else
e075ae69 3379 output_addr_const (file, disp);
2a2ab3f9
JVA
3380 }
3381
e075ae69
RH
3382 putc ('(', file);
3383 if (base)
3384 PRINT_REG (base, 0, file);
3385 if (index)
2a2ab3f9 3386 {
e075ae69
RH
3387 putc (',', file);
3388 PRINT_REG (index, 0, file);
3389 if (scale != 1)
3390 fprintf (file, ",%d", scale);
2a2ab3f9 3391 }
e075ae69 3392 putc (')', file);
2a2ab3f9 3393 }
2a2ab3f9
JVA
3394 else
3395 {
e075ae69 3396 rtx offset = NULL_RTX;
e9a25f70 3397
e075ae69
RH
3398 if (disp)
3399 {
3400 /* Pull out the offset of a symbol; print any symbol itself. */
3401 if (GET_CODE (disp) == CONST
3402 && GET_CODE (XEXP (disp, 0)) == PLUS
3403 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
3404 {
3405 offset = XEXP (XEXP (disp, 0), 1);
3406 disp = gen_rtx_CONST (VOIDmode,
3407 XEXP (XEXP (disp, 0), 0));
3408 }
ce193852 3409
e075ae69
RH
3410 if (flag_pic)
3411 output_pic_addr_const (file, disp, 0);
3412 else if (GET_CODE (disp) == LABEL_REF)
3413 output_asm_label (disp);
3414 else if (GET_CODE (disp) == CONST_INT)
3415 offset = disp;
3416 else
3417 output_addr_const (file, disp);
3418 }
e9a25f70 3419
e075ae69
RH
3420 putc ('[', file);
3421 if (base)
a8620236 3422 {
e075ae69
RH
3423 PRINT_REG (base, 0, file);
3424 if (offset)
3425 {
3426 if (INTVAL (offset) >= 0)
3427 putc ('+', file);
3428 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3429 }
a8620236 3430 }
e075ae69
RH
3431 else if (offset)
3432 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 3433 else
e075ae69 3434 putc ('0', file);
e9a25f70 3435
e075ae69
RH
3436 if (index)
3437 {
3438 putc ('+', file);
3439 PRINT_REG (index, 0, file);
3440 if (scale != 1)
3441 fprintf (file, "*%d", scale);
3442 }
3443 putc (']', file);
3444 }
2a2ab3f9
JVA
3445 }
3446}
3447\f
3448/* Split one or more DImode RTL references into pairs of SImode
3449 references. The RTL can be REG, offsettable MEM, integer constant, or
3450 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
3451 split and "num" is its length. lo_half and hi_half are output arrays
3452 that parallel "operands". */
3453
3454void
3455split_di (operands, num, lo_half, hi_half)
3456 rtx operands[];
3457 int num;
3458 rtx lo_half[], hi_half[];
3459{
3460 while (num--)
3461 {
57dbca5e 3462 rtx op = operands[num];
e075ae69
RH
3463 if (CONSTANT_P (op))
3464 split_double (op, &lo_half[num], &hi_half[num]);
3465 else if (! reload_completed)
a269a03c
JC
3466 {
3467 lo_half[num] = gen_lowpart (SImode, op);
3468 hi_half[num] = gen_highpart (SImode, op);
3469 }
3470 else if (GET_CODE (op) == REG)
2a2ab3f9 3471 {
57dbca5e
BS
3472 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
3473 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
2a2ab3f9 3474 }
57dbca5e 3475 else if (offsettable_memref_p (op))
2a2ab3f9 3476 {
57dbca5e
BS
3477 rtx lo_addr = XEXP (op, 0);
3478 rtx hi_addr = XEXP (adj_offsettable_operand (op, 4), 0);
3479 lo_half[num] = change_address (op, SImode, lo_addr);
3480 hi_half[num] = change_address (op, SImode, hi_addr);
2a2ab3f9
JVA
3481 }
3482 else
564d80f4 3483 abort ();
2a2ab3f9
JVA
3484 }
3485}
3486\f
2a2ab3f9
JVA
3487/* Output code to perform a 387 binary operation in INSN, one of PLUS,
3488 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
3489 is the expression of the binary operation. The output may either be
3490 emitted here, or returned to the caller, like all output_* functions.
3491
3492 There is no guarantee that the operands are the same mode, as they
3493 might be within FLOAT or FLOAT_EXTEND expressions. */
3494
e3c2afab
AM
3495#ifndef SYSV386_COMPAT
3496/* Set to 1 for compatibility with brain-damaged assemblers. No-one
3497 wants to fix the assemblers because that causes incompatibility
3498 with gcc. No-one wants to fix gcc because that causes
3499 incompatibility with assemblers... You can use the option of
3500 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
3501#define SYSV386_COMPAT 1
3502#endif
3503
69ddee61 3504const char *
2a2ab3f9
JVA
3505output_387_binary_op (insn, operands)
3506 rtx insn;
3507 rtx *operands;
3508{
e3c2afab 3509 static char buf[30];
69ddee61 3510 const char *p;
2a2ab3f9 3511
e3c2afab
AM
3512#ifdef ENABLE_CHECKING
3513 /* Even if we do not want to check the inputs, this documents input
3514 constraints. Which helps in understanding the following code. */
3515 if (STACK_REG_P (operands[0])
3516 && ((REG_P (operands[1])
3517 && REGNO (operands[0]) == REGNO (operands[1])
3518 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
3519 || (REG_P (operands[2])
3520 && REGNO (operands[0]) == REGNO (operands[2])
3521 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
3522 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
3523 ; /* ok */
3524 else
3525 abort ();
3526#endif
3527
2a2ab3f9
JVA
3528 switch (GET_CODE (operands[3]))
3529 {
3530 case PLUS:
e075ae69
RH
3531 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3532 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3533 p = "fiadd";
3534 else
3535 p = "fadd";
2a2ab3f9
JVA
3536 break;
3537
3538 case MINUS:
e075ae69
RH
3539 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3540 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3541 p = "fisub";
3542 else
3543 p = "fsub";
2a2ab3f9
JVA
3544 break;
3545
3546 case MULT:
e075ae69
RH
3547 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3548 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3549 p = "fimul";
3550 else
3551 p = "fmul";
2a2ab3f9
JVA
3552 break;
3553
3554 case DIV:
e075ae69
RH
3555 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3556 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3557 p = "fidiv";
3558 else
3559 p = "fdiv";
2a2ab3f9
JVA
3560 break;
3561
3562 default:
3563 abort ();
3564 }
3565
e075ae69 3566 strcpy (buf, p);
2a2ab3f9
JVA
3567
3568 switch (GET_CODE (operands[3]))
3569 {
3570 case MULT:
3571 case PLUS:
3572 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
3573 {
e3c2afab 3574 rtx temp = operands[2];
2a2ab3f9
JVA
3575 operands[2] = operands[1];
3576 operands[1] = temp;
3577 }
3578
e3c2afab
AM
3579 /* know operands[0] == operands[1]. */
3580
2a2ab3f9 3581 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
3582 {
3583 p = "%z2\t%2";
3584 break;
3585 }
2a2ab3f9
JVA
3586
3587 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
3588 {
3589 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
3590 /* How is it that we are storing to a dead operand[2]?
3591 Well, presumably operands[1] is dead too. We can't
3592 store the result to st(0) as st(0) gets popped on this
3593 instruction. Instead store to operands[2] (which I
3594 think has to be st(1)). st(1) will be popped later.
3595 gcc <= 2.8.1 didn't have this check and generated
3596 assembly code that the Unixware assembler rejected. */
3597 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 3598 else
e3c2afab 3599 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 3600 break;
6b28fd63 3601 }
2a2ab3f9
JVA
3602
3603 if (STACK_TOP_P (operands[0]))
e3c2afab 3604 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 3605 else
e3c2afab 3606 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 3607 break;
2a2ab3f9
JVA
3608
3609 case MINUS:
3610 case DIV:
3611 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
3612 {
3613 p = "r%z1\t%1";
3614 break;
3615 }
2a2ab3f9
JVA
3616
3617 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
3618 {
3619 p = "%z2\t%2";
3620 break;
3621 }
2a2ab3f9 3622
2a2ab3f9 3623 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 3624 {
e3c2afab
AM
3625#if SYSV386_COMPAT
3626 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
3627 derived assemblers, confusingly reverse the direction of
3628 the operation for fsub{r} and fdiv{r} when the
3629 destination register is not st(0). The Intel assembler
3630 doesn't have this brain damage. Read !SYSV386_COMPAT to
3631 figure out what the hardware really does. */
3632 if (STACK_TOP_P (operands[0]))
3633 p = "{p\t%0, %2|rp\t%2, %0}";
3634 else
3635 p = "{rp\t%2, %0|p\t%0, %2}";
3636#else
6b28fd63 3637 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
3638 /* As above for fmul/fadd, we can't store to st(0). */
3639 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 3640 else
e3c2afab
AM
3641 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
3642#endif
e075ae69 3643 break;
6b28fd63 3644 }
2a2ab3f9
JVA
3645
3646 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 3647 {
e3c2afab 3648#if SYSV386_COMPAT
6b28fd63 3649 if (STACK_TOP_P (operands[0]))
e3c2afab 3650 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 3651 else
e3c2afab
AM
3652 p = "{p\t%1, %0|rp\t%0, %1}";
3653#else
3654 if (STACK_TOP_P (operands[0]))
3655 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
3656 else
3657 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
3658#endif
e075ae69 3659 break;
6b28fd63 3660 }
2a2ab3f9
JVA
3661
3662 if (STACK_TOP_P (operands[0]))
3663 {
3664 if (STACK_TOP_P (operands[1]))
e3c2afab 3665 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 3666 else
e3c2afab 3667 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 3668 break;
2a2ab3f9
JVA
3669 }
3670 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
3671 {
3672#if SYSV386_COMPAT
3673 p = "{\t%1, %0|r\t%0, %1}";
3674#else
3675 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
3676#endif
3677 }
2a2ab3f9 3678 else
e3c2afab
AM
3679 {
3680#if SYSV386_COMPAT
3681 p = "{r\t%2, %0|\t%0, %2}";
3682#else
3683 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
3684#endif
3685 }
e075ae69 3686 break;
2a2ab3f9
JVA
3687
3688 default:
3689 abort ();
3690 }
e075ae69
RH
3691
3692 strcat (buf, p);
3693 return buf;
2a2ab3f9 3694}
e075ae69 3695
2a2ab3f9 3696/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 3697 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 3698 operand may be [SDX]Fmode. */
2a2ab3f9 3699
69ddee61 3700const char *
2a2ab3f9
JVA
3701output_fix_trunc (insn, operands)
3702 rtx insn;
3703 rtx *operands;
3704{
3705 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69
RH
3706 int dimode_p = GET_MODE (operands[0]) == DImode;
3707 rtx xops[4];
2a2ab3f9 3708
e075ae69
RH
3709 /* Jump through a hoop or two for DImode, since the hardware has no
3710 non-popping instruction. We used to do this a different way, but
3711 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
3712 if (dimode_p && !stack_top_dies)
3713 output_asm_insn ("fld\t%y1", operands);
e075ae69
RH
3714
3715 if (! STACK_TOP_P (operands[1]))
10195bd8
JW
3716 abort ();
3717
e075ae69
RH
3718 xops[0] = GEN_INT (12);
3719 xops[1] = adj_offsettable_operand (operands[2], 1);
3720 xops[1] = change_address (xops[1], QImode, NULL_RTX);
305f097e 3721
e075ae69
RH
3722 xops[2] = operands[0];
3723 if (GET_CODE (operands[0]) != MEM)
3724 xops[2] = operands[3];
2a2ab3f9 3725
e075ae69
RH
3726 output_asm_insn ("fnstcw\t%2", operands);
3727 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands);
3728 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops);
3729 output_asm_insn ("fldcw\t%2", operands);
3730 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands);
e9a25f70 3731
e075ae69
RH
3732 if (stack_top_dies || dimode_p)
3733 output_asm_insn ("fistp%z2\t%2", xops);
10195bd8 3734 else
e075ae69
RH
3735 output_asm_insn ("fist%z2\t%2", xops);
3736
3737 output_asm_insn ("fldcw\t%2", operands);
10195bd8 3738
e075ae69 3739 if (GET_CODE (operands[0]) != MEM)
2a2ab3f9 3740 {
e075ae69 3741 if (dimode_p)
2e14a41b 3742 {
e075ae69
RH
3743 split_di (operands+0, 1, xops+0, xops+1);
3744 split_di (operands+3, 1, xops+2, xops+3);
3745 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3746 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops);
2e14a41b 3747 }
46d21d2c 3748 else if (GET_MODE (operands[0]) == SImode)
e3c2afab 3749 output_asm_insn ("mov{l}\t{%3, %0|%0, %3}", operands);
46d21d2c
JW
3750 else
3751 output_asm_insn ("mov{w}\t{%3, %0|%0, %3}", operands);
2a2ab3f9 3752 }
2a2ab3f9 3753
e075ae69 3754 return "";
2a2ab3f9 3755}
cda749b1 3756
e075ae69
RH
3757/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
3758 should be used and 2 when fnstsw should be used. UNORDERED_P is true
3759 when fucom should be used. */
3760
69ddee61 3761const char *
e075ae69 3762output_fp_compare (insn, operands, eflags_p, unordered_p)
cda749b1
JW
3763 rtx insn;
3764 rtx *operands;
e075ae69 3765 int eflags_p, unordered_p;
cda749b1 3766{
e075ae69
RH
3767 int stack_top_dies;
3768 rtx cmp_op0 = operands[0];
3769 rtx cmp_op1 = operands[1];
3770
3771 if (eflags_p == 2)
3772 {
3773 cmp_op0 = cmp_op1;
3774 cmp_op1 = operands[2];
3775 }
cda749b1 3776
e075ae69 3777 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
3778 abort ();
3779
e075ae69 3780 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 3781
e075ae69
RH
3782 if (STACK_REG_P (cmp_op1)
3783 && stack_top_dies
3784 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
3785 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 3786 {
e075ae69
RH
3787 /* If both the top of the 387 stack dies, and the other operand
3788 is also a stack register that dies, then this must be a
3789 `fcompp' float compare */
3790
3791 if (eflags_p == 1)
3792 {
3793 /* There is no double popping fcomi variant. Fortunately,
3794 eflags is immune from the fstp's cc clobbering. */
3795 if (unordered_p)
3796 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
3797 else
3798 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
3799 return "fstp\t%y0";
3800 }
3801 else
cda749b1 3802 {
e075ae69
RH
3803 if (eflags_p == 2)
3804 {
3805 if (unordered_p)
3806 return "fucompp\n\tfnstsw\t%0";
3807 else
3808 return "fcompp\n\tfnstsw\t%0";
3809 }
cda749b1
JW
3810 else
3811 {
e075ae69
RH
3812 if (unordered_p)
3813 return "fucompp";
3814 else
3815 return "fcompp";
cda749b1
JW
3816 }
3817 }
cda749b1
JW
3818 }
3819 else
3820 {
e075ae69 3821 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 3822
69ddee61 3823 static const char * const alt[24] =
e075ae69
RH
3824 {
3825 "fcom%z1\t%y1",
3826 "fcomp%z1\t%y1",
3827 "fucom%z1\t%y1",
3828 "fucomp%z1\t%y1",
3829
3830 "ficom%z1\t%y1",
3831 "ficomp%z1\t%y1",
3832 NULL,
3833 NULL,
3834
3835 "fcomi\t{%y1, %0|%0, %y1}",
3836 "fcomip\t{%y1, %0|%0, %y1}",
3837 "fucomi\t{%y1, %0|%0, %y1}",
3838 "fucomip\t{%y1, %0|%0, %y1}",
3839
3840 NULL,
3841 NULL,
3842 NULL,
3843 NULL,
3844
3845 "fcom%z2\t%y2\n\tfnstsw\t%0",
3846 "fcomp%z2\t%y2\n\tfnstsw\t%0",
3847 "fucom%z2\t%y2\n\tfnstsw\t%0",
3848 "fucomp%z2\t%y2\n\tfnstsw\t%0",
3849
3850 "ficom%z2\t%y2\n\tfnstsw\t%0",
3851 "ficomp%z2\t%y2\n\tfnstsw\t%0",
3852 NULL,
3853 NULL
3854 };
3855
3856 int mask;
69ddee61 3857 const char *ret;
e075ae69
RH
3858
3859 mask = eflags_p << 3;
3860 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
3861 mask |= unordered_p << 1;
3862 mask |= stack_top_dies;
3863
3864 if (mask >= 24)
3865 abort ();
3866 ret = alt[mask];
3867 if (ret == NULL)
3868 abort ();
cda749b1 3869
e075ae69 3870 return ret;
cda749b1
JW
3871 }
3872}
2a2ab3f9 3873
e075ae69 3874/* Output assembler code to FILE to initialize basic-block profiling.
2a2ab3f9 3875
e075ae69 3876 If profile_block_flag == 2
2a2ab3f9 3877
e075ae69
RH
3878 Output code to call the subroutine `__bb_init_trace_func'
3879 and pass two parameters to it. The first parameter is
3880 the address of a block allocated in the object module.
3881 The second parameter is the number of the first basic block
3882 of the function.
2a2ab3f9 3883
e075ae69
RH
3884 The name of the block is a local symbol made with this statement:
3885
3886 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
2a2ab3f9 3887
e075ae69
RH
3888 Of course, since you are writing the definition of
3889 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3890 can take a short cut in the definition of this macro and use the
3891 name that you know will result.
2a2ab3f9 3892
e075ae69
RH
3893 The number of the first basic block of the function is
3894 passed to the macro in BLOCK_OR_LABEL.
2a2ab3f9 3895
e075ae69
RH
3896 If described in a virtual assembler language the code to be
3897 output looks like:
2a2ab3f9 3898
e075ae69
RH
3899 parameter1 <- LPBX0
3900 parameter2 <- BLOCK_OR_LABEL
3901 call __bb_init_trace_func
2a2ab3f9 3902
e075ae69 3903 else if profile_block_flag != 0
e74389ff 3904
e075ae69
RH
3905 Output code to call the subroutine `__bb_init_func'
3906 and pass one single parameter to it, which is the same
3907 as the first parameter to `__bb_init_trace_func'.
e74389ff 3908
e075ae69
RH
3909 The first word of this parameter is a flag which will be nonzero if
3910 the object module has already been initialized. So test this word
3911 first, and do not call `__bb_init_func' if the flag is nonzero.
3912 Note: When profile_block_flag == 2 the test need not be done
3913 but `__bb_init_trace_func' *must* be called.
e74389ff 3914
e075ae69
RH
3915 BLOCK_OR_LABEL may be used to generate a label number as a
3916 branch destination in case `__bb_init_func' will not be called.
e74389ff 3917
e075ae69
RH
3918 If described in a virtual assembler language the code to be
3919 output looks like:
2a2ab3f9 3920
e075ae69
RH
3921 cmp (LPBX0),0
3922 jne local_label
3923 parameter1 <- LPBX0
3924 call __bb_init_func
3925 local_label:
3926*/
c572e5ba 3927
e075ae69
RH
3928void
3929ix86_output_function_block_profiler (file, block_or_label)
3930 FILE *file;
3931 int block_or_label;
c572e5ba 3932{
e075ae69
RH
3933 static int num_func = 0;
3934 rtx xops[8];
3935 char block_table[80], false_label[80];
c572e5ba 3936
e075ae69 3937 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
e9a25f70 3938
e075ae69
RH
3939 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
3940 xops[5] = stack_pointer_rtx;
3941 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
2a2ab3f9 3942
e075ae69 3943 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
c572e5ba 3944
e075ae69 3945 switch (profile_block_flag)
c572e5ba 3946 {
e075ae69
RH
3947 case 2:
3948 xops[2] = GEN_INT (block_or_label);
3949 xops[3] = gen_rtx_MEM (Pmode,
3950 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
3951 xops[6] = GEN_INT (8);
e9a25f70 3952
e075ae69
RH
3953 output_asm_insn ("push{l}\t%2", xops);
3954 if (!flag_pic)
3955 output_asm_insn ("push{l}\t%1", xops);
e9a25f70 3956 else
870a0c2c 3957 {
e075ae69
RH
3958 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
3959 output_asm_insn ("push{l}\t%7", xops);
870a0c2c 3960 }
e075ae69
RH
3961 output_asm_insn ("call\t%P3", xops);
3962 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
3963 break;
c572e5ba 3964
e075ae69
RH
3965 default:
3966 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
c572e5ba 3967
e075ae69
RH
3968 xops[0] = const0_rtx;
3969 xops[2] = gen_rtx_MEM (Pmode,
3970 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
3971 xops[3] = gen_rtx_MEM (Pmode,
3972 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
3973 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
3974 xops[6] = GEN_INT (4);
a14003ee 3975
e075ae69 3976 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
446ba526 3977
e075ae69
RH
3978 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
3979 output_asm_insn ("jne\t%2", xops);
870a0c2c 3980
e075ae69
RH
3981 if (!flag_pic)
3982 output_asm_insn ("push{l}\t%1", xops);
3983 else
3984 {
3985 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
3986 output_asm_insn ("push{l}\t%7", xops);
870a0c2c 3987 }
e075ae69
RH
3988 output_asm_insn ("call\t%P3", xops);
3989 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
3990 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
3991 num_func++;
3992 break;
c572e5ba 3993 }
2a2ab3f9 3994}
305f097e 3995
e075ae69
RH
3996/* Output assembler code to FILE to increment a counter associated
3997 with basic block number BLOCKNO.
305f097e 3998
e075ae69 3999 If profile_block_flag == 2
ecbc4695 4000
e075ae69
RH
4001 Output code to initialize the global structure `__bb' and
4002 call the function `__bb_trace_func' which will increment the
4003 counter.
ecbc4695 4004
e075ae69
RH
4005 `__bb' consists of two words. In the first word the number
4006 of the basic block has to be stored. In the second word
4007 the address of a block allocated in the object module
4008 has to be stored.
ecbc4695 4009
e075ae69 4010 The basic block number is given by BLOCKNO.
ecbc4695 4011
e075ae69 4012 The address of the block is given by the label created with
305f097e 4013
e075ae69 4014 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
305f097e 4015
e075ae69 4016 by FUNCTION_BLOCK_PROFILER.
ecbc4695 4017
e075ae69
RH
4018 Of course, since you are writing the definition of
4019 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4020 can take a short cut in the definition of this macro and use the
4021 name that you know will result.
305f097e 4022
e075ae69
RH
4023 If described in a virtual assembler language the code to be
4024 output looks like:
305f097e 4025
e075ae69
RH
4026 move BLOCKNO -> (__bb)
4027 move LPBX0 -> (__bb+4)
4028 call __bb_trace_func
305f097e 4029
e075ae69
RH
4030 Note that function `__bb_trace_func' must not change the
4031 machine state, especially the flag register. To grant
4032 this, you must output code to save and restore registers
4033 either in this macro or in the macros MACHINE_STATE_SAVE
4034 and MACHINE_STATE_RESTORE. The last two macros will be
4035 used in the function `__bb_trace_func', so you must make
4036 sure that the function prologue does not change any
4037 register prior to saving it with MACHINE_STATE_SAVE.
305f097e 4038
e075ae69 4039 else if profile_block_flag != 0
305f097e 4040
e075ae69
RH
4041 Output code to increment the counter directly.
4042 Basic blocks are numbered separately from zero within each
4043 compiled object module. The count associated with block number
4044 BLOCKNO is at index BLOCKNO in an array of words; the name of
4045 this array is a local symbol made with this statement:
32b5b1aa 4046
e075ae69 4047 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
32b5b1aa 4048
e075ae69
RH
4049 Of course, since you are writing the definition of
4050 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4051 can take a short cut in the definition of this macro and use the
4052 name that you know will result.
32b5b1aa 4053
e075ae69
RH
4054 If described in a virtual assembler language the code to be
4055 output looks like:
32b5b1aa 4056
e075ae69
RH
4057 inc (LPBX2+4*BLOCKNO)
4058*/
32b5b1aa 4059
e075ae69
RH
4060void
4061ix86_output_block_profiler (file, blockno)
4062 FILE *file ATTRIBUTE_UNUSED;
4063 int blockno;
4064{
4065 rtx xops[8], cnt_rtx;
4066 char counts[80];
4067 char *block_table = counts;
4068
4069 switch (profile_block_flag)
4070 {
4071 case 2:
4072 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
32b5b1aa 4073
e075ae69
RH
4074 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4075 xops[2] = GEN_INT (blockno);
4076 xops[3] = gen_rtx_MEM (Pmode,
4077 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
4078 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
4079 xops[5] = plus_constant (xops[4], 4);
4080 xops[0] = gen_rtx_MEM (SImode, xops[4]);
4081 xops[6] = gen_rtx_MEM (SImode, xops[5]);
79325812 4082
e075ae69 4083 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
32b5b1aa 4084
e075ae69
RH
4085 output_asm_insn ("pushf", xops);
4086 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4087 if (flag_pic)
32b5b1aa 4088 {
e075ae69
RH
4089 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4090 output_asm_insn ("push{l}\t%7", xops);
4091 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4092 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
4093 output_asm_insn ("pop{l}\t%7", xops);
4094 }
4095 else
4096 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
4097 output_asm_insn ("call\t%P3", xops);
4098 output_asm_insn ("popf", xops);
32b5b1aa 4099
e075ae69 4100 break;
32b5b1aa 4101
e075ae69
RH
4102 default:
4103 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
4104 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
4105 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
32b5b1aa 4106
e075ae69
RH
4107 if (blockno)
4108 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
32b5b1aa 4109
e075ae69
RH
4110 if (flag_pic)
4111 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
32b5b1aa 4112
e075ae69
RH
4113 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
4114 output_asm_insn ("inc{l}\t%0", xops);
32b5b1aa 4115
e075ae69 4116 break;
32b5b1aa 4117 }
32b5b1aa 4118}
32b5b1aa 4119\f
79325812 4120void
e075ae69
RH
4121ix86_expand_move (mode, operands)
4122 enum machine_mode mode;
4123 rtx operands[];
32b5b1aa 4124{
e075ae69 4125 int strict = (reload_in_progress || reload_completed);
e075ae69 4126 rtx insn;
e9a25f70 4127
e075ae69 4128 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
32b5b1aa 4129 {
e075ae69 4130 /* Emit insns to move operands[1] into operands[0]. */
e9a25f70 4131
e075ae69
RH
4132 if (GET_CODE (operands[0]) == MEM)
4133 operands[1] = force_reg (Pmode, operands[1]);
4134 else
32b5b1aa 4135 {
e075ae69
RH
4136 rtx temp = operands[0];
4137 if (GET_CODE (temp) != REG)
4138 temp = gen_reg_rtx (Pmode);
4139 temp = legitimize_pic_address (operands[1], temp);
4140 if (temp == operands[0])
4141 return;
4142 operands[1] = temp;
32b5b1aa 4143 }
e075ae69
RH
4144 }
4145 else
4146 {
d7a29404
JH
4147 if (GET_CODE (operands[0]) == MEM
4148 && (GET_MODE (operands[0]) == QImode
4149 || !push_operand (operands[0], mode))
4150 && GET_CODE (operands[1]) == MEM)
e075ae69 4151 operands[1] = force_reg (mode, operands[1]);
e9a25f70 4152
2c5a510c
RH
4153 if (push_operand (operands[0], mode)
4154 && ! general_no_elim_operand (operands[1], mode))
4155 operands[1] = copy_to_mode_reg (mode, operands[1]);
4156
e075ae69 4157 if (FLOAT_MODE_P (mode))
32b5b1aa 4158 {
d7a29404
JH
4159 /* If we are loading a floating point constant to a register,
4160 force the value to memory now, since we'll get better code
4161 out the back end. */
e075ae69
RH
4162
4163 if (strict)
4164 ;
e075ae69 4165 else if (GET_CODE (operands[1]) == CONST_DOUBLE
d7a29404 4166 && register_operand (operands[0], mode))
e075ae69 4167 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
32b5b1aa 4168 }
32b5b1aa 4169 }
e9a25f70 4170
e075ae69 4171 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
e9a25f70 4172
e075ae69
RH
4173 emit_insn (insn);
4174}
e9a25f70 4175
e075ae69
RH
4176/* Attempt to expand a binary operator. Make the expansion closer to the
4177 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 4178 memory references (one output, two input) in a single insn. */
e9a25f70 4179
e075ae69
RH
4180void
4181ix86_expand_binary_operator (code, mode, operands)
4182 enum rtx_code code;
4183 enum machine_mode mode;
4184 rtx operands[];
4185{
4186 int matching_memory;
4187 rtx src1, src2, dst, op, clob;
4188
4189 dst = operands[0];
4190 src1 = operands[1];
4191 src2 = operands[2];
4192
4193 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
4194 if (GET_RTX_CLASS (code) == 'c'
4195 && (rtx_equal_p (dst, src2)
4196 || immediate_operand (src1, mode)))
4197 {
4198 rtx temp = src1;
4199 src1 = src2;
4200 src2 = temp;
32b5b1aa 4201 }
e9a25f70 4202
e075ae69
RH
4203 /* If the destination is memory, and we do not have matching source
4204 operands, do things in registers. */
4205 matching_memory = 0;
4206 if (GET_CODE (dst) == MEM)
32b5b1aa 4207 {
e075ae69
RH
4208 if (rtx_equal_p (dst, src1))
4209 matching_memory = 1;
4210 else if (GET_RTX_CLASS (code) == 'c'
4211 && rtx_equal_p (dst, src2))
4212 matching_memory = 2;
4213 else
4214 dst = gen_reg_rtx (mode);
4215 }
4216
4217 /* Both source operands cannot be in memory. */
4218 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
4219 {
4220 if (matching_memory != 2)
4221 src2 = force_reg (mode, src2);
4222 else
4223 src1 = force_reg (mode, src1);
32b5b1aa 4224 }
e9a25f70 4225
06a964de
JH
4226 /* If the operation is not commutable, source 1 cannot be a constant
4227 or non-matching memory. */
4228 if ((CONSTANT_P (src1)
4229 || (!matching_memory && GET_CODE (src1) == MEM))
4230 && GET_RTX_CLASS (code) != 'c')
e075ae69
RH
4231 src1 = force_reg (mode, src1);
4232
4233 /* If optimizing, copy to regs to improve CSE */
4234 if (optimize && !reload_in_progress && !reload_completed)
32b5b1aa 4235 {
e075ae69
RH
4236 if (GET_CODE (dst) == MEM)
4237 dst = gen_reg_rtx (mode);
4238 if (GET_CODE (src1) == MEM)
4239 src1 = force_reg (mode, src1);
4240 if (GET_CODE (src2) == MEM)
4241 src2 = force_reg (mode, src2);
32b5b1aa 4242 }
e9a25f70 4243
e075ae69
RH
4244 /* Emit the instruction. */
4245
4246 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
4247 if (reload_in_progress)
4248 {
4249 /* Reload doesn't know about the flags register, and doesn't know that
4250 it doesn't want to clobber it. We can only do this with PLUS. */
4251 if (code != PLUS)
4252 abort ();
4253 emit_insn (op);
4254 }
4255 else
32b5b1aa 4256 {
e075ae69
RH
4257 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4258 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 4259 }
e9a25f70 4260
e075ae69
RH
4261 /* Fix up the destination if needed. */
4262 if (dst != operands[0])
4263 emit_move_insn (operands[0], dst);
4264}
4265
4266/* Return TRUE or FALSE depending on whether the binary operator meets the
4267 appropriate constraints. */
4268
4269int
4270ix86_binary_operator_ok (code, mode, operands)
4271 enum rtx_code code;
4272 enum machine_mode mode ATTRIBUTE_UNUSED;
4273 rtx operands[3];
4274{
4275 /* Both source operands cannot be in memory. */
4276 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
4277 return 0;
4278 /* If the operation is not commutable, source 1 cannot be a constant. */
4279 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
4280 return 0;
4281 /* If the destination is memory, we must have a matching source operand. */
4282 if (GET_CODE (operands[0]) == MEM
4283 && ! (rtx_equal_p (operands[0], operands[1])
4284 || (GET_RTX_CLASS (code) == 'c'
4285 && rtx_equal_p (operands[0], operands[2]))))
4286 return 0;
06a964de
JH
4287 /* If the operation is not commutable and the source 1 is memory, we must
4288 have a matching destionation. */
4289 if (GET_CODE (operands[1]) == MEM
4290 && GET_RTX_CLASS (code) != 'c'
4291 && ! rtx_equal_p (operands[0], operands[1]))
4292 return 0;
e075ae69
RH
4293 return 1;
4294}
4295
4296/* Attempt to expand a unary operator. Make the expansion closer to the
4297 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 4298 memory references (one output, one input) in a single insn. */
e075ae69 4299
9d81fc27 4300void
e075ae69
RH
4301ix86_expand_unary_operator (code, mode, operands)
4302 enum rtx_code code;
4303 enum machine_mode mode;
4304 rtx operands[];
4305{
06a964de
JH
4306 int matching_memory;
4307 rtx src, dst, op, clob;
4308
4309 dst = operands[0];
4310 src = operands[1];
e075ae69 4311
06a964de
JH
4312 /* If the destination is memory, and we do not have matching source
4313 operands, do things in registers. */
4314 matching_memory = 0;
4315 if (GET_CODE (dst) == MEM)
32b5b1aa 4316 {
06a964de
JH
4317 if (rtx_equal_p (dst, src))
4318 matching_memory = 1;
e075ae69 4319 else
06a964de 4320 dst = gen_reg_rtx (mode);
32b5b1aa 4321 }
e9a25f70 4322
06a964de
JH
4323 /* When source operand is memory, destination must match. */
4324 if (!matching_memory && GET_CODE (src) == MEM)
4325 src = force_reg (mode, src);
4326
4327 /* If optimizing, copy to regs to improve CSE */
4328 if (optimize && !reload_in_progress && !reload_completed)
4329 {
4330 if (GET_CODE (dst) == MEM)
4331 dst = gen_reg_rtx (mode);
4332 if (GET_CODE (src) == MEM)
4333 src = force_reg (mode, src);
4334 }
4335
4336 /* Emit the instruction. */
4337
4338 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
4339 if (reload_in_progress || code == NOT)
4340 {
4341 /* Reload doesn't know about the flags register, and doesn't know that
4342 it doesn't want to clobber it. */
4343 if (code != NOT)
4344 abort ();
4345 emit_insn (op);
4346 }
4347 else
4348 {
4349 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4350 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4351 }
4352
4353 /* Fix up the destination if needed. */
4354 if (dst != operands[0])
4355 emit_move_insn (operands[0], dst);
e075ae69
RH
4356}
4357
4358/* Return TRUE or FALSE depending on whether the unary operator meets the
4359 appropriate constraints. */
4360
4361int
4362ix86_unary_operator_ok (code, mode, operands)
4363 enum rtx_code code ATTRIBUTE_UNUSED;
4364 enum machine_mode mode ATTRIBUTE_UNUSED;
4365 rtx operands[2] ATTRIBUTE_UNUSED;
4366{
06a964de
JH
4367 /* If one of operands is memory, source and destination must match. */
4368 if ((GET_CODE (operands[0]) == MEM
4369 || GET_CODE (operands[1]) == MEM)
4370 && ! rtx_equal_p (operands[0], operands[1]))
4371 return FALSE;
e075ae69
RH
4372 return TRUE;
4373}
4374
16189740
RH
4375/* Return TRUE or FALSE depending on whether the first SET in INSN
4376 has source and destination with matching CC modes, and that the
4377 CC mode is at least as constrained as REQ_MODE. */
4378
4379int
4380ix86_match_ccmode (insn, req_mode)
4381 rtx insn;
4382 enum machine_mode req_mode;
4383{
4384 rtx set;
4385 enum machine_mode set_mode;
4386
4387 set = PATTERN (insn);
4388 if (GET_CODE (set) == PARALLEL)
4389 set = XVECEXP (set, 0, 0);
4390 if (GET_CODE (set) != SET)
4391 abort ();
4392
4393 set_mode = GET_MODE (SET_DEST (set));
4394 switch (set_mode)
4395 {
4396 case CCmode:
4397 if (req_mode == CCNOmode)
4398 return 0;
4399 /* FALLTHRU */
4400 case CCNOmode:
4401 if (req_mode == CCZmode)
4402 return 0;
4403 /* FALLTHRU */
4404 case CCZmode:
4405 break;
4406
4407 default:
4408 abort ();
4409 }
4410
4411 return (GET_MODE (SET_SRC (set)) == set_mode);
4412}
4413
e075ae69
RH
4414/* Produce an unsigned comparison for a given signed comparison. */
4415
4416static enum rtx_code
4417unsigned_comparison (code)
4418 enum rtx_code code;
4419{
4420 switch (code)
32b5b1aa 4421 {
e075ae69
RH
4422 case GT:
4423 code = GTU;
4424 break;
4425 case LT:
4426 code = LTU;
4427 break;
4428 case GE:
4429 code = GEU;
4430 break;
4431 case LE:
4432 code = LEU;
4433 break;
4434 case EQ:
4435 case NE:
4436 case LEU:
4437 case LTU:
4438 case GEU:
4439 case GTU:
3a3677ff
RH
4440 case UNORDERED:
4441 case ORDERED:
e075ae69
RH
4442 break;
4443 default:
4444 abort ();
4445 }
4446 return code;
4447}
4448
4449/* Generate insn patterns to do an integer compare of OPERANDS. */
4450
4451static rtx
4452ix86_expand_int_compare (code, op0, op1)
4453 enum rtx_code code;
4454 rtx op0, op1;
4455{
4456 enum machine_mode cmpmode;
4457 rtx tmp, flags;
4458
4459 cmpmode = SELECT_CC_MODE (code, op0, op1);
4460 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
4461
4462 /* This is very simple, but making the interface the same as in the
4463 FP case makes the rest of the code easier. */
4464 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
4465 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
4466
4467 /* Return the test that should be put into the flags user, i.e.
4468 the bcc, scc, or cmov instruction. */
4469 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
4470}
4471
3a3677ff
RH
4472/* Figure out whether to use ordered or unordered fp comparisons.
4473 Return the appropriate mode to use. */
e075ae69 4474
3a3677ff
RH
4475static enum machine_mode
4476ix86_fp_compare_mode (code)
e075ae69 4477 enum rtx_code code;
e075ae69 4478{
3a3677ff 4479 int unordered;
e075ae69 4480
3a3677ff
RH
4481 switch (code)
4482 {
4483 case NE: case EQ:
4484 /* When not doing IEEE compliant compares, fault on NaNs. */
4485 unordered = (TARGET_IEEE_FP != 0);
4486 break;
4487
4488 case LT: case LE: case GT: case GE:
4489 unordered = 0;
4490 break;
4491
4492 case UNORDERED: case ORDERED:
4493 case UNEQ: case UNGE: case UNGT: case UNLE: case UNLT: case LTGT:
4494 unordered = 1;
4495 break;
4496
4497 default:
4498 abort ();
4499 }
e075ae69
RH
4500
4501 /* ??? If we knew whether invalid-operand exceptions were masked,
4502 we could rely on fcom to raise an exception and take care of
3a3677ff 4503 NaNs. But we don't. We could know this from c99 math pragmas. */
e075ae69
RH
4504 if (TARGET_IEEE_FP)
4505 unordered = 1;
4506
3a3677ff
RH
4507 return unordered ? CCFPUmode : CCFPmode;
4508}
4509
4510/* Return true if we should use an FCOMI instruction for this fp comparison. */
4511
4512static int
4513ix86_use_fcomi_compare (code)
4514 enum rtx_code code;
4515{
4516 return (TARGET_CMOVE
4517 && (code == ORDERED || code == UNORDERED
4518 /* All other unordered compares require checking
4519 multiple sets of bits. */
4520 || ix86_fp_compare_mode (code) == CCFPmode));
4521}
4522
4523/* Swap, force into registers, or otherwise massage the two operands
4524 to a fp comparison. The operands are updated in place; the new
4525 comparsion code is returned. */
4526
4527static enum rtx_code
4528ix86_prepare_fp_compare_args (code, pop0, pop1)
4529 enum rtx_code code;
4530 rtx *pop0, *pop1;
4531{
4532 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
4533 rtx op0 = *pop0, op1 = *pop1;
4534 enum machine_mode op_mode = GET_MODE (op0);
4535
e075ae69 4536 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
4537 The same is true of the XFmode compare instructions. The same is
4538 true of the fcomi compare instructions. */
4539
4540 if (fpcmp_mode == CCFPUmode
4541 || op_mode == XFmode
4542 || ix86_use_fcomi_compare (code))
e075ae69 4543 {
3a3677ff
RH
4544 op0 = force_reg (op_mode, op0);
4545 op1 = force_reg (op_mode, op1);
e075ae69
RH
4546 }
4547 else
4548 {
4549 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
4550 things around if they appear profitable, otherwise force op0
4551 into a register. */
4552
4553 if (standard_80387_constant_p (op0) == 0
4554 || (GET_CODE (op0) == MEM
4555 && ! (standard_80387_constant_p (op1) == 0
4556 || GET_CODE (op1) == MEM)))
32b5b1aa 4557 {
e075ae69
RH
4558 rtx tmp;
4559 tmp = op0, op0 = op1, op1 = tmp;
4560 code = swap_condition (code);
4561 }
4562
4563 if (GET_CODE (op0) != REG)
3a3677ff 4564 op0 = force_reg (op_mode, op0);
e075ae69
RH
4565
4566 if (CONSTANT_P (op1))
4567 {
4568 if (standard_80387_constant_p (op1))
3a3677ff 4569 op1 = force_reg (op_mode, op1);
e075ae69 4570 else
3a3677ff 4571 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
4572 }
4573 }
e9a25f70 4574
3a3677ff
RH
4575 *pop0 = op0;
4576 *pop1 = op1;
4577 return code;
4578}
4579
4580/* Generate insn patterns to do a floating point compare of OPERANDS. */
4581
4582rtx
4583ix86_expand_fp_compare (code, op0, op1, scratch)
4584 enum rtx_code code;
4585 rtx op0, op1, scratch;
4586{
4587 enum machine_mode fpcmp_mode, intcmp_mode;
4588 rtx tmp;
4589
4590 fpcmp_mode = ix86_fp_compare_mode (code);
4591 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
4592
e075ae69
RH
4593 /* %%% fcomi is probably always faster, even when dealing with memory,
4594 since compare-and-branch would be three insns instead of four. */
3a3677ff 4595 if (ix86_use_fcomi_compare (code))
32b5b1aa 4596 {
e075ae69
RH
4597 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
4598 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
4599 emit_insn (tmp);
4600
4601 /* The FP codes work out to act like unsigned. */
4602 code = unsigned_comparison (code);
3a3677ff 4603 intcmp_mode = CCmode;
e075ae69
RH
4604 }
4605 else
4606 {
4607 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e9a25f70 4608
e075ae69
RH
4609 rtx tmp2;
4610 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
4611 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
3a3677ff 4612 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 4613
3a3677ff
RH
4614 if (fpcmp_mode == CCFPmode
4615 || code == ORDERED
4616 || code == UNORDERED)
32b5b1aa 4617 {
e075ae69
RH
4618 /* We have two options here -- use sahf, or testing bits of ah
4619 directly. On PPRO, they are equivalent, sahf being one byte
4620 smaller. On Pentium, sahf is non-pairable while test is UV
4621 pairable. */
4622
4623 if (TARGET_USE_SAHF || optimize_size)
32b5b1aa 4624 {
e075ae69 4625 do_sahf:
3a3677ff 4626 emit_insn (gen_x86_sahf_1 (scratch));
e9a25f70 4627
e075ae69
RH
4628 /* The FP codes work out to act like unsigned. */
4629 code = unsigned_comparison (code);
e075ae69 4630 intcmp_mode = CCmode;
32b5b1aa
SC
4631 }
4632 else
4633 {
e075ae69
RH
4634 /*
4635 * The numbers below correspond to the bits of the FPSW in AH.
d22ce03d 4636 * C3, C2, and C0 are in bits 0x40, 0x4, and 0x01 respectively.
e075ae69
RH
4637 *
4638 * cmp C3 C2 C0
4639 * > 0 0 0
4640 * < 0 0 1
4641 * = 1 0 0
4642 * un 1 1 1
4643 */
4644
4645 int mask;
4646
4647 switch (code)
32b5b1aa 4648 {
e075ae69 4649 case GT:
d22ce03d 4650 mask = 0x41;
e075ae69
RH
4651 code = EQ;
4652 break;
4653 case LT:
4654 mask = 0x01;
4655 code = NE;
4656 break;
4657 case GE:
4658 /* We'd have to use `xorb 1,ah; andb 0x41,ah', so it's
4659 faster in all cases to just fall back on sahf. */
4660 goto do_sahf;
4661 case LE:
4662 mask = 0x41;
4663 code = NE;
4664 break;
4665 case EQ:
4666 mask = 0x40;
4667 code = NE;
4668 break;
4669 case NE:
4670 mask = 0x40;
4671 code = EQ;
4672 break;
3a3677ff
RH
4673 case UNORDERED:
4674 mask = 0x04;
4675 code = NE;
4676 break;
4677 case ORDERED:
4678 mask = 0x04;
4679 code = EQ;
4680 break;
4681
e075ae69
RH
4682 default:
4683 abort ();
32b5b1aa 4684 }
e075ae69 4685
3a3677ff 4686 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (mask)));
e075ae69 4687 intcmp_mode = CCNOmode;
32b5b1aa
SC
4688 }
4689 }
4690 else
4691 {
e075ae69
RH
4692 /* In the unordered case, we have to check C2 for NaN's, which
4693 doesn't happen to work out to anything nice combination-wise.
4694 So do some bit twiddling on the value we've got in AH to come
4695 up with an appropriate set of condition codes. */
4696
4697 intcmp_mode = CCNOmode;
4698 switch (code)
32b5b1aa 4699 {
e075ae69 4700 case GT:
3a3677ff 4701 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69
RH
4702 code = EQ;
4703 break;
4704 case LT:
3a3677ff
RH
4705 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4706 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
4707 intcmp_mode = CCmode;
4708 code = EQ;
4709 break;
4710 case GE:
3a3677ff 4711 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69
RH
4712 code = EQ;
4713 break;
4714 case LE:
3a3677ff
RH
4715 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4716 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
4717 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
4718 intcmp_mode = CCmode;
4719 code = LTU;
4720 break;
4721 case EQ:
3a3677ff
RH
4722 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4723 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
4724 intcmp_mode = CCmode;
4725 code = EQ;
4726 break;
4727 case NE:
3a3677ff
RH
4728 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4729 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, GEN_INT (0x40)));
4730 code = NE;
4731 break;
4732
4733 case UNORDERED:
4734 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
4735 code = NE;
4736 break;
4737 case ORDERED:
4738 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
4739 code = EQ;
4740 break;
4741 case UNEQ:
4742 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
4743 code = NE;
4744 break;
4745 case UNGE:
4746 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4747 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, GEN_INT (0x01)));
4748 code = NE;
4749 break;
4750 case UNGT:
4751 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4752 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
4753 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
4754 code = GEU;
4755 break;
4756 case UNLE:
4757 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69
RH
4758 code = NE;
4759 break;
3a3677ff
RH
4760 case UNLT:
4761 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
4762 code = NE;
4763 break;
4764 case LTGT:
4765 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
4766 code = EQ;
4767 break;
4768
e075ae69
RH
4769 default:
4770 abort ();
32b5b1aa
SC
4771 }
4772 }
32b5b1aa 4773 }
e075ae69
RH
4774
4775 /* Return the test that should be put into the flags user, i.e.
4776 the bcc, scc, or cmov instruction. */
4777 return gen_rtx_fmt_ee (code, VOIDmode,
4778 gen_rtx_REG (intcmp_mode, FLAGS_REG),
4779 const0_rtx);
4780}
4781
4782static rtx
3a3677ff 4783ix86_expand_compare (code)
e075ae69 4784 enum rtx_code code;
e075ae69
RH
4785{
4786 rtx op0, op1, ret;
4787 op0 = ix86_compare_op0;
4788 op1 = ix86_compare_op1;
4789
4790 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
3a3677ff 4791 ret = ix86_expand_fp_compare (code, op0, op1, gen_reg_rtx (HImode));
32b5b1aa 4792 else
e075ae69
RH
4793 ret = ix86_expand_int_compare (code, op0, op1);
4794
4795 return ret;
4796}
4797
4798void
3a3677ff 4799ix86_expand_branch (code, label)
e075ae69 4800 enum rtx_code code;
e075ae69
RH
4801 rtx label;
4802{
3a3677ff 4803 rtx tmp;
e075ae69 4804
3a3677ff 4805 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 4806 {
3a3677ff
RH
4807 case QImode:
4808 case HImode:
4809 case SImode:
4810 tmp = ix86_expand_compare (code);
e075ae69
RH
4811 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
4812 gen_rtx_LABEL_REF (VOIDmode, label),
4813 pc_rtx);
4814 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 4815 return;
e075ae69 4816
3a3677ff
RH
4817 case SFmode:
4818 case DFmode:
4819 case XFmode:
4820 /* Don't expand the comparison early, so that we get better code
4821 when jump or whoever decides to reverse the comparison. */
4822 {
4823 rtvec vec;
4824 int use_fcomi;
4825
4826 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
4827 &ix86_compare_op1);
4828
4829 tmp = gen_rtx_fmt_ee (code, ix86_fp_compare_mode (code),
4830 ix86_compare_op0, ix86_compare_op1);
4831 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
4832 gen_rtx_LABEL_REF (VOIDmode, label),
4833 pc_rtx);
4834 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
4835
4836 use_fcomi = ix86_use_fcomi_compare (code);
4837 vec = rtvec_alloc (3 + !use_fcomi);
4838 RTVEC_ELT (vec, 0) = tmp;
4839 RTVEC_ELT (vec, 1)
4840 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
4841 RTVEC_ELT (vec, 2)
4842 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
4843 if (! use_fcomi)
4844 RTVEC_ELT (vec, 3)
4845 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
4846
4847 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
4848 return;
4849 }
32b5b1aa 4850
3a3677ff
RH
4851 case DImode:
4852 /* Expand DImode branch into multiple compare+branch. */
4853 {
4854 rtx lo[2], hi[2], label2;
4855 enum rtx_code code1, code2, code3;
32b5b1aa 4856
3a3677ff
RH
4857 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
4858 {
4859 tmp = ix86_compare_op0;
4860 ix86_compare_op0 = ix86_compare_op1;
4861 ix86_compare_op1 = tmp;
4862 code = swap_condition (code);
4863 }
4864 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
4865 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 4866
3a3677ff
RH
4867 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
4868 avoid two branches. This costs one extra insn, so disable when
4869 optimizing for size. */
32b5b1aa 4870
3a3677ff
RH
4871 if ((code == EQ || code == NE)
4872 && (!optimize_size
4873 || hi[1] == const0_rtx || lo[1] == const0_rtx))
4874 {
4875 rtx xor0, xor1;
32b5b1aa 4876
3a3677ff
RH
4877 xor1 = hi[0];
4878 if (hi[1] != const0_rtx)
4879 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
4880 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 4881
3a3677ff
RH
4882 xor0 = lo[0];
4883 if (lo[1] != const0_rtx)
4884 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
4885 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 4886
3a3677ff
RH
4887 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
4888 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 4889
3a3677ff
RH
4890 ix86_compare_op0 = tmp;
4891 ix86_compare_op1 = const0_rtx;
4892 ix86_expand_branch (code, label);
4893 return;
4894 }
e075ae69 4895
3a3677ff
RH
4896 /* Otherwise, if we are doing less-than, op1 is a constant and the
4897 low word is zero, then we can just examine the high word. */
32b5b1aa 4898
3a3677ff
RH
4899 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx
4900 && (code == LT || code == LTU))
4901 {
4902 ix86_compare_op0 = hi[0];
4903 ix86_compare_op1 = hi[1];
4904 ix86_expand_branch (code, label);
4905 return;
4906 }
e075ae69 4907
3a3677ff 4908 /* Otherwise, we need two or three jumps. */
e075ae69 4909
3a3677ff 4910 label2 = gen_label_rtx ();
e075ae69 4911
3a3677ff
RH
4912 code1 = code;
4913 code2 = swap_condition (code);
4914 code3 = unsigned_condition (code);
e075ae69 4915
3a3677ff
RH
4916 switch (code)
4917 {
4918 case LT: case GT: case LTU: case GTU:
4919 break;
e075ae69 4920
3a3677ff
RH
4921 case LE: code1 = LT; code2 = GT; break;
4922 case GE: code1 = GT; code2 = LT; break;
4923 case LEU: code1 = LTU; code2 = GTU; break;
4924 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 4925
3a3677ff
RH
4926 case EQ: code1 = NIL; code2 = NE; break;
4927 case NE: code2 = NIL; break;
e075ae69 4928
3a3677ff
RH
4929 default:
4930 abort ();
4931 }
e075ae69 4932
3a3677ff
RH
4933 /*
4934 * a < b =>
4935 * if (hi(a) < hi(b)) goto true;
4936 * if (hi(a) > hi(b)) goto false;
4937 * if (lo(a) < lo(b)) goto true;
4938 * false:
4939 */
4940
4941 ix86_compare_op0 = hi[0];
4942 ix86_compare_op1 = hi[1];
4943
4944 if (code1 != NIL)
4945 ix86_expand_branch (code1, label);
4946 if (code2 != NIL)
4947 ix86_expand_branch (code2, label2);
4948
4949 ix86_compare_op0 = lo[0];
4950 ix86_compare_op1 = lo[1];
4951 ix86_expand_branch (code3, label);
4952
4953 if (code2 != NIL)
4954 emit_label (label2);
4955 return;
4956 }
e075ae69 4957
3a3677ff
RH
4958 default:
4959 abort ();
4960 }
32b5b1aa 4961}
e075ae69 4962
32b5b1aa 4963int
3a3677ff 4964ix86_expand_setcc (code, dest)
e075ae69 4965 enum rtx_code code;
e075ae69 4966 rtx dest;
32b5b1aa 4967{
e075ae69
RH
4968 rtx ret, tmp;
4969 int type;
4970
4971 if (GET_MODE (ix86_compare_op0) == DImode)
4972 return 0; /* FAIL */
4973
4974 /* Three modes of generation:
4975 0 -- destination does not overlap compare sources:
4976 clear dest first, emit strict_low_part setcc.
4977 1 -- destination does overlap compare sources:
4978 emit subreg setcc, zero extend.
4979 2 -- destination is in QImode:
4980 emit setcc only.
4981 */
4982
4983 type = 0;
e075ae69
RH
4984
4985 if (GET_MODE (dest) == QImode)
4986 type = 2;
4987 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
a500c31b 4988 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
e075ae69
RH
4989 type = 1;
4990
4991 if (type == 0)
4992 emit_move_insn (dest, const0_rtx);
4993
3a3677ff 4994 ret = ix86_expand_compare (code);
e075ae69
RH
4995 PUT_MODE (ret, QImode);
4996
4997 tmp = dest;
4998 if (type == 0)
32b5b1aa 4999 {
e075ae69
RH
5000 tmp = gen_lowpart (QImode, dest);
5001 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
5002 }
5003 else if (type == 1)
5004 {
5005 if (!cse_not_expected)
5006 tmp = gen_reg_rtx (QImode);
5007 else
5008 tmp = gen_lowpart (QImode, dest);
5009 }
32b5b1aa 5010
e075ae69
RH
5011 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
5012
5013 if (type == 1)
5014 {
5015 rtx clob;
5016
5017 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
5018 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
5019 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
5020 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5021 emit_insn (tmp);
32b5b1aa 5022 }
e075ae69
RH
5023
5024 return 1; /* DONE */
32b5b1aa 5025}
e075ae69 5026
32b5b1aa 5027int
e075ae69
RH
5028ix86_expand_int_movcc (operands)
5029 rtx operands[];
32b5b1aa 5030{
e075ae69
RH
5031 enum rtx_code code = GET_CODE (operands[1]), compare_code;
5032 rtx compare_seq, compare_op;
32b5b1aa 5033
36583fea
JH
5034 /* When the compare code is not LTU or GEU, we can not use sbbl case.
5035 In case comparsion is done with immediate, we can convert it to LTU or
5036 GEU by altering the integer. */
5037
5038 if ((code == LEU || code == GTU)
5039 && GET_CODE (ix86_compare_op1) == CONST_INT
5040 && GET_MODE (operands[0]) != HImode
5041 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
5042 && GET_CODE (operands[2]) == CONST_INT
5043 && GET_CODE (operands[3]) == CONST_INT)
5044 {
5045 if (code == LEU)
5046 code = LTU;
5047 else
5048 code = GEU;
5049 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
5050 }
3a3677ff 5051
e075ae69 5052 start_sequence ();
3a3677ff 5053 compare_op = ix86_expand_compare (code);
e075ae69
RH
5054 compare_seq = gen_sequence ();
5055 end_sequence ();
5056
5057 compare_code = GET_CODE (compare_op);
5058
5059 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
5060 HImode insns, we'd be swallowed in word prefix ops. */
5061
5062 if (GET_MODE (operands[0]) != HImode
5063 && GET_CODE (operands[2]) == CONST_INT
5064 && GET_CODE (operands[3]) == CONST_INT)
5065 {
5066 rtx out = operands[0];
5067 HOST_WIDE_INT ct = INTVAL (operands[2]);
5068 HOST_WIDE_INT cf = INTVAL (operands[3]);
5069 HOST_WIDE_INT diff;
5070
36583fea 5071 if (compare_code == LTU || compare_code == GEU)
e075ae69 5072 {
e075ae69
RH
5073
5074 /* Detect overlap between destination and compare sources. */
5075 rtx tmp = out;
5076
36583fea
JH
5077 /* To simplify rest of code, restrict to the GEU case. */
5078 if (compare_code == LTU)
5079 {
5080 int tmp = ct;
5081 ct = cf;
5082 cf = tmp;
5083 compare_code = reverse_condition (compare_code);
5084 code = reverse_condition (code);
5085 }
5086 diff = ct - cf;
5087
e075ae69 5088 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
a500c31b 5089 || reg_overlap_mentioned_p (out, ix86_compare_op1))
e075ae69
RH
5090 tmp = gen_reg_rtx (SImode);
5091
5092 emit_insn (compare_seq);
5093 emit_insn (gen_x86_movsicc_0_m1 (tmp));
5094
36583fea
JH
5095 if (diff == 1)
5096 {
5097 /*
5098 * cmpl op0,op1
5099 * sbbl dest,dest
5100 * [addl dest, ct]
5101 *
5102 * Size 5 - 8.
5103 */
5104 if (ct)
5105 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5106 }
5107 else if (cf == -1)
5108 {
5109 /*
5110 * cmpl op0,op1
5111 * sbbl dest,dest
5112 * orl $ct, dest
5113 *
5114 * Size 8.
5115 */
5116 emit_insn (gen_iorsi3 (out, out, GEN_INT (ct)));
5117 }
5118 else if (diff == -1 && ct)
5119 {
5120 /*
5121 * cmpl op0,op1
5122 * sbbl dest,dest
5123 * xorl $-1, dest
5124 * [addl dest, cf]
5125 *
5126 * Size 8 - 11.
5127 */
5128 emit_insn (gen_one_cmplsi2 (tmp, tmp));
5129 if (cf)
5130 emit_insn (gen_addsi3 (out, out, GEN_INT (cf)));
5131 }
5132 else
5133 {
5134 /*
5135 * cmpl op0,op1
5136 * sbbl dest,dest
5137 * andl cf - ct, dest
5138 * [addl dest, ct]
5139 *
5140 * Size 8 - 11.
5141 */
5142 emit_insn (gen_andsi3 (out, out, GEN_INT (cf - ct)));
5143 if (ct)
5144 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5145 }
e075ae69
RH
5146
5147 if (tmp != out)
5148 emit_move_insn (out, tmp);
5149
5150 return 1; /* DONE */
5151 }
5152
5153 diff = ct - cf;
5154 if (diff < 0)
5155 {
5156 HOST_WIDE_INT tmp;
5157 tmp = ct, ct = cf, cf = tmp;
5158 diff = -diff;
5159 compare_code = reverse_condition (compare_code);
5160 code = reverse_condition (code);
5161 }
5162 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
5163 || diff == 3 || diff == 5 || diff == 9)
5164 {
5165 /*
5166 * xorl dest,dest
5167 * cmpl op1,op2
5168 * setcc dest
5169 * lea cf(dest*(ct-cf)),dest
5170 *
5171 * Size 14.
5172 *
5173 * This also catches the degenerate setcc-only case.
5174 */
5175
5176 rtx tmp;
5177 int nops;
5178
5179 out = emit_store_flag (out, code, ix86_compare_op0,
5180 ix86_compare_op1, VOIDmode, 0, 1);
5181
5182 nops = 0;
5183 if (diff == 1)
5184 tmp = out;
5185 else
5186 {
5187 tmp = gen_rtx_MULT (SImode, out, GEN_INT (diff & ~1));
5188 nops++;
5189 if (diff & 1)
5190 {
5191 tmp = gen_rtx_PLUS (SImode, tmp, out);
5192 nops++;
5193 }
5194 }
5195 if (cf != 0)
5196 {
5197 tmp = gen_rtx_PLUS (SImode, tmp, GEN_INT (cf));
5198 nops++;
5199 }
5200 if (tmp != out)
5201 {
5202 if (nops == 0)
5203 emit_move_insn (out, tmp);
5204 else if (nops == 1)
5205 {
5206 rtx clob;
5207
5208 clob = gen_rtx_REG (CCmode, FLAGS_REG);
5209 clob = gen_rtx_CLOBBER (VOIDmode, clob);
5210
5211 tmp = gen_rtx_SET (VOIDmode, out, tmp);
5212 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5213 emit_insn (tmp);
5214 }
5215 else
5216 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
5217 }
5218 if (out != operands[0])
5219 emit_move_insn (operands[0], out);
5220
5221 return 1; /* DONE */
5222 }
5223
5224 /*
5225 * General case: Jumpful:
5226 * xorl dest,dest cmpl op1, op2
5227 * cmpl op1, op2 movl ct, dest
5228 * setcc dest jcc 1f
5229 * decl dest movl cf, dest
5230 * andl (cf-ct),dest 1:
5231 * addl ct,dest
5232 *
5233 * Size 20. Size 14.
5234 *
5235 * This is reasonably steep, but branch mispredict costs are
5236 * high on modern cpus, so consider failing only if optimizing
5237 * for space.
5238 *
5239 * %%% Parameterize branch_cost on the tuning architecture, then
5240 * use that. The 80386 couldn't care less about mispredicts.
5241 */
5242
5243 if (!optimize_size && !TARGET_CMOVE)
5244 {
5245 if (ct == 0)
5246 {
5247 ct = cf;
5248 cf = 0;
5249 compare_code = reverse_condition (compare_code);
5250 code = reverse_condition (code);
5251 }
5252
5253 out = emit_store_flag (out, code, ix86_compare_op0,
5254 ix86_compare_op1, VOIDmode, 0, 1);
5255
5256 emit_insn (gen_addsi3 (out, out, constm1_rtx));
5257 emit_insn (gen_andsi3 (out, out, GEN_INT (cf-ct)));
5258 if (ct != 0)
5259 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5260 if (out != operands[0])
5261 emit_move_insn (operands[0], out);
5262
5263 return 1; /* DONE */
5264 }
5265 }
5266
5267 if (!TARGET_CMOVE)
5268 {
5269 /* Try a few things more with specific constants and a variable. */
5270
78a0d70c 5271 optab op;
e075ae69
RH
5272 rtx var, orig_out, out, tmp;
5273
5274 if (optimize_size)
5275 return 0; /* FAIL */
5276
5277 /* If one of the two operands is an interesting constant, load a
5278 constant with the above and mask it in with a logical operation. */
5279
5280 if (GET_CODE (operands[2]) == CONST_INT)
5281 {
5282 var = operands[3];
5283 if (INTVAL (operands[2]) == 0)
5284 operands[3] = constm1_rtx, op = and_optab;
5285 else if (INTVAL (operands[2]) == -1)
5286 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
5287 else
5288 return 0; /* FAIL */
e075ae69
RH
5289 }
5290 else if (GET_CODE (operands[3]) == CONST_INT)
5291 {
5292 var = operands[2];
5293 if (INTVAL (operands[3]) == 0)
5294 operands[2] = constm1_rtx, op = and_optab;
5295 else if (INTVAL (operands[3]) == -1)
5296 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
5297 else
5298 return 0; /* FAIL */
e075ae69 5299 }
78a0d70c 5300 else
e075ae69
RH
5301 return 0; /* FAIL */
5302
5303 orig_out = operands[0];
5304 tmp = gen_reg_rtx (GET_MODE (orig_out));
5305 operands[0] = tmp;
5306
5307 /* Recurse to get the constant loaded. */
5308 if (ix86_expand_int_movcc (operands) == 0)
5309 return 0; /* FAIL */
5310
5311 /* Mask in the interesting variable. */
5312 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
5313 OPTAB_WIDEN);
5314 if (out != orig_out)
5315 emit_move_insn (orig_out, out);
5316
5317 return 1; /* DONE */
5318 }
5319
5320 /*
5321 * For comparison with above,
5322 *
5323 * movl cf,dest
5324 * movl ct,tmp
5325 * cmpl op1,op2
5326 * cmovcc tmp,dest
5327 *
5328 * Size 15.
5329 */
5330
5331 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
5332 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
5333 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
5334 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
5335
5336 emit_insn (compare_seq);
5337 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5338 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5339 compare_op, operands[2],
5340 operands[3])));
5341
5342 return 1; /* DONE */
e9a25f70 5343}
e075ae69 5344
32b5b1aa 5345int
e075ae69
RH
5346ix86_expand_fp_movcc (operands)
5347 rtx operands[];
32b5b1aa 5348{
e075ae69
RH
5349 enum rtx_code code;
5350 enum machine_mode mode;
5351 rtx tmp;
32b5b1aa 5352
e075ae69
RH
5353 /* The floating point conditional move instructions don't directly
5354 support conditions resulting from a signed integer comparison. */
32b5b1aa 5355
e075ae69
RH
5356 code = GET_CODE (operands[1]);
5357 switch (code)
5358 {
5359 case LT:
5360 case LE:
5361 case GE:
5362 case GT:
5363 tmp = gen_reg_rtx (QImode);
3a3677ff 5364 ix86_expand_setcc (code, tmp);
e075ae69
RH
5365 code = NE;
5366 ix86_compare_op0 = tmp;
5367 ix86_compare_op1 = const0_rtx;
5368 break;
5369
5370 default:
5371 break;
5372 }
e9a25f70 5373
e075ae69
RH
5374 mode = SELECT_CC_MODE (code, ix86_compare_op0, ix86_compare_op1);
5375 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (mode, FLAGS_REG),
5376 gen_rtx_COMPARE (mode,
5377 ix86_compare_op0,
5378 ix86_compare_op1)));
5379 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5380 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5381 gen_rtx_fmt_ee (code, VOIDmode,
5382 gen_rtx_REG (mode, FLAGS_REG),
5383 const0_rtx),
5384 operands[2],
5385 operands[3])));
32b5b1aa 5386
e075ae69 5387 return 1;
32b5b1aa
SC
5388}
5389
2450a057
JH
5390/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
5391 works for floating pointer parameters and nonoffsetable memories.
5392 For pushes, it returns just stack offsets; the values will be saved
5393 in the right order. Maximally three parts are generated. */
5394
5395static void
5396ix86_split_to_parts (operand, parts, mode)
5397 rtx operand;
5398 rtx *parts;
5399 enum machine_mode mode;
32b5b1aa 5400{
2450a057
JH
5401 int size = GET_MODE_SIZE (mode) / 4;
5402
5403 if (size < 2 || size > 3)
5404 abort ();
5405
d7a29404
JH
5406 /* Optimize constant pool reference to immediates. This is used by fp moves,
5407 that force all constants to memory to allow combining. */
5408
5409 if (GET_CODE (operand) == MEM
5410 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
5411 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
5412 operand = get_pool_constant (XEXP (operand, 0));
5413
2450a057 5414 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 5415 {
2450a057
JH
5416 /* The only non-offsetable memories we handle are pushes. */
5417 if (! push_operand (operand, VOIDmode))
5418 abort ();
5419
5420 PUT_MODE (operand, SImode);
5421 parts[0] = parts[1] = parts[2] = operand;
5422 }
5423 else
5424 {
5425 if (mode == DImode)
5426 split_di (&operand, 1, &parts[0], &parts[1]);
5427 else
e075ae69 5428 {
2450a057
JH
5429 if (REG_P (operand))
5430 {
5431 if (!reload_completed)
5432 abort ();
5433 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
5434 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
5435 if (size == 3)
5436 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
5437 }
5438 else if (offsettable_memref_p (operand))
5439 {
5440 PUT_MODE (operand, SImode);
5441 parts[0] = operand;
5442 parts[1] = adj_offsettable_operand (operand, 4);
5443 if (size == 3)
5444 parts[2] = adj_offsettable_operand (operand, 8);
5445 }
5446 else if (GET_CODE (operand) == CONST_DOUBLE)
5447 {
5448 REAL_VALUE_TYPE r;
5449 long l[3];
5450
5451 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
5452 switch (mode)
5453 {
5454 case XFmode:
5455 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
5456 parts[2] = GEN_INT (l[2]);
5457 break;
5458 case DFmode:
5459 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
5460 break;
5461 default:
5462 abort ();
5463 }
5464 parts[1] = GEN_INT (l[1]);
5465 parts[0] = GEN_INT (l[0]);
5466 }
5467 else
5468 abort ();
e075ae69 5469 }
2450a057
JH
5470 }
5471
5472 return;
5473}
5474
5475/* Emit insns to perform a move or push of DI, DF, and XF values.
5476 Return false when normal moves are needed; true when all required
5477 insns have been emitted. Operands 2-4 contain the input values
5478 int the correct order; operands 5-7 contain the output values. */
5479
5480int
5481ix86_split_long_move (operands1)
5482 rtx operands1[];
5483{
5484 rtx part[2][3];
5485 rtx operands[2];
5486 int size = GET_MODE_SIZE (GET_MODE (operands1[0])) / 4;
5487 int push = 0;
5488 int collisions = 0;
5489
5490 /* Make our own copy to avoid clobbering the operands. */
5491 operands[0] = copy_rtx (operands1[0]);
5492 operands[1] = copy_rtx (operands1[1]);
5493
5494 if (size < 2 || size > 3)
5495 abort ();
5496
5497 /* The only non-offsettable memory we handle is push. */
5498 if (push_operand (operands[0], VOIDmode))
5499 push = 1;
5500 else if (GET_CODE (operands[0]) == MEM
5501 && ! offsettable_memref_p (operands[0]))
5502 abort ();
5503
5504 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands1[0]));
5505 ix86_split_to_parts (operands[1], part[1], GET_MODE (operands1[0]));
5506
5507 /* When emitting push, take care for source operands on the stack. */
5508 if (push && GET_CODE (operands[1]) == MEM
5509 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
5510 {
5511 if (size == 3)
5512 part[1][1] = part[1][2];
5513 part[1][0] = part[1][1];
5514 }
5515
5516 /* We need to do copy in the right order in case an address register
5517 of the source overlaps the destination. */
5518 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
5519 {
5520 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
5521 collisions++;
5522 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
5523 collisions++;
5524 if (size == 3
5525 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
5526 collisions++;
5527
5528 /* Collision in the middle part can be handled by reordering. */
5529 if (collisions == 1 && size == 3
5530 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 5531 {
2450a057
JH
5532 rtx tmp;
5533 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
5534 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
5535 }
e075ae69 5536
2450a057
JH
5537 /* If there are more collisions, we can't handle it by reordering.
5538 Do an lea to the last part and use only one colliding move. */
5539 else if (collisions > 1)
5540 {
5541 collisions = 1;
5542 emit_insn (gen_rtx_SET (VOIDmode, part[0][size - 1],
5543 XEXP (part[1][0], 0)));
5544 part[1][0] = change_address (part[1][0], SImode, part[0][size - 1]);
5545 part[1][1] = adj_offsettable_operand (part[1][0], 4);
5546 if (size == 3)
5547 part[1][2] = adj_offsettable_operand (part[1][0], 8);
5548 }
5549 }
5550
5551 if (push)
5552 {
5553 if (size == 3)
5554 emit_insn (gen_push (part[1][2]));
5555 emit_insn (gen_push (part[1][1]));
5556 emit_insn (gen_push (part[1][0]));
5557 return 1;
5558 }
5559
5560 /* Choose correct order to not overwrite the source before it is copied. */
5561 if ((REG_P (part[0][0])
5562 && REG_P (part[1][1])
5563 && (REGNO (part[0][0]) == REGNO (part[1][1])
5564 || (size == 3
5565 && REGNO (part[0][0]) == REGNO (part[1][2]))))
5566 || (collisions > 0
5567 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
5568 {
5569 if (size == 3)
5570 {
5571 operands1[2] = part[0][2];
5572 operands1[3] = part[0][1];
5573 operands1[4] = part[0][0];
5574 operands1[5] = part[1][2];
5575 operands1[6] = part[1][1];
5576 operands1[7] = part[1][0];
5577 }
5578 else
5579 {
5580 operands1[2] = part[0][1];
5581 operands1[3] = part[0][0];
5582 operands1[5] = part[1][1];
5583 operands1[6] = part[1][0];
5584 }
5585 }
5586 else
5587 {
5588 if (size == 3)
5589 {
5590 operands1[2] = part[0][0];
5591 operands1[3] = part[0][1];
5592 operands1[4] = part[0][2];
5593 operands1[5] = part[1][0];
5594 operands1[6] = part[1][1];
5595 operands1[7] = part[1][2];
5596 }
5597 else
5598 {
5599 operands1[2] = part[0][0];
5600 operands1[3] = part[0][1];
5601 operands1[5] = part[1][0];
5602 operands1[6] = part[1][1];
e075ae69
RH
5603 }
5604 }
32b5b1aa 5605
e9a25f70 5606 return 0;
32b5b1aa 5607}
32b5b1aa 5608
e075ae69
RH
5609void
5610ix86_split_ashldi (operands, scratch)
5611 rtx *operands, scratch;
32b5b1aa 5612{
e075ae69
RH
5613 rtx low[2], high[2];
5614 int count;
b985a30f 5615
e075ae69
RH
5616 if (GET_CODE (operands[2]) == CONST_INT)
5617 {
5618 split_di (operands, 2, low, high);
5619 count = INTVAL (operands[2]) & 63;
32b5b1aa 5620
e075ae69
RH
5621 if (count >= 32)
5622 {
5623 emit_move_insn (high[0], low[1]);
5624 emit_move_insn (low[0], const0_rtx);
b985a30f 5625
e075ae69
RH
5626 if (count > 32)
5627 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
5628 }
5629 else
5630 {
5631 if (!rtx_equal_p (operands[0], operands[1]))
5632 emit_move_insn (operands[0], operands[1]);
5633 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
5634 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
5635 }
5636 }
5637 else
5638 {
5639 if (!rtx_equal_p (operands[0], operands[1]))
5640 emit_move_insn (operands[0], operands[1]);
b985a30f 5641
e075ae69 5642 split_di (operands, 1, low, high);
b985a30f 5643
e075ae69
RH
5644 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
5645 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 5646
e075ae69
RH
5647 if (TARGET_CMOVE && (! reload_completed || scratch))
5648 {
5649 if (! reload_completed)
5650 scratch = force_reg (SImode, const0_rtx);
5651 else
5652 emit_move_insn (scratch, const0_rtx);
5653
5654 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
5655 scratch));
5656 }
5657 else
5658 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
5659 }
e9a25f70 5660}
32b5b1aa 5661
e075ae69
RH
5662void
5663ix86_split_ashrdi (operands, scratch)
5664 rtx *operands, scratch;
32b5b1aa 5665{
e075ae69
RH
5666 rtx low[2], high[2];
5667 int count;
32b5b1aa 5668
e075ae69
RH
5669 if (GET_CODE (operands[2]) == CONST_INT)
5670 {
5671 split_di (operands, 2, low, high);
5672 count = INTVAL (operands[2]) & 63;
32b5b1aa 5673
e075ae69
RH
5674 if (count >= 32)
5675 {
5676 emit_move_insn (low[0], high[1]);
32b5b1aa 5677
e075ae69
RH
5678 if (! reload_completed)
5679 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
5680 else
5681 {
5682 emit_move_insn (high[0], low[0]);
5683 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
5684 }
5685
5686 if (count > 32)
5687 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
5688 }
5689 else
5690 {
5691 if (!rtx_equal_p (operands[0], operands[1]))
5692 emit_move_insn (operands[0], operands[1]);
5693 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
5694 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
5695 }
5696 }
5697 else
32b5b1aa 5698 {
e075ae69
RH
5699 if (!rtx_equal_p (operands[0], operands[1]))
5700 emit_move_insn (operands[0], operands[1]);
5701
5702 split_di (operands, 1, low, high);
5703
5704 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
5705 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
5706
5707 if (TARGET_CMOVE && (!reload_completed || scratch))
5708 {
5709 if (! reload_completed)
5710 scratch = gen_reg_rtx (SImode);
5711 emit_move_insn (scratch, high[0]);
5712 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
5713 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
5714 scratch));
5715 }
5716 else
5717 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 5718 }
e075ae69 5719}
32b5b1aa 5720
e075ae69
RH
5721void
5722ix86_split_lshrdi (operands, scratch)
5723 rtx *operands, scratch;
5724{
5725 rtx low[2], high[2];
5726 int count;
32b5b1aa 5727
e075ae69 5728 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 5729 {
e075ae69
RH
5730 split_di (operands, 2, low, high);
5731 count = INTVAL (operands[2]) & 63;
5732
5733 if (count >= 32)
c7271385 5734 {
e075ae69
RH
5735 emit_move_insn (low[0], high[1]);
5736 emit_move_insn (high[0], const0_rtx);
32b5b1aa 5737
e075ae69
RH
5738 if (count > 32)
5739 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
5740 }
5741 else
5742 {
5743 if (!rtx_equal_p (operands[0], operands[1]))
5744 emit_move_insn (operands[0], operands[1]);
5745 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
5746 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
5747 }
32b5b1aa 5748 }
e075ae69
RH
5749 else
5750 {
5751 if (!rtx_equal_p (operands[0], operands[1]))
5752 emit_move_insn (operands[0], operands[1]);
32b5b1aa 5753
e075ae69
RH
5754 split_di (operands, 1, low, high);
5755
5756 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
5757 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
5758
5759 /* Heh. By reversing the arguments, we can reuse this pattern. */
5760 if (TARGET_CMOVE && (! reload_completed || scratch))
5761 {
5762 if (! reload_completed)
5763 scratch = force_reg (SImode, const0_rtx);
5764 else
5765 emit_move_insn (scratch, const0_rtx);
5766
5767 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
5768 scratch));
5769 }
5770 else
5771 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
5772 }
32b5b1aa 5773}
3f803cd9 5774
e075ae69
RH
5775/* Expand the appropriate insns for doing strlen if not just doing
5776 repnz; scasb
5777
5778 out = result, initialized with the start address
5779 align_rtx = alignment of the address.
5780 scratch = scratch register, initialized with the startaddress when
5781 not aligned, otherwise undefined
3f803cd9
SC
5782
5783 This is just the body. It needs the initialisations mentioned above and
5784 some address computing at the end. These things are done in i386.md. */
5785
e075ae69
RH
5786void
5787ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch)
5788 rtx out, align_rtx, scratch;
3f803cd9 5789{
e075ae69
RH
5790 int align;
5791 rtx tmp;
5792 rtx align_2_label = NULL_RTX;
5793 rtx align_3_label = NULL_RTX;
5794 rtx align_4_label = gen_label_rtx ();
5795 rtx end_0_label = gen_label_rtx ();
e075ae69 5796 rtx mem;
16189740
RH
5797 rtx no_flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
5798 rtx z_flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
e2e52e1b 5799 rtx tmpreg = gen_reg_rtx (SImode);
e075ae69
RH
5800
5801 align = 0;
5802 if (GET_CODE (align_rtx) == CONST_INT)
5803 align = INTVAL (align_rtx);
3f803cd9 5804
e9a25f70 5805 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 5806
e9a25f70 5807 /* Is there a known alignment and is it less than 4? */
e075ae69 5808 if (align < 4)
3f803cd9 5809 {
e9a25f70 5810 /* Is there a known alignment and is it not 2? */
e075ae69 5811 if (align != 2)
3f803cd9 5812 {
e075ae69
RH
5813 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
5814 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
5815
5816 /* Leave just the 3 lower bits. */
5817 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (3),
5818 NULL_RTX, 0, OPTAB_WIDEN);
5819
16189740 5820 emit_insn (gen_cmpsi_ccz_1 (align_rtx, const0_rtx));
e075ae69 5821
16189740 5822 tmp = gen_rtx_EQ (VOIDmode, z_flags, const0_rtx);
e075ae69
RH
5823 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5824 gen_rtx_LABEL_REF (VOIDmode,
5825 align_4_label),
5826 pc_rtx);
5827 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5828
16189740 5829 emit_insn (gen_cmpsi_ccno_1 (align_rtx, GEN_INT (2)));
e075ae69 5830
16189740 5831 tmp = gen_rtx_EQ (VOIDmode, no_flags, const0_rtx);
e075ae69
RH
5832 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5833 gen_rtx_LABEL_REF (VOIDmode,
5834 align_2_label),
5835 pc_rtx);
5836 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5837
16189740 5838 tmp = gen_rtx_GTU (VOIDmode, no_flags, const0_rtx);
e075ae69
RH
5839 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5840 gen_rtx_LABEL_REF (VOIDmode,
5841 align_3_label),
5842 pc_rtx);
5843 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
3f803cd9
SC
5844 }
5845 else
5846 {
e9a25f70
JL
5847 /* Since the alignment is 2, we have to check 2 or 0 bytes;
5848 check if is aligned to 4 - byte. */
e9a25f70 5849
e075ae69
RH
5850 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (2),
5851 NULL_RTX, 0, OPTAB_WIDEN);
5852
16189740 5853 emit_insn (gen_cmpsi_ccz_1 (align_rtx, const0_rtx));
e075ae69 5854
16189740 5855 tmp = gen_rtx_EQ (VOIDmode, z_flags, const0_rtx);
e075ae69
RH
5856 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5857 gen_rtx_LABEL_REF (VOIDmode,
5858 align_4_label),
5859 pc_rtx);
5860 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
3f803cd9
SC
5861 }
5862
e075ae69 5863 mem = gen_rtx_MEM (QImode, out);
e9a25f70 5864
e075ae69 5865 /* Now compare the bytes. */
e9a25f70 5866
e075ae69 5867 /* Compare the first n unaligned byte on a byte per byte basis. */
16189740 5868 emit_insn (gen_cmpqi_ccz_1 (mem, const0_rtx));
e9a25f70 5869
16189740 5870 tmp = gen_rtx_EQ (VOIDmode, z_flags, const0_rtx);
e075ae69
RH
5871 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5872 gen_rtx_LABEL_REF (VOIDmode, end_0_label),
5873 pc_rtx);
5874 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
3f803cd9 5875
e075ae69
RH
5876 /* Increment the address. */
5877 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 5878
e075ae69
RH
5879 /* Not needed with an alignment of 2 */
5880 if (align != 2)
5881 {
5882 emit_label (align_2_label);
3f803cd9 5883
16189740 5884 emit_insn (gen_cmpqi_ccz_1 (mem, const0_rtx));
3f803cd9 5885
16189740 5886 tmp = gen_rtx_EQ (VOIDmode, z_flags, const0_rtx);
e075ae69
RH
5887 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5888 gen_rtx_LABEL_REF (VOIDmode,
5889 end_0_label),
5890 pc_rtx);
5891 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5892
5893 emit_insn (gen_addsi3 (out, out, const1_rtx));
5894
5895 emit_label (align_3_label);
5896 }
5897
16189740 5898 emit_insn (gen_cmpqi_ccz_1 (mem, const0_rtx));
e9a25f70 5899
16189740 5900 tmp = gen_rtx_EQ (VOIDmode, z_flags, const0_rtx);
e075ae69
RH
5901 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5902 gen_rtx_LABEL_REF (VOIDmode, end_0_label),
5903 pc_rtx);
5904 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5905
5906 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
5907 }
5908
e075ae69
RH
5909 /* Generate loop to check 4 bytes at a time. It is not a good idea to
5910 align this loop. It gives only huge programs, but does not help to
5911 speed up. */
5912 emit_label (align_4_label);
3f803cd9 5913
e075ae69
RH
5914 mem = gen_rtx_MEM (SImode, out);
5915 emit_move_insn (scratch, mem);
e075ae69 5916 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 5917
e2e52e1b
JH
5918 /* This formula yields a nonzero result iff one of the bytes is zero.
5919 This saves three branches inside loop and many cycles. */
5920
5921 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
5922 emit_insn (gen_one_cmplsi2 (scratch, scratch));
5923 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
5924 emit_insn (gen_andsi3 (tmpreg, tmpreg, GEN_INT (0x80808080)));
5925 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, 0, align_4_label);
5926
5927 if (TARGET_CMOVE)
5928 {
5929 rtx reg = gen_reg_rtx (SImode);
5930 emit_move_insn (reg, tmpreg);
5931 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
5932
5933 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 5934 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
5935 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
5936 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
5937 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
5938 gen_rtx_IF_THEN_ELSE (SImode, tmp,
5939 reg,
5940 tmpreg)));
5941 /* Emit lea manually to avoid clobbering of flags. */
5942 emit_insn (gen_rtx_SET (SImode, reg,
5943 gen_rtx_PLUS (SImode, out, GEN_INT (2))));
5944
5945 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
5946 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
5947 emit_insn (gen_rtx_SET (VOIDmode, out,
5948 gen_rtx_IF_THEN_ELSE (SImode, tmp,
5949 reg,
5950 out)));
5951
5952 }
5953 else
5954 {
5955 rtx end_2_label = gen_label_rtx ();
5956 /* Is zero in the first two bytes? */
5957
16189740 5958 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
5959 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
5960 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
5961 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5962 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
5963 pc_rtx);
5964 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5965 JUMP_LABEL (tmp) = end_2_label;
5966
5967 /* Not in the first two. Move two bytes forward. */
5968 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
5969 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
5970
5971 emit_label (end_2_label);
5972
5973 }
5974
5975 /* Avoid branch in fixing the byte. */
5976 tmpreg = gen_lowpart (QImode, tmpreg);
5977 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
5978 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
e075ae69
RH
5979
5980 emit_label (end_0_label);
5981}
5982\f
e075ae69
RH
5983/* Clear stack slot assignments remembered from previous functions.
5984 This is called from INIT_EXPANDERS once before RTL is emitted for each
5985 function. */
5986
36edd3cc
BS
5987static void
5988ix86_init_machine_status (p)
1526a060 5989 struct function *p;
e075ae69
RH
5990{
5991 enum machine_mode mode;
5992 int n;
36edd3cc
BS
5993 p->machine
5994 = (struct machine_function *) xmalloc (sizeof (struct machine_function));
e075ae69
RH
5995
5996 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
5997 mode = (enum machine_mode) ((int) mode + 1))
5998 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
5999 ix86_stack_locals[(int) mode][n] = NULL_RTX;
e075ae69
RH
6000}
6001
1526a060
BS
6002/* Mark machine specific bits of P for GC. */
6003static void
6004ix86_mark_machine_status (p)
6005 struct function *p;
6006{
6007 enum machine_mode mode;
6008 int n;
6009
6010 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
6011 mode = (enum machine_mode) ((int) mode + 1))
6012 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
6013 ggc_mark_rtx (p->machine->stack_locals[(int) mode][n]);
6014}
6015
e075ae69
RH
6016/* Return a MEM corresponding to a stack slot with mode MODE.
6017 Allocate a new slot if necessary.
6018
6019 The RTL for a function can have several slots available: N is
6020 which slot to use. */
6021
6022rtx
6023assign_386_stack_local (mode, n)
6024 enum machine_mode mode;
6025 int n;
6026{
6027 if (n < 0 || n >= MAX_386_STACK_LOCALS)
6028 abort ();
6029
6030 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
6031 ix86_stack_locals[(int) mode][n]
6032 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
6033
6034 return ix86_stack_locals[(int) mode][n];
6035}
6036\f
6037/* Calculate the length of the memory address in the instruction
6038 encoding. Does not include the one-byte modrm, opcode, or prefix. */
6039
6040static int
6041memory_address_length (addr)
6042 rtx addr;
6043{
6044 struct ix86_address parts;
6045 rtx base, index, disp;
6046 int len;
6047
6048 if (GET_CODE (addr) == PRE_DEC
6049 || GET_CODE (addr) == POST_INC)
6050 return 0;
3f803cd9 6051
e075ae69
RH
6052 if (! ix86_decompose_address (addr, &parts))
6053 abort ();
3f803cd9 6054
e075ae69
RH
6055 base = parts.base;
6056 index = parts.index;
6057 disp = parts.disp;
6058 len = 0;
3f803cd9 6059
e075ae69
RH
6060 /* Register Indirect. */
6061 if (base && !index && !disp)
6062 {
6063 /* Special cases: ebp and esp need the two-byte modrm form. */
6064 if (addr == stack_pointer_rtx
6065 || addr == arg_pointer_rtx
564d80f4
JH
6066 || addr == frame_pointer_rtx
6067 || addr == hard_frame_pointer_rtx)
e075ae69 6068 len = 1;
3f803cd9 6069 }
e9a25f70 6070
e075ae69
RH
6071 /* Direct Addressing. */
6072 else if (disp && !base && !index)
6073 len = 4;
6074
3f803cd9
SC
6075 else
6076 {
e075ae69
RH
6077 /* Find the length of the displacement constant. */
6078 if (disp)
6079 {
6080 if (GET_CODE (disp) == CONST_INT
6081 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
6082 len = 1;
6083 else
6084 len = 4;
6085 }
3f803cd9 6086
e075ae69
RH
6087 /* An index requires the two-byte modrm form. */
6088 if (index)
6089 len += 1;
3f803cd9
SC
6090 }
6091
e075ae69
RH
6092 return len;
6093}
79325812 6094
e075ae69
RH
6095int
6096ix86_attr_length_default (insn)
6097 rtx insn;
6098{
6099 enum attr_type type;
6100 int len = 0, i;
6101
6102 type = get_attr_type (insn);
6103 extract_insn (insn);
6104 switch (type)
6105 {
6106 case TYPE_INCDEC:
6107 case TYPE_SETCC:
6108 case TYPE_ICMOV:
6109 case TYPE_FMOV:
6110 case TYPE_FOP:
6111 case TYPE_FCMP:
6112 case TYPE_FOP1:
6113 case TYPE_FMUL:
6114 case TYPE_FDIV:
6115 case TYPE_FSGN:
6116 case TYPE_FPSPC:
6117 case TYPE_FCMOV:
6118 case TYPE_IBR:
6119 break;
7c7ef435
JH
6120 case TYPE_STR:
6121 case TYPE_CLD:
6122 len = 0;
3f803cd9 6123
e075ae69
RH
6124 case TYPE_ALU1:
6125 case TYPE_NEGNOT:
6126 case TYPE_ALU:
6127 case TYPE_ICMP:
6128 case TYPE_IMOVX:
6129 case TYPE_ISHIFT:
6130 case TYPE_IMUL:
6131 case TYPE_IDIV:
6132 case TYPE_PUSH:
6133 case TYPE_POP:
1ccbefce
RH
6134 for (i = recog_data.n_operands - 1; i >= 0; --i)
6135 if (CONSTANT_P (recog_data.operand[i]))
e075ae69 6136 {
1ccbefce
RH
6137 if (GET_CODE (recog_data.operand[i]) == CONST_INT
6138 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
e075ae69
RH
6139 len += 1;
6140 else
1ccbefce 6141 len += GET_MODE_SIZE (GET_MODE (recog_data.operand[0]));
e075ae69
RH
6142 }
6143 break;
6144
6145 case TYPE_IMOV:
1ccbefce
RH
6146 if (CONSTANT_P (recog_data.operand[1]))
6147 len += GET_MODE_SIZE (GET_MODE (recog_data.operand[0]));
e075ae69
RH
6148 break;
6149
6150 case TYPE_CALL:
6baf1cc8
BS
6151 if (constant_call_address_operand (recog_data.operand[0],
6152 GET_MODE (recog_data.operand[0])))
e075ae69
RH
6153 return 5;
6154 break;
3f803cd9 6155
e075ae69 6156 case TYPE_CALLV:
6baf1cc8
BS
6157 if (constant_call_address_operand (recog_data.operand[1],
6158 GET_MODE (recog_data.operand[1])))
e075ae69
RH
6159 return 5;
6160 break;
3f803cd9 6161
e075ae69 6162 case TYPE_LEA:
3071fab5
RH
6163 {
6164 /* Irritatingly, single_set doesn't work with REG_UNUSED present,
6165 as we'll get from running life_analysis during reg-stack when
1c71e60e
JH
6166 not optimizing. Not that it matters anyway, now that
6167 pro_epilogue_adjust_stack uses lea, and is by design not
6168 single_set. */
3071fab5
RH
6169 rtx set = PATTERN (insn);
6170 if (GET_CODE (set) == SET)
6171 ;
6172 else if (GET_CODE (set) == PARALLEL
1c71e60e 6173 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
3071fab5
RH
6174 set = XVECEXP (set, 0, 0);
6175 else
6176 abort ();
6177
6178 len += memory_address_length (SET_SRC (set));
6179 goto just_opcode;
6180 }
3f803cd9 6181
e075ae69
RH
6182 case TYPE_OTHER:
6183 case TYPE_MULTI:
6184 return 15;
3f803cd9 6185
5d3c4797 6186 case TYPE_FXCH:
1ccbefce
RH
6187 if (STACK_TOP_P (recog_data.operand[0]))
6188 return 2 + (REGNO (recog_data.operand[1]) != FIRST_STACK_REG + 1);
5d3c4797 6189 else
1ccbefce 6190 return 2 + (REGNO (recog_data.operand[0]) != FIRST_STACK_REG + 1);
5d3c4797 6191
e075ae69
RH
6192 default:
6193 abort ();
6194 }
6195
1ccbefce
RH
6196 for (i = recog_data.n_operands - 1; i >= 0; --i)
6197 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 6198 {
1ccbefce 6199 len += memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
6200 break;
6201 }
6202
6203just_opcode:
6204 len += get_attr_length_opcode (insn);
6205 len += get_attr_length_prefix (insn);
6206
6207 return len;
3f803cd9 6208}
e075ae69
RH
6209\f
6210/* Return the maximum number of instructions a cpu can issue. */
b657fc39 6211
e075ae69
RH
6212int
6213ix86_issue_rate ()
b657fc39 6214{
e075ae69 6215 switch (ix86_cpu)
b657fc39 6216 {
e075ae69
RH
6217 case PROCESSOR_PENTIUM:
6218 case PROCESSOR_K6:
6219 return 2;
79325812 6220
e075ae69
RH
6221 case PROCESSOR_PENTIUMPRO:
6222 return 3;
b657fc39 6223
b657fc39 6224 default:
e075ae69 6225 return 1;
b657fc39 6226 }
b657fc39
L
6227}
6228
e075ae69
RH
6229/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
6230 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 6231
e075ae69
RH
6232static int
6233ix86_flags_dependant (insn, dep_insn, insn_type)
6234 rtx insn, dep_insn;
6235 enum attr_type insn_type;
6236{
6237 rtx set, set2;
b657fc39 6238
e075ae69
RH
6239 /* Simplify the test for uninteresting insns. */
6240 if (insn_type != TYPE_SETCC
6241 && insn_type != TYPE_ICMOV
6242 && insn_type != TYPE_FCMOV
6243 && insn_type != TYPE_IBR)
6244 return 0;
b657fc39 6245
e075ae69
RH
6246 if ((set = single_set (dep_insn)) != 0)
6247 {
6248 set = SET_DEST (set);
6249 set2 = NULL_RTX;
6250 }
6251 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
6252 && XVECLEN (PATTERN (dep_insn), 0) == 2
6253 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
6254 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
6255 {
6256 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
6257 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
6258 }
78a0d70c
ZW
6259 else
6260 return 0;
b657fc39 6261
78a0d70c
ZW
6262 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
6263 return 0;
b657fc39 6264
78a0d70c
ZW
6265 /* This test is true if the dependant insn reads the flags but
6266 not any other potentially set register. */
6267 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
6268 return 0;
6269
6270 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
6271 return 0;
6272
6273 return 1;
e075ae69 6274}
b657fc39 6275
e075ae69
RH
6276/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
6277 address with operands set by DEP_INSN. */
6278
6279static int
6280ix86_agi_dependant (insn, dep_insn, insn_type)
6281 rtx insn, dep_insn;
6282 enum attr_type insn_type;
6283{
6284 rtx addr;
6285
6286 if (insn_type == TYPE_LEA)
5fbdde42
RH
6287 {
6288 addr = PATTERN (insn);
6289 if (GET_CODE (addr) == SET)
6290 ;
6291 else if (GET_CODE (addr) == PARALLEL
6292 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
6293 addr = XVECEXP (addr, 0, 0);
6294 else
6295 abort ();
6296 addr = SET_SRC (addr);
6297 }
e075ae69
RH
6298 else
6299 {
6300 int i;
6301 extract_insn (insn);
1ccbefce
RH
6302 for (i = recog_data.n_operands - 1; i >= 0; --i)
6303 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 6304 {
1ccbefce 6305 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
6306 goto found;
6307 }
6308 return 0;
6309 found:;
b657fc39
L
6310 }
6311
e075ae69 6312 return modified_in_p (addr, dep_insn);
b657fc39 6313}
a269a03c
JC
6314
6315int
e075ae69 6316ix86_adjust_cost (insn, link, dep_insn, cost)
a269a03c
JC
6317 rtx insn, link, dep_insn;
6318 int cost;
6319{
e075ae69
RH
6320 enum attr_type insn_type, dep_insn_type;
6321 rtx set, set2;
9b00189f 6322 int dep_insn_code_number;
a269a03c 6323
309ada50 6324 /* Anti and output depenancies have zero cost on all CPUs. */
e075ae69 6325 if (REG_NOTE_KIND (link) != 0)
309ada50 6326 return 0;
a269a03c 6327
9b00189f
JH
6328 dep_insn_code_number = recog_memoized (dep_insn);
6329
e075ae69 6330 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 6331 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 6332 return cost;
a269a03c 6333
1c71e60e
JH
6334 insn_type = get_attr_type (insn);
6335 dep_insn_type = get_attr_type (dep_insn);
9b00189f 6336
1c71e60e
JH
6337 /* Prologue and epilogue allocators can have a false dependency on ebp.
6338 This results in one cycle extra stall on Pentium prologue scheduling,
6339 so handle this important case manually. */
6340 if (dep_insn_code_number == CODE_FOR_pro_epilogue_adjust_stack
6341 && dep_insn_type == TYPE_ALU
9b00189f
JH
6342 && !reg_mentioned_p (stack_pointer_rtx, insn))
6343 return 0;
6344
a269a03c
JC
6345 switch (ix86_cpu)
6346 {
6347 case PROCESSOR_PENTIUM:
e075ae69
RH
6348 /* Address Generation Interlock adds a cycle of latency. */
6349 if (ix86_agi_dependant (insn, dep_insn, insn_type))
6350 cost += 1;
6351
6352 /* ??? Compares pair with jump/setcc. */
6353 if (ix86_flags_dependant (insn, dep_insn, insn_type))
6354 cost = 0;
6355
6356 /* Floating point stores require value to be ready one cycle ealier. */
6357 if (insn_type == TYPE_FMOV
6358 && get_attr_memory (insn) == MEMORY_STORE
6359 && !ix86_agi_dependant (insn, dep_insn, insn_type))
6360 cost += 1;
6361 break;
a269a03c 6362
e075ae69
RH
6363 case PROCESSOR_PENTIUMPRO:
6364 /* Since we can't represent delayed latencies of load+operation,
6365 increase the cost here for non-imov insns. */
6366 if (dep_insn_type != TYPE_IMOV
6367 && dep_insn_type != TYPE_FMOV
6368 && get_attr_memory (dep_insn) == MEMORY_LOAD)
6369 cost += 1;
6370
6371 /* INT->FP conversion is expensive. */
6372 if (get_attr_fp_int_src (dep_insn))
6373 cost += 5;
6374
6375 /* There is one cycle extra latency between an FP op and a store. */
6376 if (insn_type == TYPE_FMOV
6377 && (set = single_set (dep_insn)) != NULL_RTX
6378 && (set2 = single_set (insn)) != NULL_RTX
6379 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
6380 && GET_CODE (SET_DEST (set2)) == MEM)
6381 cost += 1;
6382 break;
a269a03c 6383
e075ae69
RH
6384 case PROCESSOR_K6:
6385 /* The esp dependency is resolved before the instruction is really
6386 finished. */
6387 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
6388 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
6389 return 1;
a269a03c 6390
e075ae69
RH
6391 /* Since we can't represent delayed latencies of load+operation,
6392 increase the cost here for non-imov insns. */
6393 if (get_attr_memory (dep_insn) == MEMORY_LOAD)
6394 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
6395
6396 /* INT->FP conversion is expensive. */
6397 if (get_attr_fp_int_src (dep_insn))
6398 cost += 5;
a14003ee 6399 break;
e075ae69 6400
309ada50
JH
6401 case PROCESSOR_ATHLON:
6402 /* Address Generation Interlock cause problems on the Athlon CPU because
6403 the loads and stores are done in order so once one load or store has
6404 to wait, others must too, so penalize the AGIs slightly by one cycle.
6405 We might experiment with this value later. */
6406 if (ix86_agi_dependant (insn, dep_insn, insn_type))
6407 cost += 1;
6408
6409 /* Since we can't represent delayed latencies of load+operation,
6410 increase the cost here for non-imov insns. */
6411 if (dep_insn_type != TYPE_IMOV
6412 && dep_insn_type != TYPE_FMOV
6413 && get_attr_memory (dep_insn) == MEMORY_LOAD)
6414 cost += 2;
a269a03c 6415 default:
a269a03c
JC
6416 break;
6417 }
6418
6419 return cost;
6420}
0a726ef1 6421
e075ae69
RH
6422static union
6423{
6424 struct ppro_sched_data
6425 {
6426 rtx decode[3];
6427 int issued_this_cycle;
6428 } ppro;
6429} ix86_sched_data;
0a726ef1 6430
e075ae69
RH
6431static int
6432ix86_safe_length (insn)
6433 rtx insn;
6434{
6435 if (recog_memoized (insn) >= 0)
6436 return get_attr_length(insn);
6437 else
6438 return 128;
6439}
0a726ef1 6440
e075ae69
RH
6441static int
6442ix86_safe_length_prefix (insn)
6443 rtx insn;
6444{
6445 if (recog_memoized (insn) >= 0)
6446 return get_attr_length(insn);
6447 else
6448 return 0;
6449}
6450
6451static enum attr_memory
6452ix86_safe_memory (insn)
6453 rtx insn;
6454{
6455 if (recog_memoized (insn) >= 0)
6456 return get_attr_memory(insn);
6457 else
6458 return MEMORY_UNKNOWN;
6459}
0a726ef1 6460
e075ae69
RH
6461static enum attr_pent_pair
6462ix86_safe_pent_pair (insn)
6463 rtx insn;
6464{
6465 if (recog_memoized (insn) >= 0)
6466 return get_attr_pent_pair(insn);
6467 else
6468 return PENT_PAIR_NP;
6469}
0a726ef1 6470
e075ae69
RH
6471static enum attr_ppro_uops
6472ix86_safe_ppro_uops (insn)
6473 rtx insn;
6474{
6475 if (recog_memoized (insn) >= 0)
6476 return get_attr_ppro_uops (insn);
6477 else
6478 return PPRO_UOPS_MANY;
6479}
0a726ef1 6480
e075ae69
RH
6481static void
6482ix86_dump_ppro_packet (dump)
6483 FILE *dump;
0a726ef1 6484{
e075ae69 6485 if (ix86_sched_data.ppro.decode[0])
0a726ef1 6486 {
e075ae69
RH
6487 fprintf (dump, "PPRO packet: %d",
6488 INSN_UID (ix86_sched_data.ppro.decode[0]));
6489 if (ix86_sched_data.ppro.decode[1])
6490 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
6491 if (ix86_sched_data.ppro.decode[2])
6492 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
6493 fputc ('\n', dump);
6494 }
6495}
0a726ef1 6496
e075ae69 6497/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 6498
e075ae69
RH
6499void
6500ix86_sched_init (dump, sched_verbose)
6501 FILE *dump ATTRIBUTE_UNUSED;
6502 int sched_verbose ATTRIBUTE_UNUSED;
6503{
6504 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
6505}
6506
6507/* Shift INSN to SLOT, and shift everything else down. */
6508
6509static void
6510ix86_reorder_insn (insnp, slot)
6511 rtx *insnp, *slot;
6512{
6513 if (insnp != slot)
6514 {
6515 rtx insn = *insnp;
6516 do
6517 insnp[0] = insnp[1];
6518 while (++insnp != slot);
6519 *insnp = insn;
0a726ef1 6520 }
e075ae69
RH
6521}
6522
6523/* Find an instruction with given pairability and minimal amount of cycles
6524 lost by the fact that the CPU waits for both pipelines to finish before
6525 reading next instructions. Also take care that both instructions together
6526 can not exceed 7 bytes. */
6527
6528static rtx *
6529ix86_pent_find_pair (e_ready, ready, type, first)
6530 rtx *e_ready;
6531 rtx *ready;
6532 enum attr_pent_pair type;
6533 rtx first;
6534{
6535 int mincycles, cycles;
6536 enum attr_pent_pair tmp;
6537 enum attr_memory memory;
6538 rtx *insnp, *bestinsnp = NULL;
0a726ef1 6539
e075ae69
RH
6540 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
6541 return NULL;
0a726ef1 6542
e075ae69
RH
6543 memory = ix86_safe_memory (first);
6544 cycles = result_ready_cost (first);
6545 mincycles = INT_MAX;
6546
6547 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
6548 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
6549 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
6ec6d558 6550 {
e075ae69
RH
6551 enum attr_memory second_memory;
6552 int secondcycles, currentcycles;
6553
6554 second_memory = ix86_safe_memory (*insnp);
6555 secondcycles = result_ready_cost (*insnp);
6556 currentcycles = abs (cycles - secondcycles);
6557
6558 if (secondcycles >= 1 && cycles >= 1)
6ec6d558 6559 {
e075ae69
RH
6560 /* Two read/modify/write instructions together takes two
6561 cycles longer. */
6562 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
6563 currentcycles += 2;
6564
6565 /* Read modify/write instruction followed by read/modify
6566 takes one cycle longer. */
6567 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
6568 && tmp != PENT_PAIR_UV
6569 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
6570 currentcycles += 1;
6ec6d558 6571 }
e075ae69
RH
6572 if (currentcycles < mincycles)
6573 bestinsnp = insnp, mincycles = currentcycles;
6ec6d558 6574 }
0a726ef1 6575
e075ae69
RH
6576 return bestinsnp;
6577}
6578
78a0d70c 6579/* Subroutines of ix86_sched_reorder. */
e075ae69 6580
c6991660 6581static void
78a0d70c 6582ix86_sched_reorder_pentium (ready, e_ready)
e075ae69 6583 rtx *ready;
78a0d70c 6584 rtx *e_ready;
e075ae69 6585{
78a0d70c 6586 enum attr_pent_pair pair1, pair2;
e075ae69 6587 rtx *insnp;
e075ae69 6588
78a0d70c
ZW
6589 /* This wouldn't be necessary if Haifa knew that static insn ordering
6590 is important to which pipe an insn is issued to. So we have to make
6591 some minor rearrangements. */
e075ae69 6592
78a0d70c
ZW
6593 pair1 = ix86_safe_pent_pair (*e_ready);
6594
6595 /* If the first insn is non-pairable, let it be. */
6596 if (pair1 == PENT_PAIR_NP)
6597 return;
6598
6599 pair2 = PENT_PAIR_NP;
6600 insnp = 0;
6601
6602 /* If the first insn is UV or PV pairable, search for a PU
6603 insn to go with. */
6604 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
e075ae69 6605 {
78a0d70c
ZW
6606 insnp = ix86_pent_find_pair (e_ready-1, ready,
6607 PENT_PAIR_PU, *e_ready);
6608 if (insnp)
6609 pair2 = PENT_PAIR_PU;
6610 }
e075ae69 6611
78a0d70c
ZW
6612 /* If the first insn is PU or UV pairable, search for a PV
6613 insn to go with. */
6614 if (pair2 == PENT_PAIR_NP
6615 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
6616 {
6617 insnp = ix86_pent_find_pair (e_ready-1, ready,
6618 PENT_PAIR_PV, *e_ready);
6619 if (insnp)
6620 pair2 = PENT_PAIR_PV;
6621 }
e075ae69 6622
78a0d70c
ZW
6623 /* If the first insn is pairable, search for a UV
6624 insn to go with. */
6625 if (pair2 == PENT_PAIR_NP)
6626 {
6627 insnp = ix86_pent_find_pair (e_ready-1, ready,
6628 PENT_PAIR_UV, *e_ready);
6629 if (insnp)
6630 pair2 = PENT_PAIR_UV;
6631 }
e075ae69 6632
78a0d70c
ZW
6633 if (pair2 == PENT_PAIR_NP)
6634 return;
e075ae69 6635
78a0d70c
ZW
6636 /* Found something! Decide if we need to swap the order. */
6637 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
6638 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
6639 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
6640 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
6641 ix86_reorder_insn (insnp, e_ready);
6642 else
6643 ix86_reorder_insn (insnp, e_ready - 1);
6644}
e075ae69 6645
c6991660 6646static void
78a0d70c
ZW
6647ix86_sched_reorder_ppro (ready, e_ready)
6648 rtx *ready;
6649 rtx *e_ready;
6650{
6651 rtx decode[3];
6652 enum attr_ppro_uops cur_uops;
6653 int issued_this_cycle;
6654 rtx *insnp;
6655 int i;
e075ae69 6656
78a0d70c
ZW
6657 /* At this point .ppro.decode contains the state of the three
6658 decoders from last "cycle". That is, those insns that were
6659 actually independent. But here we're scheduling for the
6660 decoder, and we may find things that are decodable in the
6661 same cycle. */
e075ae69 6662
78a0d70c
ZW
6663 memcpy (decode, ix86_sched_data.ppro.decode, sizeof(decode));
6664 issued_this_cycle = 0;
e075ae69 6665
78a0d70c
ZW
6666 insnp = e_ready;
6667 cur_uops = ix86_safe_ppro_uops (*insnp);
0a726ef1 6668
78a0d70c
ZW
6669 /* If the decoders are empty, and we've a complex insn at the
6670 head of the priority queue, let it issue without complaint. */
6671 if (decode[0] == NULL)
6672 {
6673 if (cur_uops == PPRO_UOPS_MANY)
6674 {
6675 decode[0] = *insnp;
6676 goto ppro_done;
6677 }
6678
6679 /* Otherwise, search for a 2-4 uop unsn to issue. */
6680 while (cur_uops != PPRO_UOPS_FEW)
6681 {
6682 if (insnp == ready)
6683 break;
6684 cur_uops = ix86_safe_ppro_uops (*--insnp);
6685 }
6686
6687 /* If so, move it to the head of the line. */
6688 if (cur_uops == PPRO_UOPS_FEW)
6689 ix86_reorder_insn (insnp, e_ready);
0a726ef1 6690
78a0d70c
ZW
6691 /* Issue the head of the queue. */
6692 issued_this_cycle = 1;
6693 decode[0] = *e_ready--;
6694 }
fb693d44 6695
78a0d70c
ZW
6696 /* Look for simple insns to fill in the other two slots. */
6697 for (i = 1; i < 3; ++i)
6698 if (decode[i] == NULL)
6699 {
6700 if (ready >= e_ready)
6701 goto ppro_done;
fb693d44 6702
e075ae69
RH
6703 insnp = e_ready;
6704 cur_uops = ix86_safe_ppro_uops (*insnp);
78a0d70c
ZW
6705 while (cur_uops != PPRO_UOPS_ONE)
6706 {
6707 if (insnp == ready)
6708 break;
6709 cur_uops = ix86_safe_ppro_uops (*--insnp);
6710 }
fb693d44 6711
78a0d70c
ZW
6712 /* Found one. Move it to the head of the queue and issue it. */
6713 if (cur_uops == PPRO_UOPS_ONE)
e075ae69 6714 {
78a0d70c
ZW
6715 ix86_reorder_insn (insnp, e_ready);
6716 decode[i] = *e_ready--;
6717 issued_this_cycle++;
6718 continue;
6719 }
fb693d44 6720
78a0d70c
ZW
6721 /* ??? Didn't find one. Ideally, here we would do a lazy split
6722 of 2-uop insns, issue one and queue the other. */
6723 }
fb693d44 6724
78a0d70c
ZW
6725 ppro_done:
6726 if (issued_this_cycle == 0)
6727 issued_this_cycle = 1;
6728 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
6729}
fb693d44 6730
78a0d70c
ZW
6731
6732/* We are about to being issuing insns for this clock cycle.
6733 Override the default sort algorithm to better slot instructions. */
6734int
6735ix86_sched_reorder (dump, sched_verbose, ready, n_ready, clock_var)
6736 FILE *dump ATTRIBUTE_UNUSED;
6737 int sched_verbose ATTRIBUTE_UNUSED;
6738 rtx *ready;
6739 int n_ready;
6740 int clock_var ATTRIBUTE_UNUSED;
6741{
6742 rtx *e_ready = ready + n_ready - 1;
fb693d44 6743
78a0d70c
ZW
6744 if (n_ready < 2)
6745 goto out;
e075ae69 6746
78a0d70c
ZW
6747 switch (ix86_cpu)
6748 {
6749 default:
6750 break;
e075ae69 6751
78a0d70c
ZW
6752 case PROCESSOR_PENTIUM:
6753 ix86_sched_reorder_pentium (ready, e_ready);
6754 break;
e075ae69 6755
78a0d70c
ZW
6756 case PROCESSOR_PENTIUMPRO:
6757 ix86_sched_reorder_ppro (ready, e_ready);
e075ae69 6758 break;
fb693d44
RH
6759 }
6760
e075ae69
RH
6761out:
6762 return ix86_issue_rate ();
6763}
fb693d44 6764
e075ae69
RH
6765/* We are about to issue INSN. Return the number of insns left on the
6766 ready queue that can be issued this cycle. */
b222082e 6767
e075ae69
RH
6768int
6769ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
6770 FILE *dump;
6771 int sched_verbose;
6772 rtx insn;
6773 int can_issue_more;
6774{
6775 int i;
6776 switch (ix86_cpu)
fb693d44 6777 {
e075ae69
RH
6778 default:
6779 return can_issue_more - 1;
fb693d44 6780
e075ae69
RH
6781 case PROCESSOR_PENTIUMPRO:
6782 {
6783 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 6784
e075ae69
RH
6785 if (uops == PPRO_UOPS_MANY)
6786 {
6787 if (sched_verbose)
6788 ix86_dump_ppro_packet (dump);
6789 ix86_sched_data.ppro.decode[0] = insn;
6790 ix86_sched_data.ppro.decode[1] = NULL;
6791 ix86_sched_data.ppro.decode[2] = NULL;
6792 if (sched_verbose)
6793 ix86_dump_ppro_packet (dump);
6794 ix86_sched_data.ppro.decode[0] = NULL;
6795 }
6796 else if (uops == PPRO_UOPS_FEW)
6797 {
6798 if (sched_verbose)
6799 ix86_dump_ppro_packet (dump);
6800 ix86_sched_data.ppro.decode[0] = insn;
6801 ix86_sched_data.ppro.decode[1] = NULL;
6802 ix86_sched_data.ppro.decode[2] = NULL;
6803 }
6804 else
6805 {
6806 for (i = 0; i < 3; ++i)
6807 if (ix86_sched_data.ppro.decode[i] == NULL)
6808 {
6809 ix86_sched_data.ppro.decode[i] = insn;
6810 break;
6811 }
6812 if (i == 3)
6813 abort ();
6814 if (i == 2)
6815 {
6816 if (sched_verbose)
6817 ix86_dump_ppro_packet (dump);
6818 ix86_sched_data.ppro.decode[0] = NULL;
6819 ix86_sched_data.ppro.decode[1] = NULL;
6820 ix86_sched_data.ppro.decode[2] = NULL;
6821 }
6822 }
6823 }
6824 return --ix86_sched_data.ppro.issued_this_cycle;
6825 }
fb693d44 6826}
This page took 2.411124 seconds and 5 git commands to generate.