]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
loop.c (canonicalize_condition): Add WANT_REG argument.
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
4592bdcb
JL
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000
3 Free Software Foundation, Inc.
2a2ab3f9
JVA
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
32b5b1aa 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
0b6b2900 22#include <setjmp.h>
2a2ab3f9 23#include "config.h"
bb5177ac 24#include "system.h"
2a2ab3f9 25#include "rtl.h"
6baf1cc8
BS
26#include "tree.h"
27#include "tm_p.h"
2a2ab3f9
JVA
28#include "regs.h"
29#include "hard-reg-set.h"
30#include "real.h"
31#include "insn-config.h"
32#include "conditions.h"
33#include "insn-flags.h"
34#include "output.h"
35#include "insn-attr.h"
2a2ab3f9 36#include "flags.h"
a8ffcc81 37#include "except.h"
ecbc4695 38#include "function.h"
00c79232 39#include "recog.h"
ced8dd8c 40#include "expr.h"
f103890b 41#include "toplev.h"
e075ae69 42#include "basic-block.h"
1526a060 43#include "ggc.h"
2a2ab3f9 44
997de79c
JVA
45#ifdef EXTRA_CONSTRAINT
46/* If EXTRA_CONSTRAINT is defined, then the 'S'
47 constraint in REG_CLASS_FROM_LETTER will no longer work, and various
48 asm statements that need 'S' for class SIREG will break. */
ad5a6adc
RS
49 error EXTRA_CONSTRAINT conflicts with S constraint letter
50/* The previous line used to be #error, but some compilers barf
51 even if the conditional was untrue. */
997de79c
JVA
52#endif
53
8dfe5673
RK
54#ifndef CHECK_STACK_LIMIT
55#define CHECK_STACK_LIMIT -1
56#endif
57
32b5b1aa
SC
58/* Processor costs (relative to an add) */
59struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 60 1, /* cost of an add instruction */
32b5b1aa
SC
61 1, /* cost of a lea instruction */
62 3, /* variable shift costs */
63 2, /* constant shift costs */
64 6, /* cost of starting a multiply */
65 1, /* cost of multiply per each bit set */
e075ae69 66 23, /* cost of a divide/mod */
96e7ae40 67 15, /* "large" insn */
e2e52e1b 68 3, /* MOVE_RATIO */
7c6b971d 69 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
70 {2, 4, 2}, /* cost of loading integer registers
71 in QImode, HImode and SImode.
72 Relative to reg-reg move (2). */
73 {2, 4, 2}, /* cost of storing integer registers */
74 2, /* cost of reg,reg fld/fst */
75 {8, 8, 8}, /* cost of loading fp registers
76 in SFmode, DFmode and XFmode */
77 {8, 8, 8} /* cost of loading integer registers */
32b5b1aa
SC
78};
79
80struct processor_costs i486_cost = { /* 486 specific costs */
81 1, /* cost of an add instruction */
82 1, /* cost of a lea instruction */
83 3, /* variable shift costs */
84 2, /* constant shift costs */
85 12, /* cost of starting a multiply */
86 1, /* cost of multiply per each bit set */
e075ae69 87 40, /* cost of a divide/mod */
96e7ae40 88 15, /* "large" insn */
e2e52e1b 89 3, /* MOVE_RATIO */
7c6b971d 90 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
91 {2, 4, 2}, /* cost of loading integer registers
92 in QImode, HImode and SImode.
93 Relative to reg-reg move (2). */
94 {2, 4, 2}, /* cost of storing integer registers */
95 2, /* cost of reg,reg fld/fst */
96 {8, 8, 8}, /* cost of loading fp registers
97 in SFmode, DFmode and XFmode */
98 {8, 8, 8} /* cost of loading integer registers */
32b5b1aa
SC
99};
100
e5cb57e8 101struct processor_costs pentium_cost = {
32b5b1aa
SC
102 1, /* cost of an add instruction */
103 1, /* cost of a lea instruction */
856b07a1 104 4, /* variable shift costs */
e5cb57e8 105 1, /* constant shift costs */
856b07a1
SC
106 11, /* cost of starting a multiply */
107 0, /* cost of multiply per each bit set */
e075ae69 108 25, /* cost of a divide/mod */
96e7ae40 109 8, /* "large" insn */
e2e52e1b 110 6, /* MOVE_RATIO */
7c6b971d 111 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
112 {2, 4, 2}, /* cost of loading integer registers
113 in QImode, HImode and SImode.
114 Relative to reg-reg move (2). */
115 {2, 4, 2}, /* cost of storing integer registers */
116 2, /* cost of reg,reg fld/fst */
117 {2, 2, 6}, /* cost of loading fp registers
118 in SFmode, DFmode and XFmode */
119 {4, 4, 6} /* cost of loading integer registers */
32b5b1aa
SC
120};
121
856b07a1
SC
122struct processor_costs pentiumpro_cost = {
123 1, /* cost of an add instruction */
124 1, /* cost of a lea instruction */
e075ae69 125 1, /* variable shift costs */
856b07a1 126 1, /* constant shift costs */
369e59b1 127 4, /* cost of starting a multiply */
856b07a1 128 0, /* cost of multiply per each bit set */
e075ae69 129 17, /* cost of a divide/mod */
96e7ae40 130 8, /* "large" insn */
e2e52e1b 131 6, /* MOVE_RATIO */
7c6b971d 132 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
133 {4, 4, 4}, /* cost of loading integer registers
134 in QImode, HImode and SImode.
135 Relative to reg-reg move (2). */
136 {2, 2, 2}, /* cost of storing integer registers */
137 2, /* cost of reg,reg fld/fst */
138 {2, 2, 6}, /* cost of loading fp registers
139 in SFmode, DFmode and XFmode */
140 {4, 4, 6} /* cost of loading integer registers */
856b07a1
SC
141};
142
a269a03c
JC
143struct processor_costs k6_cost = {
144 1, /* cost of an add instruction */
e075ae69 145 2, /* cost of a lea instruction */
a269a03c
JC
146 1, /* variable shift costs */
147 1, /* constant shift costs */
73fe76e4 148 3, /* cost of starting a multiply */
a269a03c 149 0, /* cost of multiply per each bit set */
e075ae69 150 18, /* cost of a divide/mod */
96e7ae40 151 8, /* "large" insn */
e2e52e1b 152 4, /* MOVE_RATIO */
7c6b971d 153 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
154 {4, 5, 4}, /* cost of loading integer registers
155 in QImode, HImode and SImode.
156 Relative to reg-reg move (2). */
157 {2, 3, 2}, /* cost of storing integer registers */
158 4, /* cost of reg,reg fld/fst */
159 {6, 6, 6}, /* cost of loading fp registers
160 in SFmode, DFmode and XFmode */
161 {4, 4, 4} /* cost of loading integer registers */
a269a03c
JC
162};
163
309ada50
JH
164struct processor_costs athlon_cost = {
165 1, /* cost of an add instruction */
166 1, /* cost of a lea instruction */
167 1, /* variable shift costs */
168 1, /* constant shift costs */
169 5, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
171 19, /* cost of a divide/mod */
172 8, /* "large" insn */
e2e52e1b 173 9, /* MOVE_RATIO */
309ada50
JH
174 4, /* cost for loading QImode using movzbl */
175 {4, 5, 4}, /* cost of loading integer registers
176 in QImode, HImode and SImode.
177 Relative to reg-reg move (2). */
178 {2, 3, 2}, /* cost of storing integer registers */
179 4, /* cost of reg,reg fld/fst */
180 {6, 6, 6}, /* cost of loading fp registers
181 in SFmode, DFmode and XFmode */
182 {4, 4, 4} /* cost of loading integer registers */
183};
184
32b5b1aa
SC
185struct processor_costs *ix86_cost = &pentium_cost;
186
a269a03c
JC
187/* Processor feature/optimization bitmasks. */
188#define m_386 (1<<PROCESSOR_I386)
189#define m_486 (1<<PROCESSOR_I486)
190#define m_PENT (1<<PROCESSOR_PENTIUM)
191#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
192#define m_K6 (1<<PROCESSOR_K6)
309ada50 193#define m_ATHLON (1<<PROCESSOR_ATHLON)
a269a03c 194
309ada50
JH
195const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
196const int x86_push_memory = m_386 | m_K6 | m_ATHLON;
a269a03c 197const int x86_zero_extend_with_and = m_486 | m_PENT;
369e59b1 198const int x86_movx = m_ATHLON | m_PPRO /* m_386 | m_K6 */;
e075ae69 199const int x86_double_with_add = ~m_386;
a269a03c 200const int x86_use_bit_test = m_386;
e2e52e1b 201const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
a269a03c
JC
202const int x86_use_q_reg = m_PENT | m_PPRO | m_K6;
203const int x86_use_any_reg = m_486;
309ada50
JH
204const int x86_cmove = m_PPRO | m_ATHLON;
205const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON;
206const int x86_use_sahf = m_PPRO | m_K6 | m_ATHLON;
e075ae69
RH
207const int x86_partial_reg_stall = m_PPRO;
208const int x86_use_loop = m_K6;
309ada50 209const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
e075ae69
RH
210const int x86_use_mov0 = m_K6;
211const int x86_use_cltd = ~(m_PENT | m_K6);
212const int x86_read_modify_write = ~m_PENT;
213const int x86_read_modify = ~(m_PENT | m_PPRO);
214const int x86_split_long_moves = m_PPRO;
e9e80858 215const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
f90800f8 216const int x86_single_stringop = m_386;
d9f32422
JH
217const int x86_qimode_math = ~(0);
218const int x86_promote_qi_regs = 0;
219const int x86_himode_math = ~(m_PPRO);
220const int x86_promote_hi_regs = m_PPRO;
bdeb029c
JH
221const int x86_sub_esp_4 = m_ATHLON | m_PPRO;
222const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486;
223const int x86_add_esp_4 = m_ATHLON | m_K6;
224const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486;
a269a03c 225
564d80f4 226#define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
2a2ab3f9 227
e075ae69
RH
228const char * const hi_reg_name[] = HI_REGISTER_NAMES;
229const char * const qi_reg_name[] = QI_REGISTER_NAMES;
230const char * const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
4c0d89b5
RS
231
232/* Array of the smallest class containing reg number REGNO, indexed by
233 REGNO. Used by REGNO_REG_CLASS in i386.h. */
234
e075ae69 235enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
236{
237 /* ax, dx, cx, bx */
ab408a86 238 AREG, DREG, CREG, BREG,
4c0d89b5 239 /* si, di, bp, sp */
e075ae69 240 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
241 /* FP registers */
242 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 243 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 244 /* arg pointer */
83774849 245 NON_Q_REGS,
564d80f4
JH
246 /* flags, fpsr, dirflag, frame */
247 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS
4c0d89b5 248};
c572e5ba 249
83774849
RH
250/* The "default" register map. */
251
252int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
253{
254 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
255 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
256 -1, -1, -1, -1, /* arg, flags, fpsr, dir */
257};
258
259/* Define the register numbers to be used in Dwarf debugging information.
260 The SVR4 reference port C compiler uses the following register numbers
261 in its Dwarf output code:
262 0 for %eax (gcc regno = 0)
263 1 for %ecx (gcc regno = 2)
264 2 for %edx (gcc regno = 1)
265 3 for %ebx (gcc regno = 3)
266 4 for %esp (gcc regno = 7)
267 5 for %ebp (gcc regno = 6)
268 6 for %esi (gcc regno = 4)
269 7 for %edi (gcc regno = 5)
270 The following three DWARF register numbers are never generated by
271 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
272 believes these numbers have these meanings.
273 8 for %eip (no gcc equivalent)
274 9 for %eflags (gcc regno = 17)
275 10 for %trapno (no gcc equivalent)
276 It is not at all clear how we should number the FP stack registers
277 for the x86 architecture. If the version of SDB on x86/svr4 were
278 a bit less brain dead with respect to floating-point then we would
279 have a precedent to follow with respect to DWARF register numbers
280 for x86 FP registers, but the SDB on x86/svr4 is so completely
281 broken with respect to FP registers that it is hardly worth thinking
282 of it as something to strive for compatibility with.
283 The version of x86/svr4 SDB I have at the moment does (partially)
284 seem to believe that DWARF register number 11 is associated with
285 the x86 register %st(0), but that's about all. Higher DWARF
286 register numbers don't seem to be associated with anything in
287 particular, and even for DWARF regno 11, SDB only seems to under-
288 stand that it should say that a variable lives in %st(0) (when
289 asked via an `=' command) if we said it was in DWARF regno 11,
290 but SDB still prints garbage when asked for the value of the
291 variable in question (via a `/' command).
292 (Also note that the labels SDB prints for various FP stack regs
293 when doing an `x' command are all wrong.)
294 Note that these problems generally don't affect the native SVR4
295 C compiler because it doesn't allow the use of -O with -g and
296 because when it is *not* optimizing, it allocates a memory
297 location for each floating-point variable, and the memory
298 location is what gets described in the DWARF AT_location
299 attribute for the variable in question.
300 Regardless of the severe mental illness of the x86/svr4 SDB, we
301 do something sensible here and we use the following DWARF
302 register numbers. Note that these are all stack-top-relative
303 numbers.
304 11 for %st(0) (gcc regno = 8)
305 12 for %st(1) (gcc regno = 9)
306 13 for %st(2) (gcc regno = 10)
307 14 for %st(3) (gcc regno = 11)
308 15 for %st(4) (gcc regno = 12)
309 16 for %st(5) (gcc regno = 13)
310 17 for %st(6) (gcc regno = 14)
311 18 for %st(7) (gcc regno = 15)
312*/
313int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
314{
315 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
316 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
317 -1, 9, -1, -1, /* arg, flags, fpsr, dir */
318};
319
320
321
c572e5ba
JVA
322/* Test and compare insns in i386.md store the information needed to
323 generate branch and scc insns here. */
324
e075ae69
RH
325struct rtx_def *ix86_compare_op0 = NULL_RTX;
326struct rtx_def *ix86_compare_op1 = NULL_RTX;
f5316dfe 327
36edd3cc
BS
328#define MAX_386_STACK_LOCALS 2
329
330/* Define the structure for the machine field in struct function. */
331struct machine_function
332{
333 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
334};
335
01d939e8 336#define ix86_stack_locals (cfun->machine->stack_locals)
36edd3cc 337
c8c5cb99 338/* which cpu are we scheduling for */
e42ea7f9 339enum processor_type ix86_cpu;
c8c5cb99
SC
340
341/* which instruction set architecture to use. */
c942177e 342int ix86_arch;
c8c5cb99
SC
343
344/* Strings to hold which cpu and instruction set architecture to use. */
9c23aa47
ZW
345const char *ix86_cpu_string; /* for -mcpu=<xxx> */
346const char *ix86_arch_string; /* for -march=<xxx> */
c8c5cb99 347
f5316dfe 348/* Register allocation order */
e075ae69 349const char *ix86_reg_alloc_order;
f5316dfe
MM
350static char regs_allocated[FIRST_PSEUDO_REGISTER];
351
b08de47e 352/* # of registers to use to pass arguments. */
e075ae69 353const char *ix86_regparm_string;
e9a25f70 354
e075ae69
RH
355/* ix86_regparm_string as a number */
356int ix86_regparm;
e9a25f70
JL
357
358/* Alignment to use for loops and jumps: */
359
360/* Power of two alignment for loops. */
e075ae69 361const char *ix86_align_loops_string;
e9a25f70
JL
362
363/* Power of two alignment for non-loop jumps. */
e075ae69 364const char *ix86_align_jumps_string;
e9a25f70 365
3af4bd89 366/* Power of two alignment for stack boundary in bytes. */
e075ae69 367const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
368
369/* Preferred alignment for stack boundary in bits. */
e075ae69 370int ix86_preferred_stack_boundary;
3af4bd89 371
e9a25f70 372/* Values 1-5: see jump.c */
e075ae69
RH
373int ix86_branch_cost;
374const char *ix86_branch_cost_string;
e9a25f70
JL
375
376/* Power of two alignment for functions. */
e075ae69
RH
377int ix86_align_funcs;
378const char *ix86_align_funcs_string;
b08de47e 379
e9a25f70 380/* Power of two alignment for loops. */
e075ae69 381int ix86_align_loops;
b08de47e 382
e9a25f70 383/* Power of two alignment for non-loop jumps. */
e075ae69
RH
384int ix86_align_jumps;
385\f
f6da8bc3
KG
386static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
387static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
e075ae69 388 int, int, FILE *));
f6da8bc3
KG
389static enum rtx_code unsigned_comparison PARAMS ((enum rtx_code code));
390static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
3a3677ff
RH
391static enum machine_mode ix86_fp_compare_mode PARAMS ((enum rtx_code));
392static int ix86_use_fcomi_compare PARAMS ((enum rtx_code));
393static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
394 rtx *, rtx *));
395static rtx ix86_expand_compare PARAMS ((enum rtx_code));
f6da8bc3
KG
396static rtx gen_push PARAMS ((rtx));
397static int memory_address_length PARAMS ((rtx addr));
398static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
399static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
400static int ix86_safe_length PARAMS ((rtx));
401static enum attr_memory ix86_safe_memory PARAMS ((rtx));
402static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
403static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
404static void ix86_dump_ppro_packet PARAMS ((FILE *));
405static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
406static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
e075ae69 407 rtx));
f6da8bc3
KG
408static void ix86_init_machine_status PARAMS ((struct function *));
409static void ix86_mark_machine_status PARAMS ((struct function *));
410static void ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
411static int ix86_safe_length_prefix PARAMS ((rtx));
564d80f4
JH
412static HOST_WIDE_INT ix86_compute_frame_size PARAMS((HOST_WIDE_INT,
413 int *, int *, int *));
0903fcab
JH
414static int ix86_nsaved_regs PARAMS((void));
415static void ix86_emit_save_regs PARAMS((void));
da2d1d3a 416static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int));
0903fcab 417static void ix86_emit_epilogue_esp_adjustment PARAMS((int));
c6991660
KG
418static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
419static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
e075ae69
RH
420
421struct ix86_address
422{
423 rtx base, index, disp;
424 HOST_WIDE_INT scale;
425};
b08de47e 426
e075ae69
RH
427static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
428\f
f5316dfe
MM
429/* Sometimes certain combinations of command options do not make
430 sense on a particular target machine. You can define a macro
431 `OVERRIDE_OPTIONS' to take account of this. This macro, if
432 defined, is executed once just after all the command options have
433 been parsed.
434
435 Don't use this macro to turn on various extra optimizations for
436 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
437
438void
439override_options ()
440{
e075ae69
RH
441 /* Comes from final.c -- no real reason to change it. */
442#define MAX_CODE_ALIGN 16
f5316dfe 443
c8c5cb99
SC
444 static struct ptt
445 {
e075ae69
RH
446 struct processor_costs *cost; /* Processor costs */
447 int target_enable; /* Target flags to enable. */
448 int target_disable; /* Target flags to disable. */
449 int align_loop; /* Default alignments. */
450 int align_jump;
451 int align_func;
452 int branch_cost;
453 }
454 const processor_target_table[PROCESSOR_max] =
455 {
456 {&i386_cost, 0, 0, 2, 2, 2, 1},
457 {&i486_cost, 0, 0, 4, 4, 4, 1},
458 {&pentium_cost, 0, 0, -4, -4, -4, 1},
459 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
309ada50
JH
460 {&k6_cost, 0, 0, -5, -5, 4, 1},
461 {&athlon_cost, 0, 0, 4, -4, 4, 1}
e075ae69
RH
462 };
463
464 static struct pta
465 {
69ddee61 466 const char *name; /* processor name or nickname. */
e075ae69
RH
467 enum processor_type processor;
468 }
469 const processor_alias_table[] =
470 {
471 {"i386", PROCESSOR_I386},
472 {"i486", PROCESSOR_I486},
473 {"i586", PROCESSOR_PENTIUM},
474 {"pentium", PROCESSOR_PENTIUM},
475 {"i686", PROCESSOR_PENTIUMPRO},
476 {"pentiumpro", PROCESSOR_PENTIUMPRO},
e075ae69 477 {"k6", PROCESSOR_K6},
309ada50 478 {"athlon", PROCESSOR_ATHLON},
3af4bd89 479 };
c8c5cb99 480
e075ae69 481 int const pta_size = sizeof(processor_alias_table)/sizeof(struct pta);
c8c5cb99 482
f5316dfe
MM
483#ifdef SUBTARGET_OVERRIDE_OPTIONS
484 SUBTARGET_OVERRIDE_OPTIONS;
485#endif
486
5a6ee819 487 ix86_arch = PROCESSOR_I386;
e075ae69
RH
488 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
489
490 if (ix86_arch_string != 0)
491 {
492 int i;
493 for (i = 0; i < pta_size; i++)
494 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
495 {
496 ix86_arch = processor_alias_table[i].processor;
497 /* Default cpu tuning to the architecture. */
498 ix86_cpu = ix86_arch;
499 break;
500 }
501 if (i == pta_size)
502 error ("bad value (%s) for -march= switch", ix86_arch_string);
503 }
504
505 if (ix86_cpu_string != 0)
506 {
507 int i;
508 for (i = 0; i < pta_size; i++)
509 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
510 {
511 ix86_cpu = processor_alias_table[i].processor;
512 break;
513 }
514 if (i == pta_size)
515 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
516 }
517
518 ix86_cost = processor_target_table[ix86_cpu].cost;
519 target_flags |= processor_target_table[ix86_cpu].target_enable;
520 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
521
36edd3cc
BS
522 /* Arrange to set up i386_stack_locals for all functions. */
523 init_machine_status = ix86_init_machine_status;
1526a060 524 mark_machine_status = ix86_mark_machine_status;
36edd3cc 525
e9a25f70 526 /* Validate registers in register allocation order. */
e075ae69 527 if (ix86_reg_alloc_order)
f5316dfe 528 {
e075ae69
RH
529 int i, ch;
530 for (i = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
f5316dfe 531 {
00c79232 532 int regno = 0;
79325812 533
f5316dfe
MM
534 switch (ch)
535 {
536 case 'a': regno = 0; break;
537 case 'd': regno = 1; break;
538 case 'c': regno = 2; break;
539 case 'b': regno = 3; break;
540 case 'S': regno = 4; break;
541 case 'D': regno = 5; break;
542 case 'B': regno = 6; break;
543
544 default: fatal ("Register '%c' is unknown", ch);
545 }
546
547 if (regs_allocated[regno])
e9a25f70 548 fatal ("Register '%c' already specified in allocation order", ch);
f5316dfe
MM
549
550 regs_allocated[regno] = 1;
551 }
552 }
b08de47e 553
e9a25f70 554 /* Validate -mregparm= value. */
e075ae69 555 if (ix86_regparm_string)
b08de47e 556 {
e075ae69
RH
557 ix86_regparm = atoi (ix86_regparm_string);
558 if (ix86_regparm < 0 || ix86_regparm > REGPARM_MAX)
e9a25f70 559 fatal ("-mregparm=%d is not between 0 and %d",
e075ae69 560 ix86_regparm, REGPARM_MAX);
b08de47e
MM
561 }
562
e9a25f70 563 /* Validate -malign-loops= value, or provide default. */
e075ae69
RH
564 ix86_align_loops = processor_target_table[ix86_cpu].align_loop;
565 if (ix86_align_loops_string)
b08de47e 566 {
e075ae69
RH
567 ix86_align_loops = atoi (ix86_align_loops_string);
568 if (ix86_align_loops < 0 || ix86_align_loops > MAX_CODE_ALIGN)
b08de47e 569 fatal ("-malign-loops=%d is not between 0 and %d",
e075ae69 570 ix86_align_loops, MAX_CODE_ALIGN);
b08de47e 571 }
3af4bd89
JH
572
573 /* Validate -malign-jumps= value, or provide default. */
e075ae69
RH
574 ix86_align_jumps = processor_target_table[ix86_cpu].align_jump;
575 if (ix86_align_jumps_string)
b08de47e 576 {
e075ae69
RH
577 ix86_align_jumps = atoi (ix86_align_jumps_string);
578 if (ix86_align_jumps < 0 || ix86_align_jumps > MAX_CODE_ALIGN)
b08de47e 579 fatal ("-malign-jumps=%d is not between 0 and %d",
e075ae69 580 ix86_align_jumps, MAX_CODE_ALIGN);
b08de47e 581 }
b08de47e 582
e9a25f70 583 /* Validate -malign-functions= value, or provide default. */
e075ae69
RH
584 ix86_align_funcs = processor_target_table[ix86_cpu].align_func;
585 if (ix86_align_funcs_string)
b08de47e 586 {
e075ae69
RH
587 ix86_align_funcs = atoi (ix86_align_funcs_string);
588 if (ix86_align_funcs < 0 || ix86_align_funcs > MAX_CODE_ALIGN)
b08de47e 589 fatal ("-malign-functions=%d is not between 0 and %d",
e075ae69 590 ix86_align_funcs, MAX_CODE_ALIGN);
b08de47e 591 }
3af4bd89 592
e4c0478d 593 /* Validate -mpreferred-stack-boundary= value, or provide default.
3af4bd89 594 The default of 128 bits is for Pentium III's SSE __m128. */
e075ae69
RH
595 ix86_preferred_stack_boundary = 128;
596 if (ix86_preferred_stack_boundary_string)
3af4bd89 597 {
e075ae69 598 int i = atoi (ix86_preferred_stack_boundary_string);
3af4bd89 599 if (i < 2 || i > 31)
e4c0478d 600 fatal ("-mpreferred-stack-boundary=%d is not between 2 and 31", i);
e075ae69 601 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 602 }
77a989d1 603
e9a25f70 604 /* Validate -mbranch-cost= value, or provide default. */
e075ae69
RH
605 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
606 if (ix86_branch_cost_string)
804a8ee0 607 {
e075ae69
RH
608 ix86_branch_cost = atoi (ix86_branch_cost_string);
609 if (ix86_branch_cost < 0 || ix86_branch_cost > 5)
610 fatal ("-mbranch-cost=%d is not between 0 and 5",
611 ix86_branch_cost);
804a8ee0 612 }
804a8ee0 613
e9a25f70
JL
614 /* Keep nonleaf frame pointers. */
615 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 616 flag_omit_frame_pointer = 1;
e075ae69
RH
617
618 /* If we're doing fast math, we don't care about comparison order
619 wrt NaNs. This lets us use a shorter comparison sequence. */
620 if (flag_fast_math)
621 target_flags &= ~MASK_IEEE_FP;
622
623 /* If we're planning on using `loop', use it. */
624 if (TARGET_USE_LOOP && optimize)
625 flag_branch_on_count_reg = 1;
f5316dfe
MM
626}
627\f
628/* A C statement (sans semicolon) to choose the order in which to
629 allocate hard registers for pseudo-registers local to a basic
630 block.
631
632 Store the desired register order in the array `reg_alloc_order'.
633 Element 0 should be the register to allocate first; element 1, the
634 next register; and so on.
635
636 The macro body should not assume anything about the contents of
637 `reg_alloc_order' before execution of the macro.
638
639 On most machines, it is not necessary to define this macro. */
640
641void
642order_regs_for_local_alloc ()
643{
00c79232 644 int i, ch, order;
f5316dfe 645
e9a25f70
JL
646 /* User specified the register allocation order. */
647
e075ae69 648 if (ix86_reg_alloc_order)
f5316dfe 649 {
e075ae69 650 for (i = order = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
f5316dfe 651 {
00c79232 652 int regno = 0;
79325812 653
f5316dfe
MM
654 switch (ch)
655 {
656 case 'a': regno = 0; break;
657 case 'd': regno = 1; break;
658 case 'c': regno = 2; break;
659 case 'b': regno = 3; break;
660 case 'S': regno = 4; break;
661 case 'D': regno = 5; break;
662 case 'B': regno = 6; break;
663 }
664
665 reg_alloc_order[order++] = regno;
666 }
667
668 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
669 {
e9a25f70 670 if (! regs_allocated[i])
f5316dfe
MM
671 reg_alloc_order[order++] = i;
672 }
673 }
674
e9a25f70 675 /* If user did not specify a register allocation order, use natural order. */
f5316dfe
MM
676 else
677 {
678 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
679 reg_alloc_order[i] = i;
f5316dfe
MM
680 }
681}
32b5b1aa
SC
682\f
683void
c6aded7c 684optimization_options (level, size)
32b5b1aa 685 int level;
bb5177ac 686 int size ATTRIBUTE_UNUSED;
32b5b1aa 687{
e9a25f70
JL
688 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
689 make the problem with not enough registers even worse. */
32b5b1aa
SC
690#ifdef INSN_SCHEDULING
691 if (level > 1)
692 flag_schedule_insns = 0;
693#endif
694}
b08de47e
MM
695\f
696/* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
697 attribute for DECL. The attributes in ATTRIBUTES have previously been
698 assigned to DECL. */
699
700int
e075ae69 701ix86_valid_decl_attribute_p (decl, attributes, identifier, args)
bb5177ac
RL
702 tree decl ATTRIBUTE_UNUSED;
703 tree attributes ATTRIBUTE_UNUSED;
704 tree identifier ATTRIBUTE_UNUSED;
705 tree args ATTRIBUTE_UNUSED;
b08de47e
MM
706{
707 return 0;
708}
709
710/* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
711 attribute for TYPE. The attributes in ATTRIBUTES have previously been
712 assigned to TYPE. */
713
714int
e075ae69 715ix86_valid_type_attribute_p (type, attributes, identifier, args)
b08de47e 716 tree type;
bb5177ac 717 tree attributes ATTRIBUTE_UNUSED;
b08de47e
MM
718 tree identifier;
719 tree args;
720{
721 if (TREE_CODE (type) != FUNCTION_TYPE
ac478ac0 722 && TREE_CODE (type) != METHOD_TYPE
b08de47e
MM
723 && TREE_CODE (type) != FIELD_DECL
724 && TREE_CODE (type) != TYPE_DECL)
725 return 0;
726
727 /* Stdcall attribute says callee is responsible for popping arguments
728 if they are not variable. */
729 if (is_attribute_p ("stdcall", identifier))
730 return (args == NULL_TREE);
731
e9a25f70 732 /* Cdecl attribute says the callee is a normal C declaration. */
b08de47e
MM
733 if (is_attribute_p ("cdecl", identifier))
734 return (args == NULL_TREE);
735
736 /* Regparm attribute specifies how many integer arguments are to be
e9a25f70 737 passed in registers. */
b08de47e
MM
738 if (is_attribute_p ("regparm", identifier))
739 {
740 tree cst;
741
e9a25f70 742 if (! args || TREE_CODE (args) != TREE_LIST
b08de47e
MM
743 || TREE_CHAIN (args) != NULL_TREE
744 || TREE_VALUE (args) == NULL_TREE)
745 return 0;
746
747 cst = TREE_VALUE (args);
748 if (TREE_CODE (cst) != INTEGER_CST)
749 return 0;
750
cce097f1 751 if (compare_tree_int (cst, REGPARM_MAX) > 0)
b08de47e
MM
752 return 0;
753
754 return 1;
755 }
756
757 return 0;
758}
759
760/* Return 0 if the attributes for two types are incompatible, 1 if they
761 are compatible, and 2 if they are nearly compatible (which causes a
762 warning to be generated). */
763
764int
e075ae69 765ix86_comp_type_attributes (type1, type2)
afcfe58c
MM
766 tree type1;
767 tree type2;
b08de47e 768{
afcfe58c 769 /* Check for mismatch of non-default calling convention. */
69ddee61 770 const char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
771
772 if (TREE_CODE (type1) != FUNCTION_TYPE)
773 return 1;
774
775 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
776 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
777 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 778 return 0;
b08de47e
MM
779 return 1;
780}
b08de47e
MM
781\f
782/* Value is the number of bytes of arguments automatically
783 popped when returning from a subroutine call.
784 FUNDECL is the declaration node of the function (as a tree),
785 FUNTYPE is the data type of the function (as a tree),
786 or for a library call it is an identifier node for the subroutine name.
787 SIZE is the number of bytes of arguments passed on the stack.
788
789 On the 80386, the RTD insn may be used to pop them if the number
790 of args is fixed, but if the number is variable then the caller
791 must pop them all. RTD can't be used for library calls now
792 because the library is compiled with the Unix compiler.
793 Use of RTD is a selectable option, since it is incompatible with
794 standard Unix calling sequences. If the option is not selected,
795 the caller must always pop the args.
796
797 The attribute stdcall is equivalent to RTD on a per module basis. */
798
799int
e075ae69 800ix86_return_pops_args (fundecl, funtype, size)
b08de47e
MM
801 tree fundecl;
802 tree funtype;
803 int size;
79325812 804{
3345ee7d 805 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 806
e9a25f70
JL
807 /* Cdecl functions override -mrtd, and never pop the stack. */
808 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 809
e9a25f70 810 /* Stdcall functions will pop the stack if not variable args. */
698cdd84
SC
811 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
812 rtd = 1;
79325812 813
698cdd84
SC
814 if (rtd
815 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
816 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
817 == void_type_node)))
698cdd84
SC
818 return size;
819 }
79325812 820
e9a25f70 821 /* Lose any fake structure return argument. */
698cdd84
SC
822 if (aggregate_value_p (TREE_TYPE (funtype)))
823 return GET_MODE_SIZE (Pmode);
79325812 824
2614aac6 825 return 0;
b08de47e 826}
b08de47e
MM
827\f
828/* Argument support functions. */
829
830/* Initialize a variable CUM of type CUMULATIVE_ARGS
831 for a call to a function whose data type is FNTYPE.
832 For a library call, FNTYPE is 0. */
833
834void
835init_cumulative_args (cum, fntype, libname)
e9a25f70 836 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
b08de47e
MM
837 tree fntype; /* tree ptr for function decl */
838 rtx libname; /* SYMBOL_REF of library name or 0 */
839{
840 static CUMULATIVE_ARGS zero_cum;
841 tree param, next_param;
842
843 if (TARGET_DEBUG_ARG)
844 {
845 fprintf (stderr, "\ninit_cumulative_args (");
846 if (fntype)
e9a25f70
JL
847 fprintf (stderr, "fntype code = %s, ret code = %s",
848 tree_code_name[(int) TREE_CODE (fntype)],
849 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
850 else
851 fprintf (stderr, "no fntype");
852
853 if (libname)
854 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
855 }
856
857 *cum = zero_cum;
858
859 /* Set up the number of registers to use for passing arguments. */
e075ae69 860 cum->nregs = ix86_regparm;
b08de47e
MM
861 if (fntype)
862 {
863 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
e9a25f70 864
b08de47e
MM
865 if (attr)
866 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
867 }
868
869 /* Determine if this function has variable arguments. This is
870 indicated by the last argument being 'void_type_mode' if there
871 are no variable arguments. If there are variable arguments, then
872 we won't pass anything in registers */
873
874 if (cum->nregs)
875 {
876 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 877 param != 0; param = next_param)
b08de47e
MM
878 {
879 next_param = TREE_CHAIN (param);
e9a25f70 880 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
b08de47e
MM
881 cum->nregs = 0;
882 }
883 }
884
885 if (TARGET_DEBUG_ARG)
886 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
887
888 return;
889}
890
891/* Update the data in CUM to advance over an argument
892 of mode MODE and data type TYPE.
893 (TYPE is null for libcalls where that information may not be available.) */
894
895void
896function_arg_advance (cum, mode, type, named)
897 CUMULATIVE_ARGS *cum; /* current arg information */
898 enum machine_mode mode; /* current arg mode */
899 tree type; /* type of the argument or 0 if lib support */
900 int named; /* whether or not the argument was named */
901{
e9a25f70
JL
902 int bytes
903 = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
b08de47e
MM
904 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
905
906 if (TARGET_DEBUG_ARG)
907 fprintf (stderr,
e9a25f70 908 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
b08de47e
MM
909 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
910
911 cum->words += words;
912 cum->nregs -= words;
913 cum->regno += words;
914
915 if (cum->nregs <= 0)
916 {
917 cum->nregs = 0;
918 cum->regno = 0;
919 }
920
921 return;
922}
923
924/* Define where to put the arguments to a function.
925 Value is zero to push the argument on the stack,
926 or a hard register in which to store the argument.
927
928 MODE is the argument's machine mode.
929 TYPE is the data type of the argument (as a tree).
930 This is null for libcalls where that information may
931 not be available.
932 CUM is a variable of type CUMULATIVE_ARGS which gives info about
933 the preceding args and about the function being called.
934 NAMED is nonzero if this argument is a named parameter
935 (otherwise it is an extra parameter matching an ellipsis). */
936
937struct rtx_def *
938function_arg (cum, mode, type, named)
939 CUMULATIVE_ARGS *cum; /* current arg information */
940 enum machine_mode mode; /* current arg mode */
941 tree type; /* type of the argument or 0 if lib support */
942 int named; /* != 0 for normal args, == 0 for ... args */
943{
944 rtx ret = NULL_RTX;
e9a25f70
JL
945 int bytes
946 = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
b08de47e
MM
947 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
948
949 switch (mode)
950 {
e9a25f70
JL
951 /* For now, pass fp/complex values on the stack. */
952 default:
b08de47e
MM
953 break;
954
955 case BLKmode:
956 case DImode:
957 case SImode:
958 case HImode:
959 case QImode:
960 if (words <= cum->nregs)
f64cecad 961 ret = gen_rtx_REG (mode, cum->regno);
b08de47e
MM
962 break;
963 }
964
965 if (TARGET_DEBUG_ARG)
966 {
967 fprintf (stderr,
e9a25f70 968 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
b08de47e
MM
969 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
970
971 if (ret)
972 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
973 else
974 fprintf (stderr, ", stack");
975
976 fprintf (stderr, " )\n");
977 }
978
979 return ret;
980}
e075ae69
RH
981\f
982/* Returns 1 if OP is either a symbol reference or a sum of a symbol
983 reference and a constant. */
b08de47e
MM
984
985int
e075ae69
RH
986symbolic_operand (op, mode)
987 register rtx op;
988 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 989{
e075ae69 990 switch (GET_CODE (op))
2a2ab3f9 991 {
e075ae69
RH
992 case SYMBOL_REF:
993 case LABEL_REF:
994 return 1;
995
996 case CONST:
997 op = XEXP (op, 0);
998 if (GET_CODE (op) == SYMBOL_REF
999 || GET_CODE (op) == LABEL_REF
1000 || (GET_CODE (op) == UNSPEC
1001 && XINT (op, 1) >= 6
1002 && XINT (op, 1) <= 7))
1003 return 1;
1004 if (GET_CODE (op) != PLUS
1005 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1006 return 0;
1007
1008 op = XEXP (op, 0);
1009 if (GET_CODE (op) == SYMBOL_REF
1010 || GET_CODE (op) == LABEL_REF)
1011 return 1;
1012 /* Only @GOTOFF gets offsets. */
1013 if (GET_CODE (op) != UNSPEC
1014 || XINT (op, 1) != 7)
1015 return 0;
1016
1017 op = XVECEXP (op, 0, 0);
1018 if (GET_CODE (op) == SYMBOL_REF
1019 || GET_CODE (op) == LABEL_REF)
1020 return 1;
1021 return 0;
1022
1023 default:
1024 return 0;
2a2ab3f9
JVA
1025 }
1026}
2a2ab3f9 1027
e075ae69 1028/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 1029
e075ae69
RH
1030int
1031pic_symbolic_operand (op, mode)
1032 register rtx op;
1033 enum machine_mode mode ATTRIBUTE_UNUSED;
1034{
1035 if (GET_CODE (op) == CONST)
2a2ab3f9 1036 {
e075ae69
RH
1037 op = XEXP (op, 0);
1038 if (GET_CODE (op) == UNSPEC)
1039 return 1;
1040 if (GET_CODE (op) != PLUS
1041 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1042 return 0;
1043 op = XEXP (op, 0);
1044 if (GET_CODE (op) == UNSPEC)
1045 return 1;
2a2ab3f9 1046 }
e075ae69 1047 return 0;
2a2ab3f9 1048}
2a2ab3f9 1049
28d52ffb
RH
1050/* Test for a valid operand for a call instruction. Don't allow the
1051 arg pointer register or virtual regs since they may decay into
1052 reg + const, which the patterns can't handle. */
2a2ab3f9 1053
e075ae69
RH
1054int
1055call_insn_operand (op, mode)
1056 rtx op;
1057 enum machine_mode mode ATTRIBUTE_UNUSED;
1058{
1059 if (GET_CODE (op) != MEM)
1060 return 0;
1061 op = XEXP (op, 0);
2a2ab3f9 1062
e075ae69
RH
1063 /* Disallow indirect through a virtual register. This leads to
1064 compiler aborts when trying to eliminate them. */
1065 if (GET_CODE (op) == REG
1066 && (op == arg_pointer_rtx
564d80f4 1067 || op == frame_pointer_rtx
e075ae69
RH
1068 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
1069 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
1070 return 0;
2a2ab3f9 1071
28d52ffb
RH
1072 /* Disallow `call 1234'. Due to varying assembler lameness this
1073 gets either rejected or translated to `call .+1234'. */
1074 if (GET_CODE (op) == CONST_INT)
1075 return 0;
1076
cbbf65e0
RH
1077 /* Explicitly allow SYMBOL_REF even if pic. */
1078 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 1079 return 1;
2a2ab3f9 1080
cbbf65e0
RH
1081 /* Half-pic doesn't allow anything but registers and constants.
1082 We've just taken care of the later. */
1083 if (HALF_PIC_P ())
1084 return register_operand (op, Pmode);
1085
1086 /* Otherwise we can allow any general_operand in the address. */
1087 return general_operand (op, Pmode);
e075ae69 1088}
79325812 1089
e075ae69
RH
1090int
1091constant_call_address_operand (op, mode)
1092 rtx op;
1093 enum machine_mode mode ATTRIBUTE_UNUSED;
1094{
cbbf65e0
RH
1095 return (GET_CODE (op) == MEM
1096 && CONSTANT_ADDRESS_P (XEXP (op, 0))
1097 && GET_CODE (XEXP (op, 0)) != CONST_INT);
e075ae69 1098}
2a2ab3f9 1099
e075ae69 1100/* Match exactly zero and one. */
e9a25f70 1101
e075ae69
RH
1102int
1103const0_operand (op, mode)
1104 register rtx op;
1105 enum machine_mode mode;
1106{
1107 return op == CONST0_RTX (mode);
1108}
e9a25f70 1109
e075ae69
RH
1110int
1111const1_operand (op, mode)
1112 register rtx op;
1113 enum machine_mode mode ATTRIBUTE_UNUSED;
1114{
1115 return op == const1_rtx;
1116}
2a2ab3f9 1117
e075ae69 1118/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 1119
e075ae69
RH
1120int
1121const248_operand (op, mode)
1122 register rtx op;
1123 enum machine_mode mode ATTRIBUTE_UNUSED;
1124{
1125 return (GET_CODE (op) == CONST_INT
1126 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
1127}
e9a25f70 1128
e075ae69 1129/* True if this is a constant appropriate for an increment or decremenmt. */
81fd0956 1130
e075ae69
RH
1131int
1132incdec_operand (op, mode)
1133 register rtx op;
1134 enum machine_mode mode;
1135{
1136 if (op == const1_rtx || op == constm1_rtx)
1137 return 1;
1138 if (GET_CODE (op) != CONST_INT)
1139 return 0;
1140 if (mode == SImode && INTVAL (op) == (HOST_WIDE_INT) 0xffffffff)
1141 return 1;
1142 if (mode == HImode && INTVAL (op) == (HOST_WIDE_INT) 0xffff)
1143 return 1;
1144 if (mode == QImode && INTVAL (op) == (HOST_WIDE_INT) 0xff)
1145 return 1;
1146 return 0;
1147}
2a2ab3f9 1148
e075ae69
RH
1149/* Return false if this is the stack pointer, or any other fake
1150 register eliminable to the stack pointer. Otherwise, this is
1151 a register operand.
2a2ab3f9 1152
e075ae69
RH
1153 This is used to prevent esp from being used as an index reg.
1154 Which would only happen in pathological cases. */
5f1ec3e6 1155
e075ae69
RH
1156int
1157reg_no_sp_operand (op, mode)
1158 register rtx op;
1159 enum machine_mode mode;
1160{
1161 rtx t = op;
1162 if (GET_CODE (t) == SUBREG)
1163 t = SUBREG_REG (t);
564d80f4 1164 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 1165 return 0;
2a2ab3f9 1166
e075ae69 1167 return register_operand (op, mode);
2a2ab3f9 1168}
b840bfb0 1169
2c5a510c
RH
1170/* Return false if this is any eliminable register. Otherwise
1171 general_operand. */
1172
1173int
1174general_no_elim_operand (op, mode)
1175 register rtx op;
1176 enum machine_mode mode;
1177{
1178 rtx t = op;
1179 if (GET_CODE (t) == SUBREG)
1180 t = SUBREG_REG (t);
1181 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1182 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1183 || t == virtual_stack_dynamic_rtx)
1184 return 0;
1185
1186 return general_operand (op, mode);
1187}
1188
1189/* Return false if this is any eliminable register. Otherwise
1190 register_operand or const_int. */
1191
1192int
1193nonmemory_no_elim_operand (op, mode)
1194 register rtx op;
1195 enum machine_mode mode;
1196{
1197 rtx t = op;
1198 if (GET_CODE (t) == SUBREG)
1199 t = SUBREG_REG (t);
1200 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1201 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1202 || t == virtual_stack_dynamic_rtx)
1203 return 0;
1204
1205 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
1206}
1207
e075ae69 1208/* Return true if op is a Q_REGS class register. */
b840bfb0 1209
e075ae69
RH
1210int
1211q_regs_operand (op, mode)
1212 register rtx op;
1213 enum machine_mode mode;
b840bfb0 1214{
e075ae69
RH
1215 if (mode != VOIDmode && GET_MODE (op) != mode)
1216 return 0;
1217 if (GET_CODE (op) == SUBREG)
1218 op = SUBREG_REG (op);
1219 return QI_REG_P (op);
1220}
b840bfb0 1221
e075ae69 1222/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 1223
e075ae69
RH
1224int
1225non_q_regs_operand (op, mode)
1226 register rtx op;
1227 enum machine_mode mode;
1228{
1229 if (mode != VOIDmode && GET_MODE (op) != mode)
1230 return 0;
1231 if (GET_CODE (op) == SUBREG)
1232 op = SUBREG_REG (op);
1233 return NON_QI_REG_P (op);
1234}
b840bfb0 1235
e075ae69
RH
1236/* Return 1 if OP is a comparison operator that can use the condition code
1237 generated by a logical operation, which characteristicly does not set
1238 overflow or carry. To be used with CCNOmode. */
b840bfb0 1239
e075ae69
RH
1240int
1241no_comparison_operator (op, mode)
1242 register rtx op;
1243 enum machine_mode mode;
1244{
3a3677ff
RH
1245 if (mode != VOIDmode && GET_MODE (op) != mode)
1246 return 0;
1247
1248 switch (GET_CODE (op))
1249 {
1250 case EQ: case NE:
1251 case LT: case GE:
1252 case LEU: case LTU: case GEU: case GTU:
1253 return 1;
1254
1255 default:
1256 return 0;
1257 }
e075ae69 1258}
b840bfb0 1259
e075ae69 1260/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
b840bfb0 1261
e075ae69
RH
1262int
1263fcmov_comparison_operator (op, mode)
1264 register rtx op;
1265 enum machine_mode mode;
1266{
3a3677ff
RH
1267 if (mode != VOIDmode && GET_MODE (op) != mode)
1268 return 0;
1269
1270 switch (GET_CODE (op))
1271 {
1272 case EQ: case NE:
1273 case LEU: case LTU: case GEU: case GTU:
1274 case UNORDERED: case ORDERED:
1275 return 1;
1276
1277 default:
1278 return 0;
1279 }
1280}
1281
1282/* Return 1 if OP is any normal comparison operator plus {UN}ORDERED. */
1283
1284int
1285uno_comparison_operator (op, mode)
1286 register rtx op;
1287 enum machine_mode mode;
1288{
1289 if (mode != VOIDmode && GET_MODE (op) != mode)
1290 return 0;
1291
1292 switch (GET_CODE (op))
1293 {
1294 case EQ: case NE:
1295 case LE: case LT: case GE: case GT:
1296 case LEU: case LTU: case GEU: case GTU:
1297 case UNORDERED: case ORDERED:
1298 return 1;
1299
1300 default:
1301 return 0;
1302 }
e075ae69 1303}
b840bfb0 1304
e9e80858
JH
1305/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1306
1307int
1308promotable_binary_operator (op, mode)
1309 register rtx op;
1310 enum machine_mode mode ATTRIBUTE_UNUSED;
1311{
1312 switch (GET_CODE (op))
1313 {
1314 case MULT:
1315 /* Modern CPUs have same latency for HImode and SImode multiply,
1316 but 386 and 486 do HImode multiply faster. */
1317 return ix86_cpu > PROCESSOR_I486;
1318 case PLUS:
1319 case AND:
1320 case IOR:
1321 case XOR:
1322 case ASHIFT:
1323 return 1;
1324 default:
1325 return 0;
1326 }
1327}
1328
e075ae69
RH
1329/* Nearly general operand, but accept any const_double, since we wish
1330 to be able to drop them into memory rather than have them get pulled
1331 into registers. */
b840bfb0 1332
2a2ab3f9 1333int
e075ae69
RH
1334cmp_fp_expander_operand (op, mode)
1335 register rtx op;
1336 enum machine_mode mode;
2a2ab3f9 1337{
e075ae69 1338 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 1339 return 0;
e075ae69 1340 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 1341 return 1;
e075ae69 1342 return general_operand (op, mode);
2a2ab3f9
JVA
1343}
1344
e075ae69 1345/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
1346
1347int
e075ae69 1348ext_register_operand (op, mode)
2a2ab3f9 1349 register rtx op;
bb5177ac 1350 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 1351{
e075ae69
RH
1352 if (GET_MODE (op) != SImode && GET_MODE (op) != HImode)
1353 return 0;
1354 return register_operand (op, VOIDmode);
1355}
1356
1357/* Return 1 if this is a valid binary floating-point operation.
1358 OP is the expression matched, and MODE is its mode. */
1359
1360int
1361binary_fp_operator (op, mode)
1362 register rtx op;
1363 enum machine_mode mode;
1364{
1365 if (mode != VOIDmode && mode != GET_MODE (op))
1366 return 0;
1367
2a2ab3f9
JVA
1368 switch (GET_CODE (op))
1369 {
e075ae69
RH
1370 case PLUS:
1371 case MINUS:
1372 case MULT:
1373 case DIV:
1374 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 1375
2a2ab3f9
JVA
1376 default:
1377 return 0;
1378 }
1379}
fee2770d 1380
e075ae69
RH
1381int
1382mult_operator(op, mode)
1383 register rtx op;
1384 enum machine_mode mode ATTRIBUTE_UNUSED;
1385{
1386 return GET_CODE (op) == MULT;
1387}
1388
1389int
1390div_operator(op, mode)
1391 register rtx op;
1392 enum machine_mode mode ATTRIBUTE_UNUSED;
1393{
1394 return GET_CODE (op) == DIV;
1395}
0a726ef1
JL
1396
1397int
e075ae69
RH
1398arith_or_logical_operator (op, mode)
1399 rtx op;
1400 enum machine_mode mode;
0a726ef1 1401{
e075ae69
RH
1402 return ((mode == VOIDmode || GET_MODE (op) == mode)
1403 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
1404 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
1405}
1406
e075ae69 1407/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
1408
1409int
e075ae69
RH
1410memory_displacement_operand (op, mode)
1411 register rtx op;
1412 enum machine_mode mode;
4f2c8ebb 1413{
e075ae69 1414 struct ix86_address parts;
e9a25f70 1415
e075ae69
RH
1416 if (! memory_operand (op, mode))
1417 return 0;
1418
1419 if (! ix86_decompose_address (XEXP (op, 0), &parts))
1420 abort ();
1421
1422 return parts.disp != NULL_RTX;
4f2c8ebb
RS
1423}
1424
16189740 1425/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
1426 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1427
1428 ??? It seems likely that this will only work because cmpsi is an
1429 expander, and no actual insns use this. */
4f2c8ebb
RS
1430
1431int
e075ae69
RH
1432cmpsi_operand (op, mode)
1433 rtx op;
1434 enum machine_mode mode;
fee2770d 1435{
e075ae69
RH
1436 if (general_operand (op, mode))
1437 return 1;
1438
1439 if (GET_CODE (op) == AND
1440 && GET_MODE (op) == SImode
1441 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
1442 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
1443 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
1444 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
1445 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
1446 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 1447 return 1;
e9a25f70 1448
fee2770d
RS
1449 return 0;
1450}
d784886d 1451
e075ae69
RH
1452/* Returns 1 if OP is memory operand that can not be represented by the
1453 modRM array. */
d784886d
RK
1454
1455int
e075ae69 1456long_memory_operand (op, mode)
d784886d
RK
1457 register rtx op;
1458 enum machine_mode mode;
1459{
e075ae69 1460 if (! memory_operand (op, mode))
d784886d
RK
1461 return 0;
1462
e075ae69 1463 return memory_address_length (op) != 0;
d784886d 1464}
2247f6ed
JH
1465
1466/* Return nonzero if the rtx is known aligned. */
1467
1468int
1469aligned_operand (op, mode)
1470 rtx op;
1471 enum machine_mode mode;
1472{
1473 struct ix86_address parts;
1474
1475 if (!general_operand (op, mode))
1476 return 0;
1477
1478 /* Registers and immediate operands are always "aligned". */
1479 if (GET_CODE (op) != MEM)
1480 return 1;
1481
1482 /* Don't even try to do any aligned optimizations with volatiles. */
1483 if (MEM_VOLATILE_P (op))
1484 return 0;
1485
1486 op = XEXP (op, 0);
1487
1488 /* Pushes and pops are only valid on the stack pointer. */
1489 if (GET_CODE (op) == PRE_DEC
1490 || GET_CODE (op) == POST_INC)
1491 return 1;
1492
1493 /* Decode the address. */
1494 if (! ix86_decompose_address (op, &parts))
1495 abort ();
1496
1497 /* Look for some component that isn't known to be aligned. */
1498 if (parts.index)
1499 {
1500 if (parts.scale < 4
bdb429a5 1501 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
1502 return 0;
1503 }
1504 if (parts.base)
1505 {
bdb429a5 1506 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
1507 return 0;
1508 }
1509 if (parts.disp)
1510 {
1511 if (GET_CODE (parts.disp) != CONST_INT
1512 || (INTVAL (parts.disp) & 3) != 0)
1513 return 0;
1514 }
1515
1516 /* Didn't find one -- this must be an aligned address. */
1517 return 1;
1518}
e075ae69
RH
1519\f
1520/* Return true if the constant is something that can be loaded with
1521 a special instruction. Only handle 0.0 and 1.0; others are less
1522 worthwhile. */
57dbca5e
BS
1523
1524int
e075ae69
RH
1525standard_80387_constant_p (x)
1526 rtx x;
57dbca5e 1527{
e075ae69
RH
1528 if (GET_CODE (x) != CONST_DOUBLE)
1529 return -1;
1530
1531#if ! defined (REAL_IS_NOT_DOUBLE) || defined (REAL_ARITHMETIC)
1532 {
1533 REAL_VALUE_TYPE d;
1534 jmp_buf handler;
1535 int is0, is1;
1536
1537 if (setjmp (handler))
1538 return 0;
1539
1540 set_float_handler (handler);
1541 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
1542 is0 = REAL_VALUES_EQUAL (d, dconst0) && !REAL_VALUE_MINUS_ZERO (d);
1543 is1 = REAL_VALUES_EQUAL (d, dconst1);
1544 set_float_handler (NULL_PTR);
1545
1546 if (is0)
1547 return 1;
1548
1549 if (is1)
1550 return 2;
1551
1552 /* Note that on the 80387, other constants, such as pi,
1553 are much slower to load as standard constants
1554 than to load from doubles in memory! */
1555 /* ??? Not true on K6: all constants are equal cost. */
1556 }
1557#endif
1558
1559 return 0;
57dbca5e
BS
1560}
1561
2a2ab3f9
JVA
1562/* Returns 1 if OP contains a symbol reference */
1563
1564int
1565symbolic_reference_mentioned_p (op)
1566 rtx op;
1567{
6f7d635c 1568 register const char *fmt;
2a2ab3f9
JVA
1569 register int i;
1570
1571 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
1572 return 1;
1573
1574 fmt = GET_RTX_FORMAT (GET_CODE (op));
1575 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
1576 {
1577 if (fmt[i] == 'E')
1578 {
1579 register int j;
1580
1581 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
1582 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
1583 return 1;
1584 }
e9a25f70 1585
2a2ab3f9
JVA
1586 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
1587 return 1;
1588 }
1589
1590 return 0;
1591}
e075ae69
RH
1592
1593/* Return 1 if it is appropriate to emit `ret' instructions in the
1594 body of a function. Do this only if the epilogue is simple, needing a
1595 couple of insns. Prior to reloading, we can't tell how many registers
1596 must be saved, so return 0 then. Return 0 if there is no frame
1597 marker to de-allocate.
1598
1599 If NON_SAVING_SETJMP is defined and true, then it is not possible
1600 for the epilogue to be simple, so return 0. This is a special case
1601 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1602 until final, but jump_optimize may need to know sooner if a
1603 `return' is OK. */
32b5b1aa
SC
1604
1605int
e075ae69 1606ix86_can_use_return_insn_p ()
32b5b1aa 1607{
9a7372d6
RH
1608 HOST_WIDE_INT tsize;
1609 int nregs;
1610
e075ae69
RH
1611#ifdef NON_SAVING_SETJMP
1612 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
1613 return 0;
1614#endif
9a7372d6
RH
1615#ifdef FUNCTION_BLOCK_PROFILER_EXIT
1616 if (profile_block_flag == 2)
1617 return 0;
1618#endif
1619
1620 if (! reload_completed || frame_pointer_needed)
1621 return 0;
32b5b1aa 1622
9a7372d6
RH
1623 /* Don't allow more than 32 pop, since that's all we can do
1624 with one instruction. */
1625 if (current_function_pops_args
1626 && current_function_args_size >= 32768)
e075ae69 1627 return 0;
32b5b1aa 1628
9a7372d6
RH
1629 tsize = ix86_compute_frame_size (get_frame_size (), &nregs, NULL, NULL);
1630 return tsize == 0 && nregs == 0;
e075ae69
RH
1631}
1632\f
21a427cc 1633static char *pic_label_name;
e075ae69 1634static int pic_label_output;
21a427cc 1635static char *global_offset_table_name;
e9a25f70 1636
e075ae69
RH
1637/* This function generates code for -fpic that loads %ebx with
1638 the return address of the caller and then returns. */
1639
1640void
1641asm_output_function_prefix (file, name)
1642 FILE *file;
3cce094d 1643 const char *name ATTRIBUTE_UNUSED;
e075ae69
RH
1644{
1645 rtx xops[2];
1646 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1647 || current_function_uses_const_pool);
1648 xops[0] = pic_offset_table_rtx;
1649 xops[1] = stack_pointer_rtx;
32b5b1aa 1650
e075ae69
RH
1651 /* Deep branch prediction favors having a return for every call. */
1652 if (pic_reg_used && TARGET_DEEP_BRANCH_PREDICTION)
32b5b1aa 1653 {
e075ae69
RH
1654 if (!pic_label_output)
1655 {
1656 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
1657 internal (non-global) label that's being emitted, it didn't make
1658 sense to have .type information for local labels. This caused
1659 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
1660 me debug info for a label that you're declaring non-global?) this
1661 was changed to call ASM_OUTPUT_LABEL() instead. */
32b5b1aa 1662
e075ae69 1663 ASM_OUTPUT_LABEL (file, pic_label_name);
e9a25f70 1664
e075ae69
RH
1665 xops[1] = gen_rtx_MEM (SImode, xops[1]);
1666 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
1667 output_asm_insn ("ret", xops);
0afeb08a 1668
e075ae69 1669 pic_label_output = 1;
32b5b1aa 1670 }
32b5b1aa 1671 }
32b5b1aa 1672}
32b5b1aa 1673
e075ae69
RH
1674void
1675load_pic_register ()
32b5b1aa 1676{
e075ae69 1677 rtx gotsym, pclab;
32b5b1aa 1678
21a427cc
AS
1679 if (global_offset_table_name == NULL)
1680 {
1681 global_offset_table_name =
1682 ggc_alloc_string ("_GLOBAL_OFFSET_TABLE_", 21);
1683 ggc_add_string_root (&global_offset_table_name, 1);
1684 }
1685 gotsym = gen_rtx_SYMBOL_REF (Pmode, global_offset_table_name);
32b5b1aa 1686
e075ae69 1687 if (TARGET_DEEP_BRANCH_PREDICTION)
32b5b1aa 1688 {
21a427cc
AS
1689 if (pic_label_name == NULL)
1690 {
1691 pic_label_name = ggc_alloc_string (NULL, 32);
1692 ggc_add_string_root (&pic_label_name, 1);
1693 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
1694 }
e075ae69 1695 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
32b5b1aa 1696 }
e075ae69 1697 else
e5cb57e8 1698 {
e075ae69 1699 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
e5cb57e8 1700 }
e5cb57e8 1701
e075ae69 1702 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
2a2ab3f9 1703
e075ae69
RH
1704 if (! TARGET_DEEP_BRANCH_PREDICTION)
1705 emit_insn (gen_popsi1 (pic_offset_table_rtx));
79325812 1706
e075ae69 1707 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
e9a25f70 1708}
8dfe5673 1709
e075ae69 1710/* Generate an SImode "push" pattern for input ARG. */
e9a25f70 1711
e075ae69
RH
1712static rtx
1713gen_push (arg)
1714 rtx arg;
e9a25f70 1715{
c5c76735
JL
1716 return gen_rtx_SET (VOIDmode,
1717 gen_rtx_MEM (SImode,
1718 gen_rtx_PRE_DEC (SImode,
1719 stack_pointer_rtx)),
1720 arg);
e9a25f70
JL
1721}
1722
0903fcab
JH
1723/* Return number of registers to be saved on the stack. */
1724
1725static int
1726ix86_nsaved_regs ()
1727{
1728 int nregs = 0;
1729 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1730 || current_function_uses_const_pool);
1731 int limit = (frame_pointer_needed
1732 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1733 int regno;
1734
1735 for (regno = limit - 1; regno >= 0; regno--)
1736 if ((regs_ever_live[regno] && ! call_used_regs[regno])
1737 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1738 {
1739 nregs ++;
1740 }
1741 return nregs;
1742}
1743
1744/* Return the offset between two registers, one to be eliminated, and the other
1745 its replacement, at the start of a routine. */
1746
1747HOST_WIDE_INT
1748ix86_initial_elimination_offset (from, to)
1749 int from;
1750 int to;
1751{
564d80f4
JH
1752 int padding1;
1753 int nregs;
1754
1755 /* Stack grows downward:
1756
1757 [arguments]
1758 <- ARG_POINTER
1759 saved pc
1760
1761 saved frame pointer if frame_pointer_needed
1762 <- HARD_FRAME_POINTER
1c71e60e 1763 [saved regs]
564d80f4
JH
1764
1765 [padding1] \
1766 | <- FRAME_POINTER
1767 [frame] > tsize
1768 |
1769 [padding2] /
564d80f4
JH
1770 */
1771
1772 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
1773 /* Skip saved PC and previous frame pointer.
1774 Executed only when frame_pointer_needed. */
1775 return 8;
1776 else if (from == FRAME_POINTER_REGNUM
1777 && to == HARD_FRAME_POINTER_REGNUM)
1778 {
1779 ix86_compute_frame_size (get_frame_size (), &nregs, &padding1, (int *)0);
1c71e60e 1780 padding1 += nregs * UNITS_PER_WORD;
564d80f4
JH
1781 return -padding1;
1782 }
0903fcab
JH
1783 else
1784 {
564d80f4
JH
1785 /* ARG_POINTER or FRAME_POINTER to STACK_POINTER elimination. */
1786 int frame_size = frame_pointer_needed ? 8 : 4;
0903fcab 1787 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (),
564d80f4 1788 &nregs, &padding1, (int *)0);
0903fcab 1789
0903fcab 1790
564d80f4
JH
1791 if (to != STACK_POINTER_REGNUM)
1792 abort ();
1793 else if (from == ARG_POINTER_REGNUM)
1794 return tsize + nregs * UNITS_PER_WORD + frame_size;
1795 else if (from != FRAME_POINTER_REGNUM)
1796 abort ();
0903fcab 1797 else
1c71e60e 1798 return tsize - padding1;
0903fcab
JH
1799 }
1800}
1801
65954bd8
JL
1802/* Compute the size of local storage taking into consideration the
1803 desired stack alignment which is to be maintained. Also determine
564d80f4
JH
1804 the number of registers saved below the local storage.
1805
1806 PADDING1 returns padding before stack frame and PADDING2 returns
1807 padding after stack frame;
1808 */
1809
1810static HOST_WIDE_INT
1811ix86_compute_frame_size (size, nregs_on_stack, rpadding1, rpadding2)
65954bd8
JL
1812 HOST_WIDE_INT size;
1813 int *nregs_on_stack;
564d80f4
JH
1814 int *rpadding1;
1815 int *rpadding2;
65954bd8 1816{
65954bd8 1817 int nregs;
564d80f4
JH
1818 int padding1 = 0;
1819 int padding2 = 0;
65954bd8 1820 HOST_WIDE_INT total_size;
564d80f4 1821 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
44affdae
JH
1822 int offset;
1823 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
65954bd8 1824
564d80f4 1825 nregs = ix86_nsaved_regs ();
564d80f4 1826 total_size = size;
65954bd8 1827
44affdae 1828 offset = frame_pointer_needed ? 8 : 4;
564d80f4 1829
44affdae
JH
1830 /* Do some sanity checking of stack_alignment_needed and preferred_alignment,
1831 since i386 port is the only using those features that may break easilly. */
564d80f4 1832
44affdae
JH
1833 if (size && !stack_alignment_needed)
1834 abort ();
5f677a9e 1835 if (!size && stack_alignment_needed != STACK_BOUNDARY / BITS_PER_UNIT)
44affdae
JH
1836 abort ();
1837 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
1838 abort ();
1839 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1840 abort ();
1841 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1842 abort ();
564d80f4 1843
44affdae
JH
1844 if (stack_alignment_needed < 4)
1845 stack_alignment_needed = 4;
564d80f4 1846
44affdae 1847 offset += nregs * UNITS_PER_WORD;
65954bd8 1848
f73ad30e
JH
1849 if (ACCUMULATE_OUTGOING_ARGS)
1850 total_size += current_function_outgoing_args_size;
1851
44affdae 1852 total_size += offset;
65954bd8 1853
44affdae
JH
1854 /* Align start of frame for local function. */
1855 padding1 = ((offset + stack_alignment_needed - 1)
1856 & -stack_alignment_needed) - offset;
1857 total_size += padding1;
54ff41b7 1858
44affdae
JH
1859 /* Align stack boundary. */
1860 padding2 = ((total_size + preferred_alignment - 1)
1861 & -preferred_alignment) - total_size;
65954bd8 1862
f73ad30e
JH
1863 if (ACCUMULATE_OUTGOING_ARGS)
1864 padding2 += current_function_outgoing_args_size;
1865
65954bd8
JL
1866 if (nregs_on_stack)
1867 *nregs_on_stack = nregs;
564d80f4
JH
1868 if (rpadding1)
1869 *rpadding1 = padding1;
564d80f4
JH
1870 if (rpadding2)
1871 *rpadding2 = padding2;
1872
1873 return size + padding1 + padding2;
65954bd8
JL
1874}
1875
0903fcab
JH
1876/* Emit code to save registers in the prologue. */
1877
1878static void
1879ix86_emit_save_regs ()
1880{
1881 register int regno;
1882 int limit;
1883 rtx insn;
1884 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1885 || current_function_uses_const_pool);
1886 limit = (frame_pointer_needed
564d80f4 1887 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
0903fcab
JH
1888
1889 for (regno = limit - 1; regno >= 0; regno--)
1890 if ((regs_ever_live[regno] && !call_used_regs[regno])
1891 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1892 {
1893 insn = emit_insn (gen_push (gen_rtx_REG (SImode, regno)));
1894 RTX_FRAME_RELATED_P (insn) = 1;
1895 }
1896}
1897
e075ae69
RH
1898/* Expand the prologue into a bunch of separate insns. */
1899
1900void
1901ix86_expand_prologue ()
2a2ab3f9 1902{
564d80f4
JH
1903 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), (int *)0, (int *)0,
1904 (int *)0);
1905 rtx insn;
aae75261
JVA
1906 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1907 || current_function_uses_const_pool);
79325812 1908
e075ae69
RH
1909 /* Note: AT&T enter does NOT have reversed args. Enter is probably
1910 slower on all targets. Also sdb doesn't like it. */
e9a25f70 1911
2a2ab3f9
JVA
1912 if (frame_pointer_needed)
1913 {
564d80f4 1914 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 1915 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 1916
564d80f4 1917 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 1918 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
1919 }
1920
1c71e60e 1921 ix86_emit_save_regs ();
564d80f4 1922
8dfe5673
RK
1923 if (tsize == 0)
1924 ;
1925 else if (! TARGET_STACK_PROBE || tsize < CHECK_STACK_LIMIT)
469ac993 1926 {
e075ae69 1927 if (frame_pointer_needed)
1c71e60e
JH
1928 insn = emit_insn (gen_pro_epilogue_adjust_stack
1929 (stack_pointer_rtx, stack_pointer_rtx,
1930 GEN_INT (-tsize), hard_frame_pointer_rtx));
79325812 1931 else
e075ae69
RH
1932 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
1933 GEN_INT (-tsize)));
1934 RTX_FRAME_RELATED_P (insn) = 1;
469ac993 1935 }
79325812 1936 else
8dfe5673 1937 {
e075ae69 1938 /* ??? Is this only valid for Win32? */
e9a25f70 1939
e075ae69 1940 rtx arg0, sym;
e9a25f70 1941
e075ae69
RH
1942 arg0 = gen_rtx_REG (SImode, 0);
1943 emit_move_insn (arg0, GEN_INT (tsize));
77a989d1 1944
e075ae69
RH
1945 sym = gen_rtx_MEM (FUNCTION_MODE,
1946 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
1947 insn = emit_call_insn (gen_call (sym, const0_rtx));
1948
1949 CALL_INSN_FUNCTION_USAGE (insn)
276ab4a4
RH
1950 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
1951 CALL_INSN_FUNCTION_USAGE (insn));
e075ae69 1952 }
e9a25f70 1953
84530511
SC
1954#ifdef SUBTARGET_PROLOGUE
1955 SUBTARGET_PROLOGUE;
1956#endif
1957
e9a25f70 1958 if (pic_reg_used)
e075ae69 1959 load_pic_register ();
77a989d1 1960
e9a25f70
JL
1961 /* If we are profiling, make sure no instructions are scheduled before
1962 the call to mcount. However, if -fpic, the above call will have
1963 done that. */
e075ae69 1964 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
e9a25f70 1965 emit_insn (gen_blockage ());
77a989d1
SC
1966}
1967
0903fcab
JH
1968/* Emit code to add TSIZE to esp value. Use POP instruction when
1969 profitable. */
1970
1971static void
1972ix86_emit_epilogue_esp_adjustment (tsize)
1973 int tsize;
1974{
bdeb029c
JH
1975 /* If a frame pointer is present, we must be sure to tie the sp
1976 to the fp so that we don't mis-schedule. */
1977 if (frame_pointer_needed)
1978 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
1979 stack_pointer_rtx,
1980 GEN_INT (tsize),
1981 hard_frame_pointer_rtx));
0903fcab 1982 else
bdeb029c
JH
1983 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
1984 GEN_INT (tsize)));
0903fcab
JH
1985}
1986
da2d1d3a
JH
1987/* Emit code to restore saved registers using MOV insns. First register
1988 is restored from POINTER + OFFSET. */
1989static void
1990ix86_emit_restore_regs_using_mov (pointer, offset)
1991 rtx pointer;
1992 int offset;
1993{
1994 int regno;
1995 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1996 || current_function_uses_const_pool);
1997 int limit = (frame_pointer_needed
1998 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1999
2000 for (regno = 0; regno < limit; regno++)
2001 if ((regs_ever_live[regno] && !call_used_regs[regno])
2002 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2003 {
2004 emit_move_insn (gen_rtx_REG (SImode, regno),
2005 adj_offsettable_operand (gen_rtx_MEM (SImode,
2006 pointer),
2007 offset));
2008 offset += 4;
2009 }
2010}
2011
79325812 2012/* Restore function stack, frame, and registers. */
e9a25f70 2013
2a2ab3f9 2014void
cbbf65e0
RH
2015ix86_expand_epilogue (emit_return)
2016 int emit_return;
2a2ab3f9 2017{
65954bd8 2018 int nregs;
1c71e60e
JH
2019 int regno;
2020
aae75261
JVA
2021 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2022 || current_function_uses_const_pool);
fdb8a883 2023 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
65954bd8 2024 HOST_WIDE_INT offset;
1c71e60e
JH
2025 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), &nregs,
2026 (int *)0, (int *)0);
2a2ab3f9 2027
2a2ab3f9 2028
1c71e60e
JH
2029 /* Calculate start of saved registers relative to ebp. */
2030 offset = -nregs * UNITS_PER_WORD;
2a2ab3f9 2031
1c71e60e
JH
2032#ifdef FUNCTION_BLOCK_PROFILER_EXIT
2033 if (profile_block_flag == 2)
564d80f4 2034 {
1c71e60e 2035 FUNCTION_BLOCK_PROFILER_EXIT;
564d80f4 2036 }
1c71e60e 2037#endif
564d80f4 2038
fdb8a883
JW
2039 /* If we're only restoring one register and sp is not valid then
2040 using a move instruction to restore the register since it's
da2d1d3a
JH
2041 less work than reloading sp and popping the register.
2042
2043 The default code result in stack adjustment using add/lea instruction,
2044 while this code results in LEAVE instruction (or discrete equivalent),
2045 so it is profitable in some other cases as well. Especially when there
2046 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2047 and there is exactly one register to pop. This heruistic may need some
2048 tuning in future. */
2049 if ((!sp_valid && nregs <= 1)
2050 || (frame_pointer_needed && !nregs && tsize)
2051 || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
2052 && nregs == 1))
2a2ab3f9 2053 {
da2d1d3a
JH
2054 /* Restore registers. We can use ebp or esp to address the memory
2055 locations. If both are available, default to ebp, since offsets
2056 are known to be small. Only exception is esp pointing directly to the
2057 end of block of saved registers, where we may simplify addressing
2058 mode. */
2059
2060 if (!frame_pointer_needed || (sp_valid && !tsize))
2061 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, tsize);
2062 else
2063 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, offset);
2a2ab3f9 2064
da2d1d3a
JH
2065 if (!frame_pointer_needed)
2066 ix86_emit_epilogue_esp_adjustment (tsize + nregs * UNITS_PER_WORD);
c8c5cb99 2067 /* If not an i386, mov & pop is faster than "leave". */
da2d1d3a 2068 else if (TARGET_USE_LEAVE || optimize_size)
564d80f4 2069 emit_insn (gen_leave ());
c8c5cb99 2070 else
2a2ab3f9 2071 {
1c71e60e
JH
2072 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2073 hard_frame_pointer_rtx,
2074 const0_rtx,
2075 hard_frame_pointer_rtx));
564d80f4 2076 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
2077 }
2078 }
1c71e60e 2079 else
68f654ec 2080 {
1c71e60e
JH
2081 /* First step is to deallocate the stack frame so that we can
2082 pop the registers. */
2083 if (!sp_valid)
2084 {
2085 if (!frame_pointer_needed)
2086 abort ();
2087 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2088 hard_frame_pointer_rtx,
2089 GEN_INT (offset),
2090 hard_frame_pointer_rtx));
2091 }
2092 else if (tsize)
2093 ix86_emit_epilogue_esp_adjustment (tsize);
2094
2095 for (regno = 0; regno < STACK_POINTER_REGNUM; regno++)
2096 if ((regs_ever_live[regno] && !call_used_regs[regno])
2097 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2098 emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
68f654ec 2099 }
68f654ec 2100
cbbf65e0
RH
2101 /* Sibcall epilogues don't want a return instruction. */
2102 if (! emit_return)
2103 return;
2104
2a2ab3f9
JVA
2105 if (current_function_pops_args && current_function_args_size)
2106 {
e075ae69 2107 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 2108
b8c752c8
UD
2109 /* i386 can only pop 64K bytes. If asked to pop more, pop
2110 return address, do explicit add, and jump indirectly to the
2111 caller. */
2a2ab3f9 2112
b8c752c8 2113 if (current_function_pops_args >= 65536)
2a2ab3f9 2114 {
e075ae69 2115 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 2116
e075ae69
RH
2117 emit_insn (gen_popsi1 (ecx));
2118 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
2119 emit_indirect_jump (ecx);
e9a25f70 2120 }
79325812 2121 else
e075ae69
RH
2122 emit_jump_insn (gen_return_pop_internal (popc));
2123 }
2124 else
2125 emit_jump_insn (gen_return_internal ());
2126}
2127\f
2128/* Extract the parts of an RTL expression that is a valid memory address
2129 for an instruction. Return false if the structure of the address is
2130 grossly off. */
2131
2132static int
2133ix86_decompose_address (addr, out)
2134 register rtx addr;
2135 struct ix86_address *out;
2136{
2137 rtx base = NULL_RTX;
2138 rtx index = NULL_RTX;
2139 rtx disp = NULL_RTX;
2140 HOST_WIDE_INT scale = 1;
2141 rtx scale_rtx = NULL_RTX;
2142
2143 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
2144 base = addr;
2145 else if (GET_CODE (addr) == PLUS)
2146 {
2147 rtx op0 = XEXP (addr, 0);
2148 rtx op1 = XEXP (addr, 1);
2149 enum rtx_code code0 = GET_CODE (op0);
2150 enum rtx_code code1 = GET_CODE (op1);
2151
2152 if (code0 == REG || code0 == SUBREG)
2153 {
2154 if (code1 == REG || code1 == SUBREG)
2155 index = op0, base = op1; /* index + base */
2156 else
2157 base = op0, disp = op1; /* base + displacement */
2158 }
2159 else if (code0 == MULT)
e9a25f70 2160 {
e075ae69
RH
2161 index = XEXP (op0, 0);
2162 scale_rtx = XEXP (op0, 1);
2163 if (code1 == REG || code1 == SUBREG)
2164 base = op1; /* index*scale + base */
e9a25f70 2165 else
e075ae69
RH
2166 disp = op1; /* index*scale + disp */
2167 }
2168 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
2169 {
2170 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
2171 scale_rtx = XEXP (XEXP (op0, 0), 1);
2172 base = XEXP (op0, 1);
2173 disp = op1;
2a2ab3f9 2174 }
e075ae69
RH
2175 else if (code0 == PLUS)
2176 {
2177 index = XEXP (op0, 0); /* index + base + disp */
2178 base = XEXP (op0, 1);
2179 disp = op1;
2180 }
2181 else
2182 return FALSE;
2183 }
2184 else if (GET_CODE (addr) == MULT)
2185 {
2186 index = XEXP (addr, 0); /* index*scale */
2187 scale_rtx = XEXP (addr, 1);
2188 }
2189 else if (GET_CODE (addr) == ASHIFT)
2190 {
2191 rtx tmp;
2192
2193 /* We're called for lea too, which implements ashift on occasion. */
2194 index = XEXP (addr, 0);
2195 tmp = XEXP (addr, 1);
2196 if (GET_CODE (tmp) != CONST_INT)
2197 return FALSE;
2198 scale = INTVAL (tmp);
2199 if ((unsigned HOST_WIDE_INT) scale > 3)
2200 return FALSE;
2201 scale = 1 << scale;
2a2ab3f9 2202 }
2a2ab3f9 2203 else
e075ae69
RH
2204 disp = addr; /* displacement */
2205
2206 /* Extract the integral value of scale. */
2207 if (scale_rtx)
e9a25f70 2208 {
e075ae69
RH
2209 if (GET_CODE (scale_rtx) != CONST_INT)
2210 return FALSE;
2211 scale = INTVAL (scale_rtx);
e9a25f70 2212 }
3b3c6a3f 2213
e075ae69
RH
2214 /* Allow arg pointer and stack pointer as index if there is not scaling */
2215 if (base && index && scale == 1
564d80f4
JH
2216 && (index == arg_pointer_rtx || index == frame_pointer_rtx
2217 || index == stack_pointer_rtx))
e075ae69
RH
2218 {
2219 rtx tmp = base;
2220 base = index;
2221 index = tmp;
2222 }
2223
2224 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
2225 if ((base == hard_frame_pointer_rtx
2226 || base == frame_pointer_rtx
2227 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
2228 disp = const0_rtx;
2229
2230 /* Special case: on K6, [%esi] makes the instruction vector decoded.
2231 Avoid this by transforming to [%esi+0]. */
2232 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
2233 && base && !index && !disp
329e1d01 2234 && REG_P (base)
e075ae69
RH
2235 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
2236 disp = const0_rtx;
2237
2238 /* Special case: encode reg+reg instead of reg*2. */
2239 if (!base && index && scale && scale == 2)
2240 base = index, scale = 1;
2241
2242 /* Special case: scaling cannot be encoded without base or displacement. */
2243 if (!base && !disp && index && scale != 1)
2244 disp = const0_rtx;
2245
2246 out->base = base;
2247 out->index = index;
2248 out->disp = disp;
2249 out->scale = scale;
3b3c6a3f 2250
e075ae69
RH
2251 return TRUE;
2252}
3b3c6a3f 2253
e075ae69
RH
2254/* Determine if a given CONST RTX is a valid memory displacement
2255 in PIC mode. */
2256
59be65f6 2257int
91bb873f
RH
2258legitimate_pic_address_disp_p (disp)
2259 register rtx disp;
2260{
2261 if (GET_CODE (disp) != CONST)
2262 return 0;
2263 disp = XEXP (disp, 0);
2264
2265 if (GET_CODE (disp) == PLUS)
2266 {
2267 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
2268 return 0;
2269 disp = XEXP (disp, 0);
2270 }
2271
2272 if (GET_CODE (disp) != UNSPEC
2273 || XVECLEN (disp, 0) != 1)
2274 return 0;
2275
2276 /* Must be @GOT or @GOTOFF. */
2277 if (XINT (disp, 1) != 6
2278 && XINT (disp, 1) != 7)
2279 return 0;
2280
2281 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2282 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
2283 return 0;
2284
2285 return 1;
2286}
2287
e075ae69
RH
2288/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
2289 memory address for an instruction. The MODE argument is the machine mode
2290 for the MEM expression that wants to use this address.
2291
2292 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
2293 convert common non-canonical forms to canonical form so that they will
2294 be recognized. */
2295
3b3c6a3f
MM
2296int
2297legitimate_address_p (mode, addr, strict)
2298 enum machine_mode mode;
2299 register rtx addr;
2300 int strict;
2301{
e075ae69
RH
2302 struct ix86_address parts;
2303 rtx base, index, disp;
2304 HOST_WIDE_INT scale;
2305 const char *reason = NULL;
2306 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
2307
2308 if (TARGET_DEBUG_ADDR)
2309 {
2310 fprintf (stderr,
e9a25f70 2311 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 2312 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
2313 debug_rtx (addr);
2314 }
2315
e075ae69 2316 if (! ix86_decompose_address (addr, &parts))
3b3c6a3f 2317 {
e075ae69
RH
2318 reason = "decomposition failed";
2319 goto error;
3b3c6a3f
MM
2320 }
2321
e075ae69
RH
2322 base = parts.base;
2323 index = parts.index;
2324 disp = parts.disp;
2325 scale = parts.scale;
91f0226f 2326
e075ae69 2327 /* Validate base register.
e9a25f70
JL
2328
2329 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
2330 is one word out of a two word structure, which is represented internally
2331 as a DImode int. */
e9a25f70 2332
3b3c6a3f
MM
2333 if (base)
2334 {
e075ae69
RH
2335 reason_rtx = base;
2336
3d771dfd 2337 if (GET_CODE (base) != REG)
3b3c6a3f 2338 {
e075ae69
RH
2339 reason = "base is not a register";
2340 goto error;
3b3c6a3f
MM
2341 }
2342
c954bd01
RH
2343 if (GET_MODE (base) != Pmode)
2344 {
e075ae69
RH
2345 reason = "base is not in Pmode";
2346 goto error;
c954bd01
RH
2347 }
2348
e9a25f70
JL
2349 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
2350 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
3b3c6a3f 2351 {
e075ae69
RH
2352 reason = "base is not valid";
2353 goto error;
3b3c6a3f
MM
2354 }
2355 }
2356
e075ae69 2357 /* Validate index register.
e9a25f70
JL
2358
2359 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
2360 is one word out of a two word structure, which is represented internally
2361 as a DImode int. */
e075ae69
RH
2362
2363 if (index)
3b3c6a3f 2364 {
e075ae69
RH
2365 reason_rtx = index;
2366
2367 if (GET_CODE (index) != REG)
3b3c6a3f 2368 {
e075ae69
RH
2369 reason = "index is not a register";
2370 goto error;
3b3c6a3f
MM
2371 }
2372
e075ae69 2373 if (GET_MODE (index) != Pmode)
c954bd01 2374 {
e075ae69
RH
2375 reason = "index is not in Pmode";
2376 goto error;
c954bd01
RH
2377 }
2378
e075ae69
RH
2379 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
2380 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
3b3c6a3f 2381 {
e075ae69
RH
2382 reason = "index is not valid";
2383 goto error;
3b3c6a3f
MM
2384 }
2385 }
3b3c6a3f 2386
e075ae69
RH
2387 /* Validate scale factor. */
2388 if (scale != 1)
3b3c6a3f 2389 {
e075ae69
RH
2390 reason_rtx = GEN_INT (scale);
2391 if (!index)
3b3c6a3f 2392 {
e075ae69
RH
2393 reason = "scale without index";
2394 goto error;
3b3c6a3f
MM
2395 }
2396
e075ae69 2397 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 2398 {
e075ae69
RH
2399 reason = "scale is not a valid multiplier";
2400 goto error;
3b3c6a3f
MM
2401 }
2402 }
2403
91bb873f 2404 /* Validate displacement. */
3b3c6a3f
MM
2405 if (disp)
2406 {
e075ae69
RH
2407 reason_rtx = disp;
2408
91bb873f 2409 if (!CONSTANT_ADDRESS_P (disp))
3b3c6a3f 2410 {
e075ae69
RH
2411 reason = "displacement is not constant";
2412 goto error;
3b3c6a3f
MM
2413 }
2414
e075ae69 2415 if (GET_CODE (disp) == CONST_DOUBLE)
3b3c6a3f 2416 {
e075ae69
RH
2417 reason = "displacement is a const_double";
2418 goto error;
3b3c6a3f
MM
2419 }
2420
91bb873f 2421 if (flag_pic && SYMBOLIC_CONST (disp))
3b3c6a3f 2422 {
91bb873f
RH
2423 if (! legitimate_pic_address_disp_p (disp))
2424 {
e075ae69
RH
2425 reason = "displacement is an invalid pic construct";
2426 goto error;
91bb873f
RH
2427 }
2428
e075ae69
RH
2429 /* Verify that a symbolic pic displacement includes
2430 the pic_offset_table_rtx register. */
91bb873f 2431 if (base != pic_offset_table_rtx
e075ae69 2432 && (index != pic_offset_table_rtx || scale != 1))
91bb873f 2433 {
e075ae69
RH
2434 reason = "pic displacement against invalid base";
2435 goto error;
91bb873f 2436 }
3b3c6a3f 2437 }
91bb873f 2438 else if (HALF_PIC_P ())
3b3c6a3f 2439 {
91bb873f 2440 if (! HALF_PIC_ADDRESS_P (disp)
e075ae69 2441 || (base != NULL_RTX || index != NULL_RTX))
91bb873f 2442 {
e075ae69
RH
2443 reason = "displacement is an invalid half-pic reference";
2444 goto error;
91bb873f 2445 }
3b3c6a3f
MM
2446 }
2447 }
2448
e075ae69 2449 /* Everything looks valid. */
3b3c6a3f 2450 if (TARGET_DEBUG_ADDR)
e075ae69 2451 fprintf (stderr, "Success.\n");
3b3c6a3f 2452 return TRUE;
e075ae69
RH
2453
2454error:
2455 if (TARGET_DEBUG_ADDR)
2456 {
2457 fprintf (stderr, "Error: %s\n", reason);
2458 debug_rtx (reason_rtx);
2459 }
2460 return FALSE;
3b3c6a3f 2461}
3b3c6a3f
MM
2462\f
2463/* Return a legitimate reference for ORIG (an address) using the
2464 register REG. If REG is 0, a new pseudo is generated.
2465
91bb873f 2466 There are two types of references that must be handled:
3b3c6a3f
MM
2467
2468 1. Global data references must load the address from the GOT, via
2469 the PIC reg. An insn is emitted to do this load, and the reg is
2470 returned.
2471
91bb873f
RH
2472 2. Static data references, constant pool addresses, and code labels
2473 compute the address as an offset from the GOT, whose base is in
2474 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
2475 differentiate them from global data objects. The returned
2476 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
2477
2478 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 2479 reg also appears in the address. */
3b3c6a3f
MM
2480
2481rtx
2482legitimize_pic_address (orig, reg)
2483 rtx orig;
2484 rtx reg;
2485{
2486 rtx addr = orig;
2487 rtx new = orig;
91bb873f 2488 rtx base;
3b3c6a3f 2489
91bb873f
RH
2490 if (GET_CODE (addr) == LABEL_REF
2491 || (GET_CODE (addr) == SYMBOL_REF
2492 && (CONSTANT_POOL_ADDRESS_P (addr)
2493 || SYMBOL_REF_FLAG (addr))))
3b3c6a3f 2494 {
91bb873f
RH
2495 /* This symbol may be referenced via a displacement from the PIC
2496 base address (@GOTOFF). */
3b3c6a3f 2497
91bb873f
RH
2498 current_function_uses_pic_offset_table = 1;
2499 new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, addr), 7);
2500 new = gen_rtx_CONST (VOIDmode, new);
2501 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 2502
91bb873f
RH
2503 if (reg != 0)
2504 {
3b3c6a3f 2505 emit_move_insn (reg, new);
91bb873f 2506 new = reg;
3b3c6a3f 2507 }
3b3c6a3f 2508 }
91bb873f 2509 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 2510 {
91bb873f
RH
2511 /* This symbol must be referenced via a load from the
2512 Global Offset Table (@GOT). */
3b3c6a3f 2513
91bb873f
RH
2514 current_function_uses_pic_offset_table = 1;
2515 new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, addr), 6);
2516 new = gen_rtx_CONST (VOIDmode, new);
2517 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2518 new = gen_rtx_MEM (Pmode, new);
2519 RTX_UNCHANGING_P (new) = 1;
3b3c6a3f
MM
2520
2521 if (reg == 0)
2522 reg = gen_reg_rtx (Pmode);
91bb873f
RH
2523 emit_move_insn (reg, new);
2524 new = reg;
2525 }
2526 else
2527 {
2528 if (GET_CODE (addr) == CONST)
3b3c6a3f 2529 {
91bb873f
RH
2530 addr = XEXP (addr, 0);
2531 if (GET_CODE (addr) == UNSPEC)
2532 {
2533 /* Check that the unspec is one of the ones we generate? */
2534 }
2535 else if (GET_CODE (addr) != PLUS)
564d80f4 2536 abort ();
3b3c6a3f 2537 }
91bb873f
RH
2538 if (GET_CODE (addr) == PLUS)
2539 {
2540 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 2541
91bb873f
RH
2542 /* Check first to see if this is a constant offset from a @GOTOFF
2543 symbol reference. */
2544 if ((GET_CODE (op0) == LABEL_REF
2545 || (GET_CODE (op0) == SYMBOL_REF
2546 && (CONSTANT_POOL_ADDRESS_P (op0)
2547 || SYMBOL_REF_FLAG (op0))))
2548 && GET_CODE (op1) == CONST_INT)
2549 {
2550 current_function_uses_pic_offset_table = 1;
2551 new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, op0), 7);
2552 new = gen_rtx_PLUS (VOIDmode, new, op1);
2553 new = gen_rtx_CONST (VOIDmode, new);
2554 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2555
2556 if (reg != 0)
2557 {
2558 emit_move_insn (reg, new);
2559 new = reg;
2560 }
2561 }
2562 else
2563 {
2564 base = legitimize_pic_address (XEXP (addr, 0), reg);
2565 new = legitimize_pic_address (XEXP (addr, 1),
2566 base == reg ? NULL_RTX : reg);
2567
2568 if (GET_CODE (new) == CONST_INT)
2569 new = plus_constant (base, INTVAL (new));
2570 else
2571 {
2572 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
2573 {
2574 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
2575 new = XEXP (new, 1);
2576 }
2577 new = gen_rtx_PLUS (Pmode, base, new);
2578 }
2579 }
2580 }
3b3c6a3f
MM
2581 }
2582 return new;
2583}
2584\f
3b3c6a3f
MM
2585/* Try machine-dependent ways of modifying an illegitimate address
2586 to be legitimate. If we find one, return the new, valid address.
2587 This macro is used in only one place: `memory_address' in explow.c.
2588
2589 OLDX is the address as it was before break_out_memory_refs was called.
2590 In some cases it is useful to look at this to decide what needs to be done.
2591
2592 MODE and WIN are passed so that this macro can use
2593 GO_IF_LEGITIMATE_ADDRESS.
2594
2595 It is always safe for this macro to do nothing. It exists to recognize
2596 opportunities to optimize the output.
2597
2598 For the 80386, we handle X+REG by loading X into a register R and
2599 using R+REG. R will go in a general reg and indexing will be used.
2600 However, if REG is a broken-out memory address or multiplication,
2601 nothing needs to be done because REG can certainly go in a general reg.
2602
2603 When -fpic is used, special handling is needed for symbolic references.
2604 See comments by legitimize_pic_address in i386.c for details. */
2605
2606rtx
2607legitimize_address (x, oldx, mode)
2608 register rtx x;
bb5177ac 2609 register rtx oldx ATTRIBUTE_UNUSED;
3b3c6a3f
MM
2610 enum machine_mode mode;
2611{
2612 int changed = 0;
2613 unsigned log;
2614
2615 if (TARGET_DEBUG_ADDR)
2616 {
e9a25f70
JL
2617 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
2618 GET_MODE_NAME (mode));
3b3c6a3f
MM
2619 debug_rtx (x);
2620 }
2621
2622 if (flag_pic && SYMBOLIC_CONST (x))
2623 return legitimize_pic_address (x, 0);
2624
2625 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
2626 if (GET_CODE (x) == ASHIFT
2627 && GET_CODE (XEXP (x, 1)) == CONST_INT
2628 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
2629 {
2630 changed = 1;
a269a03c
JC
2631 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
2632 GEN_INT (1 << log));
3b3c6a3f
MM
2633 }
2634
2635 if (GET_CODE (x) == PLUS)
2636 {
e9a25f70
JL
2637 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
2638
3b3c6a3f
MM
2639 if (GET_CODE (XEXP (x, 0)) == ASHIFT
2640 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2641 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
2642 {
2643 changed = 1;
c5c76735
JL
2644 XEXP (x, 0) = gen_rtx_MULT (Pmode,
2645 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
2646 GEN_INT (1 << log));
3b3c6a3f
MM
2647 }
2648
2649 if (GET_CODE (XEXP (x, 1)) == ASHIFT
2650 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
2651 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
2652 {
2653 changed = 1;
c5c76735
JL
2654 XEXP (x, 1) = gen_rtx_MULT (Pmode,
2655 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
2656 GEN_INT (1 << log));
3b3c6a3f
MM
2657 }
2658
e9a25f70 2659 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
2660 if (GET_CODE (XEXP (x, 1)) == MULT)
2661 {
2662 rtx tmp = XEXP (x, 0);
2663 XEXP (x, 0) = XEXP (x, 1);
2664 XEXP (x, 1) = tmp;
2665 changed = 1;
2666 }
2667
2668 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
2669 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
2670 created by virtual register instantiation, register elimination, and
2671 similar optimizations. */
2672 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
2673 {
2674 changed = 1;
c5c76735
JL
2675 x = gen_rtx_PLUS (Pmode,
2676 gen_rtx_PLUS (Pmode, XEXP (x, 0),
2677 XEXP (XEXP (x, 1), 0)),
2678 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
2679 }
2680
e9a25f70
JL
2681 /* Canonicalize
2682 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
2683 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
2684 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
2685 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
2686 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
2687 && CONSTANT_P (XEXP (x, 1)))
2688 {
00c79232
ML
2689 rtx constant;
2690 rtx other = NULL_RTX;
3b3c6a3f
MM
2691
2692 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2693 {
2694 constant = XEXP (x, 1);
2695 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
2696 }
2697 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
2698 {
2699 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
2700 other = XEXP (x, 1);
2701 }
2702 else
2703 constant = 0;
2704
2705 if (constant)
2706 {
2707 changed = 1;
c5c76735
JL
2708 x = gen_rtx_PLUS (Pmode,
2709 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
2710 XEXP (XEXP (XEXP (x, 0), 1), 0)),
2711 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
2712 }
2713 }
2714
2715 if (changed && legitimate_address_p (mode, x, FALSE))
2716 return x;
2717
2718 if (GET_CODE (XEXP (x, 0)) == MULT)
2719 {
2720 changed = 1;
2721 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
2722 }
2723
2724 if (GET_CODE (XEXP (x, 1)) == MULT)
2725 {
2726 changed = 1;
2727 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
2728 }
2729
2730 if (changed
2731 && GET_CODE (XEXP (x, 1)) == REG
2732 && GET_CODE (XEXP (x, 0)) == REG)
2733 return x;
2734
2735 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
2736 {
2737 changed = 1;
2738 x = legitimize_pic_address (x, 0);
2739 }
2740
2741 if (changed && legitimate_address_p (mode, x, FALSE))
2742 return x;
2743
2744 if (GET_CODE (XEXP (x, 0)) == REG)
2745 {
2746 register rtx temp = gen_reg_rtx (Pmode);
2747 register rtx val = force_operand (XEXP (x, 1), temp);
2748 if (val != temp)
2749 emit_move_insn (temp, val);
2750
2751 XEXP (x, 1) = temp;
2752 return x;
2753 }
2754
2755 else if (GET_CODE (XEXP (x, 1)) == REG)
2756 {
2757 register rtx temp = gen_reg_rtx (Pmode);
2758 register rtx val = force_operand (XEXP (x, 0), temp);
2759 if (val != temp)
2760 emit_move_insn (temp, val);
2761
2762 XEXP (x, 0) = temp;
2763 return x;
2764 }
2765 }
2766
2767 return x;
2768}
2a2ab3f9
JVA
2769\f
2770/* Print an integer constant expression in assembler syntax. Addition
2771 and subtraction are the only arithmetic that may appear in these
2772 expressions. FILE is the stdio stream to write to, X is the rtx, and
2773 CODE is the operand print code from the output string. */
2774
2775static void
2776output_pic_addr_const (file, x, code)
2777 FILE *file;
2778 rtx x;
2779 int code;
2780{
2781 char buf[256];
2782
2783 switch (GET_CODE (x))
2784 {
2785 case PC:
2786 if (flag_pic)
2787 putc ('.', file);
2788 else
2789 abort ();
2790 break;
2791
2792 case SYMBOL_REF:
91bb873f
RH
2793 assemble_name (file, XSTR (x, 0));
2794 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
2795 fputs ("@PLT", file);
2a2ab3f9
JVA
2796 break;
2797
91bb873f
RH
2798 case LABEL_REF:
2799 x = XEXP (x, 0);
2800 /* FALLTHRU */
2a2ab3f9
JVA
2801 case CODE_LABEL:
2802 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
2803 assemble_name (asm_out_file, buf);
2804 break;
2805
2806 case CONST_INT:
f64cecad 2807 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
2808 break;
2809
2810 case CONST:
2811 /* This used to output parentheses around the expression,
2812 but that does not work on the 386 (either ATT or BSD assembler). */
2813 output_pic_addr_const (file, XEXP (x, 0), code);
2814 break;
2815
2816 case CONST_DOUBLE:
2817 if (GET_MODE (x) == VOIDmode)
2818 {
2819 /* We can use %d if the number is <32 bits and positive. */
2820 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
2821 fprintf (file, "0x%lx%08lx",
2822 (unsigned long) CONST_DOUBLE_HIGH (x),
2823 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 2824 else
f64cecad 2825 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
2826 }
2827 else
2828 /* We can't handle floating point constants;
2829 PRINT_OPERAND must handle them. */
2830 output_operand_lossage ("floating constant misused");
2831 break;
2832
2833 case PLUS:
e9a25f70 2834 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
2835 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
2836 {
2a2ab3f9 2837 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 2838 putc ('+', file);
e9a25f70 2839 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 2840 }
91bb873f 2841 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 2842 {
2a2ab3f9 2843 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 2844 putc ('+', file);
e9a25f70 2845 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 2846 }
91bb873f
RH
2847 else
2848 abort ();
2a2ab3f9
JVA
2849 break;
2850
2851 case MINUS:
e075ae69 2852 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
2a2ab3f9 2853 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 2854 putc ('-', file);
2a2ab3f9 2855 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 2856 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
2a2ab3f9
JVA
2857 break;
2858
91bb873f
RH
2859 case UNSPEC:
2860 if (XVECLEN (x, 0) != 1)
2861 abort ();
2862 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
2863 switch (XINT (x, 1))
2864 {
2865 case 6:
2866 fputs ("@GOT", file);
2867 break;
2868 case 7:
2869 fputs ("@GOTOFF", file);
2870 break;
2871 case 8:
2872 fputs ("@PLT", file);
2873 break;
2874 default:
2875 output_operand_lossage ("invalid UNSPEC as operand");
2876 break;
2877 }
2878 break;
2879
2a2ab3f9
JVA
2880 default:
2881 output_operand_lossage ("invalid expression as operand");
2882 }
2883}
1865dbb5
JM
2884
2885/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
2886 We need to handle our special PIC relocations. */
2887
2888void
2889i386_dwarf_output_addr_const (file, x)
2890 FILE *file;
2891 rtx x;
2892{
2893 fprintf (file, "\t%s\t", INT_ASM_OP);
2894 if (flag_pic)
2895 output_pic_addr_const (file, x, '\0');
2896 else
2897 output_addr_const (file, x);
2898 fputc ('\n', file);
2899}
2900
2901/* In the name of slightly smaller debug output, and to cater to
2902 general assembler losage, recognize PIC+GOTOFF and turn it back
2903 into a direct symbol reference. */
2904
2905rtx
2906i386_simplify_dwarf_addr (orig_x)
2907 rtx orig_x;
2908{
2909 rtx x = orig_x;
2910
2911 if (GET_CODE (x) != PLUS
2912 || GET_CODE (XEXP (x, 0)) != REG
2913 || GET_CODE (XEXP (x, 1)) != CONST)
2914 return orig_x;
2915
2916 x = XEXP (XEXP (x, 1), 0);
2917 if (GET_CODE (x) == UNSPEC
2918 && XINT (x, 1) == 7)
2919 return XVECEXP (x, 0, 0);
2920
2921 if (GET_CODE (x) == PLUS
2922 && GET_CODE (XEXP (x, 0)) == UNSPEC
2923 && GET_CODE (XEXP (x, 1)) == CONST_INT
2924 && XINT (XEXP (x, 0), 1) == 7)
2925 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
2926
2927 return orig_x;
2928}
2a2ab3f9 2929\f
a269a03c 2930static void
e075ae69 2931put_condition_code (code, mode, reverse, fp, file)
a269a03c 2932 enum rtx_code code;
e075ae69
RH
2933 enum machine_mode mode;
2934 int reverse, fp;
a269a03c
JC
2935 FILE *file;
2936{
a269a03c
JC
2937 const char *suffix;
2938
a269a03c
JC
2939 if (reverse)
2940 code = reverse_condition (code);
e075ae69 2941
a269a03c
JC
2942 switch (code)
2943 {
2944 case EQ:
2945 suffix = "e";
2946 break;
a269a03c
JC
2947 case NE:
2948 suffix = "ne";
2949 break;
a269a03c 2950 case GT:
e075ae69
RH
2951 if (mode == CCNOmode)
2952 abort ();
2953 suffix = "g";
a269a03c 2954 break;
a269a03c 2955 case GTU:
e075ae69
RH
2956 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
2957 Those same assemblers have the same but opposite losage on cmov. */
2958 suffix = fp ? "nbe" : "a";
a269a03c 2959 break;
a269a03c 2960 case LT:
e075ae69 2961 if (mode == CCNOmode)
a269a03c
JC
2962 suffix = "s";
2963 else
e075ae69 2964 suffix = "l";
a269a03c 2965 break;
a269a03c
JC
2966 case LTU:
2967 suffix = "b";
2968 break;
a269a03c 2969 case GE:
e075ae69 2970 if (mode == CCNOmode)
a269a03c
JC
2971 suffix = "ns";
2972 else
e075ae69 2973 suffix = "ge";
a269a03c 2974 break;
a269a03c 2975 case GEU:
e075ae69
RH
2976 /* ??? As above. */
2977 suffix = fp ? "nb" : "ae";
a269a03c 2978 break;
a269a03c 2979 case LE:
e075ae69
RH
2980 if (mode == CCNOmode)
2981 abort ();
2982 suffix = "le";
a269a03c 2983 break;
a269a03c
JC
2984 case LEU:
2985 suffix = "be";
2986 break;
3a3677ff
RH
2987 case UNORDERED:
2988 suffix = "p";
2989 break;
2990 case ORDERED:
2991 suffix = "np";
2992 break;
a269a03c
JC
2993 default:
2994 abort ();
2995 }
2996 fputs (suffix, file);
2997}
2998
e075ae69
RH
2999void
3000print_reg (x, code, file)
3001 rtx x;
3002 int code;
3003 FILE *file;
e5cb57e8 3004{
e075ae69 3005 if (REGNO (x) == ARG_POINTER_REGNUM
564d80f4 3006 || REGNO (x) == FRAME_POINTER_REGNUM
e075ae69
RH
3007 || REGNO (x) == FLAGS_REG
3008 || REGNO (x) == FPSR_REG)
3009 abort ();
e9a25f70 3010
e075ae69
RH
3011 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
3012 putc ('%', file);
3013
3014 if (code == 'w')
3015 code = 2;
3016 else if (code == 'b')
3017 code = 1;
3018 else if (code == 'k')
3019 code = 4;
3020 else if (code == 'y')
3021 code = 3;
3022 else if (code == 'h')
3023 code = 0;
3024 else
3025 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 3026
e075ae69
RH
3027 switch (code)
3028 {
3029 case 3:
3030 if (STACK_TOP_P (x))
3031 {
3032 fputs ("st(0)", file);
3033 break;
3034 }
3035 /* FALLTHRU */
3036 case 4:
3037 case 8:
3038 case 12:
3039 if (! FP_REG_P (x))
3040 putc ('e', file);
3041 /* FALLTHRU */
3042 case 2:
3043 fputs (hi_reg_name[REGNO (x)], file);
3044 break;
3045 case 1:
3046 fputs (qi_reg_name[REGNO (x)], file);
3047 break;
3048 case 0:
3049 fputs (qi_high_reg_name[REGNO (x)], file);
3050 break;
3051 default:
3052 abort ();
fe25fea3 3053 }
e5cb57e8
SC
3054}
3055
2a2ab3f9 3056/* Meaning of CODE:
fe25fea3 3057 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 3058 C -- print opcode suffix for set/cmov insn.
fe25fea3 3059 c -- like C, but print reversed condition
2a2ab3f9
JVA
3060 R -- print the prefix for register names.
3061 z -- print the opcode suffix for the size of the current operand.
3062 * -- print a star (in certain assembler syntax)
3063 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
3064 s -- print a shift double count, followed by the assemblers argument
3065 delimiter.
fe25fea3
SC
3066 b -- print the QImode name of the register for the indicated operand.
3067 %b0 would print %al if operands[0] is reg 0.
3068 w -- likewise, print the HImode name of the register.
3069 k -- likewise, print the SImode name of the register.
3070 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
e075ae69 3071 y -- print "st(0)" instead of "st" as a register. */
2a2ab3f9
JVA
3072
3073void
3074print_operand (file, x, code)
3075 FILE *file;
3076 rtx x;
3077 int code;
3078{
3079 if (code)
3080 {
3081 switch (code)
3082 {
3083 case '*':
e075ae69 3084 if (ASSEMBLER_DIALECT == 0)
2a2ab3f9
JVA
3085 putc ('*', file);
3086 return;
3087
2a2ab3f9 3088 case 'L':
e075ae69
RH
3089 if (ASSEMBLER_DIALECT == 0)
3090 putc ('l', file);
2a2ab3f9
JVA
3091 return;
3092
3093 case 'W':
e075ae69
RH
3094 if (ASSEMBLER_DIALECT == 0)
3095 putc ('w', file);
2a2ab3f9
JVA
3096 return;
3097
3098 case 'B':
e075ae69
RH
3099 if (ASSEMBLER_DIALECT == 0)
3100 putc ('b', file);
2a2ab3f9
JVA
3101 return;
3102
3103 case 'Q':
e075ae69
RH
3104 if (ASSEMBLER_DIALECT == 0)
3105 putc ('l', file);
2a2ab3f9
JVA
3106 return;
3107
3108 case 'S':
e075ae69
RH
3109 if (ASSEMBLER_DIALECT == 0)
3110 putc ('s', file);
2a2ab3f9
JVA
3111 return;
3112
5f1ec3e6 3113 case 'T':
e075ae69
RH
3114 if (ASSEMBLER_DIALECT == 0)
3115 putc ('t', file);
5f1ec3e6
JVA
3116 return;
3117
2a2ab3f9
JVA
3118 case 'z':
3119 /* 387 opcodes don't get size suffixes if the operands are
3120 registers. */
3121
3122 if (STACK_REG_P (x))
3123 return;
3124
e075ae69
RH
3125 /* Intel syntax has no truck with instruction suffixes. */
3126 if (ASSEMBLER_DIALECT != 0)
3127 return;
3128
2a2ab3f9
JVA
3129 /* this is the size of op from size of operand */
3130 switch (GET_MODE_SIZE (GET_MODE (x)))
3131 {
2a2ab3f9 3132 case 2:
155d8a47
JW
3133#ifdef HAVE_GAS_FILDS_FISTS
3134 putc ('s', file);
3135#endif
2a2ab3f9
JVA
3136 return;
3137
3138 case 4:
3139 if (GET_MODE (x) == SFmode)
3140 {
e075ae69 3141 putc ('s', file);
2a2ab3f9
JVA
3142 return;
3143 }
3144 else
e075ae69 3145 putc ('l', file);
2a2ab3f9
JVA
3146 return;
3147
5f1ec3e6 3148 case 12:
e075ae69
RH
3149 putc ('t', file);
3150 return;
5f1ec3e6 3151
2a2ab3f9
JVA
3152 case 8:
3153 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
3154 {
3155#ifdef GAS_MNEMONICS
e075ae69 3156 putc ('q', file);
56c0e8fa 3157#else
e075ae69
RH
3158 putc ('l', file);
3159 putc ('l', file);
56c0e8fa
JVA
3160#endif
3161 }
e075ae69
RH
3162 else
3163 putc ('l', file);
2a2ab3f9 3164 return;
155d8a47
JW
3165
3166 default:
3167 abort ();
2a2ab3f9 3168 }
4af3895e
JVA
3169
3170 case 'b':
3171 case 'w':
3172 case 'k':
3173 case 'h':
3174 case 'y':
5cb6195d 3175 case 'X':
e075ae69 3176 case 'P':
4af3895e
JVA
3177 break;
3178
2d49677f
SC
3179 case 's':
3180 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
3181 {
3182 PRINT_OPERAND (file, x, 0);
e075ae69 3183 putc (',', file);
2d49677f 3184 }
a269a03c
JC
3185 return;
3186
1853aadd 3187 case 'C':
e075ae69 3188 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 3189 return;
fe25fea3 3190 case 'F':
e075ae69 3191 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
3192 return;
3193
e9a25f70 3194 /* Like above, but reverse condition */
e075ae69
RH
3195 case 'c':
3196 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
3197 return;
fe25fea3 3198 case 'f':
e075ae69 3199 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 3200 return;
e5cb57e8 3201
4af3895e 3202 default:
68daafd4
JVA
3203 {
3204 char str[50];
68daafd4
JVA
3205 sprintf (str, "invalid operand code `%c'", code);
3206 output_operand_lossage (str);
3207 }
2a2ab3f9
JVA
3208 }
3209 }
e9a25f70 3210
2a2ab3f9
JVA
3211 if (GET_CODE (x) == REG)
3212 {
3213 PRINT_REG (x, code, file);
3214 }
e9a25f70 3215
2a2ab3f9
JVA
3216 else if (GET_CODE (x) == MEM)
3217 {
e075ae69
RH
3218 /* No `byte ptr' prefix for call instructions. */
3219 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
2a2ab3f9 3220 {
69ddee61 3221 const char * size;
e075ae69
RH
3222 switch (GET_MODE_SIZE (GET_MODE (x)))
3223 {
3224 case 1: size = "BYTE"; break;
3225 case 2: size = "WORD"; break;
3226 case 4: size = "DWORD"; break;
3227 case 8: size = "QWORD"; break;
3228 case 12: size = "XWORD"; break;
3229 default:
564d80f4 3230 abort ();
e075ae69
RH
3231 }
3232 fputs (size, file);
3233 fputs (" PTR ", file);
2a2ab3f9 3234 }
e075ae69
RH
3235
3236 x = XEXP (x, 0);
3237 if (flag_pic && CONSTANT_ADDRESS_P (x))
3238 output_pic_addr_const (file, x, code);
2a2ab3f9 3239 else
e075ae69 3240 output_address (x);
2a2ab3f9 3241 }
e9a25f70 3242
2a2ab3f9
JVA
3243 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
3244 {
e9a25f70
JL
3245 REAL_VALUE_TYPE r;
3246 long l;
3247
5f1ec3e6
JVA
3248 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3249 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69
RH
3250
3251 if (ASSEMBLER_DIALECT == 0)
3252 putc ('$', file);
52267fcb 3253 fprintf (file, "0x%lx", l);
5f1ec3e6 3254 }
e9a25f70 3255
5f1ec3e6
JVA
3256 /* These float cases don't actually occur as immediate operands. */
3257 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
3258 {
e9a25f70
JL
3259 REAL_VALUE_TYPE r;
3260 char dstr[30];
3261
5f1ec3e6
JVA
3262 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3263 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3264 fprintf (file, "%s", dstr);
2a2ab3f9 3265 }
e9a25f70 3266
5f1ec3e6 3267 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
2a2ab3f9 3268 {
e9a25f70
JL
3269 REAL_VALUE_TYPE r;
3270 char dstr[30];
3271
5f1ec3e6
JVA
3272 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3273 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3274 fprintf (file, "%s", dstr);
2a2ab3f9 3275 }
79325812 3276 else
2a2ab3f9 3277 {
4af3895e 3278 if (code != 'P')
2a2ab3f9 3279 {
695dac07 3280 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69
RH
3281 {
3282 if (ASSEMBLER_DIALECT == 0)
3283 putc ('$', file);
3284 }
2a2ab3f9
JVA
3285 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
3286 || GET_CODE (x) == LABEL_REF)
e075ae69
RH
3287 {
3288 if (ASSEMBLER_DIALECT == 0)
3289 putc ('$', file);
3290 else
3291 fputs ("OFFSET FLAT:", file);
3292 }
2a2ab3f9 3293 }
e075ae69
RH
3294 if (GET_CODE (x) == CONST_INT)
3295 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3296 else if (flag_pic)
2a2ab3f9
JVA
3297 output_pic_addr_const (file, x, code);
3298 else
3299 output_addr_const (file, x);
3300 }
3301}
3302\f
3303/* Print a memory operand whose address is ADDR. */
3304
3305void
3306print_operand_address (file, addr)
3307 FILE *file;
3308 register rtx addr;
3309{
e075ae69
RH
3310 struct ix86_address parts;
3311 rtx base, index, disp;
3312 int scale;
e9a25f70 3313
e075ae69
RH
3314 if (! ix86_decompose_address (addr, &parts))
3315 abort ();
e9a25f70 3316
e075ae69
RH
3317 base = parts.base;
3318 index = parts.index;
3319 disp = parts.disp;
3320 scale = parts.scale;
e9a25f70 3321
e075ae69
RH
3322 if (!base && !index)
3323 {
3324 /* Displacement only requires special attention. */
e9a25f70 3325
e075ae69 3326 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 3327 {
e075ae69
RH
3328 if (ASSEMBLER_DIALECT != 0)
3329 fputs ("ds:", file);
3330 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
2a2ab3f9 3331 }
e075ae69
RH
3332 else if (flag_pic)
3333 output_pic_addr_const (file, addr, 0);
3334 else
3335 output_addr_const (file, addr);
3336 }
3337 else
3338 {
3339 if (ASSEMBLER_DIALECT == 0)
2a2ab3f9 3340 {
e075ae69 3341 if (disp)
2a2ab3f9 3342 {
c399861d 3343 if (flag_pic)
e075ae69
RH
3344 output_pic_addr_const (file, disp, 0);
3345 else if (GET_CODE (disp) == LABEL_REF)
3346 output_asm_label (disp);
2a2ab3f9 3347 else
e075ae69 3348 output_addr_const (file, disp);
2a2ab3f9
JVA
3349 }
3350
e075ae69
RH
3351 putc ('(', file);
3352 if (base)
3353 PRINT_REG (base, 0, file);
3354 if (index)
2a2ab3f9 3355 {
e075ae69
RH
3356 putc (',', file);
3357 PRINT_REG (index, 0, file);
3358 if (scale != 1)
3359 fprintf (file, ",%d", scale);
2a2ab3f9 3360 }
e075ae69 3361 putc (')', file);
2a2ab3f9 3362 }
2a2ab3f9
JVA
3363 else
3364 {
e075ae69 3365 rtx offset = NULL_RTX;
e9a25f70 3366
e075ae69
RH
3367 if (disp)
3368 {
3369 /* Pull out the offset of a symbol; print any symbol itself. */
3370 if (GET_CODE (disp) == CONST
3371 && GET_CODE (XEXP (disp, 0)) == PLUS
3372 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
3373 {
3374 offset = XEXP (XEXP (disp, 0), 1);
3375 disp = gen_rtx_CONST (VOIDmode,
3376 XEXP (XEXP (disp, 0), 0));
3377 }
ce193852 3378
e075ae69
RH
3379 if (flag_pic)
3380 output_pic_addr_const (file, disp, 0);
3381 else if (GET_CODE (disp) == LABEL_REF)
3382 output_asm_label (disp);
3383 else if (GET_CODE (disp) == CONST_INT)
3384 offset = disp;
3385 else
3386 output_addr_const (file, disp);
3387 }
e9a25f70 3388
e075ae69
RH
3389 putc ('[', file);
3390 if (base)
a8620236 3391 {
e075ae69
RH
3392 PRINT_REG (base, 0, file);
3393 if (offset)
3394 {
3395 if (INTVAL (offset) >= 0)
3396 putc ('+', file);
3397 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3398 }
a8620236 3399 }
e075ae69
RH
3400 else if (offset)
3401 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 3402 else
e075ae69 3403 putc ('0', file);
e9a25f70 3404
e075ae69
RH
3405 if (index)
3406 {
3407 putc ('+', file);
3408 PRINT_REG (index, 0, file);
3409 if (scale != 1)
3410 fprintf (file, "*%d", scale);
3411 }
3412 putc (']', file);
3413 }
2a2ab3f9
JVA
3414 }
3415}
3416\f
3417/* Split one or more DImode RTL references into pairs of SImode
3418 references. The RTL can be REG, offsettable MEM, integer constant, or
3419 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
3420 split and "num" is its length. lo_half and hi_half are output arrays
3421 that parallel "operands". */
3422
3423void
3424split_di (operands, num, lo_half, hi_half)
3425 rtx operands[];
3426 int num;
3427 rtx lo_half[], hi_half[];
3428{
3429 while (num--)
3430 {
57dbca5e 3431 rtx op = operands[num];
e075ae69
RH
3432 if (CONSTANT_P (op))
3433 split_double (op, &lo_half[num], &hi_half[num]);
3434 else if (! reload_completed)
a269a03c
JC
3435 {
3436 lo_half[num] = gen_lowpart (SImode, op);
3437 hi_half[num] = gen_highpart (SImode, op);
3438 }
3439 else if (GET_CODE (op) == REG)
2a2ab3f9 3440 {
57dbca5e
BS
3441 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
3442 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
2a2ab3f9 3443 }
57dbca5e 3444 else if (offsettable_memref_p (op))
2a2ab3f9 3445 {
57dbca5e
BS
3446 rtx lo_addr = XEXP (op, 0);
3447 rtx hi_addr = XEXP (adj_offsettable_operand (op, 4), 0);
3448 lo_half[num] = change_address (op, SImode, lo_addr);
3449 hi_half[num] = change_address (op, SImode, hi_addr);
2a2ab3f9
JVA
3450 }
3451 else
564d80f4 3452 abort ();
2a2ab3f9
JVA
3453 }
3454}
3455\f
2a2ab3f9
JVA
3456/* Output code to perform a 387 binary operation in INSN, one of PLUS,
3457 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
3458 is the expression of the binary operation. The output may either be
3459 emitted here, or returned to the caller, like all output_* functions.
3460
3461 There is no guarantee that the operands are the same mode, as they
3462 might be within FLOAT or FLOAT_EXTEND expressions. */
3463
e3c2afab
AM
3464#ifndef SYSV386_COMPAT
3465/* Set to 1 for compatibility with brain-damaged assemblers. No-one
3466 wants to fix the assemblers because that causes incompatibility
3467 with gcc. No-one wants to fix gcc because that causes
3468 incompatibility with assemblers... You can use the option of
3469 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
3470#define SYSV386_COMPAT 1
3471#endif
3472
69ddee61 3473const char *
2a2ab3f9
JVA
3474output_387_binary_op (insn, operands)
3475 rtx insn;
3476 rtx *operands;
3477{
e3c2afab 3478 static char buf[30];
69ddee61 3479 const char *p;
2a2ab3f9 3480
e3c2afab
AM
3481#ifdef ENABLE_CHECKING
3482 /* Even if we do not want to check the inputs, this documents input
3483 constraints. Which helps in understanding the following code. */
3484 if (STACK_REG_P (operands[0])
3485 && ((REG_P (operands[1])
3486 && REGNO (operands[0]) == REGNO (operands[1])
3487 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
3488 || (REG_P (operands[2])
3489 && REGNO (operands[0]) == REGNO (operands[2])
3490 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
3491 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
3492 ; /* ok */
3493 else
3494 abort ();
3495#endif
3496
2a2ab3f9
JVA
3497 switch (GET_CODE (operands[3]))
3498 {
3499 case PLUS:
e075ae69
RH
3500 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3501 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3502 p = "fiadd";
3503 else
3504 p = "fadd";
2a2ab3f9
JVA
3505 break;
3506
3507 case MINUS:
e075ae69
RH
3508 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3509 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3510 p = "fisub";
3511 else
3512 p = "fsub";
2a2ab3f9
JVA
3513 break;
3514
3515 case MULT:
e075ae69
RH
3516 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3517 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3518 p = "fimul";
3519 else
3520 p = "fmul";
2a2ab3f9
JVA
3521 break;
3522
3523 case DIV:
e075ae69
RH
3524 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3525 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3526 p = "fidiv";
3527 else
3528 p = "fdiv";
2a2ab3f9
JVA
3529 break;
3530
3531 default:
3532 abort ();
3533 }
3534
e075ae69 3535 strcpy (buf, p);
2a2ab3f9
JVA
3536
3537 switch (GET_CODE (operands[3]))
3538 {
3539 case MULT:
3540 case PLUS:
3541 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
3542 {
e3c2afab 3543 rtx temp = operands[2];
2a2ab3f9
JVA
3544 operands[2] = operands[1];
3545 operands[1] = temp;
3546 }
3547
e3c2afab
AM
3548 /* know operands[0] == operands[1]. */
3549
2a2ab3f9 3550 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
3551 {
3552 p = "%z2\t%2";
3553 break;
3554 }
2a2ab3f9
JVA
3555
3556 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
3557 {
3558 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
3559 /* How is it that we are storing to a dead operand[2]?
3560 Well, presumably operands[1] is dead too. We can't
3561 store the result to st(0) as st(0) gets popped on this
3562 instruction. Instead store to operands[2] (which I
3563 think has to be st(1)). st(1) will be popped later.
3564 gcc <= 2.8.1 didn't have this check and generated
3565 assembly code that the Unixware assembler rejected. */
3566 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 3567 else
e3c2afab 3568 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 3569 break;
6b28fd63 3570 }
2a2ab3f9
JVA
3571
3572 if (STACK_TOP_P (operands[0]))
e3c2afab 3573 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 3574 else
e3c2afab 3575 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 3576 break;
2a2ab3f9
JVA
3577
3578 case MINUS:
3579 case DIV:
3580 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
3581 {
3582 p = "r%z1\t%1";
3583 break;
3584 }
2a2ab3f9
JVA
3585
3586 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
3587 {
3588 p = "%z2\t%2";
3589 break;
3590 }
2a2ab3f9 3591
2a2ab3f9 3592 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 3593 {
e3c2afab
AM
3594#if SYSV386_COMPAT
3595 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
3596 derived assemblers, confusingly reverse the direction of
3597 the operation for fsub{r} and fdiv{r} when the
3598 destination register is not st(0). The Intel assembler
3599 doesn't have this brain damage. Read !SYSV386_COMPAT to
3600 figure out what the hardware really does. */
3601 if (STACK_TOP_P (operands[0]))
3602 p = "{p\t%0, %2|rp\t%2, %0}";
3603 else
3604 p = "{rp\t%2, %0|p\t%0, %2}";
3605#else
6b28fd63 3606 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
3607 /* As above for fmul/fadd, we can't store to st(0). */
3608 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 3609 else
e3c2afab
AM
3610 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
3611#endif
e075ae69 3612 break;
6b28fd63 3613 }
2a2ab3f9
JVA
3614
3615 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 3616 {
e3c2afab 3617#if SYSV386_COMPAT
6b28fd63 3618 if (STACK_TOP_P (operands[0]))
e3c2afab 3619 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 3620 else
e3c2afab
AM
3621 p = "{p\t%1, %0|rp\t%0, %1}";
3622#else
3623 if (STACK_TOP_P (operands[0]))
3624 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
3625 else
3626 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
3627#endif
e075ae69 3628 break;
6b28fd63 3629 }
2a2ab3f9
JVA
3630
3631 if (STACK_TOP_P (operands[0]))
3632 {
3633 if (STACK_TOP_P (operands[1]))
e3c2afab 3634 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 3635 else
e3c2afab 3636 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 3637 break;
2a2ab3f9
JVA
3638 }
3639 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
3640 {
3641#if SYSV386_COMPAT
3642 p = "{\t%1, %0|r\t%0, %1}";
3643#else
3644 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
3645#endif
3646 }
2a2ab3f9 3647 else
e3c2afab
AM
3648 {
3649#if SYSV386_COMPAT
3650 p = "{r\t%2, %0|\t%0, %2}";
3651#else
3652 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
3653#endif
3654 }
e075ae69 3655 break;
2a2ab3f9
JVA
3656
3657 default:
3658 abort ();
3659 }
e075ae69
RH
3660
3661 strcat (buf, p);
3662 return buf;
2a2ab3f9 3663}
e075ae69 3664
2a2ab3f9 3665/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 3666 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 3667 operand may be [SDX]Fmode. */
2a2ab3f9 3668
69ddee61 3669const char *
2a2ab3f9
JVA
3670output_fix_trunc (insn, operands)
3671 rtx insn;
3672 rtx *operands;
3673{
3674 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69
RH
3675 int dimode_p = GET_MODE (operands[0]) == DImode;
3676 rtx xops[4];
2a2ab3f9 3677
e075ae69
RH
3678 /* Jump through a hoop or two for DImode, since the hardware has no
3679 non-popping instruction. We used to do this a different way, but
3680 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
3681 if (dimode_p && !stack_top_dies)
3682 output_asm_insn ("fld\t%y1", operands);
e075ae69
RH
3683
3684 if (! STACK_TOP_P (operands[1]))
10195bd8
JW
3685 abort ();
3686
e075ae69
RH
3687 xops[0] = GEN_INT (12);
3688 xops[1] = adj_offsettable_operand (operands[2], 1);
3689 xops[1] = change_address (xops[1], QImode, NULL_RTX);
305f097e 3690
e075ae69
RH
3691 xops[2] = operands[0];
3692 if (GET_CODE (operands[0]) != MEM)
3693 xops[2] = operands[3];
2a2ab3f9 3694
e075ae69
RH
3695 output_asm_insn ("fnstcw\t%2", operands);
3696 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands);
3697 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops);
3698 output_asm_insn ("fldcw\t%2", operands);
3699 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands);
e9a25f70 3700
e075ae69
RH
3701 if (stack_top_dies || dimode_p)
3702 output_asm_insn ("fistp%z2\t%2", xops);
10195bd8 3703 else
e075ae69
RH
3704 output_asm_insn ("fist%z2\t%2", xops);
3705
3706 output_asm_insn ("fldcw\t%2", operands);
10195bd8 3707
e075ae69 3708 if (GET_CODE (operands[0]) != MEM)
2a2ab3f9 3709 {
e075ae69 3710 if (dimode_p)
2e14a41b 3711 {
e075ae69
RH
3712 split_di (operands+0, 1, xops+0, xops+1);
3713 split_di (operands+3, 1, xops+2, xops+3);
3714 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3715 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops);
2e14a41b 3716 }
46d21d2c 3717 else if (GET_MODE (operands[0]) == SImode)
e3c2afab 3718 output_asm_insn ("mov{l}\t{%3, %0|%0, %3}", operands);
46d21d2c
JW
3719 else
3720 output_asm_insn ("mov{w}\t{%3, %0|%0, %3}", operands);
2a2ab3f9 3721 }
2a2ab3f9 3722
e075ae69 3723 return "";
2a2ab3f9 3724}
cda749b1 3725
e075ae69
RH
3726/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
3727 should be used and 2 when fnstsw should be used. UNORDERED_P is true
3728 when fucom should be used. */
3729
69ddee61 3730const char *
e075ae69 3731output_fp_compare (insn, operands, eflags_p, unordered_p)
cda749b1
JW
3732 rtx insn;
3733 rtx *operands;
e075ae69 3734 int eflags_p, unordered_p;
cda749b1 3735{
e075ae69
RH
3736 int stack_top_dies;
3737 rtx cmp_op0 = operands[0];
3738 rtx cmp_op1 = operands[1];
3739
3740 if (eflags_p == 2)
3741 {
3742 cmp_op0 = cmp_op1;
3743 cmp_op1 = operands[2];
3744 }
cda749b1 3745
e075ae69 3746 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
3747 abort ();
3748
e075ae69 3749 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 3750
e075ae69
RH
3751 if (STACK_REG_P (cmp_op1)
3752 && stack_top_dies
3753 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
3754 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 3755 {
e075ae69
RH
3756 /* If both the top of the 387 stack dies, and the other operand
3757 is also a stack register that dies, then this must be a
3758 `fcompp' float compare */
3759
3760 if (eflags_p == 1)
3761 {
3762 /* There is no double popping fcomi variant. Fortunately,
3763 eflags is immune from the fstp's cc clobbering. */
3764 if (unordered_p)
3765 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
3766 else
3767 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
3768 return "fstp\t%y0";
3769 }
3770 else
cda749b1 3771 {
e075ae69
RH
3772 if (eflags_p == 2)
3773 {
3774 if (unordered_p)
3775 return "fucompp\n\tfnstsw\t%0";
3776 else
3777 return "fcompp\n\tfnstsw\t%0";
3778 }
cda749b1
JW
3779 else
3780 {
e075ae69
RH
3781 if (unordered_p)
3782 return "fucompp";
3783 else
3784 return "fcompp";
cda749b1
JW
3785 }
3786 }
cda749b1
JW
3787 }
3788 else
3789 {
e075ae69 3790 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 3791
69ddee61 3792 static const char * const alt[24] =
e075ae69
RH
3793 {
3794 "fcom%z1\t%y1",
3795 "fcomp%z1\t%y1",
3796 "fucom%z1\t%y1",
3797 "fucomp%z1\t%y1",
3798
3799 "ficom%z1\t%y1",
3800 "ficomp%z1\t%y1",
3801 NULL,
3802 NULL,
3803
3804 "fcomi\t{%y1, %0|%0, %y1}",
3805 "fcomip\t{%y1, %0|%0, %y1}",
3806 "fucomi\t{%y1, %0|%0, %y1}",
3807 "fucomip\t{%y1, %0|%0, %y1}",
3808
3809 NULL,
3810 NULL,
3811 NULL,
3812 NULL,
3813
3814 "fcom%z2\t%y2\n\tfnstsw\t%0",
3815 "fcomp%z2\t%y2\n\tfnstsw\t%0",
3816 "fucom%z2\t%y2\n\tfnstsw\t%0",
3817 "fucomp%z2\t%y2\n\tfnstsw\t%0",
3818
3819 "ficom%z2\t%y2\n\tfnstsw\t%0",
3820 "ficomp%z2\t%y2\n\tfnstsw\t%0",
3821 NULL,
3822 NULL
3823 };
3824
3825 int mask;
69ddee61 3826 const char *ret;
e075ae69
RH
3827
3828 mask = eflags_p << 3;
3829 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
3830 mask |= unordered_p << 1;
3831 mask |= stack_top_dies;
3832
3833 if (mask >= 24)
3834 abort ();
3835 ret = alt[mask];
3836 if (ret == NULL)
3837 abort ();
cda749b1 3838
e075ae69 3839 return ret;
cda749b1
JW
3840 }
3841}
2a2ab3f9 3842
e075ae69 3843/* Output assembler code to FILE to initialize basic-block profiling.
2a2ab3f9 3844
e075ae69 3845 If profile_block_flag == 2
2a2ab3f9 3846
e075ae69
RH
3847 Output code to call the subroutine `__bb_init_trace_func'
3848 and pass two parameters to it. The first parameter is
3849 the address of a block allocated in the object module.
3850 The second parameter is the number of the first basic block
3851 of the function.
2a2ab3f9 3852
e075ae69
RH
3853 The name of the block is a local symbol made with this statement:
3854
3855 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
2a2ab3f9 3856
e075ae69
RH
3857 Of course, since you are writing the definition of
3858 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3859 can take a short cut in the definition of this macro and use the
3860 name that you know will result.
2a2ab3f9 3861
e075ae69
RH
3862 The number of the first basic block of the function is
3863 passed to the macro in BLOCK_OR_LABEL.
2a2ab3f9 3864
e075ae69
RH
3865 If described in a virtual assembler language the code to be
3866 output looks like:
2a2ab3f9 3867
e075ae69
RH
3868 parameter1 <- LPBX0
3869 parameter2 <- BLOCK_OR_LABEL
3870 call __bb_init_trace_func
2a2ab3f9 3871
e075ae69 3872 else if profile_block_flag != 0
e74389ff 3873
e075ae69
RH
3874 Output code to call the subroutine `__bb_init_func'
3875 and pass one single parameter to it, which is the same
3876 as the first parameter to `__bb_init_trace_func'.
e74389ff 3877
e075ae69
RH
3878 The first word of this parameter is a flag which will be nonzero if
3879 the object module has already been initialized. So test this word
3880 first, and do not call `__bb_init_func' if the flag is nonzero.
3881 Note: When profile_block_flag == 2 the test need not be done
3882 but `__bb_init_trace_func' *must* be called.
e74389ff 3883
e075ae69
RH
3884 BLOCK_OR_LABEL may be used to generate a label number as a
3885 branch destination in case `__bb_init_func' will not be called.
e74389ff 3886
e075ae69
RH
3887 If described in a virtual assembler language the code to be
3888 output looks like:
2a2ab3f9 3889
e075ae69
RH
3890 cmp (LPBX0),0
3891 jne local_label
3892 parameter1 <- LPBX0
3893 call __bb_init_func
3894 local_label:
3895*/
c572e5ba 3896
e075ae69
RH
3897void
3898ix86_output_function_block_profiler (file, block_or_label)
3899 FILE *file;
3900 int block_or_label;
c572e5ba 3901{
e075ae69
RH
3902 static int num_func = 0;
3903 rtx xops[8];
3904 char block_table[80], false_label[80];
c572e5ba 3905
e075ae69 3906 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
e9a25f70 3907
e075ae69
RH
3908 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
3909 xops[5] = stack_pointer_rtx;
3910 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
2a2ab3f9 3911
e075ae69 3912 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
c572e5ba 3913
e075ae69 3914 switch (profile_block_flag)
c572e5ba 3915 {
e075ae69
RH
3916 case 2:
3917 xops[2] = GEN_INT (block_or_label);
3918 xops[3] = gen_rtx_MEM (Pmode,
3919 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
3920 xops[6] = GEN_INT (8);
e9a25f70 3921
e075ae69
RH
3922 output_asm_insn ("push{l}\t%2", xops);
3923 if (!flag_pic)
3924 output_asm_insn ("push{l}\t%1", xops);
e9a25f70 3925 else
870a0c2c 3926 {
e075ae69
RH
3927 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
3928 output_asm_insn ("push{l}\t%7", xops);
870a0c2c 3929 }
e075ae69
RH
3930 output_asm_insn ("call\t%P3", xops);
3931 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
3932 break;
c572e5ba 3933
e075ae69
RH
3934 default:
3935 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
c572e5ba 3936
e075ae69
RH
3937 xops[0] = const0_rtx;
3938 xops[2] = gen_rtx_MEM (Pmode,
3939 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
3940 xops[3] = gen_rtx_MEM (Pmode,
3941 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
3942 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
3943 xops[6] = GEN_INT (4);
a14003ee 3944
e075ae69 3945 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
446ba526 3946
e075ae69
RH
3947 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
3948 output_asm_insn ("jne\t%2", xops);
870a0c2c 3949
e075ae69
RH
3950 if (!flag_pic)
3951 output_asm_insn ("push{l}\t%1", xops);
3952 else
3953 {
3954 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
3955 output_asm_insn ("push{l}\t%7", xops);
870a0c2c 3956 }
e075ae69
RH
3957 output_asm_insn ("call\t%P3", xops);
3958 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
3959 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
3960 num_func++;
3961 break;
c572e5ba 3962 }
2a2ab3f9 3963}
305f097e 3964
e075ae69
RH
3965/* Output assembler code to FILE to increment a counter associated
3966 with basic block number BLOCKNO.
305f097e 3967
e075ae69 3968 If profile_block_flag == 2
ecbc4695 3969
e075ae69
RH
3970 Output code to initialize the global structure `__bb' and
3971 call the function `__bb_trace_func' which will increment the
3972 counter.
ecbc4695 3973
e075ae69
RH
3974 `__bb' consists of two words. In the first word the number
3975 of the basic block has to be stored. In the second word
3976 the address of a block allocated in the object module
3977 has to be stored.
ecbc4695 3978
e075ae69 3979 The basic block number is given by BLOCKNO.
ecbc4695 3980
e075ae69 3981 The address of the block is given by the label created with
305f097e 3982
e075ae69 3983 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
305f097e 3984
e075ae69 3985 by FUNCTION_BLOCK_PROFILER.
ecbc4695 3986
e075ae69
RH
3987 Of course, since you are writing the definition of
3988 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3989 can take a short cut in the definition of this macro and use the
3990 name that you know will result.
305f097e 3991
e075ae69
RH
3992 If described in a virtual assembler language the code to be
3993 output looks like:
305f097e 3994
e075ae69
RH
3995 move BLOCKNO -> (__bb)
3996 move LPBX0 -> (__bb+4)
3997 call __bb_trace_func
305f097e 3998
e075ae69
RH
3999 Note that function `__bb_trace_func' must not change the
4000 machine state, especially the flag register. To grant
4001 this, you must output code to save and restore registers
4002 either in this macro or in the macros MACHINE_STATE_SAVE
4003 and MACHINE_STATE_RESTORE. The last two macros will be
4004 used in the function `__bb_trace_func', so you must make
4005 sure that the function prologue does not change any
4006 register prior to saving it with MACHINE_STATE_SAVE.
305f097e 4007
e075ae69 4008 else if profile_block_flag != 0
305f097e 4009
e075ae69
RH
4010 Output code to increment the counter directly.
4011 Basic blocks are numbered separately from zero within each
4012 compiled object module. The count associated with block number
4013 BLOCKNO is at index BLOCKNO in an array of words; the name of
4014 this array is a local symbol made with this statement:
32b5b1aa 4015
e075ae69 4016 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
32b5b1aa 4017
e075ae69
RH
4018 Of course, since you are writing the definition of
4019 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4020 can take a short cut in the definition of this macro and use the
4021 name that you know will result.
32b5b1aa 4022
e075ae69
RH
4023 If described in a virtual assembler language the code to be
4024 output looks like:
32b5b1aa 4025
e075ae69
RH
4026 inc (LPBX2+4*BLOCKNO)
4027*/
32b5b1aa 4028
e075ae69
RH
4029void
4030ix86_output_block_profiler (file, blockno)
4031 FILE *file ATTRIBUTE_UNUSED;
4032 int blockno;
4033{
4034 rtx xops[8], cnt_rtx;
4035 char counts[80];
4036 char *block_table = counts;
4037
4038 switch (profile_block_flag)
4039 {
4040 case 2:
4041 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
32b5b1aa 4042
e075ae69
RH
4043 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4044 xops[2] = GEN_INT (blockno);
4045 xops[3] = gen_rtx_MEM (Pmode,
4046 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
4047 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
4048 xops[5] = plus_constant (xops[4], 4);
4049 xops[0] = gen_rtx_MEM (SImode, xops[4]);
4050 xops[6] = gen_rtx_MEM (SImode, xops[5]);
79325812 4051
e075ae69 4052 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
32b5b1aa 4053
e075ae69
RH
4054 output_asm_insn ("pushf", xops);
4055 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4056 if (flag_pic)
32b5b1aa 4057 {
e075ae69
RH
4058 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4059 output_asm_insn ("push{l}\t%7", xops);
4060 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4061 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
4062 output_asm_insn ("pop{l}\t%7", xops);
4063 }
4064 else
4065 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
4066 output_asm_insn ("call\t%P3", xops);
4067 output_asm_insn ("popf", xops);
32b5b1aa 4068
e075ae69 4069 break;
32b5b1aa 4070
e075ae69
RH
4071 default:
4072 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
4073 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
4074 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
32b5b1aa 4075
e075ae69
RH
4076 if (blockno)
4077 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
32b5b1aa 4078
e075ae69
RH
4079 if (flag_pic)
4080 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
32b5b1aa 4081
e075ae69
RH
4082 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
4083 output_asm_insn ("inc{l}\t%0", xops);
32b5b1aa 4084
e075ae69 4085 break;
32b5b1aa 4086 }
32b5b1aa 4087}
32b5b1aa 4088\f
79325812 4089void
e075ae69
RH
4090ix86_expand_move (mode, operands)
4091 enum machine_mode mode;
4092 rtx operands[];
32b5b1aa 4093{
e075ae69 4094 int strict = (reload_in_progress || reload_completed);
e075ae69 4095 rtx insn;
e9a25f70 4096
e075ae69 4097 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
32b5b1aa 4098 {
e075ae69 4099 /* Emit insns to move operands[1] into operands[0]. */
e9a25f70 4100
e075ae69
RH
4101 if (GET_CODE (operands[0]) == MEM)
4102 operands[1] = force_reg (Pmode, operands[1]);
4103 else
32b5b1aa 4104 {
e075ae69
RH
4105 rtx temp = operands[0];
4106 if (GET_CODE (temp) != REG)
4107 temp = gen_reg_rtx (Pmode);
4108 temp = legitimize_pic_address (operands[1], temp);
4109 if (temp == operands[0])
4110 return;
4111 operands[1] = temp;
32b5b1aa 4112 }
e075ae69
RH
4113 }
4114 else
4115 {
d7a29404
JH
4116 if (GET_CODE (operands[0]) == MEM
4117 && (GET_MODE (operands[0]) == QImode
4118 || !push_operand (operands[0], mode))
4119 && GET_CODE (operands[1]) == MEM)
e075ae69 4120 operands[1] = force_reg (mode, operands[1]);
e9a25f70 4121
2c5a510c
RH
4122 if (push_operand (operands[0], mode)
4123 && ! general_no_elim_operand (operands[1], mode))
4124 operands[1] = copy_to_mode_reg (mode, operands[1]);
4125
e075ae69 4126 if (FLOAT_MODE_P (mode))
32b5b1aa 4127 {
d7a29404
JH
4128 /* If we are loading a floating point constant to a register,
4129 force the value to memory now, since we'll get better code
4130 out the back end. */
e075ae69
RH
4131
4132 if (strict)
4133 ;
e075ae69 4134 else if (GET_CODE (operands[1]) == CONST_DOUBLE
d7a29404 4135 && register_operand (operands[0], mode))
e075ae69 4136 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
32b5b1aa 4137 }
32b5b1aa 4138 }
e9a25f70 4139
e075ae69 4140 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
e9a25f70 4141
e075ae69
RH
4142 emit_insn (insn);
4143}
e9a25f70 4144
e075ae69
RH
4145/* Attempt to expand a binary operator. Make the expansion closer to the
4146 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 4147 memory references (one output, two input) in a single insn. */
e9a25f70 4148
e075ae69
RH
4149void
4150ix86_expand_binary_operator (code, mode, operands)
4151 enum rtx_code code;
4152 enum machine_mode mode;
4153 rtx operands[];
4154{
4155 int matching_memory;
4156 rtx src1, src2, dst, op, clob;
4157
4158 dst = operands[0];
4159 src1 = operands[1];
4160 src2 = operands[2];
4161
4162 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
4163 if (GET_RTX_CLASS (code) == 'c'
4164 && (rtx_equal_p (dst, src2)
4165 || immediate_operand (src1, mode)))
4166 {
4167 rtx temp = src1;
4168 src1 = src2;
4169 src2 = temp;
32b5b1aa 4170 }
e9a25f70 4171
e075ae69
RH
4172 /* If the destination is memory, and we do not have matching source
4173 operands, do things in registers. */
4174 matching_memory = 0;
4175 if (GET_CODE (dst) == MEM)
32b5b1aa 4176 {
e075ae69
RH
4177 if (rtx_equal_p (dst, src1))
4178 matching_memory = 1;
4179 else if (GET_RTX_CLASS (code) == 'c'
4180 && rtx_equal_p (dst, src2))
4181 matching_memory = 2;
4182 else
4183 dst = gen_reg_rtx (mode);
4184 }
4185
4186 /* Both source operands cannot be in memory. */
4187 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
4188 {
4189 if (matching_memory != 2)
4190 src2 = force_reg (mode, src2);
4191 else
4192 src1 = force_reg (mode, src1);
32b5b1aa 4193 }
e9a25f70 4194
06a964de
JH
4195 /* If the operation is not commutable, source 1 cannot be a constant
4196 or non-matching memory. */
4197 if ((CONSTANT_P (src1)
4198 || (!matching_memory && GET_CODE (src1) == MEM))
4199 && GET_RTX_CLASS (code) != 'c')
e075ae69
RH
4200 src1 = force_reg (mode, src1);
4201
4202 /* If optimizing, copy to regs to improve CSE */
fe577e58 4203 if (optimize && ! no_new_pseudos)
32b5b1aa 4204 {
e075ae69
RH
4205 if (GET_CODE (dst) == MEM)
4206 dst = gen_reg_rtx (mode);
4207 if (GET_CODE (src1) == MEM)
4208 src1 = force_reg (mode, src1);
4209 if (GET_CODE (src2) == MEM)
4210 src2 = force_reg (mode, src2);
32b5b1aa 4211 }
e9a25f70 4212
e075ae69
RH
4213 /* Emit the instruction. */
4214
4215 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
4216 if (reload_in_progress)
4217 {
4218 /* Reload doesn't know about the flags register, and doesn't know that
4219 it doesn't want to clobber it. We can only do this with PLUS. */
4220 if (code != PLUS)
4221 abort ();
4222 emit_insn (op);
4223 }
4224 else
32b5b1aa 4225 {
e075ae69
RH
4226 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4227 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 4228 }
e9a25f70 4229
e075ae69
RH
4230 /* Fix up the destination if needed. */
4231 if (dst != operands[0])
4232 emit_move_insn (operands[0], dst);
4233}
4234
4235/* Return TRUE or FALSE depending on whether the binary operator meets the
4236 appropriate constraints. */
4237
4238int
4239ix86_binary_operator_ok (code, mode, operands)
4240 enum rtx_code code;
4241 enum machine_mode mode ATTRIBUTE_UNUSED;
4242 rtx operands[3];
4243{
4244 /* Both source operands cannot be in memory. */
4245 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
4246 return 0;
4247 /* If the operation is not commutable, source 1 cannot be a constant. */
4248 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
4249 return 0;
4250 /* If the destination is memory, we must have a matching source operand. */
4251 if (GET_CODE (operands[0]) == MEM
4252 && ! (rtx_equal_p (operands[0], operands[1])
4253 || (GET_RTX_CLASS (code) == 'c'
4254 && rtx_equal_p (operands[0], operands[2]))))
4255 return 0;
06a964de
JH
4256 /* If the operation is not commutable and the source 1 is memory, we must
4257 have a matching destionation. */
4258 if (GET_CODE (operands[1]) == MEM
4259 && GET_RTX_CLASS (code) != 'c'
4260 && ! rtx_equal_p (operands[0], operands[1]))
4261 return 0;
e075ae69
RH
4262 return 1;
4263}
4264
4265/* Attempt to expand a unary operator. Make the expansion closer to the
4266 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 4267 memory references (one output, one input) in a single insn. */
e075ae69 4268
9d81fc27 4269void
e075ae69
RH
4270ix86_expand_unary_operator (code, mode, operands)
4271 enum rtx_code code;
4272 enum machine_mode mode;
4273 rtx operands[];
4274{
06a964de
JH
4275 int matching_memory;
4276 rtx src, dst, op, clob;
4277
4278 dst = operands[0];
4279 src = operands[1];
e075ae69 4280
06a964de
JH
4281 /* If the destination is memory, and we do not have matching source
4282 operands, do things in registers. */
4283 matching_memory = 0;
4284 if (GET_CODE (dst) == MEM)
32b5b1aa 4285 {
06a964de
JH
4286 if (rtx_equal_p (dst, src))
4287 matching_memory = 1;
e075ae69 4288 else
06a964de 4289 dst = gen_reg_rtx (mode);
32b5b1aa 4290 }
e9a25f70 4291
06a964de
JH
4292 /* When source operand is memory, destination must match. */
4293 if (!matching_memory && GET_CODE (src) == MEM)
4294 src = force_reg (mode, src);
4295
4296 /* If optimizing, copy to regs to improve CSE */
fe577e58 4297 if (optimize && ! no_new_pseudos)
06a964de
JH
4298 {
4299 if (GET_CODE (dst) == MEM)
4300 dst = gen_reg_rtx (mode);
4301 if (GET_CODE (src) == MEM)
4302 src = force_reg (mode, src);
4303 }
4304
4305 /* Emit the instruction. */
4306
4307 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
4308 if (reload_in_progress || code == NOT)
4309 {
4310 /* Reload doesn't know about the flags register, and doesn't know that
4311 it doesn't want to clobber it. */
4312 if (code != NOT)
4313 abort ();
4314 emit_insn (op);
4315 }
4316 else
4317 {
4318 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4319 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4320 }
4321
4322 /* Fix up the destination if needed. */
4323 if (dst != operands[0])
4324 emit_move_insn (operands[0], dst);
e075ae69
RH
4325}
4326
4327/* Return TRUE or FALSE depending on whether the unary operator meets the
4328 appropriate constraints. */
4329
4330int
4331ix86_unary_operator_ok (code, mode, operands)
4332 enum rtx_code code ATTRIBUTE_UNUSED;
4333 enum machine_mode mode ATTRIBUTE_UNUSED;
4334 rtx operands[2] ATTRIBUTE_UNUSED;
4335{
06a964de
JH
4336 /* If one of operands is memory, source and destination must match. */
4337 if ((GET_CODE (operands[0]) == MEM
4338 || GET_CODE (operands[1]) == MEM)
4339 && ! rtx_equal_p (operands[0], operands[1]))
4340 return FALSE;
e075ae69
RH
4341 return TRUE;
4342}
4343
16189740
RH
4344/* Return TRUE or FALSE depending on whether the first SET in INSN
4345 has source and destination with matching CC modes, and that the
4346 CC mode is at least as constrained as REQ_MODE. */
4347
4348int
4349ix86_match_ccmode (insn, req_mode)
4350 rtx insn;
4351 enum machine_mode req_mode;
4352{
4353 rtx set;
4354 enum machine_mode set_mode;
4355
4356 set = PATTERN (insn);
4357 if (GET_CODE (set) == PARALLEL)
4358 set = XVECEXP (set, 0, 0);
4359 if (GET_CODE (set) != SET)
4360 abort ();
4361
4362 set_mode = GET_MODE (SET_DEST (set));
4363 switch (set_mode)
4364 {
4365 case CCmode:
4366 if (req_mode == CCNOmode)
4367 return 0;
4368 /* FALLTHRU */
4369 case CCNOmode:
4370 if (req_mode == CCZmode)
4371 return 0;
4372 /* FALLTHRU */
4373 case CCZmode:
4374 break;
4375
4376 default:
4377 abort ();
4378 }
4379
4380 return (GET_MODE (SET_SRC (set)) == set_mode);
4381}
4382
e075ae69
RH
4383/* Produce an unsigned comparison for a given signed comparison. */
4384
4385static enum rtx_code
4386unsigned_comparison (code)
4387 enum rtx_code code;
4388{
4389 switch (code)
32b5b1aa 4390 {
e075ae69
RH
4391 case GT:
4392 code = GTU;
4393 break;
4394 case LT:
4395 code = LTU;
4396 break;
4397 case GE:
4398 code = GEU;
4399 break;
4400 case LE:
4401 code = LEU;
4402 break;
4403 case EQ:
4404 case NE:
4405 case LEU:
4406 case LTU:
4407 case GEU:
4408 case GTU:
3a3677ff
RH
4409 case UNORDERED:
4410 case ORDERED:
e075ae69
RH
4411 break;
4412 default:
4413 abort ();
4414 }
4415 return code;
4416}
4417
4418/* Generate insn patterns to do an integer compare of OPERANDS. */
4419
4420static rtx
4421ix86_expand_int_compare (code, op0, op1)
4422 enum rtx_code code;
4423 rtx op0, op1;
4424{
4425 enum machine_mode cmpmode;
4426 rtx tmp, flags;
4427
4428 cmpmode = SELECT_CC_MODE (code, op0, op1);
4429 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
4430
4431 /* This is very simple, but making the interface the same as in the
4432 FP case makes the rest of the code easier. */
4433 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
4434 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
4435
4436 /* Return the test that should be put into the flags user, i.e.
4437 the bcc, scc, or cmov instruction. */
4438 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
4439}
4440
3a3677ff
RH
4441/* Figure out whether to use ordered or unordered fp comparisons.
4442 Return the appropriate mode to use. */
e075ae69 4443
3a3677ff
RH
4444static enum machine_mode
4445ix86_fp_compare_mode (code)
e075ae69 4446 enum rtx_code code;
e075ae69 4447{
3a3677ff 4448 int unordered;
e075ae69 4449
3a3677ff
RH
4450 switch (code)
4451 {
4452 case NE: case EQ:
4453 /* When not doing IEEE compliant compares, fault on NaNs. */
4454 unordered = (TARGET_IEEE_FP != 0);
4455 break;
4456
4457 case LT: case LE: case GT: case GE:
4458 unordered = 0;
4459 break;
4460
4461 case UNORDERED: case ORDERED:
4462 case UNEQ: case UNGE: case UNGT: case UNLE: case UNLT: case LTGT:
4463 unordered = 1;
4464 break;
4465
4466 default:
4467 abort ();
4468 }
e075ae69
RH
4469
4470 /* ??? If we knew whether invalid-operand exceptions were masked,
4471 we could rely on fcom to raise an exception and take care of
3a3677ff 4472 NaNs. But we don't. We could know this from c99 math pragmas. */
e075ae69
RH
4473 if (TARGET_IEEE_FP)
4474 unordered = 1;
4475
3a3677ff
RH
4476 return unordered ? CCFPUmode : CCFPmode;
4477}
4478
4479/* Return true if we should use an FCOMI instruction for this fp comparison. */
4480
4481static int
4482ix86_use_fcomi_compare (code)
4483 enum rtx_code code;
4484{
4485 return (TARGET_CMOVE
4486 && (code == ORDERED || code == UNORDERED
4487 /* All other unordered compares require checking
4488 multiple sets of bits. */
4489 || ix86_fp_compare_mode (code) == CCFPmode));
4490}
4491
4492/* Swap, force into registers, or otherwise massage the two operands
4493 to a fp comparison. The operands are updated in place; the new
4494 comparsion code is returned. */
4495
4496static enum rtx_code
4497ix86_prepare_fp_compare_args (code, pop0, pop1)
4498 enum rtx_code code;
4499 rtx *pop0, *pop1;
4500{
4501 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
4502 rtx op0 = *pop0, op1 = *pop1;
4503 enum machine_mode op_mode = GET_MODE (op0);
4504
e075ae69 4505 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
4506 The same is true of the XFmode compare instructions. The same is
4507 true of the fcomi compare instructions. */
4508
4509 if (fpcmp_mode == CCFPUmode
4510 || op_mode == XFmode
4511 || ix86_use_fcomi_compare (code))
e075ae69 4512 {
3a3677ff
RH
4513 op0 = force_reg (op_mode, op0);
4514 op1 = force_reg (op_mode, op1);
e075ae69
RH
4515 }
4516 else
4517 {
4518 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
4519 things around if they appear profitable, otherwise force op0
4520 into a register. */
4521
4522 if (standard_80387_constant_p (op0) == 0
4523 || (GET_CODE (op0) == MEM
4524 && ! (standard_80387_constant_p (op1) == 0
4525 || GET_CODE (op1) == MEM)))
32b5b1aa 4526 {
e075ae69
RH
4527 rtx tmp;
4528 tmp = op0, op0 = op1, op1 = tmp;
4529 code = swap_condition (code);
4530 }
4531
4532 if (GET_CODE (op0) != REG)
3a3677ff 4533 op0 = force_reg (op_mode, op0);
e075ae69
RH
4534
4535 if (CONSTANT_P (op1))
4536 {
4537 if (standard_80387_constant_p (op1))
3a3677ff 4538 op1 = force_reg (op_mode, op1);
e075ae69 4539 else
3a3677ff 4540 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
4541 }
4542 }
e9a25f70 4543
3a3677ff
RH
4544 *pop0 = op0;
4545 *pop1 = op1;
4546 return code;
4547}
4548
4549/* Generate insn patterns to do a floating point compare of OPERANDS. */
4550
4551rtx
4552ix86_expand_fp_compare (code, op0, op1, scratch)
4553 enum rtx_code code;
4554 rtx op0, op1, scratch;
4555{
4556 enum machine_mode fpcmp_mode, intcmp_mode;
4557 rtx tmp;
4558
4559 fpcmp_mode = ix86_fp_compare_mode (code);
4560 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
4561
e075ae69
RH
4562 /* %%% fcomi is probably always faster, even when dealing with memory,
4563 since compare-and-branch would be three insns instead of four. */
3a3677ff 4564 if (ix86_use_fcomi_compare (code))
32b5b1aa 4565 {
e075ae69
RH
4566 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
4567 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
4568 emit_insn (tmp);
4569
4570 /* The FP codes work out to act like unsigned. */
4571 code = unsigned_comparison (code);
3a3677ff 4572 intcmp_mode = CCmode;
e075ae69
RH
4573 }
4574 else
4575 {
4576 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e9a25f70 4577
e075ae69
RH
4578 rtx tmp2;
4579 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
4580 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
3a3677ff 4581 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 4582
3a3677ff
RH
4583 if (fpcmp_mode == CCFPmode
4584 || code == ORDERED
4585 || code == UNORDERED)
32b5b1aa 4586 {
e075ae69
RH
4587 /* We have two options here -- use sahf, or testing bits of ah
4588 directly. On PPRO, they are equivalent, sahf being one byte
4589 smaller. On Pentium, sahf is non-pairable while test is UV
4590 pairable. */
4591
4592 if (TARGET_USE_SAHF || optimize_size)
32b5b1aa 4593 {
e075ae69 4594 do_sahf:
3a3677ff 4595 emit_insn (gen_x86_sahf_1 (scratch));
e9a25f70 4596
e075ae69
RH
4597 /* The FP codes work out to act like unsigned. */
4598 code = unsigned_comparison (code);
e075ae69 4599 intcmp_mode = CCmode;
32b5b1aa
SC
4600 }
4601 else
4602 {
e075ae69
RH
4603 /*
4604 * The numbers below correspond to the bits of the FPSW in AH.
d22ce03d 4605 * C3, C2, and C0 are in bits 0x40, 0x4, and 0x01 respectively.
e075ae69
RH
4606 *
4607 * cmp C3 C2 C0
4608 * > 0 0 0
4609 * < 0 0 1
4610 * = 1 0 0
4611 * un 1 1 1
4612 */
4613
4614 int mask;
4615
4616 switch (code)
32b5b1aa 4617 {
e075ae69 4618 case GT:
d22ce03d 4619 mask = 0x41;
e075ae69
RH
4620 code = EQ;
4621 break;
4622 case LT:
4623 mask = 0x01;
4624 code = NE;
4625 break;
4626 case GE:
4627 /* We'd have to use `xorb 1,ah; andb 0x41,ah', so it's
4628 faster in all cases to just fall back on sahf. */
4629 goto do_sahf;
4630 case LE:
4631 mask = 0x41;
4632 code = NE;
4633 break;
4634 case EQ:
4635 mask = 0x40;
4636 code = NE;
4637 break;
4638 case NE:
4639 mask = 0x40;
4640 code = EQ;
4641 break;
3a3677ff
RH
4642 case UNORDERED:
4643 mask = 0x04;
4644 code = NE;
4645 break;
4646 case ORDERED:
4647 mask = 0x04;
4648 code = EQ;
4649 break;
4650
e075ae69
RH
4651 default:
4652 abort ();
32b5b1aa 4653 }
e075ae69 4654
3a3677ff 4655 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (mask)));
e075ae69 4656 intcmp_mode = CCNOmode;
32b5b1aa
SC
4657 }
4658 }
4659 else
4660 {
e075ae69
RH
4661 /* In the unordered case, we have to check C2 for NaN's, which
4662 doesn't happen to work out to anything nice combination-wise.
4663 So do some bit twiddling on the value we've got in AH to come
4664 up with an appropriate set of condition codes. */
4665
4666 intcmp_mode = CCNOmode;
4667 switch (code)
32b5b1aa 4668 {
e075ae69 4669 case GT:
3a3677ff 4670 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69
RH
4671 code = EQ;
4672 break;
4673 case LT:
3a3677ff
RH
4674 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4675 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
4676 intcmp_mode = CCmode;
4677 code = EQ;
4678 break;
4679 case GE:
3a3677ff 4680 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69
RH
4681 code = EQ;
4682 break;
4683 case LE:
3a3677ff
RH
4684 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4685 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
4686 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
4687 intcmp_mode = CCmode;
4688 code = LTU;
4689 break;
4690 case EQ:
3a3677ff
RH
4691 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4692 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
4693 intcmp_mode = CCmode;
4694 code = EQ;
4695 break;
4696 case NE:
3a3677ff
RH
4697 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4698 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, GEN_INT (0x40)));
4699 code = NE;
4700 break;
4701
4702 case UNORDERED:
4703 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
4704 code = NE;
4705 break;
4706 case ORDERED:
4707 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
4708 code = EQ;
4709 break;
4710 case UNEQ:
4711 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
4712 code = NE;
4713 break;
4714 case UNGE:
4715 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4716 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, GEN_INT (0x01)));
4717 code = NE;
4718 break;
4719 case UNGT:
4720 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4721 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
4722 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
4723 code = GEU;
4724 break;
4725 case UNLE:
4726 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69
RH
4727 code = NE;
4728 break;
3a3677ff
RH
4729 case UNLT:
4730 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
4731 code = NE;
4732 break;
4733 case LTGT:
4734 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
4735 code = EQ;
4736 break;
4737
e075ae69
RH
4738 default:
4739 abort ();
32b5b1aa
SC
4740 }
4741 }
32b5b1aa 4742 }
e075ae69
RH
4743
4744 /* Return the test that should be put into the flags user, i.e.
4745 the bcc, scc, or cmov instruction. */
4746 return gen_rtx_fmt_ee (code, VOIDmode,
4747 gen_rtx_REG (intcmp_mode, FLAGS_REG),
4748 const0_rtx);
4749}
4750
4751static rtx
3a3677ff 4752ix86_expand_compare (code)
e075ae69 4753 enum rtx_code code;
e075ae69
RH
4754{
4755 rtx op0, op1, ret;
4756 op0 = ix86_compare_op0;
4757 op1 = ix86_compare_op1;
4758
4759 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
3a3677ff 4760 ret = ix86_expand_fp_compare (code, op0, op1, gen_reg_rtx (HImode));
32b5b1aa 4761 else
e075ae69
RH
4762 ret = ix86_expand_int_compare (code, op0, op1);
4763
4764 return ret;
4765}
4766
4767void
3a3677ff 4768ix86_expand_branch (code, label)
e075ae69 4769 enum rtx_code code;
e075ae69
RH
4770 rtx label;
4771{
3a3677ff 4772 rtx tmp;
e075ae69 4773
3a3677ff 4774 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 4775 {
3a3677ff
RH
4776 case QImode:
4777 case HImode:
4778 case SImode:
4779 tmp = ix86_expand_compare (code);
e075ae69
RH
4780 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
4781 gen_rtx_LABEL_REF (VOIDmode, label),
4782 pc_rtx);
4783 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 4784 return;
e075ae69 4785
3a3677ff
RH
4786 case SFmode:
4787 case DFmode:
4788 case XFmode:
4789 /* Don't expand the comparison early, so that we get better code
4790 when jump or whoever decides to reverse the comparison. */
4791 {
4792 rtvec vec;
4793 int use_fcomi;
4794
4795 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
4796 &ix86_compare_op1);
4797
4798 tmp = gen_rtx_fmt_ee (code, ix86_fp_compare_mode (code),
4799 ix86_compare_op0, ix86_compare_op1);
4800 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
4801 gen_rtx_LABEL_REF (VOIDmode, label),
4802 pc_rtx);
4803 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
4804
4805 use_fcomi = ix86_use_fcomi_compare (code);
4806 vec = rtvec_alloc (3 + !use_fcomi);
4807 RTVEC_ELT (vec, 0) = tmp;
4808 RTVEC_ELT (vec, 1)
4809 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
4810 RTVEC_ELT (vec, 2)
4811 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
4812 if (! use_fcomi)
4813 RTVEC_ELT (vec, 3)
4814 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
4815
4816 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
4817 return;
4818 }
32b5b1aa 4819
3a3677ff
RH
4820 case DImode:
4821 /* Expand DImode branch into multiple compare+branch. */
4822 {
4823 rtx lo[2], hi[2], label2;
4824 enum rtx_code code1, code2, code3;
32b5b1aa 4825
3a3677ff
RH
4826 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
4827 {
4828 tmp = ix86_compare_op0;
4829 ix86_compare_op0 = ix86_compare_op1;
4830 ix86_compare_op1 = tmp;
4831 code = swap_condition (code);
4832 }
4833 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
4834 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 4835
3a3677ff
RH
4836 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
4837 avoid two branches. This costs one extra insn, so disable when
4838 optimizing for size. */
32b5b1aa 4839
3a3677ff
RH
4840 if ((code == EQ || code == NE)
4841 && (!optimize_size
4842 || hi[1] == const0_rtx || lo[1] == const0_rtx))
4843 {
4844 rtx xor0, xor1;
32b5b1aa 4845
3a3677ff
RH
4846 xor1 = hi[0];
4847 if (hi[1] != const0_rtx)
4848 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
4849 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 4850
3a3677ff
RH
4851 xor0 = lo[0];
4852 if (lo[1] != const0_rtx)
4853 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
4854 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 4855
3a3677ff
RH
4856 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
4857 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 4858
3a3677ff
RH
4859 ix86_compare_op0 = tmp;
4860 ix86_compare_op1 = const0_rtx;
4861 ix86_expand_branch (code, label);
4862 return;
4863 }
e075ae69 4864
3a3677ff
RH
4865 /* Otherwise, if we are doing less-than, op1 is a constant and the
4866 low word is zero, then we can just examine the high word. */
32b5b1aa 4867
3a3677ff
RH
4868 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx
4869 && (code == LT || code == LTU))
4870 {
4871 ix86_compare_op0 = hi[0];
4872 ix86_compare_op1 = hi[1];
4873 ix86_expand_branch (code, label);
4874 return;
4875 }
e075ae69 4876
3a3677ff 4877 /* Otherwise, we need two or three jumps. */
e075ae69 4878
3a3677ff 4879 label2 = gen_label_rtx ();
e075ae69 4880
3a3677ff
RH
4881 code1 = code;
4882 code2 = swap_condition (code);
4883 code3 = unsigned_condition (code);
e075ae69 4884
3a3677ff
RH
4885 switch (code)
4886 {
4887 case LT: case GT: case LTU: case GTU:
4888 break;
e075ae69 4889
3a3677ff
RH
4890 case LE: code1 = LT; code2 = GT; break;
4891 case GE: code1 = GT; code2 = LT; break;
4892 case LEU: code1 = LTU; code2 = GTU; break;
4893 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 4894
3a3677ff
RH
4895 case EQ: code1 = NIL; code2 = NE; break;
4896 case NE: code2 = NIL; break;
e075ae69 4897
3a3677ff
RH
4898 default:
4899 abort ();
4900 }
e075ae69 4901
3a3677ff
RH
4902 /*
4903 * a < b =>
4904 * if (hi(a) < hi(b)) goto true;
4905 * if (hi(a) > hi(b)) goto false;
4906 * if (lo(a) < lo(b)) goto true;
4907 * false:
4908 */
4909
4910 ix86_compare_op0 = hi[0];
4911 ix86_compare_op1 = hi[1];
4912
4913 if (code1 != NIL)
4914 ix86_expand_branch (code1, label);
4915 if (code2 != NIL)
4916 ix86_expand_branch (code2, label2);
4917
4918 ix86_compare_op0 = lo[0];
4919 ix86_compare_op1 = lo[1];
4920 ix86_expand_branch (code3, label);
4921
4922 if (code2 != NIL)
4923 emit_label (label2);
4924 return;
4925 }
e075ae69 4926
3a3677ff
RH
4927 default:
4928 abort ();
4929 }
32b5b1aa 4930}
e075ae69 4931
32b5b1aa 4932int
3a3677ff 4933ix86_expand_setcc (code, dest)
e075ae69 4934 enum rtx_code code;
e075ae69 4935 rtx dest;
32b5b1aa 4936{
e075ae69
RH
4937 rtx ret, tmp;
4938 int type;
4939
4940 if (GET_MODE (ix86_compare_op0) == DImode)
4941 return 0; /* FAIL */
4942
4943 /* Three modes of generation:
4944 0 -- destination does not overlap compare sources:
4945 clear dest first, emit strict_low_part setcc.
4946 1 -- destination does overlap compare sources:
4947 emit subreg setcc, zero extend.
4948 2 -- destination is in QImode:
4949 emit setcc only.
4950 */
4951
4952 type = 0;
e075ae69
RH
4953
4954 if (GET_MODE (dest) == QImode)
4955 type = 2;
4956 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
a500c31b 4957 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
e075ae69
RH
4958 type = 1;
4959
4960 if (type == 0)
4961 emit_move_insn (dest, const0_rtx);
4962
3a3677ff 4963 ret = ix86_expand_compare (code);
e075ae69
RH
4964 PUT_MODE (ret, QImode);
4965
4966 tmp = dest;
4967 if (type == 0)
32b5b1aa 4968 {
e075ae69
RH
4969 tmp = gen_lowpart (QImode, dest);
4970 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
4971 }
4972 else if (type == 1)
4973 {
4974 if (!cse_not_expected)
4975 tmp = gen_reg_rtx (QImode);
4976 else
4977 tmp = gen_lowpart (QImode, dest);
4978 }
32b5b1aa 4979
e075ae69
RH
4980 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
4981
4982 if (type == 1)
4983 {
4984 rtx clob;
4985
4986 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
4987 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
4988 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4989 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
4990 emit_insn (tmp);
32b5b1aa 4991 }
e075ae69
RH
4992
4993 return 1; /* DONE */
32b5b1aa 4994}
e075ae69 4995
32b5b1aa 4996int
e075ae69
RH
4997ix86_expand_int_movcc (operands)
4998 rtx operands[];
32b5b1aa 4999{
e075ae69
RH
5000 enum rtx_code code = GET_CODE (operands[1]), compare_code;
5001 rtx compare_seq, compare_op;
32b5b1aa 5002
36583fea
JH
5003 /* When the compare code is not LTU or GEU, we can not use sbbl case.
5004 In case comparsion is done with immediate, we can convert it to LTU or
5005 GEU by altering the integer. */
5006
5007 if ((code == LEU || code == GTU)
5008 && GET_CODE (ix86_compare_op1) == CONST_INT
5009 && GET_MODE (operands[0]) != HImode
5010 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
5011 && GET_CODE (operands[2]) == CONST_INT
5012 && GET_CODE (operands[3]) == CONST_INT)
5013 {
5014 if (code == LEU)
5015 code = LTU;
5016 else
5017 code = GEU;
5018 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
5019 }
3a3677ff 5020
e075ae69 5021 start_sequence ();
3a3677ff 5022 compare_op = ix86_expand_compare (code);
e075ae69
RH
5023 compare_seq = gen_sequence ();
5024 end_sequence ();
5025
5026 compare_code = GET_CODE (compare_op);
5027
5028 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
5029 HImode insns, we'd be swallowed in word prefix ops. */
5030
5031 if (GET_MODE (operands[0]) != HImode
5032 && GET_CODE (operands[2]) == CONST_INT
5033 && GET_CODE (operands[3]) == CONST_INT)
5034 {
5035 rtx out = operands[0];
5036 HOST_WIDE_INT ct = INTVAL (operands[2]);
5037 HOST_WIDE_INT cf = INTVAL (operands[3]);
5038 HOST_WIDE_INT diff;
5039
36583fea 5040 if (compare_code == LTU || compare_code == GEU)
e075ae69 5041 {
e075ae69
RH
5042
5043 /* Detect overlap between destination and compare sources. */
5044 rtx tmp = out;
5045
36583fea
JH
5046 /* To simplify rest of code, restrict to the GEU case. */
5047 if (compare_code == LTU)
5048 {
5049 int tmp = ct;
5050 ct = cf;
5051 cf = tmp;
5052 compare_code = reverse_condition (compare_code);
5053 code = reverse_condition (code);
5054 }
5055 diff = ct - cf;
5056
e075ae69 5057 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
a500c31b 5058 || reg_overlap_mentioned_p (out, ix86_compare_op1))
e075ae69
RH
5059 tmp = gen_reg_rtx (SImode);
5060
5061 emit_insn (compare_seq);
5062 emit_insn (gen_x86_movsicc_0_m1 (tmp));
5063
36583fea
JH
5064 if (diff == 1)
5065 {
5066 /*
5067 * cmpl op0,op1
5068 * sbbl dest,dest
5069 * [addl dest, ct]
5070 *
5071 * Size 5 - 8.
5072 */
5073 if (ct)
5074 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5075 }
5076 else if (cf == -1)
5077 {
5078 /*
5079 * cmpl op0,op1
5080 * sbbl dest,dest
5081 * orl $ct, dest
5082 *
5083 * Size 8.
5084 */
5085 emit_insn (gen_iorsi3 (out, out, GEN_INT (ct)));
5086 }
5087 else if (diff == -1 && ct)
5088 {
5089 /*
5090 * cmpl op0,op1
5091 * sbbl dest,dest
5092 * xorl $-1, dest
5093 * [addl dest, cf]
5094 *
5095 * Size 8 - 11.
5096 */
5097 emit_insn (gen_one_cmplsi2 (tmp, tmp));
5098 if (cf)
5099 emit_insn (gen_addsi3 (out, out, GEN_INT (cf)));
5100 }
5101 else
5102 {
5103 /*
5104 * cmpl op0,op1
5105 * sbbl dest,dest
5106 * andl cf - ct, dest
5107 * [addl dest, ct]
5108 *
5109 * Size 8 - 11.
5110 */
5111 emit_insn (gen_andsi3 (out, out, GEN_INT (cf - ct)));
5112 if (ct)
5113 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5114 }
e075ae69
RH
5115
5116 if (tmp != out)
5117 emit_move_insn (out, tmp);
5118
5119 return 1; /* DONE */
5120 }
5121
5122 diff = ct - cf;
5123 if (diff < 0)
5124 {
5125 HOST_WIDE_INT tmp;
5126 tmp = ct, ct = cf, cf = tmp;
5127 diff = -diff;
5128 compare_code = reverse_condition (compare_code);
5129 code = reverse_condition (code);
5130 }
5131 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
5132 || diff == 3 || diff == 5 || diff == 9)
5133 {
5134 /*
5135 * xorl dest,dest
5136 * cmpl op1,op2
5137 * setcc dest
5138 * lea cf(dest*(ct-cf)),dest
5139 *
5140 * Size 14.
5141 *
5142 * This also catches the degenerate setcc-only case.
5143 */
5144
5145 rtx tmp;
5146 int nops;
5147
5148 out = emit_store_flag (out, code, ix86_compare_op0,
5149 ix86_compare_op1, VOIDmode, 0, 1);
5150
5151 nops = 0;
5152 if (diff == 1)
5153 tmp = out;
5154 else
5155 {
5156 tmp = gen_rtx_MULT (SImode, out, GEN_INT (diff & ~1));
5157 nops++;
5158 if (diff & 1)
5159 {
5160 tmp = gen_rtx_PLUS (SImode, tmp, out);
5161 nops++;
5162 }
5163 }
5164 if (cf != 0)
5165 {
5166 tmp = gen_rtx_PLUS (SImode, tmp, GEN_INT (cf));
5167 nops++;
5168 }
5169 if (tmp != out)
5170 {
5171 if (nops == 0)
5172 emit_move_insn (out, tmp);
5173 else if (nops == 1)
5174 {
5175 rtx clob;
5176
5177 clob = gen_rtx_REG (CCmode, FLAGS_REG);
5178 clob = gen_rtx_CLOBBER (VOIDmode, clob);
5179
5180 tmp = gen_rtx_SET (VOIDmode, out, tmp);
5181 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5182 emit_insn (tmp);
5183 }
5184 else
5185 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
5186 }
5187 if (out != operands[0])
5188 emit_move_insn (operands[0], out);
5189
5190 return 1; /* DONE */
5191 }
5192
5193 /*
5194 * General case: Jumpful:
5195 * xorl dest,dest cmpl op1, op2
5196 * cmpl op1, op2 movl ct, dest
5197 * setcc dest jcc 1f
5198 * decl dest movl cf, dest
5199 * andl (cf-ct),dest 1:
5200 * addl ct,dest
5201 *
5202 * Size 20. Size 14.
5203 *
5204 * This is reasonably steep, but branch mispredict costs are
5205 * high on modern cpus, so consider failing only if optimizing
5206 * for space.
5207 *
5208 * %%% Parameterize branch_cost on the tuning architecture, then
5209 * use that. The 80386 couldn't care less about mispredicts.
5210 */
5211
5212 if (!optimize_size && !TARGET_CMOVE)
5213 {
5214 if (ct == 0)
5215 {
5216 ct = cf;
5217 cf = 0;
5218 compare_code = reverse_condition (compare_code);
5219 code = reverse_condition (code);
5220 }
5221
5222 out = emit_store_flag (out, code, ix86_compare_op0,
5223 ix86_compare_op1, VOIDmode, 0, 1);
5224
5225 emit_insn (gen_addsi3 (out, out, constm1_rtx));
5226 emit_insn (gen_andsi3 (out, out, GEN_INT (cf-ct)));
5227 if (ct != 0)
5228 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5229 if (out != operands[0])
5230 emit_move_insn (operands[0], out);
5231
5232 return 1; /* DONE */
5233 }
5234 }
5235
5236 if (!TARGET_CMOVE)
5237 {
5238 /* Try a few things more with specific constants and a variable. */
5239
78a0d70c 5240 optab op;
e075ae69
RH
5241 rtx var, orig_out, out, tmp;
5242
5243 if (optimize_size)
5244 return 0; /* FAIL */
5245
5246 /* If one of the two operands is an interesting constant, load a
5247 constant with the above and mask it in with a logical operation. */
5248
5249 if (GET_CODE (operands[2]) == CONST_INT)
5250 {
5251 var = operands[3];
5252 if (INTVAL (operands[2]) == 0)
5253 operands[3] = constm1_rtx, op = and_optab;
5254 else if (INTVAL (operands[2]) == -1)
5255 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
5256 else
5257 return 0; /* FAIL */
e075ae69
RH
5258 }
5259 else if (GET_CODE (operands[3]) == CONST_INT)
5260 {
5261 var = operands[2];
5262 if (INTVAL (operands[3]) == 0)
5263 operands[2] = constm1_rtx, op = and_optab;
5264 else if (INTVAL (operands[3]) == -1)
5265 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
5266 else
5267 return 0; /* FAIL */
e075ae69 5268 }
78a0d70c 5269 else
e075ae69
RH
5270 return 0; /* FAIL */
5271
5272 orig_out = operands[0];
5273 tmp = gen_reg_rtx (GET_MODE (orig_out));
5274 operands[0] = tmp;
5275
5276 /* Recurse to get the constant loaded. */
5277 if (ix86_expand_int_movcc (operands) == 0)
5278 return 0; /* FAIL */
5279
5280 /* Mask in the interesting variable. */
5281 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
5282 OPTAB_WIDEN);
5283 if (out != orig_out)
5284 emit_move_insn (orig_out, out);
5285
5286 return 1; /* DONE */
5287 }
5288
5289 /*
5290 * For comparison with above,
5291 *
5292 * movl cf,dest
5293 * movl ct,tmp
5294 * cmpl op1,op2
5295 * cmovcc tmp,dest
5296 *
5297 * Size 15.
5298 */
5299
5300 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
5301 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
5302 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
5303 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
5304
5305 emit_insn (compare_seq);
5306 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5307 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5308 compare_op, operands[2],
5309 operands[3])));
5310
5311 return 1; /* DONE */
e9a25f70 5312}
e075ae69 5313
32b5b1aa 5314int
e075ae69
RH
5315ix86_expand_fp_movcc (operands)
5316 rtx operands[];
32b5b1aa 5317{
e075ae69
RH
5318 enum rtx_code code;
5319 enum machine_mode mode;
5320 rtx tmp;
32b5b1aa 5321
e075ae69
RH
5322 /* The floating point conditional move instructions don't directly
5323 support conditions resulting from a signed integer comparison. */
32b5b1aa 5324
e075ae69
RH
5325 code = GET_CODE (operands[1]);
5326 switch (code)
5327 {
5328 case LT:
5329 case LE:
5330 case GE:
5331 case GT:
5332 tmp = gen_reg_rtx (QImode);
3a3677ff 5333 ix86_expand_setcc (code, tmp);
e075ae69
RH
5334 code = NE;
5335 ix86_compare_op0 = tmp;
5336 ix86_compare_op1 = const0_rtx;
5337 break;
5338
5339 default:
5340 break;
5341 }
e9a25f70 5342
e075ae69
RH
5343 mode = SELECT_CC_MODE (code, ix86_compare_op0, ix86_compare_op1);
5344 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (mode, FLAGS_REG),
5345 gen_rtx_COMPARE (mode,
5346 ix86_compare_op0,
5347 ix86_compare_op1)));
5348 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5349 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5350 gen_rtx_fmt_ee (code, VOIDmode,
5351 gen_rtx_REG (mode, FLAGS_REG),
5352 const0_rtx),
5353 operands[2],
5354 operands[3])));
32b5b1aa 5355
e075ae69 5356 return 1;
32b5b1aa
SC
5357}
5358
2450a057
JH
5359/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
5360 works for floating pointer parameters and nonoffsetable memories.
5361 For pushes, it returns just stack offsets; the values will be saved
5362 in the right order. Maximally three parts are generated. */
5363
5364static void
5365ix86_split_to_parts (operand, parts, mode)
5366 rtx operand;
5367 rtx *parts;
5368 enum machine_mode mode;
32b5b1aa 5369{
2450a057
JH
5370 int size = GET_MODE_SIZE (mode) / 4;
5371
5372 if (size < 2 || size > 3)
5373 abort ();
5374
d7a29404
JH
5375 /* Optimize constant pool reference to immediates. This is used by fp moves,
5376 that force all constants to memory to allow combining. */
5377
5378 if (GET_CODE (operand) == MEM
5379 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
5380 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
5381 operand = get_pool_constant (XEXP (operand, 0));
5382
2450a057 5383 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 5384 {
2450a057
JH
5385 /* The only non-offsetable memories we handle are pushes. */
5386 if (! push_operand (operand, VOIDmode))
5387 abort ();
5388
5389 PUT_MODE (operand, SImode);
5390 parts[0] = parts[1] = parts[2] = operand;
5391 }
5392 else
5393 {
5394 if (mode == DImode)
5395 split_di (&operand, 1, &parts[0], &parts[1]);
5396 else
e075ae69 5397 {
2450a057
JH
5398 if (REG_P (operand))
5399 {
5400 if (!reload_completed)
5401 abort ();
5402 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
5403 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
5404 if (size == 3)
5405 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
5406 }
5407 else if (offsettable_memref_p (operand))
5408 {
5409 PUT_MODE (operand, SImode);
5410 parts[0] = operand;
5411 parts[1] = adj_offsettable_operand (operand, 4);
5412 if (size == 3)
5413 parts[2] = adj_offsettable_operand (operand, 8);
5414 }
5415 else if (GET_CODE (operand) == CONST_DOUBLE)
5416 {
5417 REAL_VALUE_TYPE r;
5418 long l[3];
5419
5420 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
5421 switch (mode)
5422 {
5423 case XFmode:
5424 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
5425 parts[2] = GEN_INT (l[2]);
5426 break;
5427 case DFmode:
5428 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
5429 break;
5430 default:
5431 abort ();
5432 }
5433 parts[1] = GEN_INT (l[1]);
5434 parts[0] = GEN_INT (l[0]);
5435 }
5436 else
5437 abort ();
e075ae69 5438 }
2450a057
JH
5439 }
5440
5441 return;
5442}
5443
5444/* Emit insns to perform a move or push of DI, DF, and XF values.
5445 Return false when normal moves are needed; true when all required
5446 insns have been emitted. Operands 2-4 contain the input values
5447 int the correct order; operands 5-7 contain the output values. */
5448
5449int
5450ix86_split_long_move (operands1)
5451 rtx operands1[];
5452{
5453 rtx part[2][3];
5454 rtx operands[2];
5455 int size = GET_MODE_SIZE (GET_MODE (operands1[0])) / 4;
5456 int push = 0;
5457 int collisions = 0;
5458
5459 /* Make our own copy to avoid clobbering the operands. */
5460 operands[0] = copy_rtx (operands1[0]);
5461 operands[1] = copy_rtx (operands1[1]);
5462
5463 if (size < 2 || size > 3)
5464 abort ();
5465
5466 /* The only non-offsettable memory we handle is push. */
5467 if (push_operand (operands[0], VOIDmode))
5468 push = 1;
5469 else if (GET_CODE (operands[0]) == MEM
5470 && ! offsettable_memref_p (operands[0]))
5471 abort ();
5472
5473 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands1[0]));
5474 ix86_split_to_parts (operands[1], part[1], GET_MODE (operands1[0]));
5475
5476 /* When emitting push, take care for source operands on the stack. */
5477 if (push && GET_CODE (operands[1]) == MEM
5478 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
5479 {
5480 if (size == 3)
5481 part[1][1] = part[1][2];
5482 part[1][0] = part[1][1];
5483 }
5484
5485 /* We need to do copy in the right order in case an address register
5486 of the source overlaps the destination. */
5487 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
5488 {
5489 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
5490 collisions++;
5491 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
5492 collisions++;
5493 if (size == 3
5494 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
5495 collisions++;
5496
5497 /* Collision in the middle part can be handled by reordering. */
5498 if (collisions == 1 && size == 3
5499 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 5500 {
2450a057
JH
5501 rtx tmp;
5502 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
5503 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
5504 }
e075ae69 5505
2450a057
JH
5506 /* If there are more collisions, we can't handle it by reordering.
5507 Do an lea to the last part and use only one colliding move. */
5508 else if (collisions > 1)
5509 {
5510 collisions = 1;
5511 emit_insn (gen_rtx_SET (VOIDmode, part[0][size - 1],
5512 XEXP (part[1][0], 0)));
5513 part[1][0] = change_address (part[1][0], SImode, part[0][size - 1]);
5514 part[1][1] = adj_offsettable_operand (part[1][0], 4);
5515 if (size == 3)
5516 part[1][2] = adj_offsettable_operand (part[1][0], 8);
5517 }
5518 }
5519
5520 if (push)
5521 {
5522 if (size == 3)
5523 emit_insn (gen_push (part[1][2]));
5524 emit_insn (gen_push (part[1][1]));
5525 emit_insn (gen_push (part[1][0]));
5526 return 1;
5527 }
5528
5529 /* Choose correct order to not overwrite the source before it is copied. */
5530 if ((REG_P (part[0][0])
5531 && REG_P (part[1][1])
5532 && (REGNO (part[0][0]) == REGNO (part[1][1])
5533 || (size == 3
5534 && REGNO (part[0][0]) == REGNO (part[1][2]))))
5535 || (collisions > 0
5536 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
5537 {
5538 if (size == 3)
5539 {
5540 operands1[2] = part[0][2];
5541 operands1[3] = part[0][1];
5542 operands1[4] = part[0][0];
5543 operands1[5] = part[1][2];
5544 operands1[6] = part[1][1];
5545 operands1[7] = part[1][0];
5546 }
5547 else
5548 {
5549 operands1[2] = part[0][1];
5550 operands1[3] = part[0][0];
5551 operands1[5] = part[1][1];
5552 operands1[6] = part[1][0];
5553 }
5554 }
5555 else
5556 {
5557 if (size == 3)
5558 {
5559 operands1[2] = part[0][0];
5560 operands1[3] = part[0][1];
5561 operands1[4] = part[0][2];
5562 operands1[5] = part[1][0];
5563 operands1[6] = part[1][1];
5564 operands1[7] = part[1][2];
5565 }
5566 else
5567 {
5568 operands1[2] = part[0][0];
5569 operands1[3] = part[0][1];
5570 operands1[5] = part[1][0];
5571 operands1[6] = part[1][1];
e075ae69
RH
5572 }
5573 }
32b5b1aa 5574
e9a25f70 5575 return 0;
32b5b1aa 5576}
32b5b1aa 5577
e075ae69
RH
5578void
5579ix86_split_ashldi (operands, scratch)
5580 rtx *operands, scratch;
32b5b1aa 5581{
e075ae69
RH
5582 rtx low[2], high[2];
5583 int count;
b985a30f 5584
e075ae69
RH
5585 if (GET_CODE (operands[2]) == CONST_INT)
5586 {
5587 split_di (operands, 2, low, high);
5588 count = INTVAL (operands[2]) & 63;
32b5b1aa 5589
e075ae69
RH
5590 if (count >= 32)
5591 {
5592 emit_move_insn (high[0], low[1]);
5593 emit_move_insn (low[0], const0_rtx);
b985a30f 5594
e075ae69
RH
5595 if (count > 32)
5596 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
5597 }
5598 else
5599 {
5600 if (!rtx_equal_p (operands[0], operands[1]))
5601 emit_move_insn (operands[0], operands[1]);
5602 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
5603 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
5604 }
5605 }
5606 else
5607 {
5608 if (!rtx_equal_p (operands[0], operands[1]))
5609 emit_move_insn (operands[0], operands[1]);
b985a30f 5610
e075ae69 5611 split_di (operands, 1, low, high);
b985a30f 5612
e075ae69
RH
5613 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
5614 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 5615
fe577e58 5616 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 5617 {
fe577e58 5618 if (! no_new_pseudos)
e075ae69
RH
5619 scratch = force_reg (SImode, const0_rtx);
5620 else
5621 emit_move_insn (scratch, const0_rtx);
5622
5623 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
5624 scratch));
5625 }
5626 else
5627 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
5628 }
e9a25f70 5629}
32b5b1aa 5630
e075ae69
RH
5631void
5632ix86_split_ashrdi (operands, scratch)
5633 rtx *operands, scratch;
32b5b1aa 5634{
e075ae69
RH
5635 rtx low[2], high[2];
5636 int count;
32b5b1aa 5637
e075ae69
RH
5638 if (GET_CODE (operands[2]) == CONST_INT)
5639 {
5640 split_di (operands, 2, low, high);
5641 count = INTVAL (operands[2]) & 63;
32b5b1aa 5642
e075ae69
RH
5643 if (count >= 32)
5644 {
5645 emit_move_insn (low[0], high[1]);
32b5b1aa 5646
e075ae69
RH
5647 if (! reload_completed)
5648 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
5649 else
5650 {
5651 emit_move_insn (high[0], low[0]);
5652 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
5653 }
5654
5655 if (count > 32)
5656 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
5657 }
5658 else
5659 {
5660 if (!rtx_equal_p (operands[0], operands[1]))
5661 emit_move_insn (operands[0], operands[1]);
5662 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
5663 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
5664 }
5665 }
5666 else
32b5b1aa 5667 {
e075ae69
RH
5668 if (!rtx_equal_p (operands[0], operands[1]))
5669 emit_move_insn (operands[0], operands[1]);
5670
5671 split_di (operands, 1, low, high);
5672
5673 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
5674 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
5675
fe577e58 5676 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 5677 {
fe577e58 5678 if (! no_new_pseudos)
e075ae69
RH
5679 scratch = gen_reg_rtx (SImode);
5680 emit_move_insn (scratch, high[0]);
5681 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
5682 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
5683 scratch));
5684 }
5685 else
5686 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 5687 }
e075ae69 5688}
32b5b1aa 5689
e075ae69
RH
5690void
5691ix86_split_lshrdi (operands, scratch)
5692 rtx *operands, scratch;
5693{
5694 rtx low[2], high[2];
5695 int count;
32b5b1aa 5696
e075ae69 5697 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 5698 {
e075ae69
RH
5699 split_di (operands, 2, low, high);
5700 count = INTVAL (operands[2]) & 63;
5701
5702 if (count >= 32)
c7271385 5703 {
e075ae69
RH
5704 emit_move_insn (low[0], high[1]);
5705 emit_move_insn (high[0], const0_rtx);
32b5b1aa 5706
e075ae69
RH
5707 if (count > 32)
5708 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
5709 }
5710 else
5711 {
5712 if (!rtx_equal_p (operands[0], operands[1]))
5713 emit_move_insn (operands[0], operands[1]);
5714 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
5715 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
5716 }
32b5b1aa 5717 }
e075ae69
RH
5718 else
5719 {
5720 if (!rtx_equal_p (operands[0], operands[1]))
5721 emit_move_insn (operands[0], operands[1]);
32b5b1aa 5722
e075ae69
RH
5723 split_di (operands, 1, low, high);
5724
5725 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
5726 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
5727
5728 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 5729 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 5730 {
fe577e58 5731 if (! no_new_pseudos)
e075ae69
RH
5732 scratch = force_reg (SImode, const0_rtx);
5733 else
5734 emit_move_insn (scratch, const0_rtx);
5735
5736 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
5737 scratch));
5738 }
5739 else
5740 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
5741 }
32b5b1aa 5742}
3f803cd9 5743
e075ae69
RH
5744/* Expand the appropriate insns for doing strlen if not just doing
5745 repnz; scasb
5746
5747 out = result, initialized with the start address
5748 align_rtx = alignment of the address.
5749 scratch = scratch register, initialized with the startaddress when
5750 not aligned, otherwise undefined
3f803cd9
SC
5751
5752 This is just the body. It needs the initialisations mentioned above and
5753 some address computing at the end. These things are done in i386.md. */
5754
e075ae69
RH
5755void
5756ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch)
5757 rtx out, align_rtx, scratch;
3f803cd9 5758{
e075ae69
RH
5759 int align;
5760 rtx tmp;
5761 rtx align_2_label = NULL_RTX;
5762 rtx align_3_label = NULL_RTX;
5763 rtx align_4_label = gen_label_rtx ();
5764 rtx end_0_label = gen_label_rtx ();
e075ae69 5765 rtx mem;
16189740
RH
5766 rtx no_flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
5767 rtx z_flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
e2e52e1b 5768 rtx tmpreg = gen_reg_rtx (SImode);
e075ae69
RH
5769
5770 align = 0;
5771 if (GET_CODE (align_rtx) == CONST_INT)
5772 align = INTVAL (align_rtx);
3f803cd9 5773
e9a25f70 5774 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 5775
e9a25f70 5776 /* Is there a known alignment and is it less than 4? */
e075ae69 5777 if (align < 4)
3f803cd9 5778 {
e9a25f70 5779 /* Is there a known alignment and is it not 2? */
e075ae69 5780 if (align != 2)
3f803cd9 5781 {
e075ae69
RH
5782 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
5783 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
5784
5785 /* Leave just the 3 lower bits. */
5786 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (3),
5787 NULL_RTX, 0, OPTAB_WIDEN);
5788
16189740 5789 emit_insn (gen_cmpsi_ccz_1 (align_rtx, const0_rtx));
e075ae69 5790
16189740 5791 tmp = gen_rtx_EQ (VOIDmode, z_flags, const0_rtx);
e075ae69
RH
5792 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5793 gen_rtx_LABEL_REF (VOIDmode,
5794 align_4_label),
5795 pc_rtx);
5796 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5797
16189740 5798 emit_insn (gen_cmpsi_ccno_1 (align_rtx, GEN_INT (2)));
e075ae69 5799
16189740 5800 tmp = gen_rtx_EQ (VOIDmode, no_flags, const0_rtx);
e075ae69
RH
5801 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5802 gen_rtx_LABEL_REF (VOIDmode,
5803 align_2_label),
5804 pc_rtx);
5805 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5806
16189740 5807 tmp = gen_rtx_GTU (VOIDmode, no_flags, const0_rtx);
e075ae69
RH
5808 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5809 gen_rtx_LABEL_REF (VOIDmode,
5810 align_3_label),
5811 pc_rtx);
5812 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
3f803cd9
SC
5813 }
5814 else
5815 {
e9a25f70
JL
5816 /* Since the alignment is 2, we have to check 2 or 0 bytes;
5817 check if is aligned to 4 - byte. */
e9a25f70 5818
e075ae69
RH
5819 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (2),
5820 NULL_RTX, 0, OPTAB_WIDEN);
5821
16189740 5822 emit_insn (gen_cmpsi_ccz_1 (align_rtx, const0_rtx));
e075ae69 5823
16189740 5824 tmp = gen_rtx_EQ (VOIDmode, z_flags, const0_rtx);
e075ae69
RH
5825 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5826 gen_rtx_LABEL_REF (VOIDmode,
5827 align_4_label),
5828 pc_rtx);
5829 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
3f803cd9
SC
5830 }
5831
e075ae69 5832 mem = gen_rtx_MEM (QImode, out);
e9a25f70 5833
e075ae69 5834 /* Now compare the bytes. */
e9a25f70 5835
e075ae69 5836 /* Compare the first n unaligned byte on a byte per byte basis. */
16189740 5837 emit_insn (gen_cmpqi_ccz_1 (mem, const0_rtx));
e9a25f70 5838
16189740 5839 tmp = gen_rtx_EQ (VOIDmode, z_flags, const0_rtx);
e075ae69
RH
5840 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5841 gen_rtx_LABEL_REF (VOIDmode, end_0_label),
5842 pc_rtx);
5843 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
3f803cd9 5844
e075ae69
RH
5845 /* Increment the address. */
5846 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 5847
e075ae69
RH
5848 /* Not needed with an alignment of 2 */
5849 if (align != 2)
5850 {
5851 emit_label (align_2_label);
3f803cd9 5852
16189740 5853 emit_insn (gen_cmpqi_ccz_1 (mem, const0_rtx));
3f803cd9 5854
16189740 5855 tmp = gen_rtx_EQ (VOIDmode, z_flags, const0_rtx);
e075ae69
RH
5856 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5857 gen_rtx_LABEL_REF (VOIDmode,
5858 end_0_label),
5859 pc_rtx);
5860 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5861
5862 emit_insn (gen_addsi3 (out, out, const1_rtx));
5863
5864 emit_label (align_3_label);
5865 }
5866
16189740 5867 emit_insn (gen_cmpqi_ccz_1 (mem, const0_rtx));
e9a25f70 5868
16189740 5869 tmp = gen_rtx_EQ (VOIDmode, z_flags, const0_rtx);
e075ae69
RH
5870 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5871 gen_rtx_LABEL_REF (VOIDmode, end_0_label),
5872 pc_rtx);
5873 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5874
5875 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
5876 }
5877
e075ae69
RH
5878 /* Generate loop to check 4 bytes at a time. It is not a good idea to
5879 align this loop. It gives only huge programs, but does not help to
5880 speed up. */
5881 emit_label (align_4_label);
3f803cd9 5882
e075ae69
RH
5883 mem = gen_rtx_MEM (SImode, out);
5884 emit_move_insn (scratch, mem);
e075ae69 5885 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 5886
e2e52e1b
JH
5887 /* This formula yields a nonzero result iff one of the bytes is zero.
5888 This saves three branches inside loop and many cycles. */
5889
5890 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
5891 emit_insn (gen_one_cmplsi2 (scratch, scratch));
5892 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
5893 emit_insn (gen_andsi3 (tmpreg, tmpreg, GEN_INT (0x80808080)));
5894 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, 0, align_4_label);
5895
5896 if (TARGET_CMOVE)
5897 {
5898 rtx reg = gen_reg_rtx (SImode);
5899 emit_move_insn (reg, tmpreg);
5900 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
5901
5902 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 5903 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
5904 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
5905 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
5906 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
5907 gen_rtx_IF_THEN_ELSE (SImode, tmp,
5908 reg,
5909 tmpreg)));
5910 /* Emit lea manually to avoid clobbering of flags. */
5911 emit_insn (gen_rtx_SET (SImode, reg,
5912 gen_rtx_PLUS (SImode, out, GEN_INT (2))));
5913
5914 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
5915 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
5916 emit_insn (gen_rtx_SET (VOIDmode, out,
5917 gen_rtx_IF_THEN_ELSE (SImode, tmp,
5918 reg,
5919 out)));
5920
5921 }
5922 else
5923 {
5924 rtx end_2_label = gen_label_rtx ();
5925 /* Is zero in the first two bytes? */
5926
16189740 5927 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
5928 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
5929 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
5930 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5931 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
5932 pc_rtx);
5933 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5934 JUMP_LABEL (tmp) = end_2_label;
5935
5936 /* Not in the first two. Move two bytes forward. */
5937 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
5938 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
5939
5940 emit_label (end_2_label);
5941
5942 }
5943
5944 /* Avoid branch in fixing the byte. */
5945 tmpreg = gen_lowpart (QImode, tmpreg);
5946 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
5947 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
e075ae69
RH
5948
5949 emit_label (end_0_label);
5950}
5951\f
e075ae69
RH
5952/* Clear stack slot assignments remembered from previous functions.
5953 This is called from INIT_EXPANDERS once before RTL is emitted for each
5954 function. */
5955
36edd3cc
BS
5956static void
5957ix86_init_machine_status (p)
1526a060 5958 struct function *p;
e075ae69
RH
5959{
5960 enum machine_mode mode;
5961 int n;
36edd3cc
BS
5962 p->machine
5963 = (struct machine_function *) xmalloc (sizeof (struct machine_function));
e075ae69
RH
5964
5965 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
5966 mode = (enum machine_mode) ((int) mode + 1))
5967 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
5968 ix86_stack_locals[(int) mode][n] = NULL_RTX;
e075ae69
RH
5969}
5970
1526a060
BS
5971/* Mark machine specific bits of P for GC. */
5972static void
5973ix86_mark_machine_status (p)
5974 struct function *p;
5975{
5976 enum machine_mode mode;
5977 int n;
5978
5979 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
5980 mode = (enum machine_mode) ((int) mode + 1))
5981 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
5982 ggc_mark_rtx (p->machine->stack_locals[(int) mode][n]);
5983}
5984
e075ae69
RH
5985/* Return a MEM corresponding to a stack slot with mode MODE.
5986 Allocate a new slot if necessary.
5987
5988 The RTL for a function can have several slots available: N is
5989 which slot to use. */
5990
5991rtx
5992assign_386_stack_local (mode, n)
5993 enum machine_mode mode;
5994 int n;
5995{
5996 if (n < 0 || n >= MAX_386_STACK_LOCALS)
5997 abort ();
5998
5999 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
6000 ix86_stack_locals[(int) mode][n]
6001 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
6002
6003 return ix86_stack_locals[(int) mode][n];
6004}
6005\f
6006/* Calculate the length of the memory address in the instruction
6007 encoding. Does not include the one-byte modrm, opcode, or prefix. */
6008
6009static int
6010memory_address_length (addr)
6011 rtx addr;
6012{
6013 struct ix86_address parts;
6014 rtx base, index, disp;
6015 int len;
6016
6017 if (GET_CODE (addr) == PRE_DEC
6018 || GET_CODE (addr) == POST_INC)
6019 return 0;
3f803cd9 6020
e075ae69
RH
6021 if (! ix86_decompose_address (addr, &parts))
6022 abort ();
3f803cd9 6023
e075ae69
RH
6024 base = parts.base;
6025 index = parts.index;
6026 disp = parts.disp;
6027 len = 0;
3f803cd9 6028
e075ae69
RH
6029 /* Register Indirect. */
6030 if (base && !index && !disp)
6031 {
6032 /* Special cases: ebp and esp need the two-byte modrm form. */
6033 if (addr == stack_pointer_rtx
6034 || addr == arg_pointer_rtx
564d80f4
JH
6035 || addr == frame_pointer_rtx
6036 || addr == hard_frame_pointer_rtx)
e075ae69 6037 len = 1;
3f803cd9 6038 }
e9a25f70 6039
e075ae69
RH
6040 /* Direct Addressing. */
6041 else if (disp && !base && !index)
6042 len = 4;
6043
3f803cd9
SC
6044 else
6045 {
e075ae69
RH
6046 /* Find the length of the displacement constant. */
6047 if (disp)
6048 {
6049 if (GET_CODE (disp) == CONST_INT
6050 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
6051 len = 1;
6052 else
6053 len = 4;
6054 }
3f803cd9 6055
e075ae69
RH
6056 /* An index requires the two-byte modrm form. */
6057 if (index)
6058 len += 1;
3f803cd9
SC
6059 }
6060
e075ae69
RH
6061 return len;
6062}
79325812 6063
e075ae69
RH
6064int
6065ix86_attr_length_default (insn)
6066 rtx insn;
6067{
6068 enum attr_type type;
6069 int len = 0, i;
6070
6071 type = get_attr_type (insn);
6072 extract_insn (insn);
6073 switch (type)
6074 {
6075 case TYPE_INCDEC:
6076 case TYPE_SETCC:
6077 case TYPE_ICMOV:
6078 case TYPE_FMOV:
6079 case TYPE_FOP:
6080 case TYPE_FCMP:
6081 case TYPE_FOP1:
6082 case TYPE_FMUL:
6083 case TYPE_FDIV:
6084 case TYPE_FSGN:
6085 case TYPE_FPSPC:
6086 case TYPE_FCMOV:
6087 case TYPE_IBR:
6088 break;
7c7ef435
JH
6089 case TYPE_STR:
6090 case TYPE_CLD:
6091 len = 0;
3f803cd9 6092
e075ae69
RH
6093 case TYPE_ALU1:
6094 case TYPE_NEGNOT:
6095 case TYPE_ALU:
6096 case TYPE_ICMP:
6097 case TYPE_IMOVX:
6098 case TYPE_ISHIFT:
6099 case TYPE_IMUL:
6100 case TYPE_IDIV:
6101 case TYPE_PUSH:
6102 case TYPE_POP:
1ccbefce
RH
6103 for (i = recog_data.n_operands - 1; i >= 0; --i)
6104 if (CONSTANT_P (recog_data.operand[i]))
e075ae69 6105 {
1ccbefce
RH
6106 if (GET_CODE (recog_data.operand[i]) == CONST_INT
6107 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
e075ae69
RH
6108 len += 1;
6109 else
1ccbefce 6110 len += GET_MODE_SIZE (GET_MODE (recog_data.operand[0]));
e075ae69
RH
6111 }
6112 break;
6113
6114 case TYPE_IMOV:
1ccbefce
RH
6115 if (CONSTANT_P (recog_data.operand[1]))
6116 len += GET_MODE_SIZE (GET_MODE (recog_data.operand[0]));
e075ae69
RH
6117 break;
6118
6119 case TYPE_CALL:
6baf1cc8
BS
6120 if (constant_call_address_operand (recog_data.operand[0],
6121 GET_MODE (recog_data.operand[0])))
e075ae69
RH
6122 return 5;
6123 break;
3f803cd9 6124
e075ae69 6125 case TYPE_CALLV:
6baf1cc8
BS
6126 if (constant_call_address_operand (recog_data.operand[1],
6127 GET_MODE (recog_data.operand[1])))
e075ae69
RH
6128 return 5;
6129 break;
3f803cd9 6130
e075ae69 6131 case TYPE_LEA:
3071fab5
RH
6132 {
6133 /* Irritatingly, single_set doesn't work with REG_UNUSED present,
6134 as we'll get from running life_analysis during reg-stack when
1c71e60e
JH
6135 not optimizing. Not that it matters anyway, now that
6136 pro_epilogue_adjust_stack uses lea, and is by design not
6137 single_set. */
3071fab5
RH
6138 rtx set = PATTERN (insn);
6139 if (GET_CODE (set) == SET)
6140 ;
6141 else if (GET_CODE (set) == PARALLEL
1c71e60e 6142 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
3071fab5
RH
6143 set = XVECEXP (set, 0, 0);
6144 else
6145 abort ();
6146
6147 len += memory_address_length (SET_SRC (set));
6148 goto just_opcode;
6149 }
3f803cd9 6150
e075ae69
RH
6151 case TYPE_OTHER:
6152 case TYPE_MULTI:
6153 return 15;
3f803cd9 6154
5d3c4797 6155 case TYPE_FXCH:
1ccbefce
RH
6156 if (STACK_TOP_P (recog_data.operand[0]))
6157 return 2 + (REGNO (recog_data.operand[1]) != FIRST_STACK_REG + 1);
5d3c4797 6158 else
1ccbefce 6159 return 2 + (REGNO (recog_data.operand[0]) != FIRST_STACK_REG + 1);
5d3c4797 6160
e075ae69
RH
6161 default:
6162 abort ();
6163 }
6164
1ccbefce
RH
6165 for (i = recog_data.n_operands - 1; i >= 0; --i)
6166 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 6167 {
1ccbefce 6168 len += memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
6169 break;
6170 }
6171
6172just_opcode:
6173 len += get_attr_length_opcode (insn);
6174 len += get_attr_length_prefix (insn);
6175
6176 return len;
3f803cd9 6177}
e075ae69
RH
6178\f
6179/* Return the maximum number of instructions a cpu can issue. */
b657fc39 6180
e075ae69
RH
6181int
6182ix86_issue_rate ()
b657fc39 6183{
e075ae69 6184 switch (ix86_cpu)
b657fc39 6185 {
e075ae69
RH
6186 case PROCESSOR_PENTIUM:
6187 case PROCESSOR_K6:
6188 return 2;
79325812 6189
e075ae69
RH
6190 case PROCESSOR_PENTIUMPRO:
6191 return 3;
b657fc39 6192
b657fc39 6193 default:
e075ae69 6194 return 1;
b657fc39 6195 }
b657fc39
L
6196}
6197
e075ae69
RH
6198/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
6199 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 6200
e075ae69
RH
6201static int
6202ix86_flags_dependant (insn, dep_insn, insn_type)
6203 rtx insn, dep_insn;
6204 enum attr_type insn_type;
6205{
6206 rtx set, set2;
b657fc39 6207
e075ae69
RH
6208 /* Simplify the test for uninteresting insns. */
6209 if (insn_type != TYPE_SETCC
6210 && insn_type != TYPE_ICMOV
6211 && insn_type != TYPE_FCMOV
6212 && insn_type != TYPE_IBR)
6213 return 0;
b657fc39 6214
e075ae69
RH
6215 if ((set = single_set (dep_insn)) != 0)
6216 {
6217 set = SET_DEST (set);
6218 set2 = NULL_RTX;
6219 }
6220 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
6221 && XVECLEN (PATTERN (dep_insn), 0) == 2
6222 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
6223 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
6224 {
6225 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
6226 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
6227 }
78a0d70c
ZW
6228 else
6229 return 0;
b657fc39 6230
78a0d70c
ZW
6231 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
6232 return 0;
b657fc39 6233
78a0d70c
ZW
6234 /* This test is true if the dependant insn reads the flags but
6235 not any other potentially set register. */
6236 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
6237 return 0;
6238
6239 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
6240 return 0;
6241
6242 return 1;
e075ae69 6243}
b657fc39 6244
e075ae69
RH
6245/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
6246 address with operands set by DEP_INSN. */
6247
6248static int
6249ix86_agi_dependant (insn, dep_insn, insn_type)
6250 rtx insn, dep_insn;
6251 enum attr_type insn_type;
6252{
6253 rtx addr;
6254
6255 if (insn_type == TYPE_LEA)
5fbdde42
RH
6256 {
6257 addr = PATTERN (insn);
6258 if (GET_CODE (addr) == SET)
6259 ;
6260 else if (GET_CODE (addr) == PARALLEL
6261 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
6262 addr = XVECEXP (addr, 0, 0);
6263 else
6264 abort ();
6265 addr = SET_SRC (addr);
6266 }
e075ae69
RH
6267 else
6268 {
6269 int i;
6270 extract_insn (insn);
1ccbefce
RH
6271 for (i = recog_data.n_operands - 1; i >= 0; --i)
6272 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 6273 {
1ccbefce 6274 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
6275 goto found;
6276 }
6277 return 0;
6278 found:;
b657fc39
L
6279 }
6280
e075ae69 6281 return modified_in_p (addr, dep_insn);
b657fc39 6282}
a269a03c
JC
6283
6284int
e075ae69 6285ix86_adjust_cost (insn, link, dep_insn, cost)
a269a03c
JC
6286 rtx insn, link, dep_insn;
6287 int cost;
6288{
e075ae69
RH
6289 enum attr_type insn_type, dep_insn_type;
6290 rtx set, set2;
9b00189f 6291 int dep_insn_code_number;
a269a03c 6292
309ada50 6293 /* Anti and output depenancies have zero cost on all CPUs. */
e075ae69 6294 if (REG_NOTE_KIND (link) != 0)
309ada50 6295 return 0;
a269a03c 6296
9b00189f
JH
6297 dep_insn_code_number = recog_memoized (dep_insn);
6298
e075ae69 6299 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 6300 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 6301 return cost;
a269a03c 6302
1c71e60e
JH
6303 insn_type = get_attr_type (insn);
6304 dep_insn_type = get_attr_type (dep_insn);
9b00189f 6305
1c71e60e
JH
6306 /* Prologue and epilogue allocators can have a false dependency on ebp.
6307 This results in one cycle extra stall on Pentium prologue scheduling,
6308 so handle this important case manually. */
6309 if (dep_insn_code_number == CODE_FOR_pro_epilogue_adjust_stack
6310 && dep_insn_type == TYPE_ALU
9b00189f
JH
6311 && !reg_mentioned_p (stack_pointer_rtx, insn))
6312 return 0;
6313
a269a03c
JC
6314 switch (ix86_cpu)
6315 {
6316 case PROCESSOR_PENTIUM:
e075ae69
RH
6317 /* Address Generation Interlock adds a cycle of latency. */
6318 if (ix86_agi_dependant (insn, dep_insn, insn_type))
6319 cost += 1;
6320
6321 /* ??? Compares pair with jump/setcc. */
6322 if (ix86_flags_dependant (insn, dep_insn, insn_type))
6323 cost = 0;
6324
6325 /* Floating point stores require value to be ready one cycle ealier. */
6326 if (insn_type == TYPE_FMOV
6327 && get_attr_memory (insn) == MEMORY_STORE
6328 && !ix86_agi_dependant (insn, dep_insn, insn_type))
6329 cost += 1;
6330 break;
a269a03c 6331
e075ae69
RH
6332 case PROCESSOR_PENTIUMPRO:
6333 /* Since we can't represent delayed latencies of load+operation,
6334 increase the cost here for non-imov insns. */
6335 if (dep_insn_type != TYPE_IMOV
6336 && dep_insn_type != TYPE_FMOV
6337 && get_attr_memory (dep_insn) == MEMORY_LOAD)
6338 cost += 1;
6339
6340 /* INT->FP conversion is expensive. */
6341 if (get_attr_fp_int_src (dep_insn))
6342 cost += 5;
6343
6344 /* There is one cycle extra latency between an FP op and a store. */
6345 if (insn_type == TYPE_FMOV
6346 && (set = single_set (dep_insn)) != NULL_RTX
6347 && (set2 = single_set (insn)) != NULL_RTX
6348 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
6349 && GET_CODE (SET_DEST (set2)) == MEM)
6350 cost += 1;
6351 break;
a269a03c 6352
e075ae69
RH
6353 case PROCESSOR_K6:
6354 /* The esp dependency is resolved before the instruction is really
6355 finished. */
6356 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
6357 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
6358 return 1;
a269a03c 6359
e075ae69
RH
6360 /* Since we can't represent delayed latencies of load+operation,
6361 increase the cost here for non-imov insns. */
6362 if (get_attr_memory (dep_insn) == MEMORY_LOAD)
6363 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
6364
6365 /* INT->FP conversion is expensive. */
6366 if (get_attr_fp_int_src (dep_insn))
6367 cost += 5;
a14003ee 6368 break;
e075ae69 6369
309ada50
JH
6370 case PROCESSOR_ATHLON:
6371 /* Address Generation Interlock cause problems on the Athlon CPU because
6372 the loads and stores are done in order so once one load or store has
6373 to wait, others must too, so penalize the AGIs slightly by one cycle.
6374 We might experiment with this value later. */
6375 if (ix86_agi_dependant (insn, dep_insn, insn_type))
6376 cost += 1;
6377
6378 /* Since we can't represent delayed latencies of load+operation,
6379 increase the cost here for non-imov insns. */
6380 if (dep_insn_type != TYPE_IMOV
6381 && dep_insn_type != TYPE_FMOV
6382 && get_attr_memory (dep_insn) == MEMORY_LOAD)
6383 cost += 2;
a269a03c 6384 default:
a269a03c
JC
6385 break;
6386 }
6387
6388 return cost;
6389}
0a726ef1 6390
e075ae69
RH
6391static union
6392{
6393 struct ppro_sched_data
6394 {
6395 rtx decode[3];
6396 int issued_this_cycle;
6397 } ppro;
6398} ix86_sched_data;
0a726ef1 6399
e075ae69
RH
6400static int
6401ix86_safe_length (insn)
6402 rtx insn;
6403{
6404 if (recog_memoized (insn) >= 0)
6405 return get_attr_length(insn);
6406 else
6407 return 128;
6408}
0a726ef1 6409
e075ae69
RH
6410static int
6411ix86_safe_length_prefix (insn)
6412 rtx insn;
6413{
6414 if (recog_memoized (insn) >= 0)
6415 return get_attr_length(insn);
6416 else
6417 return 0;
6418}
6419
6420static enum attr_memory
6421ix86_safe_memory (insn)
6422 rtx insn;
6423{
6424 if (recog_memoized (insn) >= 0)
6425 return get_attr_memory(insn);
6426 else
6427 return MEMORY_UNKNOWN;
6428}
0a726ef1 6429
e075ae69
RH
6430static enum attr_pent_pair
6431ix86_safe_pent_pair (insn)
6432 rtx insn;
6433{
6434 if (recog_memoized (insn) >= 0)
6435 return get_attr_pent_pair(insn);
6436 else
6437 return PENT_PAIR_NP;
6438}
0a726ef1 6439
e075ae69
RH
6440static enum attr_ppro_uops
6441ix86_safe_ppro_uops (insn)
6442 rtx insn;
6443{
6444 if (recog_memoized (insn) >= 0)
6445 return get_attr_ppro_uops (insn);
6446 else
6447 return PPRO_UOPS_MANY;
6448}
0a726ef1 6449
e075ae69
RH
6450static void
6451ix86_dump_ppro_packet (dump)
6452 FILE *dump;
0a726ef1 6453{
e075ae69 6454 if (ix86_sched_data.ppro.decode[0])
0a726ef1 6455 {
e075ae69
RH
6456 fprintf (dump, "PPRO packet: %d",
6457 INSN_UID (ix86_sched_data.ppro.decode[0]));
6458 if (ix86_sched_data.ppro.decode[1])
6459 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
6460 if (ix86_sched_data.ppro.decode[2])
6461 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
6462 fputc ('\n', dump);
6463 }
6464}
0a726ef1 6465
e075ae69 6466/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 6467
e075ae69
RH
6468void
6469ix86_sched_init (dump, sched_verbose)
6470 FILE *dump ATTRIBUTE_UNUSED;
6471 int sched_verbose ATTRIBUTE_UNUSED;
6472{
6473 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
6474}
6475
6476/* Shift INSN to SLOT, and shift everything else down. */
6477
6478static void
6479ix86_reorder_insn (insnp, slot)
6480 rtx *insnp, *slot;
6481{
6482 if (insnp != slot)
6483 {
6484 rtx insn = *insnp;
6485 do
6486 insnp[0] = insnp[1];
6487 while (++insnp != slot);
6488 *insnp = insn;
0a726ef1 6489 }
e075ae69
RH
6490}
6491
6492/* Find an instruction with given pairability and minimal amount of cycles
6493 lost by the fact that the CPU waits for both pipelines to finish before
6494 reading next instructions. Also take care that both instructions together
6495 can not exceed 7 bytes. */
6496
6497static rtx *
6498ix86_pent_find_pair (e_ready, ready, type, first)
6499 rtx *e_ready;
6500 rtx *ready;
6501 enum attr_pent_pair type;
6502 rtx first;
6503{
6504 int mincycles, cycles;
6505 enum attr_pent_pair tmp;
6506 enum attr_memory memory;
6507 rtx *insnp, *bestinsnp = NULL;
0a726ef1 6508
e075ae69
RH
6509 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
6510 return NULL;
0a726ef1 6511
e075ae69
RH
6512 memory = ix86_safe_memory (first);
6513 cycles = result_ready_cost (first);
6514 mincycles = INT_MAX;
6515
6516 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
6517 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
6518 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
6ec6d558 6519 {
e075ae69
RH
6520 enum attr_memory second_memory;
6521 int secondcycles, currentcycles;
6522
6523 second_memory = ix86_safe_memory (*insnp);
6524 secondcycles = result_ready_cost (*insnp);
6525 currentcycles = abs (cycles - secondcycles);
6526
6527 if (secondcycles >= 1 && cycles >= 1)
6ec6d558 6528 {
e075ae69
RH
6529 /* Two read/modify/write instructions together takes two
6530 cycles longer. */
6531 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
6532 currentcycles += 2;
6533
6534 /* Read modify/write instruction followed by read/modify
6535 takes one cycle longer. */
6536 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
6537 && tmp != PENT_PAIR_UV
6538 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
6539 currentcycles += 1;
6ec6d558 6540 }
e075ae69
RH
6541 if (currentcycles < mincycles)
6542 bestinsnp = insnp, mincycles = currentcycles;
6ec6d558 6543 }
0a726ef1 6544
e075ae69
RH
6545 return bestinsnp;
6546}
6547
78a0d70c 6548/* Subroutines of ix86_sched_reorder. */
e075ae69 6549
c6991660 6550static void
78a0d70c 6551ix86_sched_reorder_pentium (ready, e_ready)
e075ae69 6552 rtx *ready;
78a0d70c 6553 rtx *e_ready;
e075ae69 6554{
78a0d70c 6555 enum attr_pent_pair pair1, pair2;
e075ae69 6556 rtx *insnp;
e075ae69 6557
78a0d70c
ZW
6558 /* This wouldn't be necessary if Haifa knew that static insn ordering
6559 is important to which pipe an insn is issued to. So we have to make
6560 some minor rearrangements. */
e075ae69 6561
78a0d70c
ZW
6562 pair1 = ix86_safe_pent_pair (*e_ready);
6563
6564 /* If the first insn is non-pairable, let it be. */
6565 if (pair1 == PENT_PAIR_NP)
6566 return;
6567
6568 pair2 = PENT_PAIR_NP;
6569 insnp = 0;
6570
6571 /* If the first insn is UV or PV pairable, search for a PU
6572 insn to go with. */
6573 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
e075ae69 6574 {
78a0d70c
ZW
6575 insnp = ix86_pent_find_pair (e_ready-1, ready,
6576 PENT_PAIR_PU, *e_ready);
6577 if (insnp)
6578 pair2 = PENT_PAIR_PU;
6579 }
e075ae69 6580
78a0d70c
ZW
6581 /* If the first insn is PU or UV pairable, search for a PV
6582 insn to go with. */
6583 if (pair2 == PENT_PAIR_NP
6584 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
6585 {
6586 insnp = ix86_pent_find_pair (e_ready-1, ready,
6587 PENT_PAIR_PV, *e_ready);
6588 if (insnp)
6589 pair2 = PENT_PAIR_PV;
6590 }
e075ae69 6591
78a0d70c
ZW
6592 /* If the first insn is pairable, search for a UV
6593 insn to go with. */
6594 if (pair2 == PENT_PAIR_NP)
6595 {
6596 insnp = ix86_pent_find_pair (e_ready-1, ready,
6597 PENT_PAIR_UV, *e_ready);
6598 if (insnp)
6599 pair2 = PENT_PAIR_UV;
6600 }
e075ae69 6601
78a0d70c
ZW
6602 if (pair2 == PENT_PAIR_NP)
6603 return;
e075ae69 6604
78a0d70c
ZW
6605 /* Found something! Decide if we need to swap the order. */
6606 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
6607 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
6608 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
6609 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
6610 ix86_reorder_insn (insnp, e_ready);
6611 else
6612 ix86_reorder_insn (insnp, e_ready - 1);
6613}
e075ae69 6614
c6991660 6615static void
78a0d70c
ZW
6616ix86_sched_reorder_ppro (ready, e_ready)
6617 rtx *ready;
6618 rtx *e_ready;
6619{
6620 rtx decode[3];
6621 enum attr_ppro_uops cur_uops;
6622 int issued_this_cycle;
6623 rtx *insnp;
6624 int i;
e075ae69 6625
78a0d70c
ZW
6626 /* At this point .ppro.decode contains the state of the three
6627 decoders from last "cycle". That is, those insns that were
6628 actually independent. But here we're scheduling for the
6629 decoder, and we may find things that are decodable in the
6630 same cycle. */
e075ae69 6631
78a0d70c
ZW
6632 memcpy (decode, ix86_sched_data.ppro.decode, sizeof(decode));
6633 issued_this_cycle = 0;
e075ae69 6634
78a0d70c
ZW
6635 insnp = e_ready;
6636 cur_uops = ix86_safe_ppro_uops (*insnp);
0a726ef1 6637
78a0d70c
ZW
6638 /* If the decoders are empty, and we've a complex insn at the
6639 head of the priority queue, let it issue without complaint. */
6640 if (decode[0] == NULL)
6641 {
6642 if (cur_uops == PPRO_UOPS_MANY)
6643 {
6644 decode[0] = *insnp;
6645 goto ppro_done;
6646 }
6647
6648 /* Otherwise, search for a 2-4 uop unsn to issue. */
6649 while (cur_uops != PPRO_UOPS_FEW)
6650 {
6651 if (insnp == ready)
6652 break;
6653 cur_uops = ix86_safe_ppro_uops (*--insnp);
6654 }
6655
6656 /* If so, move it to the head of the line. */
6657 if (cur_uops == PPRO_UOPS_FEW)
6658 ix86_reorder_insn (insnp, e_ready);
0a726ef1 6659
78a0d70c
ZW
6660 /* Issue the head of the queue. */
6661 issued_this_cycle = 1;
6662 decode[0] = *e_ready--;
6663 }
fb693d44 6664
78a0d70c
ZW
6665 /* Look for simple insns to fill in the other two slots. */
6666 for (i = 1; i < 3; ++i)
6667 if (decode[i] == NULL)
6668 {
6669 if (ready >= e_ready)
6670 goto ppro_done;
fb693d44 6671
e075ae69
RH
6672 insnp = e_ready;
6673 cur_uops = ix86_safe_ppro_uops (*insnp);
78a0d70c
ZW
6674 while (cur_uops != PPRO_UOPS_ONE)
6675 {
6676 if (insnp == ready)
6677 break;
6678 cur_uops = ix86_safe_ppro_uops (*--insnp);
6679 }
fb693d44 6680
78a0d70c
ZW
6681 /* Found one. Move it to the head of the queue and issue it. */
6682 if (cur_uops == PPRO_UOPS_ONE)
e075ae69 6683 {
78a0d70c
ZW
6684 ix86_reorder_insn (insnp, e_ready);
6685 decode[i] = *e_ready--;
6686 issued_this_cycle++;
6687 continue;
6688 }
fb693d44 6689
78a0d70c
ZW
6690 /* ??? Didn't find one. Ideally, here we would do a lazy split
6691 of 2-uop insns, issue one and queue the other. */
6692 }
fb693d44 6693
78a0d70c
ZW
6694 ppro_done:
6695 if (issued_this_cycle == 0)
6696 issued_this_cycle = 1;
6697 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
6698}
fb693d44 6699
78a0d70c
ZW
6700
6701/* We are about to being issuing insns for this clock cycle.
6702 Override the default sort algorithm to better slot instructions. */
6703int
6704ix86_sched_reorder (dump, sched_verbose, ready, n_ready, clock_var)
6705 FILE *dump ATTRIBUTE_UNUSED;
6706 int sched_verbose ATTRIBUTE_UNUSED;
6707 rtx *ready;
6708 int n_ready;
6709 int clock_var ATTRIBUTE_UNUSED;
6710{
6711 rtx *e_ready = ready + n_ready - 1;
fb693d44 6712
78a0d70c
ZW
6713 if (n_ready < 2)
6714 goto out;
e075ae69 6715
78a0d70c
ZW
6716 switch (ix86_cpu)
6717 {
6718 default:
6719 break;
e075ae69 6720
78a0d70c
ZW
6721 case PROCESSOR_PENTIUM:
6722 ix86_sched_reorder_pentium (ready, e_ready);
6723 break;
e075ae69 6724
78a0d70c
ZW
6725 case PROCESSOR_PENTIUMPRO:
6726 ix86_sched_reorder_ppro (ready, e_ready);
e075ae69 6727 break;
fb693d44
RH
6728 }
6729
e075ae69
RH
6730out:
6731 return ix86_issue_rate ();
6732}
fb693d44 6733
e075ae69
RH
6734/* We are about to issue INSN. Return the number of insns left on the
6735 ready queue that can be issued this cycle. */
b222082e 6736
e075ae69
RH
6737int
6738ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
6739 FILE *dump;
6740 int sched_verbose;
6741 rtx insn;
6742 int can_issue_more;
6743{
6744 int i;
6745 switch (ix86_cpu)
fb693d44 6746 {
e075ae69
RH
6747 default:
6748 return can_issue_more - 1;
fb693d44 6749
e075ae69
RH
6750 case PROCESSOR_PENTIUMPRO:
6751 {
6752 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 6753
e075ae69
RH
6754 if (uops == PPRO_UOPS_MANY)
6755 {
6756 if (sched_verbose)
6757 ix86_dump_ppro_packet (dump);
6758 ix86_sched_data.ppro.decode[0] = insn;
6759 ix86_sched_data.ppro.decode[1] = NULL;
6760 ix86_sched_data.ppro.decode[2] = NULL;
6761 if (sched_verbose)
6762 ix86_dump_ppro_packet (dump);
6763 ix86_sched_data.ppro.decode[0] = NULL;
6764 }
6765 else if (uops == PPRO_UOPS_FEW)
6766 {
6767 if (sched_verbose)
6768 ix86_dump_ppro_packet (dump);
6769 ix86_sched_data.ppro.decode[0] = insn;
6770 ix86_sched_data.ppro.decode[1] = NULL;
6771 ix86_sched_data.ppro.decode[2] = NULL;
6772 }
6773 else
6774 {
6775 for (i = 0; i < 3; ++i)
6776 if (ix86_sched_data.ppro.decode[i] == NULL)
6777 {
6778 ix86_sched_data.ppro.decode[i] = insn;
6779 break;
6780 }
6781 if (i == 3)
6782 abort ();
6783 if (i == 2)
6784 {
6785 if (sched_verbose)
6786 ix86_dump_ppro_packet (dump);
6787 ix86_sched_data.ppro.decode[0] = NULL;
6788 ix86_sched_data.ppro.decode[1] = NULL;
6789 ix86_sched_data.ppro.decode[2] = NULL;
6790 }
6791 }
6792 }
6793 return --ix86_sched_data.ppro.issued_this_cycle;
6794 }
fb693d44 6795}
This page took 1.767226 seconds and 5 git commands to generate.