]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
stringstream.cc (test02): Fix.
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
4592bdcb
JL
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000
3 Free Software Foundation, Inc.
2a2ab3f9
JVA
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
32b5b1aa 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
0b6b2900 22#include <setjmp.h>
2a2ab3f9 23#include "config.h"
bb5177ac 24#include "system.h"
2a2ab3f9 25#include "rtl.h"
6baf1cc8
BS
26#include "tree.h"
27#include "tm_p.h"
2a2ab3f9
JVA
28#include "regs.h"
29#include "hard-reg-set.h"
30#include "real.h"
31#include "insn-config.h"
32#include "conditions.h"
33#include "insn-flags.h"
34#include "output.h"
35#include "insn-attr.h"
2a2ab3f9 36#include "flags.h"
a8ffcc81 37#include "except.h"
ecbc4695 38#include "function.h"
00c79232 39#include "recog.h"
ced8dd8c 40#include "expr.h"
f103890b 41#include "toplev.h"
e075ae69 42#include "basic-block.h"
1526a060 43#include "ggc.h"
2a2ab3f9 44
997de79c
JVA
45#ifdef EXTRA_CONSTRAINT
46/* If EXTRA_CONSTRAINT is defined, then the 'S'
47 constraint in REG_CLASS_FROM_LETTER will no longer work, and various
48 asm statements that need 'S' for class SIREG will break. */
ad5a6adc
RS
49 error EXTRA_CONSTRAINT conflicts with S constraint letter
50/* The previous line used to be #error, but some compilers barf
51 even if the conditional was untrue. */
997de79c
JVA
52#endif
53
8dfe5673
RK
54#ifndef CHECK_STACK_LIMIT
55#define CHECK_STACK_LIMIT -1
56#endif
57
32b5b1aa
SC
58/* Processor costs (relative to an add) */
59struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 60 1, /* cost of an add instruction */
32b5b1aa
SC
61 1, /* cost of a lea instruction */
62 3, /* variable shift costs */
63 2, /* constant shift costs */
64 6, /* cost of starting a multiply */
65 1, /* cost of multiply per each bit set */
e075ae69 66 23, /* cost of a divide/mod */
96e7ae40 67 15, /* "large" insn */
e2e52e1b 68 3, /* MOVE_RATIO */
7c6b971d 69 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
70 {2, 4, 2}, /* cost of loading integer registers
71 in QImode, HImode and SImode.
72 Relative to reg-reg move (2). */
73 {2, 4, 2}, /* cost of storing integer registers */
74 2, /* cost of reg,reg fld/fst */
75 {8, 8, 8}, /* cost of loading fp registers
76 in SFmode, DFmode and XFmode */
77 {8, 8, 8} /* cost of loading integer registers */
32b5b1aa
SC
78};
79
80struct processor_costs i486_cost = { /* 486 specific costs */
81 1, /* cost of an add instruction */
82 1, /* cost of a lea instruction */
83 3, /* variable shift costs */
84 2, /* constant shift costs */
85 12, /* cost of starting a multiply */
86 1, /* cost of multiply per each bit set */
e075ae69 87 40, /* cost of a divide/mod */
96e7ae40 88 15, /* "large" insn */
e2e52e1b 89 3, /* MOVE_RATIO */
7c6b971d 90 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
91 {2, 4, 2}, /* cost of loading integer registers
92 in QImode, HImode and SImode.
93 Relative to reg-reg move (2). */
94 {2, 4, 2}, /* cost of storing integer registers */
95 2, /* cost of reg,reg fld/fst */
96 {8, 8, 8}, /* cost of loading fp registers
97 in SFmode, DFmode and XFmode */
98 {8, 8, 8} /* cost of loading integer registers */
32b5b1aa
SC
99};
100
e5cb57e8 101struct processor_costs pentium_cost = {
32b5b1aa
SC
102 1, /* cost of an add instruction */
103 1, /* cost of a lea instruction */
856b07a1 104 4, /* variable shift costs */
e5cb57e8 105 1, /* constant shift costs */
856b07a1
SC
106 11, /* cost of starting a multiply */
107 0, /* cost of multiply per each bit set */
e075ae69 108 25, /* cost of a divide/mod */
96e7ae40 109 8, /* "large" insn */
e2e52e1b 110 6, /* MOVE_RATIO */
7c6b971d 111 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
112 {2, 4, 2}, /* cost of loading integer registers
113 in QImode, HImode and SImode.
114 Relative to reg-reg move (2). */
115 {2, 4, 2}, /* cost of storing integer registers */
116 2, /* cost of reg,reg fld/fst */
117 {2, 2, 6}, /* cost of loading fp registers
118 in SFmode, DFmode and XFmode */
119 {4, 4, 6} /* cost of loading integer registers */
32b5b1aa
SC
120};
121
856b07a1
SC
122struct processor_costs pentiumpro_cost = {
123 1, /* cost of an add instruction */
124 1, /* cost of a lea instruction */
e075ae69 125 1, /* variable shift costs */
856b07a1 126 1, /* constant shift costs */
369e59b1 127 4, /* cost of starting a multiply */
856b07a1 128 0, /* cost of multiply per each bit set */
e075ae69 129 17, /* cost of a divide/mod */
96e7ae40 130 8, /* "large" insn */
e2e52e1b 131 6, /* MOVE_RATIO */
7c6b971d 132 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
133 {4, 4, 4}, /* cost of loading integer registers
134 in QImode, HImode and SImode.
135 Relative to reg-reg move (2). */
136 {2, 2, 2}, /* cost of storing integer registers */
137 2, /* cost of reg,reg fld/fst */
138 {2, 2, 6}, /* cost of loading fp registers
139 in SFmode, DFmode and XFmode */
140 {4, 4, 6} /* cost of loading integer registers */
856b07a1
SC
141};
142
a269a03c
JC
143struct processor_costs k6_cost = {
144 1, /* cost of an add instruction */
e075ae69 145 2, /* cost of a lea instruction */
a269a03c
JC
146 1, /* variable shift costs */
147 1, /* constant shift costs */
73fe76e4 148 3, /* cost of starting a multiply */
a269a03c 149 0, /* cost of multiply per each bit set */
e075ae69 150 18, /* cost of a divide/mod */
96e7ae40 151 8, /* "large" insn */
e2e52e1b 152 4, /* MOVE_RATIO */
7c6b971d 153 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
154 {4, 5, 4}, /* cost of loading integer registers
155 in QImode, HImode and SImode.
156 Relative to reg-reg move (2). */
157 {2, 3, 2}, /* cost of storing integer registers */
158 4, /* cost of reg,reg fld/fst */
159 {6, 6, 6}, /* cost of loading fp registers
160 in SFmode, DFmode and XFmode */
161 {4, 4, 4} /* cost of loading integer registers */
a269a03c
JC
162};
163
309ada50
JH
164struct processor_costs athlon_cost = {
165 1, /* cost of an add instruction */
0b5107cf 166 2, /* cost of a lea instruction */
309ada50
JH
167 1, /* variable shift costs */
168 1, /* constant shift costs */
169 5, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
0b5107cf 171 42, /* cost of a divide/mod */
309ada50 172 8, /* "large" insn */
e2e52e1b 173 9, /* MOVE_RATIO */
309ada50
JH
174 4, /* cost for loading QImode using movzbl */
175 {4, 5, 4}, /* cost of loading integer registers
176 in QImode, HImode and SImode.
177 Relative to reg-reg move (2). */
178 {2, 3, 2}, /* cost of storing integer registers */
179 4, /* cost of reg,reg fld/fst */
0b5107cf 180 {6, 6, 20}, /* cost of loading fp registers
309ada50 181 in SFmode, DFmode and XFmode */
0b5107cf 182 {4, 4, 16} /* cost of loading integer registers */
309ada50
JH
183};
184
32b5b1aa
SC
185struct processor_costs *ix86_cost = &pentium_cost;
186
a269a03c
JC
187/* Processor feature/optimization bitmasks. */
188#define m_386 (1<<PROCESSOR_I386)
189#define m_486 (1<<PROCESSOR_I486)
190#define m_PENT (1<<PROCESSOR_PENTIUM)
191#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
192#define m_K6 (1<<PROCESSOR_K6)
309ada50 193#define m_ATHLON (1<<PROCESSOR_ATHLON)
a269a03c 194
309ada50
JH
195const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
196const int x86_push_memory = m_386 | m_K6 | m_ATHLON;
a269a03c 197const int x86_zero_extend_with_and = m_486 | m_PENT;
369e59b1 198const int x86_movx = m_ATHLON | m_PPRO /* m_386 | m_K6 */;
e075ae69 199const int x86_double_with_add = ~m_386;
a269a03c 200const int x86_use_bit_test = m_386;
e2e52e1b 201const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
a269a03c
JC
202const int x86_use_q_reg = m_PENT | m_PPRO | m_K6;
203const int x86_use_any_reg = m_486;
309ada50
JH
204const int x86_cmove = m_PPRO | m_ATHLON;
205const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON;
206const int x86_use_sahf = m_PPRO | m_K6 | m_ATHLON;
e075ae69
RH
207const int x86_partial_reg_stall = m_PPRO;
208const int x86_use_loop = m_K6;
309ada50 209const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
e075ae69
RH
210const int x86_use_mov0 = m_K6;
211const int x86_use_cltd = ~(m_PENT | m_K6);
212const int x86_read_modify_write = ~m_PENT;
213const int x86_read_modify = ~(m_PENT | m_PPRO);
214const int x86_split_long_moves = m_PPRO;
e9e80858 215const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
f90800f8 216const int x86_single_stringop = m_386;
d9f32422
JH
217const int x86_qimode_math = ~(0);
218const int x86_promote_qi_regs = 0;
219const int x86_himode_math = ~(m_PPRO);
220const int x86_promote_hi_regs = m_PPRO;
bdeb029c
JH
221const int x86_sub_esp_4 = m_ATHLON | m_PPRO;
222const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486;
223const int x86_add_esp_4 = m_ATHLON | m_K6;
224const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486;
0b5107cf
JH
225const int x86_integer_DFmode_moves = ~m_ATHLON;
226const int x86_partial_reg_dependency = m_ATHLON;
227const int x86_memory_mismatch_stall = m_ATHLON;
a269a03c 228
564d80f4 229#define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
2a2ab3f9 230
e075ae69
RH
231const char * const hi_reg_name[] = HI_REGISTER_NAMES;
232const char * const qi_reg_name[] = QI_REGISTER_NAMES;
233const char * const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
4c0d89b5
RS
234
235/* Array of the smallest class containing reg number REGNO, indexed by
236 REGNO. Used by REGNO_REG_CLASS in i386.h. */
237
e075ae69 238enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
239{
240 /* ax, dx, cx, bx */
ab408a86 241 AREG, DREG, CREG, BREG,
4c0d89b5 242 /* si, di, bp, sp */
e075ae69 243 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
244 /* FP registers */
245 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 246 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 247 /* arg pointer */
83774849 248 NON_Q_REGS,
564d80f4 249 /* flags, fpsr, dirflag, frame */
a7180f70
BS
250 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
251 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
252 SSE_REGS, SSE_REGS,
253 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
254 MMX_REGS, MMX_REGS
4c0d89b5 255};
c572e5ba 256
83774849
RH
257/* The "default" register map. */
258
259int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
260{
261 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
262 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
263 -1, -1, -1, -1, /* arg, flags, fpsr, dir */
a7180f70
BS
264 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
265 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
83774849
RH
266};
267
268/* Define the register numbers to be used in Dwarf debugging information.
269 The SVR4 reference port C compiler uses the following register numbers
270 in its Dwarf output code:
271 0 for %eax (gcc regno = 0)
272 1 for %ecx (gcc regno = 2)
273 2 for %edx (gcc regno = 1)
274 3 for %ebx (gcc regno = 3)
275 4 for %esp (gcc regno = 7)
276 5 for %ebp (gcc regno = 6)
277 6 for %esi (gcc regno = 4)
278 7 for %edi (gcc regno = 5)
279 The following three DWARF register numbers are never generated by
280 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
281 believes these numbers have these meanings.
282 8 for %eip (no gcc equivalent)
283 9 for %eflags (gcc regno = 17)
284 10 for %trapno (no gcc equivalent)
285 It is not at all clear how we should number the FP stack registers
286 for the x86 architecture. If the version of SDB on x86/svr4 were
287 a bit less brain dead with respect to floating-point then we would
288 have a precedent to follow with respect to DWARF register numbers
289 for x86 FP registers, but the SDB on x86/svr4 is so completely
290 broken with respect to FP registers that it is hardly worth thinking
291 of it as something to strive for compatibility with.
292 The version of x86/svr4 SDB I have at the moment does (partially)
293 seem to believe that DWARF register number 11 is associated with
294 the x86 register %st(0), but that's about all. Higher DWARF
295 register numbers don't seem to be associated with anything in
296 particular, and even for DWARF regno 11, SDB only seems to under-
297 stand that it should say that a variable lives in %st(0) (when
298 asked via an `=' command) if we said it was in DWARF regno 11,
299 but SDB still prints garbage when asked for the value of the
300 variable in question (via a `/' command).
301 (Also note that the labels SDB prints for various FP stack regs
302 when doing an `x' command are all wrong.)
303 Note that these problems generally don't affect the native SVR4
304 C compiler because it doesn't allow the use of -O with -g and
305 because when it is *not* optimizing, it allocates a memory
306 location for each floating-point variable, and the memory
307 location is what gets described in the DWARF AT_location
308 attribute for the variable in question.
309 Regardless of the severe mental illness of the x86/svr4 SDB, we
310 do something sensible here and we use the following DWARF
311 register numbers. Note that these are all stack-top-relative
312 numbers.
313 11 for %st(0) (gcc regno = 8)
314 12 for %st(1) (gcc regno = 9)
315 13 for %st(2) (gcc regno = 10)
316 14 for %st(3) (gcc regno = 11)
317 15 for %st(4) (gcc regno = 12)
318 16 for %st(5) (gcc regno = 13)
319 17 for %st(6) (gcc regno = 14)
320 18 for %st(7) (gcc regno = 15)
321*/
322int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
323{
324 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
325 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
326 -1, 9, -1, -1, /* arg, flags, fpsr, dir */
a7180f70
BS
327 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
328 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
83774849
RH
329};
330
331
332
c572e5ba
JVA
333/* Test and compare insns in i386.md store the information needed to
334 generate branch and scc insns here. */
335
e075ae69
RH
336struct rtx_def *ix86_compare_op0 = NULL_RTX;
337struct rtx_def *ix86_compare_op1 = NULL_RTX;
f5316dfe 338
36edd3cc
BS
339#define MAX_386_STACK_LOCALS 2
340
341/* Define the structure for the machine field in struct function. */
342struct machine_function
343{
344 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
345};
346
01d939e8 347#define ix86_stack_locals (cfun->machine->stack_locals)
36edd3cc 348
c8c5cb99 349/* which cpu are we scheduling for */
e42ea7f9 350enum processor_type ix86_cpu;
c8c5cb99
SC
351
352/* which instruction set architecture to use. */
c942177e 353int ix86_arch;
c8c5cb99
SC
354
355/* Strings to hold which cpu and instruction set architecture to use. */
9c23aa47
ZW
356const char *ix86_cpu_string; /* for -mcpu=<xxx> */
357const char *ix86_arch_string; /* for -march=<xxx> */
c8c5cb99 358
f5316dfe 359/* Register allocation order */
e075ae69 360const char *ix86_reg_alloc_order;
f5316dfe
MM
361static char regs_allocated[FIRST_PSEUDO_REGISTER];
362
b08de47e 363/* # of registers to use to pass arguments. */
e075ae69 364const char *ix86_regparm_string;
e9a25f70 365
e075ae69
RH
366/* ix86_regparm_string as a number */
367int ix86_regparm;
e9a25f70
JL
368
369/* Alignment to use for loops and jumps: */
370
371/* Power of two alignment for loops. */
e075ae69 372const char *ix86_align_loops_string;
e9a25f70
JL
373
374/* Power of two alignment for non-loop jumps. */
e075ae69 375const char *ix86_align_jumps_string;
e9a25f70 376
3af4bd89 377/* Power of two alignment for stack boundary in bytes. */
e075ae69 378const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
379
380/* Preferred alignment for stack boundary in bits. */
e075ae69 381int ix86_preferred_stack_boundary;
3af4bd89 382
e9a25f70 383/* Values 1-5: see jump.c */
e075ae69
RH
384int ix86_branch_cost;
385const char *ix86_branch_cost_string;
e9a25f70
JL
386
387/* Power of two alignment for functions. */
e075ae69
RH
388int ix86_align_funcs;
389const char *ix86_align_funcs_string;
b08de47e 390
e9a25f70 391/* Power of two alignment for loops. */
e075ae69 392int ix86_align_loops;
b08de47e 393
e9a25f70 394/* Power of two alignment for non-loop jumps. */
e075ae69
RH
395int ix86_align_jumps;
396\f
f6da8bc3
KG
397static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
398static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
e075ae69 399 int, int, FILE *));
f6da8bc3
KG
400static enum rtx_code unsigned_comparison PARAMS ((enum rtx_code code));
401static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
3a3677ff 402static enum machine_mode ix86_fp_compare_mode PARAMS ((enum rtx_code));
3a3677ff
RH
403static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
404 rtx *, rtx *));
405static rtx ix86_expand_compare PARAMS ((enum rtx_code));
f6da8bc3
KG
406static rtx gen_push PARAMS ((rtx));
407static int memory_address_length PARAMS ((rtx addr));
408static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
409static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
410static int ix86_safe_length PARAMS ((rtx));
411static enum attr_memory ix86_safe_memory PARAMS ((rtx));
412static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
413static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
414static void ix86_dump_ppro_packet PARAMS ((FILE *));
415static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
416static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
e075ae69 417 rtx));
f6da8bc3
KG
418static void ix86_init_machine_status PARAMS ((struct function *));
419static void ix86_mark_machine_status PARAMS ((struct function *));
420static void ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
421static int ix86_safe_length_prefix PARAMS ((rtx));
564d80f4
JH
422static HOST_WIDE_INT ix86_compute_frame_size PARAMS((HOST_WIDE_INT,
423 int *, int *, int *));
0903fcab
JH
424static int ix86_nsaved_regs PARAMS((void));
425static void ix86_emit_save_regs PARAMS((void));
da2d1d3a 426static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int));
0903fcab 427static void ix86_emit_epilogue_esp_adjustment PARAMS((int));
c6991660
KG
428static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
429static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
e075ae69
RH
430
431struct ix86_address
432{
433 rtx base, index, disp;
434 HOST_WIDE_INT scale;
435};
b08de47e 436
e075ae69
RH
437static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
438\f
f5316dfe
MM
439/* Sometimes certain combinations of command options do not make
440 sense on a particular target machine. You can define a macro
441 `OVERRIDE_OPTIONS' to take account of this. This macro, if
442 defined, is executed once just after all the command options have
443 been parsed.
444
445 Don't use this macro to turn on various extra optimizations for
446 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
447
448void
449override_options ()
450{
e075ae69
RH
451 /* Comes from final.c -- no real reason to change it. */
452#define MAX_CODE_ALIGN 16
f5316dfe 453
c8c5cb99
SC
454 static struct ptt
455 {
e075ae69
RH
456 struct processor_costs *cost; /* Processor costs */
457 int target_enable; /* Target flags to enable. */
458 int target_disable; /* Target flags to disable. */
459 int align_loop; /* Default alignments. */
460 int align_jump;
461 int align_func;
462 int branch_cost;
463 }
464 const processor_target_table[PROCESSOR_max] =
465 {
466 {&i386_cost, 0, 0, 2, 2, 2, 1},
467 {&i486_cost, 0, 0, 4, 4, 4, 1},
468 {&pentium_cost, 0, 0, -4, -4, -4, 1},
469 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
309ada50
JH
470 {&k6_cost, 0, 0, -5, -5, 4, 1},
471 {&athlon_cost, 0, 0, 4, -4, 4, 1}
e075ae69
RH
472 };
473
474 static struct pta
475 {
69ddee61 476 const char *name; /* processor name or nickname. */
e075ae69
RH
477 enum processor_type processor;
478 }
479 const processor_alias_table[] =
480 {
481 {"i386", PROCESSOR_I386},
482 {"i486", PROCESSOR_I486},
483 {"i586", PROCESSOR_PENTIUM},
484 {"pentium", PROCESSOR_PENTIUM},
485 {"i686", PROCESSOR_PENTIUMPRO},
486 {"pentiumpro", PROCESSOR_PENTIUMPRO},
e075ae69 487 {"k6", PROCESSOR_K6},
309ada50 488 {"athlon", PROCESSOR_ATHLON},
3af4bd89 489 };
c8c5cb99 490
e075ae69 491 int const pta_size = sizeof(processor_alias_table)/sizeof(struct pta);
c8c5cb99 492
f5316dfe
MM
493#ifdef SUBTARGET_OVERRIDE_OPTIONS
494 SUBTARGET_OVERRIDE_OPTIONS;
495#endif
496
5a6ee819 497 ix86_arch = PROCESSOR_I386;
e075ae69
RH
498 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
499
500 if (ix86_arch_string != 0)
501 {
502 int i;
503 for (i = 0; i < pta_size; i++)
504 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
505 {
506 ix86_arch = processor_alias_table[i].processor;
507 /* Default cpu tuning to the architecture. */
508 ix86_cpu = ix86_arch;
509 break;
510 }
511 if (i == pta_size)
512 error ("bad value (%s) for -march= switch", ix86_arch_string);
513 }
514
515 if (ix86_cpu_string != 0)
516 {
517 int i;
518 for (i = 0; i < pta_size; i++)
519 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
520 {
521 ix86_cpu = processor_alias_table[i].processor;
522 break;
523 }
524 if (i == pta_size)
525 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
526 }
527
528 ix86_cost = processor_target_table[ix86_cpu].cost;
529 target_flags |= processor_target_table[ix86_cpu].target_enable;
530 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
531
36edd3cc
BS
532 /* Arrange to set up i386_stack_locals for all functions. */
533 init_machine_status = ix86_init_machine_status;
1526a060 534 mark_machine_status = ix86_mark_machine_status;
36edd3cc 535
e9a25f70 536 /* Validate registers in register allocation order. */
e075ae69 537 if (ix86_reg_alloc_order)
f5316dfe 538 {
e075ae69
RH
539 int i, ch;
540 for (i = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
f5316dfe 541 {
00c79232 542 int regno = 0;
79325812 543
f5316dfe
MM
544 switch (ch)
545 {
546 case 'a': regno = 0; break;
547 case 'd': regno = 1; break;
548 case 'c': regno = 2; break;
549 case 'b': regno = 3; break;
550 case 'S': regno = 4; break;
551 case 'D': regno = 5; break;
552 case 'B': regno = 6; break;
553
554 default: fatal ("Register '%c' is unknown", ch);
555 }
556
557 if (regs_allocated[regno])
e9a25f70 558 fatal ("Register '%c' already specified in allocation order", ch);
f5316dfe
MM
559
560 regs_allocated[regno] = 1;
561 }
562 }
b08de47e 563
e9a25f70 564 /* Validate -mregparm= value. */
e075ae69 565 if (ix86_regparm_string)
b08de47e 566 {
e075ae69
RH
567 ix86_regparm = atoi (ix86_regparm_string);
568 if (ix86_regparm < 0 || ix86_regparm > REGPARM_MAX)
e9a25f70 569 fatal ("-mregparm=%d is not between 0 and %d",
e075ae69 570 ix86_regparm, REGPARM_MAX);
b08de47e
MM
571 }
572
e9a25f70 573 /* Validate -malign-loops= value, or provide default. */
e075ae69
RH
574 ix86_align_loops = processor_target_table[ix86_cpu].align_loop;
575 if (ix86_align_loops_string)
b08de47e 576 {
e075ae69
RH
577 ix86_align_loops = atoi (ix86_align_loops_string);
578 if (ix86_align_loops < 0 || ix86_align_loops > MAX_CODE_ALIGN)
b08de47e 579 fatal ("-malign-loops=%d is not between 0 and %d",
e075ae69 580 ix86_align_loops, MAX_CODE_ALIGN);
b08de47e 581 }
3af4bd89
JH
582
583 /* Validate -malign-jumps= value, or provide default. */
e075ae69
RH
584 ix86_align_jumps = processor_target_table[ix86_cpu].align_jump;
585 if (ix86_align_jumps_string)
b08de47e 586 {
e075ae69
RH
587 ix86_align_jumps = atoi (ix86_align_jumps_string);
588 if (ix86_align_jumps < 0 || ix86_align_jumps > MAX_CODE_ALIGN)
b08de47e 589 fatal ("-malign-jumps=%d is not between 0 and %d",
e075ae69 590 ix86_align_jumps, MAX_CODE_ALIGN);
b08de47e 591 }
b08de47e 592
e9a25f70 593 /* Validate -malign-functions= value, or provide default. */
e075ae69
RH
594 ix86_align_funcs = processor_target_table[ix86_cpu].align_func;
595 if (ix86_align_funcs_string)
b08de47e 596 {
e075ae69
RH
597 ix86_align_funcs = atoi (ix86_align_funcs_string);
598 if (ix86_align_funcs < 0 || ix86_align_funcs > MAX_CODE_ALIGN)
b08de47e 599 fatal ("-malign-functions=%d is not between 0 and %d",
e075ae69 600 ix86_align_funcs, MAX_CODE_ALIGN);
b08de47e 601 }
3af4bd89 602
e4c0478d 603 /* Validate -mpreferred-stack-boundary= value, or provide default.
3af4bd89 604 The default of 128 bits is for Pentium III's SSE __m128. */
e075ae69
RH
605 ix86_preferred_stack_boundary = 128;
606 if (ix86_preferred_stack_boundary_string)
3af4bd89 607 {
e075ae69 608 int i = atoi (ix86_preferred_stack_boundary_string);
3af4bd89 609 if (i < 2 || i > 31)
e4c0478d 610 fatal ("-mpreferred-stack-boundary=%d is not between 2 and 31", i);
e075ae69 611 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 612 }
77a989d1 613
e9a25f70 614 /* Validate -mbranch-cost= value, or provide default. */
e075ae69
RH
615 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
616 if (ix86_branch_cost_string)
804a8ee0 617 {
e075ae69
RH
618 ix86_branch_cost = atoi (ix86_branch_cost_string);
619 if (ix86_branch_cost < 0 || ix86_branch_cost > 5)
620 fatal ("-mbranch-cost=%d is not between 0 and 5",
621 ix86_branch_cost);
804a8ee0 622 }
804a8ee0 623
e9a25f70
JL
624 /* Keep nonleaf frame pointers. */
625 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 626 flag_omit_frame_pointer = 1;
e075ae69
RH
627
628 /* If we're doing fast math, we don't care about comparison order
629 wrt NaNs. This lets us use a shorter comparison sequence. */
630 if (flag_fast_math)
631 target_flags &= ~MASK_IEEE_FP;
632
a7180f70
BS
633 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
634 on by -msse. */
635 if (TARGET_SSE)
636 target_flags |= MASK_MMX;
f5316dfe
MM
637}
638\f
639/* A C statement (sans semicolon) to choose the order in which to
640 allocate hard registers for pseudo-registers local to a basic
641 block.
642
643 Store the desired register order in the array `reg_alloc_order'.
644 Element 0 should be the register to allocate first; element 1, the
645 next register; and so on.
646
647 The macro body should not assume anything about the contents of
648 `reg_alloc_order' before execution of the macro.
649
650 On most machines, it is not necessary to define this macro. */
651
652void
653order_regs_for_local_alloc ()
654{
00c79232 655 int i, ch, order;
f5316dfe 656
e9a25f70
JL
657 /* User specified the register allocation order. */
658
e075ae69 659 if (ix86_reg_alloc_order)
f5316dfe 660 {
e075ae69 661 for (i = order = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
f5316dfe 662 {
00c79232 663 int regno = 0;
79325812 664
f5316dfe
MM
665 switch (ch)
666 {
667 case 'a': regno = 0; break;
668 case 'd': regno = 1; break;
669 case 'c': regno = 2; break;
670 case 'b': regno = 3; break;
671 case 'S': regno = 4; break;
672 case 'D': regno = 5; break;
673 case 'B': regno = 6; break;
674 }
675
676 reg_alloc_order[order++] = regno;
677 }
678
679 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
680 {
e9a25f70 681 if (! regs_allocated[i])
f5316dfe
MM
682 reg_alloc_order[order++] = i;
683 }
684 }
685
e9a25f70 686 /* If user did not specify a register allocation order, use natural order. */
f5316dfe
MM
687 else
688 {
689 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
690 reg_alloc_order[i] = i;
f5316dfe
MM
691 }
692}
32b5b1aa
SC
693\f
694void
c6aded7c 695optimization_options (level, size)
32b5b1aa 696 int level;
bb5177ac 697 int size ATTRIBUTE_UNUSED;
32b5b1aa 698{
e9a25f70
JL
699 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
700 make the problem with not enough registers even worse. */
32b5b1aa
SC
701#ifdef INSN_SCHEDULING
702 if (level > 1)
703 flag_schedule_insns = 0;
704#endif
705}
b08de47e
MM
706\f
707/* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
708 attribute for DECL. The attributes in ATTRIBUTES have previously been
709 assigned to DECL. */
710
711int
e075ae69 712ix86_valid_decl_attribute_p (decl, attributes, identifier, args)
bb5177ac
RL
713 tree decl ATTRIBUTE_UNUSED;
714 tree attributes ATTRIBUTE_UNUSED;
715 tree identifier ATTRIBUTE_UNUSED;
716 tree args ATTRIBUTE_UNUSED;
b08de47e
MM
717{
718 return 0;
719}
720
721/* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
722 attribute for TYPE. The attributes in ATTRIBUTES have previously been
723 assigned to TYPE. */
724
725int
e075ae69 726ix86_valid_type_attribute_p (type, attributes, identifier, args)
b08de47e 727 tree type;
bb5177ac 728 tree attributes ATTRIBUTE_UNUSED;
b08de47e
MM
729 tree identifier;
730 tree args;
731{
732 if (TREE_CODE (type) != FUNCTION_TYPE
ac478ac0 733 && TREE_CODE (type) != METHOD_TYPE
b08de47e
MM
734 && TREE_CODE (type) != FIELD_DECL
735 && TREE_CODE (type) != TYPE_DECL)
736 return 0;
737
738 /* Stdcall attribute says callee is responsible for popping arguments
739 if they are not variable. */
740 if (is_attribute_p ("stdcall", identifier))
741 return (args == NULL_TREE);
742
e9a25f70 743 /* Cdecl attribute says the callee is a normal C declaration. */
b08de47e
MM
744 if (is_attribute_p ("cdecl", identifier))
745 return (args == NULL_TREE);
746
747 /* Regparm attribute specifies how many integer arguments are to be
e9a25f70 748 passed in registers. */
b08de47e
MM
749 if (is_attribute_p ("regparm", identifier))
750 {
751 tree cst;
752
e9a25f70 753 if (! args || TREE_CODE (args) != TREE_LIST
b08de47e
MM
754 || TREE_CHAIN (args) != NULL_TREE
755 || TREE_VALUE (args) == NULL_TREE)
756 return 0;
757
758 cst = TREE_VALUE (args);
759 if (TREE_CODE (cst) != INTEGER_CST)
760 return 0;
761
cce097f1 762 if (compare_tree_int (cst, REGPARM_MAX) > 0)
b08de47e
MM
763 return 0;
764
765 return 1;
766 }
767
768 return 0;
769}
770
771/* Return 0 if the attributes for two types are incompatible, 1 if they
772 are compatible, and 2 if they are nearly compatible (which causes a
773 warning to be generated). */
774
775int
e075ae69 776ix86_comp_type_attributes (type1, type2)
afcfe58c
MM
777 tree type1;
778 tree type2;
b08de47e 779{
afcfe58c 780 /* Check for mismatch of non-default calling convention. */
69ddee61 781 const char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
782
783 if (TREE_CODE (type1) != FUNCTION_TYPE)
784 return 1;
785
786 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
787 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
788 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 789 return 0;
b08de47e
MM
790 return 1;
791}
b08de47e
MM
792\f
793/* Value is the number of bytes of arguments automatically
794 popped when returning from a subroutine call.
795 FUNDECL is the declaration node of the function (as a tree),
796 FUNTYPE is the data type of the function (as a tree),
797 or for a library call it is an identifier node for the subroutine name.
798 SIZE is the number of bytes of arguments passed on the stack.
799
800 On the 80386, the RTD insn may be used to pop them if the number
801 of args is fixed, but if the number is variable then the caller
802 must pop them all. RTD can't be used for library calls now
803 because the library is compiled with the Unix compiler.
804 Use of RTD is a selectable option, since it is incompatible with
805 standard Unix calling sequences. If the option is not selected,
806 the caller must always pop the args.
807
808 The attribute stdcall is equivalent to RTD on a per module basis. */
809
810int
e075ae69 811ix86_return_pops_args (fundecl, funtype, size)
b08de47e
MM
812 tree fundecl;
813 tree funtype;
814 int size;
79325812 815{
3345ee7d 816 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 817
e9a25f70
JL
818 /* Cdecl functions override -mrtd, and never pop the stack. */
819 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 820
e9a25f70 821 /* Stdcall functions will pop the stack if not variable args. */
698cdd84
SC
822 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
823 rtd = 1;
79325812 824
698cdd84
SC
825 if (rtd
826 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
827 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
828 == void_type_node)))
698cdd84
SC
829 return size;
830 }
79325812 831
e9a25f70 832 /* Lose any fake structure return argument. */
698cdd84
SC
833 if (aggregate_value_p (TREE_TYPE (funtype)))
834 return GET_MODE_SIZE (Pmode);
79325812 835
2614aac6 836 return 0;
b08de47e 837}
b08de47e
MM
838\f
839/* Argument support functions. */
840
841/* Initialize a variable CUM of type CUMULATIVE_ARGS
842 for a call to a function whose data type is FNTYPE.
843 For a library call, FNTYPE is 0. */
844
845void
846init_cumulative_args (cum, fntype, libname)
e9a25f70 847 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
b08de47e
MM
848 tree fntype; /* tree ptr for function decl */
849 rtx libname; /* SYMBOL_REF of library name or 0 */
850{
851 static CUMULATIVE_ARGS zero_cum;
852 tree param, next_param;
853
854 if (TARGET_DEBUG_ARG)
855 {
856 fprintf (stderr, "\ninit_cumulative_args (");
857 if (fntype)
e9a25f70
JL
858 fprintf (stderr, "fntype code = %s, ret code = %s",
859 tree_code_name[(int) TREE_CODE (fntype)],
860 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
861 else
862 fprintf (stderr, "no fntype");
863
864 if (libname)
865 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
866 }
867
868 *cum = zero_cum;
869
870 /* Set up the number of registers to use for passing arguments. */
e075ae69 871 cum->nregs = ix86_regparm;
b08de47e
MM
872 if (fntype)
873 {
874 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
e9a25f70 875
b08de47e
MM
876 if (attr)
877 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
878 }
879
880 /* Determine if this function has variable arguments. This is
881 indicated by the last argument being 'void_type_mode' if there
882 are no variable arguments. If there are variable arguments, then
883 we won't pass anything in registers */
884
885 if (cum->nregs)
886 {
887 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 888 param != 0; param = next_param)
b08de47e
MM
889 {
890 next_param = TREE_CHAIN (param);
e9a25f70 891 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
b08de47e
MM
892 cum->nregs = 0;
893 }
894 }
895
896 if (TARGET_DEBUG_ARG)
897 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
898
899 return;
900}
901
902/* Update the data in CUM to advance over an argument
903 of mode MODE and data type TYPE.
904 (TYPE is null for libcalls where that information may not be available.) */
905
906void
907function_arg_advance (cum, mode, type, named)
908 CUMULATIVE_ARGS *cum; /* current arg information */
909 enum machine_mode mode; /* current arg mode */
910 tree type; /* type of the argument or 0 if lib support */
911 int named; /* whether or not the argument was named */
912{
5ac9118e
KG
913 int bytes =
914 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
915 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
916
917 if (TARGET_DEBUG_ARG)
918 fprintf (stderr,
e9a25f70 919 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
b08de47e
MM
920 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
921
922 cum->words += words;
923 cum->nregs -= words;
924 cum->regno += words;
925
926 if (cum->nregs <= 0)
927 {
928 cum->nregs = 0;
929 cum->regno = 0;
930 }
931
932 return;
933}
934
935/* Define where to put the arguments to a function.
936 Value is zero to push the argument on the stack,
937 or a hard register in which to store the argument.
938
939 MODE is the argument's machine mode.
940 TYPE is the data type of the argument (as a tree).
941 This is null for libcalls where that information may
942 not be available.
943 CUM is a variable of type CUMULATIVE_ARGS which gives info about
944 the preceding args and about the function being called.
945 NAMED is nonzero if this argument is a named parameter
946 (otherwise it is an extra parameter matching an ellipsis). */
947
948struct rtx_def *
949function_arg (cum, mode, type, named)
950 CUMULATIVE_ARGS *cum; /* current arg information */
951 enum machine_mode mode; /* current arg mode */
952 tree type; /* type of the argument or 0 if lib support */
953 int named; /* != 0 for normal args, == 0 for ... args */
954{
955 rtx ret = NULL_RTX;
5ac9118e
KG
956 int bytes =
957 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
958 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
959
960 switch (mode)
961 {
e9a25f70
JL
962 /* For now, pass fp/complex values on the stack. */
963 default:
b08de47e
MM
964 break;
965
966 case BLKmode:
967 case DImode:
968 case SImode:
969 case HImode:
970 case QImode:
971 if (words <= cum->nregs)
f64cecad 972 ret = gen_rtx_REG (mode, cum->regno);
b08de47e
MM
973 break;
974 }
975
976 if (TARGET_DEBUG_ARG)
977 {
978 fprintf (stderr,
e9a25f70 979 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
b08de47e
MM
980 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
981
982 if (ret)
983 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
984 else
985 fprintf (stderr, ", stack");
986
987 fprintf (stderr, " )\n");
988 }
989
990 return ret;
991}
e075ae69 992\f
8bad7136
JL
993
994/* Return nonzero if OP is (const_int 1), else return zero. */
995
996int
997const_int_1_operand (op, mode)
998 rtx op;
999 enum machine_mode mode ATTRIBUTE_UNUSED;
1000{
1001 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
1002}
1003
e075ae69
RH
1004/* Returns 1 if OP is either a symbol reference or a sum of a symbol
1005 reference and a constant. */
b08de47e
MM
1006
1007int
e075ae69
RH
1008symbolic_operand (op, mode)
1009 register rtx op;
1010 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 1011{
e075ae69 1012 switch (GET_CODE (op))
2a2ab3f9 1013 {
e075ae69
RH
1014 case SYMBOL_REF:
1015 case LABEL_REF:
1016 return 1;
1017
1018 case CONST:
1019 op = XEXP (op, 0);
1020 if (GET_CODE (op) == SYMBOL_REF
1021 || GET_CODE (op) == LABEL_REF
1022 || (GET_CODE (op) == UNSPEC
1023 && XINT (op, 1) >= 6
1024 && XINT (op, 1) <= 7))
1025 return 1;
1026 if (GET_CODE (op) != PLUS
1027 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1028 return 0;
1029
1030 op = XEXP (op, 0);
1031 if (GET_CODE (op) == SYMBOL_REF
1032 || GET_CODE (op) == LABEL_REF)
1033 return 1;
1034 /* Only @GOTOFF gets offsets. */
1035 if (GET_CODE (op) != UNSPEC
1036 || XINT (op, 1) != 7)
1037 return 0;
1038
1039 op = XVECEXP (op, 0, 0);
1040 if (GET_CODE (op) == SYMBOL_REF
1041 || GET_CODE (op) == LABEL_REF)
1042 return 1;
1043 return 0;
1044
1045 default:
1046 return 0;
2a2ab3f9
JVA
1047 }
1048}
2a2ab3f9 1049
e075ae69 1050/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 1051
e075ae69
RH
1052int
1053pic_symbolic_operand (op, mode)
1054 register rtx op;
1055 enum machine_mode mode ATTRIBUTE_UNUSED;
1056{
1057 if (GET_CODE (op) == CONST)
2a2ab3f9 1058 {
e075ae69
RH
1059 op = XEXP (op, 0);
1060 if (GET_CODE (op) == UNSPEC)
1061 return 1;
1062 if (GET_CODE (op) != PLUS
1063 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1064 return 0;
1065 op = XEXP (op, 0);
1066 if (GET_CODE (op) == UNSPEC)
1067 return 1;
2a2ab3f9 1068 }
e075ae69 1069 return 0;
2a2ab3f9 1070}
2a2ab3f9 1071
28d52ffb
RH
1072/* Test for a valid operand for a call instruction. Don't allow the
1073 arg pointer register or virtual regs since they may decay into
1074 reg + const, which the patterns can't handle. */
2a2ab3f9 1075
e075ae69
RH
1076int
1077call_insn_operand (op, mode)
1078 rtx op;
1079 enum machine_mode mode ATTRIBUTE_UNUSED;
1080{
1081 if (GET_CODE (op) != MEM)
1082 return 0;
1083 op = XEXP (op, 0);
2a2ab3f9 1084
e075ae69
RH
1085 /* Disallow indirect through a virtual register. This leads to
1086 compiler aborts when trying to eliminate them. */
1087 if (GET_CODE (op) == REG
1088 && (op == arg_pointer_rtx
564d80f4 1089 || op == frame_pointer_rtx
e075ae69
RH
1090 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
1091 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
1092 return 0;
2a2ab3f9 1093
28d52ffb
RH
1094 /* Disallow `call 1234'. Due to varying assembler lameness this
1095 gets either rejected or translated to `call .+1234'. */
1096 if (GET_CODE (op) == CONST_INT)
1097 return 0;
1098
cbbf65e0
RH
1099 /* Explicitly allow SYMBOL_REF even if pic. */
1100 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 1101 return 1;
2a2ab3f9 1102
cbbf65e0
RH
1103 /* Half-pic doesn't allow anything but registers and constants.
1104 We've just taken care of the later. */
1105 if (HALF_PIC_P ())
1106 return register_operand (op, Pmode);
1107
1108 /* Otherwise we can allow any general_operand in the address. */
1109 return general_operand (op, Pmode);
e075ae69 1110}
79325812 1111
e075ae69
RH
1112int
1113constant_call_address_operand (op, mode)
1114 rtx op;
1115 enum machine_mode mode ATTRIBUTE_UNUSED;
1116{
cbbf65e0
RH
1117 return (GET_CODE (op) == MEM
1118 && CONSTANT_ADDRESS_P (XEXP (op, 0))
1119 && GET_CODE (XEXP (op, 0)) != CONST_INT);
e075ae69 1120}
2a2ab3f9 1121
e075ae69 1122/* Match exactly zero and one. */
e9a25f70 1123
e075ae69
RH
1124int
1125const0_operand (op, mode)
1126 register rtx op;
1127 enum machine_mode mode;
1128{
1129 return op == CONST0_RTX (mode);
1130}
e9a25f70 1131
e075ae69
RH
1132int
1133const1_operand (op, mode)
1134 register rtx op;
1135 enum machine_mode mode ATTRIBUTE_UNUSED;
1136{
1137 return op == const1_rtx;
1138}
2a2ab3f9 1139
e075ae69 1140/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 1141
e075ae69
RH
1142int
1143const248_operand (op, mode)
1144 register rtx op;
1145 enum machine_mode mode ATTRIBUTE_UNUSED;
1146{
1147 return (GET_CODE (op) == CONST_INT
1148 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
1149}
e9a25f70 1150
e075ae69 1151/* True if this is a constant appropriate for an increment or decremenmt. */
81fd0956 1152
e075ae69
RH
1153int
1154incdec_operand (op, mode)
1155 register rtx op;
1156 enum machine_mode mode;
1157{
1158 if (op == const1_rtx || op == constm1_rtx)
1159 return 1;
1160 if (GET_CODE (op) != CONST_INT)
1161 return 0;
1162 if (mode == SImode && INTVAL (op) == (HOST_WIDE_INT) 0xffffffff)
1163 return 1;
1164 if (mode == HImode && INTVAL (op) == (HOST_WIDE_INT) 0xffff)
1165 return 1;
1166 if (mode == QImode && INTVAL (op) == (HOST_WIDE_INT) 0xff)
1167 return 1;
1168 return 0;
1169}
2a2ab3f9 1170
e075ae69
RH
1171/* Return false if this is the stack pointer, or any other fake
1172 register eliminable to the stack pointer. Otherwise, this is
1173 a register operand.
2a2ab3f9 1174
e075ae69
RH
1175 This is used to prevent esp from being used as an index reg.
1176 Which would only happen in pathological cases. */
5f1ec3e6 1177
e075ae69
RH
1178int
1179reg_no_sp_operand (op, mode)
1180 register rtx op;
1181 enum machine_mode mode;
1182{
1183 rtx t = op;
1184 if (GET_CODE (t) == SUBREG)
1185 t = SUBREG_REG (t);
564d80f4 1186 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 1187 return 0;
2a2ab3f9 1188
e075ae69 1189 return register_operand (op, mode);
2a2ab3f9 1190}
b840bfb0 1191
2c5a510c
RH
1192/* Return false if this is any eliminable register. Otherwise
1193 general_operand. */
1194
1195int
1196general_no_elim_operand (op, mode)
1197 register rtx op;
1198 enum machine_mode mode;
1199{
1200 rtx t = op;
1201 if (GET_CODE (t) == SUBREG)
1202 t = SUBREG_REG (t);
1203 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1204 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1205 || t == virtual_stack_dynamic_rtx)
1206 return 0;
1207
1208 return general_operand (op, mode);
1209}
1210
1211/* Return false if this is any eliminable register. Otherwise
1212 register_operand or const_int. */
1213
1214int
1215nonmemory_no_elim_operand (op, mode)
1216 register rtx op;
1217 enum machine_mode mode;
1218{
1219 rtx t = op;
1220 if (GET_CODE (t) == SUBREG)
1221 t = SUBREG_REG (t);
1222 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1223 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1224 || t == virtual_stack_dynamic_rtx)
1225 return 0;
1226
1227 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
1228}
1229
e075ae69 1230/* Return true if op is a Q_REGS class register. */
b840bfb0 1231
e075ae69
RH
1232int
1233q_regs_operand (op, mode)
1234 register rtx op;
1235 enum machine_mode mode;
b840bfb0 1236{
e075ae69
RH
1237 if (mode != VOIDmode && GET_MODE (op) != mode)
1238 return 0;
1239 if (GET_CODE (op) == SUBREG)
1240 op = SUBREG_REG (op);
1241 return QI_REG_P (op);
1242}
b840bfb0 1243
e075ae69 1244/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 1245
e075ae69
RH
1246int
1247non_q_regs_operand (op, mode)
1248 register rtx op;
1249 enum machine_mode mode;
1250{
1251 if (mode != VOIDmode && GET_MODE (op) != mode)
1252 return 0;
1253 if (GET_CODE (op) == SUBREG)
1254 op = SUBREG_REG (op);
1255 return NON_QI_REG_P (op);
1256}
b840bfb0 1257
e075ae69
RH
1258/* Return 1 if OP is a comparison operator that can use the condition code
1259 generated by a logical operation, which characteristicly does not set
1260 overflow or carry. To be used with CCNOmode. */
b840bfb0 1261
e075ae69
RH
1262int
1263no_comparison_operator (op, mode)
1264 register rtx op;
1265 enum machine_mode mode;
1266{
3a3677ff
RH
1267 if (mode != VOIDmode && GET_MODE (op) != mode)
1268 return 0;
1269
1270 switch (GET_CODE (op))
1271 {
1272 case EQ: case NE:
1273 case LT: case GE:
1274 case LEU: case LTU: case GEU: case GTU:
1275 return 1;
1276
1277 default:
1278 return 0;
1279 }
e075ae69 1280}
b840bfb0 1281
e075ae69 1282/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
b840bfb0 1283
e075ae69
RH
1284int
1285fcmov_comparison_operator (op, mode)
1286 register rtx op;
1287 enum machine_mode mode;
1288{
3a3677ff
RH
1289 if (mode != VOIDmode && GET_MODE (op) != mode)
1290 return 0;
1291
1292 switch (GET_CODE (op))
1293 {
1294 case EQ: case NE:
1295 case LEU: case LTU: case GEU: case GTU:
1296 case UNORDERED: case ORDERED:
1297 return 1;
1298
1299 default:
1300 return 0;
1301 }
1302}
1303
1304/* Return 1 if OP is any normal comparison operator plus {UN}ORDERED. */
1305
1306int
1307uno_comparison_operator (op, mode)
1308 register rtx op;
1309 enum machine_mode mode;
1310{
1311 if (mode != VOIDmode && GET_MODE (op) != mode)
1312 return 0;
1313
1314 switch (GET_CODE (op))
1315 {
1316 case EQ: case NE:
1317 case LE: case LT: case GE: case GT:
1318 case LEU: case LTU: case GEU: case GTU:
1319 case UNORDERED: case ORDERED:
1320 return 1;
1321
1322 default:
1323 return 0;
1324 }
e075ae69 1325}
b840bfb0 1326
e9e80858
JH
1327/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1328
1329int
1330promotable_binary_operator (op, mode)
1331 register rtx op;
1332 enum machine_mode mode ATTRIBUTE_UNUSED;
1333{
1334 switch (GET_CODE (op))
1335 {
1336 case MULT:
1337 /* Modern CPUs have same latency for HImode and SImode multiply,
1338 but 386 and 486 do HImode multiply faster. */
1339 return ix86_cpu > PROCESSOR_I486;
1340 case PLUS:
1341 case AND:
1342 case IOR:
1343 case XOR:
1344 case ASHIFT:
1345 return 1;
1346 default:
1347 return 0;
1348 }
1349}
1350
e075ae69
RH
1351/* Nearly general operand, but accept any const_double, since we wish
1352 to be able to drop them into memory rather than have them get pulled
1353 into registers. */
b840bfb0 1354
2a2ab3f9 1355int
e075ae69
RH
1356cmp_fp_expander_operand (op, mode)
1357 register rtx op;
1358 enum machine_mode mode;
2a2ab3f9 1359{
e075ae69 1360 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 1361 return 0;
e075ae69 1362 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 1363 return 1;
e075ae69 1364 return general_operand (op, mode);
2a2ab3f9
JVA
1365}
1366
e075ae69 1367/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
1368
1369int
e075ae69 1370ext_register_operand (op, mode)
2a2ab3f9 1371 register rtx op;
bb5177ac 1372 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 1373{
e075ae69
RH
1374 if (GET_MODE (op) != SImode && GET_MODE (op) != HImode)
1375 return 0;
1376 return register_operand (op, VOIDmode);
1377}
1378
1379/* Return 1 if this is a valid binary floating-point operation.
1380 OP is the expression matched, and MODE is its mode. */
1381
1382int
1383binary_fp_operator (op, mode)
1384 register rtx op;
1385 enum machine_mode mode;
1386{
1387 if (mode != VOIDmode && mode != GET_MODE (op))
1388 return 0;
1389
2a2ab3f9
JVA
1390 switch (GET_CODE (op))
1391 {
e075ae69
RH
1392 case PLUS:
1393 case MINUS:
1394 case MULT:
1395 case DIV:
1396 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 1397
2a2ab3f9
JVA
1398 default:
1399 return 0;
1400 }
1401}
fee2770d 1402
e075ae69
RH
1403int
1404mult_operator(op, mode)
1405 register rtx op;
1406 enum machine_mode mode ATTRIBUTE_UNUSED;
1407{
1408 return GET_CODE (op) == MULT;
1409}
1410
1411int
1412div_operator(op, mode)
1413 register rtx op;
1414 enum machine_mode mode ATTRIBUTE_UNUSED;
1415{
1416 return GET_CODE (op) == DIV;
1417}
0a726ef1
JL
1418
1419int
e075ae69
RH
1420arith_or_logical_operator (op, mode)
1421 rtx op;
1422 enum machine_mode mode;
0a726ef1 1423{
e075ae69
RH
1424 return ((mode == VOIDmode || GET_MODE (op) == mode)
1425 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
1426 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
1427}
1428
e075ae69 1429/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
1430
1431int
e075ae69
RH
1432memory_displacement_operand (op, mode)
1433 register rtx op;
1434 enum machine_mode mode;
4f2c8ebb 1435{
e075ae69 1436 struct ix86_address parts;
e9a25f70 1437
e075ae69
RH
1438 if (! memory_operand (op, mode))
1439 return 0;
1440
1441 if (! ix86_decompose_address (XEXP (op, 0), &parts))
1442 abort ();
1443
1444 return parts.disp != NULL_RTX;
4f2c8ebb
RS
1445}
1446
16189740 1447/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
1448 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1449
1450 ??? It seems likely that this will only work because cmpsi is an
1451 expander, and no actual insns use this. */
4f2c8ebb
RS
1452
1453int
e075ae69
RH
1454cmpsi_operand (op, mode)
1455 rtx op;
1456 enum machine_mode mode;
fee2770d 1457{
e075ae69
RH
1458 if (general_operand (op, mode))
1459 return 1;
1460
1461 if (GET_CODE (op) == AND
1462 && GET_MODE (op) == SImode
1463 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
1464 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
1465 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
1466 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
1467 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
1468 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 1469 return 1;
e9a25f70 1470
fee2770d
RS
1471 return 0;
1472}
d784886d 1473
e075ae69
RH
1474/* Returns 1 if OP is memory operand that can not be represented by the
1475 modRM array. */
d784886d
RK
1476
1477int
e075ae69 1478long_memory_operand (op, mode)
d784886d
RK
1479 register rtx op;
1480 enum machine_mode mode;
1481{
e075ae69 1482 if (! memory_operand (op, mode))
d784886d
RK
1483 return 0;
1484
e075ae69 1485 return memory_address_length (op) != 0;
d784886d 1486}
2247f6ed
JH
1487
1488/* Return nonzero if the rtx is known aligned. */
1489
1490int
1491aligned_operand (op, mode)
1492 rtx op;
1493 enum machine_mode mode;
1494{
1495 struct ix86_address parts;
1496
1497 if (!general_operand (op, mode))
1498 return 0;
1499
1500 /* Registers and immediate operands are always "aligned". */
1501 if (GET_CODE (op) != MEM)
1502 return 1;
1503
1504 /* Don't even try to do any aligned optimizations with volatiles. */
1505 if (MEM_VOLATILE_P (op))
1506 return 0;
1507
1508 op = XEXP (op, 0);
1509
1510 /* Pushes and pops are only valid on the stack pointer. */
1511 if (GET_CODE (op) == PRE_DEC
1512 || GET_CODE (op) == POST_INC)
1513 return 1;
1514
1515 /* Decode the address. */
1516 if (! ix86_decompose_address (op, &parts))
1517 abort ();
1518
1519 /* Look for some component that isn't known to be aligned. */
1520 if (parts.index)
1521 {
1522 if (parts.scale < 4
bdb429a5 1523 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
1524 return 0;
1525 }
1526 if (parts.base)
1527 {
bdb429a5 1528 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
1529 return 0;
1530 }
1531 if (parts.disp)
1532 {
1533 if (GET_CODE (parts.disp) != CONST_INT
1534 || (INTVAL (parts.disp) & 3) != 0)
1535 return 0;
1536 }
1537
1538 /* Didn't find one -- this must be an aligned address. */
1539 return 1;
1540}
e075ae69
RH
1541\f
1542/* Return true if the constant is something that can be loaded with
1543 a special instruction. Only handle 0.0 and 1.0; others are less
1544 worthwhile. */
57dbca5e
BS
1545
1546int
e075ae69
RH
1547standard_80387_constant_p (x)
1548 rtx x;
57dbca5e 1549{
e075ae69
RH
1550 if (GET_CODE (x) != CONST_DOUBLE)
1551 return -1;
1552
1553#if ! defined (REAL_IS_NOT_DOUBLE) || defined (REAL_ARITHMETIC)
1554 {
1555 REAL_VALUE_TYPE d;
1556 jmp_buf handler;
1557 int is0, is1;
1558
1559 if (setjmp (handler))
1560 return 0;
1561
1562 set_float_handler (handler);
1563 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
1564 is0 = REAL_VALUES_EQUAL (d, dconst0) && !REAL_VALUE_MINUS_ZERO (d);
1565 is1 = REAL_VALUES_EQUAL (d, dconst1);
1566 set_float_handler (NULL_PTR);
1567
1568 if (is0)
1569 return 1;
1570
1571 if (is1)
1572 return 2;
1573
1574 /* Note that on the 80387, other constants, such as pi,
1575 are much slower to load as standard constants
1576 than to load from doubles in memory! */
1577 /* ??? Not true on K6: all constants are equal cost. */
1578 }
1579#endif
1580
1581 return 0;
57dbca5e
BS
1582}
1583
2a2ab3f9
JVA
1584/* Returns 1 if OP contains a symbol reference */
1585
1586int
1587symbolic_reference_mentioned_p (op)
1588 rtx op;
1589{
6f7d635c 1590 register const char *fmt;
2a2ab3f9
JVA
1591 register int i;
1592
1593 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
1594 return 1;
1595
1596 fmt = GET_RTX_FORMAT (GET_CODE (op));
1597 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
1598 {
1599 if (fmt[i] == 'E')
1600 {
1601 register int j;
1602
1603 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
1604 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
1605 return 1;
1606 }
e9a25f70 1607
2a2ab3f9
JVA
1608 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
1609 return 1;
1610 }
1611
1612 return 0;
1613}
e075ae69
RH
1614
1615/* Return 1 if it is appropriate to emit `ret' instructions in the
1616 body of a function. Do this only if the epilogue is simple, needing a
1617 couple of insns. Prior to reloading, we can't tell how many registers
1618 must be saved, so return 0 then. Return 0 if there is no frame
1619 marker to de-allocate.
1620
1621 If NON_SAVING_SETJMP is defined and true, then it is not possible
1622 for the epilogue to be simple, so return 0. This is a special case
1623 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1624 until final, but jump_optimize may need to know sooner if a
1625 `return' is OK. */
32b5b1aa
SC
1626
1627int
e075ae69 1628ix86_can_use_return_insn_p ()
32b5b1aa 1629{
9a7372d6
RH
1630 HOST_WIDE_INT tsize;
1631 int nregs;
1632
e075ae69
RH
1633#ifdef NON_SAVING_SETJMP
1634 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
1635 return 0;
1636#endif
9a7372d6
RH
1637#ifdef FUNCTION_BLOCK_PROFILER_EXIT
1638 if (profile_block_flag == 2)
1639 return 0;
1640#endif
1641
1642 if (! reload_completed || frame_pointer_needed)
1643 return 0;
32b5b1aa 1644
9a7372d6
RH
1645 /* Don't allow more than 32 pop, since that's all we can do
1646 with one instruction. */
1647 if (current_function_pops_args
1648 && current_function_args_size >= 32768)
e075ae69 1649 return 0;
32b5b1aa 1650
9a7372d6
RH
1651 tsize = ix86_compute_frame_size (get_frame_size (), &nregs, NULL, NULL);
1652 return tsize == 0 && nregs == 0;
e075ae69
RH
1653}
1654\f
21a427cc 1655static char *pic_label_name;
e075ae69 1656static int pic_label_output;
21a427cc 1657static char *global_offset_table_name;
e9a25f70 1658
e075ae69
RH
1659/* This function generates code for -fpic that loads %ebx with
1660 the return address of the caller and then returns. */
1661
1662void
1663asm_output_function_prefix (file, name)
1664 FILE *file;
3cce094d 1665 const char *name ATTRIBUTE_UNUSED;
e075ae69
RH
1666{
1667 rtx xops[2];
1668 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1669 || current_function_uses_const_pool);
1670 xops[0] = pic_offset_table_rtx;
1671 xops[1] = stack_pointer_rtx;
32b5b1aa 1672
e075ae69
RH
1673 /* Deep branch prediction favors having a return for every call. */
1674 if (pic_reg_used && TARGET_DEEP_BRANCH_PREDICTION)
32b5b1aa 1675 {
e075ae69
RH
1676 if (!pic_label_output)
1677 {
1678 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
1679 internal (non-global) label that's being emitted, it didn't make
1680 sense to have .type information for local labels. This caused
1681 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
1682 me debug info for a label that you're declaring non-global?) this
1683 was changed to call ASM_OUTPUT_LABEL() instead. */
32b5b1aa 1684
e075ae69 1685 ASM_OUTPUT_LABEL (file, pic_label_name);
e9a25f70 1686
e075ae69
RH
1687 xops[1] = gen_rtx_MEM (SImode, xops[1]);
1688 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
1689 output_asm_insn ("ret", xops);
0afeb08a 1690
e075ae69 1691 pic_label_output = 1;
32b5b1aa 1692 }
32b5b1aa 1693 }
32b5b1aa 1694}
32b5b1aa 1695
e075ae69
RH
1696void
1697load_pic_register ()
32b5b1aa 1698{
e075ae69 1699 rtx gotsym, pclab;
32b5b1aa 1700
21a427cc
AS
1701 if (global_offset_table_name == NULL)
1702 {
1703 global_offset_table_name =
1704 ggc_alloc_string ("_GLOBAL_OFFSET_TABLE_", 21);
1705 ggc_add_string_root (&global_offset_table_name, 1);
1706 }
1707 gotsym = gen_rtx_SYMBOL_REF (Pmode, global_offset_table_name);
32b5b1aa 1708
e075ae69 1709 if (TARGET_DEEP_BRANCH_PREDICTION)
32b5b1aa 1710 {
21a427cc
AS
1711 if (pic_label_name == NULL)
1712 {
1713 pic_label_name = ggc_alloc_string (NULL, 32);
1714 ggc_add_string_root (&pic_label_name, 1);
1715 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
1716 }
e075ae69 1717 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
32b5b1aa 1718 }
e075ae69 1719 else
e5cb57e8 1720 {
e075ae69 1721 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
e5cb57e8 1722 }
e5cb57e8 1723
e075ae69 1724 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
2a2ab3f9 1725
e075ae69
RH
1726 if (! TARGET_DEEP_BRANCH_PREDICTION)
1727 emit_insn (gen_popsi1 (pic_offset_table_rtx));
79325812 1728
e075ae69 1729 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
e9a25f70 1730}
8dfe5673 1731
e075ae69 1732/* Generate an SImode "push" pattern for input ARG. */
e9a25f70 1733
e075ae69
RH
1734static rtx
1735gen_push (arg)
1736 rtx arg;
e9a25f70 1737{
c5c76735
JL
1738 return gen_rtx_SET (VOIDmode,
1739 gen_rtx_MEM (SImode,
1740 gen_rtx_PRE_DEC (SImode,
1741 stack_pointer_rtx)),
1742 arg);
e9a25f70
JL
1743}
1744
0903fcab
JH
1745/* Return number of registers to be saved on the stack. */
1746
1747static int
1748ix86_nsaved_regs ()
1749{
1750 int nregs = 0;
1751 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1752 || current_function_uses_const_pool);
1753 int limit = (frame_pointer_needed
1754 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1755 int regno;
1756
1757 for (regno = limit - 1; regno >= 0; regno--)
1758 if ((regs_ever_live[regno] && ! call_used_regs[regno])
1759 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1760 {
1761 nregs ++;
1762 }
1763 return nregs;
1764}
1765
1766/* Return the offset between two registers, one to be eliminated, and the other
1767 its replacement, at the start of a routine. */
1768
1769HOST_WIDE_INT
1770ix86_initial_elimination_offset (from, to)
1771 int from;
1772 int to;
1773{
564d80f4
JH
1774 int padding1;
1775 int nregs;
1776
1777 /* Stack grows downward:
1778
1779 [arguments]
1780 <- ARG_POINTER
1781 saved pc
1782
1783 saved frame pointer if frame_pointer_needed
1784 <- HARD_FRAME_POINTER
1c71e60e 1785 [saved regs]
564d80f4
JH
1786
1787 [padding1] \
1788 | <- FRAME_POINTER
1789 [frame] > tsize
1790 |
1791 [padding2] /
564d80f4
JH
1792 */
1793
1794 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
1795 /* Skip saved PC and previous frame pointer.
1796 Executed only when frame_pointer_needed. */
1797 return 8;
1798 else if (from == FRAME_POINTER_REGNUM
1799 && to == HARD_FRAME_POINTER_REGNUM)
1800 {
1801 ix86_compute_frame_size (get_frame_size (), &nregs, &padding1, (int *)0);
1c71e60e 1802 padding1 += nregs * UNITS_PER_WORD;
564d80f4
JH
1803 return -padding1;
1804 }
0903fcab
JH
1805 else
1806 {
564d80f4
JH
1807 /* ARG_POINTER or FRAME_POINTER to STACK_POINTER elimination. */
1808 int frame_size = frame_pointer_needed ? 8 : 4;
0903fcab 1809 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (),
564d80f4 1810 &nregs, &padding1, (int *)0);
0903fcab 1811
0903fcab 1812
564d80f4
JH
1813 if (to != STACK_POINTER_REGNUM)
1814 abort ();
1815 else if (from == ARG_POINTER_REGNUM)
1816 return tsize + nregs * UNITS_PER_WORD + frame_size;
1817 else if (from != FRAME_POINTER_REGNUM)
1818 abort ();
0903fcab 1819 else
1c71e60e 1820 return tsize - padding1;
0903fcab
JH
1821 }
1822}
1823
65954bd8
JL
1824/* Compute the size of local storage taking into consideration the
1825 desired stack alignment which is to be maintained. Also determine
564d80f4
JH
1826 the number of registers saved below the local storage.
1827
1828 PADDING1 returns padding before stack frame and PADDING2 returns
1829 padding after stack frame;
1830 */
1831
1832static HOST_WIDE_INT
1833ix86_compute_frame_size (size, nregs_on_stack, rpadding1, rpadding2)
65954bd8
JL
1834 HOST_WIDE_INT size;
1835 int *nregs_on_stack;
564d80f4
JH
1836 int *rpadding1;
1837 int *rpadding2;
65954bd8 1838{
65954bd8 1839 int nregs;
564d80f4
JH
1840 int padding1 = 0;
1841 int padding2 = 0;
65954bd8 1842 HOST_WIDE_INT total_size;
564d80f4 1843 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
44affdae
JH
1844 int offset;
1845 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
65954bd8 1846
564d80f4 1847 nregs = ix86_nsaved_regs ();
564d80f4 1848 total_size = size;
65954bd8 1849
44affdae 1850 offset = frame_pointer_needed ? 8 : 4;
564d80f4 1851
44affdae
JH
1852 /* Do some sanity checking of stack_alignment_needed and preferred_alignment,
1853 since i386 port is the only using those features that may break easilly. */
564d80f4 1854
44affdae
JH
1855 if (size && !stack_alignment_needed)
1856 abort ();
5f677a9e 1857 if (!size && stack_alignment_needed != STACK_BOUNDARY / BITS_PER_UNIT)
44affdae
JH
1858 abort ();
1859 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
1860 abort ();
1861 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1862 abort ();
1863 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1864 abort ();
564d80f4 1865
44affdae
JH
1866 if (stack_alignment_needed < 4)
1867 stack_alignment_needed = 4;
564d80f4 1868
44affdae 1869 offset += nregs * UNITS_PER_WORD;
65954bd8 1870
f73ad30e
JH
1871 if (ACCUMULATE_OUTGOING_ARGS)
1872 total_size += current_function_outgoing_args_size;
1873
44affdae 1874 total_size += offset;
65954bd8 1875
44affdae
JH
1876 /* Align start of frame for local function. */
1877 padding1 = ((offset + stack_alignment_needed - 1)
1878 & -stack_alignment_needed) - offset;
1879 total_size += padding1;
54ff41b7 1880
44affdae
JH
1881 /* Align stack boundary. */
1882 padding2 = ((total_size + preferred_alignment - 1)
1883 & -preferred_alignment) - total_size;
65954bd8 1884
f73ad30e
JH
1885 if (ACCUMULATE_OUTGOING_ARGS)
1886 padding2 += current_function_outgoing_args_size;
1887
65954bd8
JL
1888 if (nregs_on_stack)
1889 *nregs_on_stack = nregs;
564d80f4
JH
1890 if (rpadding1)
1891 *rpadding1 = padding1;
564d80f4
JH
1892 if (rpadding2)
1893 *rpadding2 = padding2;
1894
1895 return size + padding1 + padding2;
65954bd8
JL
1896}
1897
0903fcab
JH
1898/* Emit code to save registers in the prologue. */
1899
1900static void
1901ix86_emit_save_regs ()
1902{
1903 register int regno;
1904 int limit;
1905 rtx insn;
1906 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1907 || current_function_uses_const_pool);
1908 limit = (frame_pointer_needed
564d80f4 1909 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
0903fcab
JH
1910
1911 for (regno = limit - 1; regno >= 0; regno--)
1912 if ((regs_ever_live[regno] && !call_used_regs[regno])
1913 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1914 {
1915 insn = emit_insn (gen_push (gen_rtx_REG (SImode, regno)));
1916 RTX_FRAME_RELATED_P (insn) = 1;
1917 }
1918}
1919
e075ae69
RH
1920/* Expand the prologue into a bunch of separate insns. */
1921
1922void
1923ix86_expand_prologue ()
2a2ab3f9 1924{
564d80f4
JH
1925 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), (int *)0, (int *)0,
1926 (int *)0);
1927 rtx insn;
aae75261
JVA
1928 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1929 || current_function_uses_const_pool);
79325812 1930
e075ae69
RH
1931 /* Note: AT&T enter does NOT have reversed args. Enter is probably
1932 slower on all targets. Also sdb doesn't like it. */
e9a25f70 1933
2a2ab3f9
JVA
1934 if (frame_pointer_needed)
1935 {
564d80f4 1936 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 1937 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 1938
564d80f4 1939 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 1940 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
1941 }
1942
1c71e60e 1943 ix86_emit_save_regs ();
564d80f4 1944
8dfe5673
RK
1945 if (tsize == 0)
1946 ;
1947 else if (! TARGET_STACK_PROBE || tsize < CHECK_STACK_LIMIT)
469ac993 1948 {
e075ae69 1949 if (frame_pointer_needed)
1c71e60e
JH
1950 insn = emit_insn (gen_pro_epilogue_adjust_stack
1951 (stack_pointer_rtx, stack_pointer_rtx,
1952 GEN_INT (-tsize), hard_frame_pointer_rtx));
79325812 1953 else
e075ae69
RH
1954 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
1955 GEN_INT (-tsize)));
1956 RTX_FRAME_RELATED_P (insn) = 1;
469ac993 1957 }
79325812 1958 else
8dfe5673 1959 {
e075ae69 1960 /* ??? Is this only valid for Win32? */
e9a25f70 1961
e075ae69 1962 rtx arg0, sym;
e9a25f70 1963
e075ae69
RH
1964 arg0 = gen_rtx_REG (SImode, 0);
1965 emit_move_insn (arg0, GEN_INT (tsize));
77a989d1 1966
e075ae69
RH
1967 sym = gen_rtx_MEM (FUNCTION_MODE,
1968 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
1969 insn = emit_call_insn (gen_call (sym, const0_rtx));
1970
1971 CALL_INSN_FUNCTION_USAGE (insn)
276ab4a4
RH
1972 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
1973 CALL_INSN_FUNCTION_USAGE (insn));
e075ae69 1974 }
e9a25f70 1975
84530511
SC
1976#ifdef SUBTARGET_PROLOGUE
1977 SUBTARGET_PROLOGUE;
1978#endif
1979
e9a25f70 1980 if (pic_reg_used)
e075ae69 1981 load_pic_register ();
77a989d1 1982
e9a25f70
JL
1983 /* If we are profiling, make sure no instructions are scheduled before
1984 the call to mcount. However, if -fpic, the above call will have
1985 done that. */
e075ae69 1986 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
e9a25f70 1987 emit_insn (gen_blockage ());
77a989d1
SC
1988}
1989
0903fcab
JH
1990/* Emit code to add TSIZE to esp value. Use POP instruction when
1991 profitable. */
1992
1993static void
1994ix86_emit_epilogue_esp_adjustment (tsize)
1995 int tsize;
1996{
bdeb029c
JH
1997 /* If a frame pointer is present, we must be sure to tie the sp
1998 to the fp so that we don't mis-schedule. */
1999 if (frame_pointer_needed)
2000 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2001 stack_pointer_rtx,
2002 GEN_INT (tsize),
2003 hard_frame_pointer_rtx));
0903fcab 2004 else
bdeb029c
JH
2005 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
2006 GEN_INT (tsize)));
0903fcab
JH
2007}
2008
da2d1d3a
JH
2009/* Emit code to restore saved registers using MOV insns. First register
2010 is restored from POINTER + OFFSET. */
2011static void
2012ix86_emit_restore_regs_using_mov (pointer, offset)
2013 rtx pointer;
2014 int offset;
2015{
2016 int regno;
2017 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2018 || current_function_uses_const_pool);
2019 int limit = (frame_pointer_needed
2020 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
2021
2022 for (regno = 0; regno < limit; regno++)
2023 if ((regs_ever_live[regno] && !call_used_regs[regno])
2024 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2025 {
2026 emit_move_insn (gen_rtx_REG (SImode, regno),
2027 adj_offsettable_operand (gen_rtx_MEM (SImode,
2028 pointer),
2029 offset));
2030 offset += 4;
2031 }
2032}
2033
79325812 2034/* Restore function stack, frame, and registers. */
e9a25f70 2035
2a2ab3f9 2036void
cbbf65e0
RH
2037ix86_expand_epilogue (emit_return)
2038 int emit_return;
2a2ab3f9 2039{
65954bd8 2040 int nregs;
1c71e60e
JH
2041 int regno;
2042
aae75261
JVA
2043 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2044 || current_function_uses_const_pool);
fdb8a883 2045 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
65954bd8 2046 HOST_WIDE_INT offset;
1c71e60e
JH
2047 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), &nregs,
2048 (int *)0, (int *)0);
2a2ab3f9 2049
2a2ab3f9 2050
1c71e60e
JH
2051 /* Calculate start of saved registers relative to ebp. */
2052 offset = -nregs * UNITS_PER_WORD;
2a2ab3f9 2053
1c71e60e
JH
2054#ifdef FUNCTION_BLOCK_PROFILER_EXIT
2055 if (profile_block_flag == 2)
564d80f4 2056 {
1c71e60e 2057 FUNCTION_BLOCK_PROFILER_EXIT;
564d80f4 2058 }
1c71e60e 2059#endif
564d80f4 2060
fdb8a883
JW
2061 /* If we're only restoring one register and sp is not valid then
2062 using a move instruction to restore the register since it's
da2d1d3a
JH
2063 less work than reloading sp and popping the register.
2064
2065 The default code result in stack adjustment using add/lea instruction,
2066 while this code results in LEAVE instruction (or discrete equivalent),
2067 so it is profitable in some other cases as well. Especially when there
2068 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2069 and there is exactly one register to pop. This heruistic may need some
2070 tuning in future. */
2071 if ((!sp_valid && nregs <= 1)
2072 || (frame_pointer_needed && !nregs && tsize)
2073 || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
2074 && nregs == 1))
2a2ab3f9 2075 {
da2d1d3a
JH
2076 /* Restore registers. We can use ebp or esp to address the memory
2077 locations. If both are available, default to ebp, since offsets
2078 are known to be small. Only exception is esp pointing directly to the
2079 end of block of saved registers, where we may simplify addressing
2080 mode. */
2081
2082 if (!frame_pointer_needed || (sp_valid && !tsize))
2083 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, tsize);
2084 else
2085 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, offset);
2a2ab3f9 2086
da2d1d3a
JH
2087 if (!frame_pointer_needed)
2088 ix86_emit_epilogue_esp_adjustment (tsize + nregs * UNITS_PER_WORD);
c8c5cb99 2089 /* If not an i386, mov & pop is faster than "leave". */
da2d1d3a 2090 else if (TARGET_USE_LEAVE || optimize_size)
564d80f4 2091 emit_insn (gen_leave ());
c8c5cb99 2092 else
2a2ab3f9 2093 {
1c71e60e
JH
2094 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2095 hard_frame_pointer_rtx,
2096 const0_rtx,
2097 hard_frame_pointer_rtx));
564d80f4 2098 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
2099 }
2100 }
1c71e60e 2101 else
68f654ec 2102 {
1c71e60e
JH
2103 /* First step is to deallocate the stack frame so that we can
2104 pop the registers. */
2105 if (!sp_valid)
2106 {
2107 if (!frame_pointer_needed)
2108 abort ();
2109 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2110 hard_frame_pointer_rtx,
2111 GEN_INT (offset),
2112 hard_frame_pointer_rtx));
2113 }
2114 else if (tsize)
2115 ix86_emit_epilogue_esp_adjustment (tsize);
2116
2117 for (regno = 0; regno < STACK_POINTER_REGNUM; regno++)
2118 if ((regs_ever_live[regno] && !call_used_regs[regno])
2119 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2120 emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
68f654ec 2121 }
68f654ec 2122
cbbf65e0
RH
2123 /* Sibcall epilogues don't want a return instruction. */
2124 if (! emit_return)
2125 return;
2126
2a2ab3f9
JVA
2127 if (current_function_pops_args && current_function_args_size)
2128 {
e075ae69 2129 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 2130
b8c752c8
UD
2131 /* i386 can only pop 64K bytes. If asked to pop more, pop
2132 return address, do explicit add, and jump indirectly to the
2133 caller. */
2a2ab3f9 2134
b8c752c8 2135 if (current_function_pops_args >= 65536)
2a2ab3f9 2136 {
e075ae69 2137 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 2138
e075ae69
RH
2139 emit_insn (gen_popsi1 (ecx));
2140 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 2141 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 2142 }
79325812 2143 else
e075ae69
RH
2144 emit_jump_insn (gen_return_pop_internal (popc));
2145 }
2146 else
2147 emit_jump_insn (gen_return_internal ());
2148}
2149\f
2150/* Extract the parts of an RTL expression that is a valid memory address
2151 for an instruction. Return false if the structure of the address is
2152 grossly off. */
2153
2154static int
2155ix86_decompose_address (addr, out)
2156 register rtx addr;
2157 struct ix86_address *out;
2158{
2159 rtx base = NULL_RTX;
2160 rtx index = NULL_RTX;
2161 rtx disp = NULL_RTX;
2162 HOST_WIDE_INT scale = 1;
2163 rtx scale_rtx = NULL_RTX;
2164
2165 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
2166 base = addr;
2167 else if (GET_CODE (addr) == PLUS)
2168 {
2169 rtx op0 = XEXP (addr, 0);
2170 rtx op1 = XEXP (addr, 1);
2171 enum rtx_code code0 = GET_CODE (op0);
2172 enum rtx_code code1 = GET_CODE (op1);
2173
2174 if (code0 == REG || code0 == SUBREG)
2175 {
2176 if (code1 == REG || code1 == SUBREG)
2177 index = op0, base = op1; /* index + base */
2178 else
2179 base = op0, disp = op1; /* base + displacement */
2180 }
2181 else if (code0 == MULT)
e9a25f70 2182 {
e075ae69
RH
2183 index = XEXP (op0, 0);
2184 scale_rtx = XEXP (op0, 1);
2185 if (code1 == REG || code1 == SUBREG)
2186 base = op1; /* index*scale + base */
e9a25f70 2187 else
e075ae69
RH
2188 disp = op1; /* index*scale + disp */
2189 }
2190 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
2191 {
2192 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
2193 scale_rtx = XEXP (XEXP (op0, 0), 1);
2194 base = XEXP (op0, 1);
2195 disp = op1;
2a2ab3f9 2196 }
e075ae69
RH
2197 else if (code0 == PLUS)
2198 {
2199 index = XEXP (op0, 0); /* index + base + disp */
2200 base = XEXP (op0, 1);
2201 disp = op1;
2202 }
2203 else
2204 return FALSE;
2205 }
2206 else if (GET_CODE (addr) == MULT)
2207 {
2208 index = XEXP (addr, 0); /* index*scale */
2209 scale_rtx = XEXP (addr, 1);
2210 }
2211 else if (GET_CODE (addr) == ASHIFT)
2212 {
2213 rtx tmp;
2214
2215 /* We're called for lea too, which implements ashift on occasion. */
2216 index = XEXP (addr, 0);
2217 tmp = XEXP (addr, 1);
2218 if (GET_CODE (tmp) != CONST_INT)
2219 return FALSE;
2220 scale = INTVAL (tmp);
2221 if ((unsigned HOST_WIDE_INT) scale > 3)
2222 return FALSE;
2223 scale = 1 << scale;
2a2ab3f9 2224 }
2a2ab3f9 2225 else
e075ae69
RH
2226 disp = addr; /* displacement */
2227
2228 /* Extract the integral value of scale. */
2229 if (scale_rtx)
e9a25f70 2230 {
e075ae69
RH
2231 if (GET_CODE (scale_rtx) != CONST_INT)
2232 return FALSE;
2233 scale = INTVAL (scale_rtx);
e9a25f70 2234 }
3b3c6a3f 2235
e075ae69
RH
2236 /* Allow arg pointer and stack pointer as index if there is not scaling */
2237 if (base && index && scale == 1
564d80f4
JH
2238 && (index == arg_pointer_rtx || index == frame_pointer_rtx
2239 || index == stack_pointer_rtx))
e075ae69
RH
2240 {
2241 rtx tmp = base;
2242 base = index;
2243 index = tmp;
2244 }
2245
2246 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
2247 if ((base == hard_frame_pointer_rtx
2248 || base == frame_pointer_rtx
2249 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
2250 disp = const0_rtx;
2251
2252 /* Special case: on K6, [%esi] makes the instruction vector decoded.
2253 Avoid this by transforming to [%esi+0]. */
2254 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
2255 && base && !index && !disp
329e1d01 2256 && REG_P (base)
e075ae69
RH
2257 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
2258 disp = const0_rtx;
2259
2260 /* Special case: encode reg+reg instead of reg*2. */
2261 if (!base && index && scale && scale == 2)
2262 base = index, scale = 1;
2263
2264 /* Special case: scaling cannot be encoded without base or displacement. */
2265 if (!base && !disp && index && scale != 1)
2266 disp = const0_rtx;
2267
2268 out->base = base;
2269 out->index = index;
2270 out->disp = disp;
2271 out->scale = scale;
3b3c6a3f 2272
e075ae69
RH
2273 return TRUE;
2274}
01329426
JH
2275\f
2276/* Return cost of the memory address x.
2277 For i386, it is better to use a complex address than let gcc copy
2278 the address into a reg and make a new pseudo. But not if the address
2279 requires to two regs - that would mean more pseudos with longer
2280 lifetimes. */
2281int
2282ix86_address_cost (x)
2283 rtx x;
2284{
2285 struct ix86_address parts;
2286 int cost = 1;
3b3c6a3f 2287
01329426
JH
2288 if (!ix86_decompose_address (x, &parts))
2289 abort ();
2290
2291 /* More complex memory references are better. */
2292 if (parts.disp && parts.disp != const0_rtx)
2293 cost--;
2294
2295 /* Attempt to minimize number of registers in the address. */
2296 if ((parts.base
2297 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
2298 || (parts.index
2299 && (!REG_P (parts.index)
2300 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
2301 cost++;
2302
2303 if (parts.base
2304 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
2305 && parts.index
2306 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
2307 && parts.base != parts.index)
2308 cost++;
2309
2310 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
2311 since it's predecode logic can't detect the length of instructions
2312 and it degenerates to vector decoded. Increase cost of such
2313 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
2314 to split such addresses or even refuse such addresses at all.
2315
2316 Following addressing modes are affected:
2317 [base+scale*index]
2318 [scale*index+disp]
2319 [base+index]
2320
2321 The first and last case may be avoidable by explicitly coding the zero in
2322 memory address, but I don't have AMD-K6 machine handy to check this
2323 theory. */
2324
2325 if (TARGET_K6
2326 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
2327 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
2328 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
2329 cost += 10;
2330
2331 return cost;
2332}
2333\f
e075ae69
RH
2334/* Determine if a given CONST RTX is a valid memory displacement
2335 in PIC mode. */
2336
59be65f6 2337int
91bb873f
RH
2338legitimate_pic_address_disp_p (disp)
2339 register rtx disp;
2340{
2341 if (GET_CODE (disp) != CONST)
2342 return 0;
2343 disp = XEXP (disp, 0);
2344
2345 if (GET_CODE (disp) == PLUS)
2346 {
2347 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
2348 return 0;
2349 disp = XEXP (disp, 0);
2350 }
2351
2352 if (GET_CODE (disp) != UNSPEC
2353 || XVECLEN (disp, 0) != 1)
2354 return 0;
2355
2356 /* Must be @GOT or @GOTOFF. */
2357 if (XINT (disp, 1) != 6
2358 && XINT (disp, 1) != 7)
2359 return 0;
2360
2361 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2362 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
2363 return 0;
2364
2365 return 1;
2366}
2367
e075ae69
RH
2368/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
2369 memory address for an instruction. The MODE argument is the machine mode
2370 for the MEM expression that wants to use this address.
2371
2372 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
2373 convert common non-canonical forms to canonical form so that they will
2374 be recognized. */
2375
3b3c6a3f
MM
2376int
2377legitimate_address_p (mode, addr, strict)
2378 enum machine_mode mode;
2379 register rtx addr;
2380 int strict;
2381{
e075ae69
RH
2382 struct ix86_address parts;
2383 rtx base, index, disp;
2384 HOST_WIDE_INT scale;
2385 const char *reason = NULL;
2386 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
2387
2388 if (TARGET_DEBUG_ADDR)
2389 {
2390 fprintf (stderr,
e9a25f70 2391 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 2392 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
2393 debug_rtx (addr);
2394 }
2395
e075ae69 2396 if (! ix86_decompose_address (addr, &parts))
3b3c6a3f 2397 {
e075ae69 2398 reason = "decomposition failed";
50e60bc3 2399 goto report_error;
3b3c6a3f
MM
2400 }
2401
e075ae69
RH
2402 base = parts.base;
2403 index = parts.index;
2404 disp = parts.disp;
2405 scale = parts.scale;
91f0226f 2406
e075ae69 2407 /* Validate base register.
e9a25f70
JL
2408
2409 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
2410 is one word out of a two word structure, which is represented internally
2411 as a DImode int. */
e9a25f70 2412
3b3c6a3f
MM
2413 if (base)
2414 {
e075ae69
RH
2415 reason_rtx = base;
2416
3d771dfd 2417 if (GET_CODE (base) != REG)
3b3c6a3f 2418 {
e075ae69 2419 reason = "base is not a register";
50e60bc3 2420 goto report_error;
3b3c6a3f
MM
2421 }
2422
c954bd01
RH
2423 if (GET_MODE (base) != Pmode)
2424 {
e075ae69 2425 reason = "base is not in Pmode";
50e60bc3 2426 goto report_error;
c954bd01
RH
2427 }
2428
e9a25f70
JL
2429 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
2430 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
3b3c6a3f 2431 {
e075ae69 2432 reason = "base is not valid";
50e60bc3 2433 goto report_error;
3b3c6a3f
MM
2434 }
2435 }
2436
e075ae69 2437 /* Validate index register.
e9a25f70
JL
2438
2439 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
2440 is one word out of a two word structure, which is represented internally
2441 as a DImode int. */
e075ae69
RH
2442
2443 if (index)
3b3c6a3f 2444 {
e075ae69
RH
2445 reason_rtx = index;
2446
2447 if (GET_CODE (index) != REG)
3b3c6a3f 2448 {
e075ae69 2449 reason = "index is not a register";
50e60bc3 2450 goto report_error;
3b3c6a3f
MM
2451 }
2452
e075ae69 2453 if (GET_MODE (index) != Pmode)
c954bd01 2454 {
e075ae69 2455 reason = "index is not in Pmode";
50e60bc3 2456 goto report_error;
c954bd01
RH
2457 }
2458
e075ae69
RH
2459 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
2460 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
3b3c6a3f 2461 {
e075ae69 2462 reason = "index is not valid";
50e60bc3 2463 goto report_error;
3b3c6a3f
MM
2464 }
2465 }
3b3c6a3f 2466
e075ae69
RH
2467 /* Validate scale factor. */
2468 if (scale != 1)
3b3c6a3f 2469 {
e075ae69
RH
2470 reason_rtx = GEN_INT (scale);
2471 if (!index)
3b3c6a3f 2472 {
e075ae69 2473 reason = "scale without index";
50e60bc3 2474 goto report_error;
3b3c6a3f
MM
2475 }
2476
e075ae69 2477 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 2478 {
e075ae69 2479 reason = "scale is not a valid multiplier";
50e60bc3 2480 goto report_error;
3b3c6a3f
MM
2481 }
2482 }
2483
91bb873f 2484 /* Validate displacement. */
3b3c6a3f
MM
2485 if (disp)
2486 {
e075ae69
RH
2487 reason_rtx = disp;
2488
91bb873f 2489 if (!CONSTANT_ADDRESS_P (disp))
3b3c6a3f 2490 {
e075ae69 2491 reason = "displacement is not constant";
50e60bc3 2492 goto report_error;
3b3c6a3f
MM
2493 }
2494
e075ae69 2495 if (GET_CODE (disp) == CONST_DOUBLE)
3b3c6a3f 2496 {
e075ae69 2497 reason = "displacement is a const_double";
50e60bc3 2498 goto report_error;
3b3c6a3f
MM
2499 }
2500
91bb873f 2501 if (flag_pic && SYMBOLIC_CONST (disp))
3b3c6a3f 2502 {
91bb873f
RH
2503 if (! legitimate_pic_address_disp_p (disp))
2504 {
e075ae69 2505 reason = "displacement is an invalid pic construct";
50e60bc3 2506 goto report_error;
91bb873f
RH
2507 }
2508
4e9efe54
JH
2509 /* This code used to verify that a symbolic pic displacement
2510 includes the pic_offset_table_rtx register.
2511
2512 While this is good idea, unfortunately these constructs may
2513 be created by "adds using lea" optimization for incorrect
2514 code like:
2515
2516 int a;
2517 int foo(int i)
2518 {
2519 return *(&a+i);
2520 }
2521
50e60bc3 2522 This code is nonsensical, but results in addressing
4e9efe54
JH
2523 GOT table with pic_offset_table_rtx base. We can't
2524 just refuse it easilly, since it gets matched by
2525 "addsi3" pattern, that later gets split to lea in the
2526 case output register differs from input. While this
2527 can be handled by separate addsi pattern for this case
2528 that never results in lea, this seems to be easier and
2529 correct fix for crash to disable this test. */
3b3c6a3f 2530 }
91bb873f 2531 else if (HALF_PIC_P ())
3b3c6a3f 2532 {
91bb873f 2533 if (! HALF_PIC_ADDRESS_P (disp)
e075ae69 2534 || (base != NULL_RTX || index != NULL_RTX))
91bb873f 2535 {
e075ae69 2536 reason = "displacement is an invalid half-pic reference";
50e60bc3 2537 goto report_error;
91bb873f 2538 }
3b3c6a3f
MM
2539 }
2540 }
2541
e075ae69 2542 /* Everything looks valid. */
3b3c6a3f 2543 if (TARGET_DEBUG_ADDR)
e075ae69 2544 fprintf (stderr, "Success.\n");
3b3c6a3f 2545 return TRUE;
e075ae69 2546
50e60bc3 2547report_error:
e075ae69
RH
2548 if (TARGET_DEBUG_ADDR)
2549 {
2550 fprintf (stderr, "Error: %s\n", reason);
2551 debug_rtx (reason_rtx);
2552 }
2553 return FALSE;
3b3c6a3f 2554}
3b3c6a3f
MM
2555\f
2556/* Return a legitimate reference for ORIG (an address) using the
2557 register REG. If REG is 0, a new pseudo is generated.
2558
91bb873f 2559 There are two types of references that must be handled:
3b3c6a3f
MM
2560
2561 1. Global data references must load the address from the GOT, via
2562 the PIC reg. An insn is emitted to do this load, and the reg is
2563 returned.
2564
91bb873f
RH
2565 2. Static data references, constant pool addresses, and code labels
2566 compute the address as an offset from the GOT, whose base is in
2567 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
2568 differentiate them from global data objects. The returned
2569 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
2570
2571 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 2572 reg also appears in the address. */
3b3c6a3f
MM
2573
2574rtx
2575legitimize_pic_address (orig, reg)
2576 rtx orig;
2577 rtx reg;
2578{
2579 rtx addr = orig;
2580 rtx new = orig;
91bb873f 2581 rtx base;
3b3c6a3f 2582
91bb873f
RH
2583 if (GET_CODE (addr) == LABEL_REF
2584 || (GET_CODE (addr) == SYMBOL_REF
2585 && (CONSTANT_POOL_ADDRESS_P (addr)
2586 || SYMBOL_REF_FLAG (addr))))
3b3c6a3f 2587 {
91bb873f
RH
2588 /* This symbol may be referenced via a displacement from the PIC
2589 base address (@GOTOFF). */
3b3c6a3f 2590
91bb873f
RH
2591 current_function_uses_pic_offset_table = 1;
2592 new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, addr), 7);
2593 new = gen_rtx_CONST (VOIDmode, new);
2594 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 2595
91bb873f
RH
2596 if (reg != 0)
2597 {
3b3c6a3f 2598 emit_move_insn (reg, new);
91bb873f 2599 new = reg;
3b3c6a3f 2600 }
3b3c6a3f 2601 }
91bb873f 2602 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 2603 {
91bb873f
RH
2604 /* This symbol must be referenced via a load from the
2605 Global Offset Table (@GOT). */
3b3c6a3f 2606
91bb873f
RH
2607 current_function_uses_pic_offset_table = 1;
2608 new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, addr), 6);
2609 new = gen_rtx_CONST (VOIDmode, new);
2610 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2611 new = gen_rtx_MEM (Pmode, new);
2612 RTX_UNCHANGING_P (new) = 1;
3b3c6a3f
MM
2613
2614 if (reg == 0)
2615 reg = gen_reg_rtx (Pmode);
91bb873f
RH
2616 emit_move_insn (reg, new);
2617 new = reg;
2618 }
2619 else
2620 {
2621 if (GET_CODE (addr) == CONST)
3b3c6a3f 2622 {
91bb873f
RH
2623 addr = XEXP (addr, 0);
2624 if (GET_CODE (addr) == UNSPEC)
2625 {
2626 /* Check that the unspec is one of the ones we generate? */
2627 }
2628 else if (GET_CODE (addr) != PLUS)
564d80f4 2629 abort ();
3b3c6a3f 2630 }
91bb873f
RH
2631 if (GET_CODE (addr) == PLUS)
2632 {
2633 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 2634
91bb873f
RH
2635 /* Check first to see if this is a constant offset from a @GOTOFF
2636 symbol reference. */
2637 if ((GET_CODE (op0) == LABEL_REF
2638 || (GET_CODE (op0) == SYMBOL_REF
2639 && (CONSTANT_POOL_ADDRESS_P (op0)
2640 || SYMBOL_REF_FLAG (op0))))
2641 && GET_CODE (op1) == CONST_INT)
2642 {
2643 current_function_uses_pic_offset_table = 1;
2644 new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, op0), 7);
2645 new = gen_rtx_PLUS (VOIDmode, new, op1);
2646 new = gen_rtx_CONST (VOIDmode, new);
2647 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2648
2649 if (reg != 0)
2650 {
2651 emit_move_insn (reg, new);
2652 new = reg;
2653 }
2654 }
2655 else
2656 {
2657 base = legitimize_pic_address (XEXP (addr, 0), reg);
2658 new = legitimize_pic_address (XEXP (addr, 1),
2659 base == reg ? NULL_RTX : reg);
2660
2661 if (GET_CODE (new) == CONST_INT)
2662 new = plus_constant (base, INTVAL (new));
2663 else
2664 {
2665 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
2666 {
2667 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
2668 new = XEXP (new, 1);
2669 }
2670 new = gen_rtx_PLUS (Pmode, base, new);
2671 }
2672 }
2673 }
3b3c6a3f
MM
2674 }
2675 return new;
2676}
2677\f
3b3c6a3f
MM
2678/* Try machine-dependent ways of modifying an illegitimate address
2679 to be legitimate. If we find one, return the new, valid address.
2680 This macro is used in only one place: `memory_address' in explow.c.
2681
2682 OLDX is the address as it was before break_out_memory_refs was called.
2683 In some cases it is useful to look at this to decide what needs to be done.
2684
2685 MODE and WIN are passed so that this macro can use
2686 GO_IF_LEGITIMATE_ADDRESS.
2687
2688 It is always safe for this macro to do nothing. It exists to recognize
2689 opportunities to optimize the output.
2690
2691 For the 80386, we handle X+REG by loading X into a register R and
2692 using R+REG. R will go in a general reg and indexing will be used.
2693 However, if REG is a broken-out memory address or multiplication,
2694 nothing needs to be done because REG can certainly go in a general reg.
2695
2696 When -fpic is used, special handling is needed for symbolic references.
2697 See comments by legitimize_pic_address in i386.c for details. */
2698
2699rtx
2700legitimize_address (x, oldx, mode)
2701 register rtx x;
bb5177ac 2702 register rtx oldx ATTRIBUTE_UNUSED;
3b3c6a3f
MM
2703 enum machine_mode mode;
2704{
2705 int changed = 0;
2706 unsigned log;
2707
2708 if (TARGET_DEBUG_ADDR)
2709 {
e9a25f70
JL
2710 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
2711 GET_MODE_NAME (mode));
3b3c6a3f
MM
2712 debug_rtx (x);
2713 }
2714
2715 if (flag_pic && SYMBOLIC_CONST (x))
2716 return legitimize_pic_address (x, 0);
2717
2718 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
2719 if (GET_CODE (x) == ASHIFT
2720 && GET_CODE (XEXP (x, 1)) == CONST_INT
2721 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
2722 {
2723 changed = 1;
a269a03c
JC
2724 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
2725 GEN_INT (1 << log));
3b3c6a3f
MM
2726 }
2727
2728 if (GET_CODE (x) == PLUS)
2729 {
e9a25f70
JL
2730 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
2731
3b3c6a3f
MM
2732 if (GET_CODE (XEXP (x, 0)) == ASHIFT
2733 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2734 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
2735 {
2736 changed = 1;
c5c76735
JL
2737 XEXP (x, 0) = gen_rtx_MULT (Pmode,
2738 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
2739 GEN_INT (1 << log));
3b3c6a3f
MM
2740 }
2741
2742 if (GET_CODE (XEXP (x, 1)) == ASHIFT
2743 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
2744 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
2745 {
2746 changed = 1;
c5c76735
JL
2747 XEXP (x, 1) = gen_rtx_MULT (Pmode,
2748 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
2749 GEN_INT (1 << log));
3b3c6a3f
MM
2750 }
2751
e9a25f70 2752 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
2753 if (GET_CODE (XEXP (x, 1)) == MULT)
2754 {
2755 rtx tmp = XEXP (x, 0);
2756 XEXP (x, 0) = XEXP (x, 1);
2757 XEXP (x, 1) = tmp;
2758 changed = 1;
2759 }
2760
2761 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
2762 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
2763 created by virtual register instantiation, register elimination, and
2764 similar optimizations. */
2765 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
2766 {
2767 changed = 1;
c5c76735
JL
2768 x = gen_rtx_PLUS (Pmode,
2769 gen_rtx_PLUS (Pmode, XEXP (x, 0),
2770 XEXP (XEXP (x, 1), 0)),
2771 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
2772 }
2773
e9a25f70
JL
2774 /* Canonicalize
2775 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
2776 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
2777 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
2778 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
2779 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
2780 && CONSTANT_P (XEXP (x, 1)))
2781 {
00c79232
ML
2782 rtx constant;
2783 rtx other = NULL_RTX;
3b3c6a3f
MM
2784
2785 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2786 {
2787 constant = XEXP (x, 1);
2788 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
2789 }
2790 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
2791 {
2792 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
2793 other = XEXP (x, 1);
2794 }
2795 else
2796 constant = 0;
2797
2798 if (constant)
2799 {
2800 changed = 1;
c5c76735
JL
2801 x = gen_rtx_PLUS (Pmode,
2802 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
2803 XEXP (XEXP (XEXP (x, 0), 1), 0)),
2804 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
2805 }
2806 }
2807
2808 if (changed && legitimate_address_p (mode, x, FALSE))
2809 return x;
2810
2811 if (GET_CODE (XEXP (x, 0)) == MULT)
2812 {
2813 changed = 1;
2814 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
2815 }
2816
2817 if (GET_CODE (XEXP (x, 1)) == MULT)
2818 {
2819 changed = 1;
2820 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
2821 }
2822
2823 if (changed
2824 && GET_CODE (XEXP (x, 1)) == REG
2825 && GET_CODE (XEXP (x, 0)) == REG)
2826 return x;
2827
2828 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
2829 {
2830 changed = 1;
2831 x = legitimize_pic_address (x, 0);
2832 }
2833
2834 if (changed && legitimate_address_p (mode, x, FALSE))
2835 return x;
2836
2837 if (GET_CODE (XEXP (x, 0)) == REG)
2838 {
2839 register rtx temp = gen_reg_rtx (Pmode);
2840 register rtx val = force_operand (XEXP (x, 1), temp);
2841 if (val != temp)
2842 emit_move_insn (temp, val);
2843
2844 XEXP (x, 1) = temp;
2845 return x;
2846 }
2847
2848 else if (GET_CODE (XEXP (x, 1)) == REG)
2849 {
2850 register rtx temp = gen_reg_rtx (Pmode);
2851 register rtx val = force_operand (XEXP (x, 0), temp);
2852 if (val != temp)
2853 emit_move_insn (temp, val);
2854
2855 XEXP (x, 0) = temp;
2856 return x;
2857 }
2858 }
2859
2860 return x;
2861}
2a2ab3f9
JVA
2862\f
2863/* Print an integer constant expression in assembler syntax. Addition
2864 and subtraction are the only arithmetic that may appear in these
2865 expressions. FILE is the stdio stream to write to, X is the rtx, and
2866 CODE is the operand print code from the output string. */
2867
2868static void
2869output_pic_addr_const (file, x, code)
2870 FILE *file;
2871 rtx x;
2872 int code;
2873{
2874 char buf[256];
2875
2876 switch (GET_CODE (x))
2877 {
2878 case PC:
2879 if (flag_pic)
2880 putc ('.', file);
2881 else
2882 abort ();
2883 break;
2884
2885 case SYMBOL_REF:
91bb873f
RH
2886 assemble_name (file, XSTR (x, 0));
2887 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
2888 fputs ("@PLT", file);
2a2ab3f9
JVA
2889 break;
2890
91bb873f
RH
2891 case LABEL_REF:
2892 x = XEXP (x, 0);
2893 /* FALLTHRU */
2a2ab3f9
JVA
2894 case CODE_LABEL:
2895 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
2896 assemble_name (asm_out_file, buf);
2897 break;
2898
2899 case CONST_INT:
f64cecad 2900 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
2901 break;
2902
2903 case CONST:
2904 /* This used to output parentheses around the expression,
2905 but that does not work on the 386 (either ATT or BSD assembler). */
2906 output_pic_addr_const (file, XEXP (x, 0), code);
2907 break;
2908
2909 case CONST_DOUBLE:
2910 if (GET_MODE (x) == VOIDmode)
2911 {
2912 /* We can use %d if the number is <32 bits and positive. */
2913 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
2914 fprintf (file, "0x%lx%08lx",
2915 (unsigned long) CONST_DOUBLE_HIGH (x),
2916 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 2917 else
f64cecad 2918 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
2919 }
2920 else
2921 /* We can't handle floating point constants;
2922 PRINT_OPERAND must handle them. */
2923 output_operand_lossage ("floating constant misused");
2924 break;
2925
2926 case PLUS:
e9a25f70 2927 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
2928 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
2929 {
2a2ab3f9 2930 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 2931 putc ('+', file);
e9a25f70 2932 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 2933 }
91bb873f 2934 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 2935 {
2a2ab3f9 2936 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 2937 putc ('+', file);
e9a25f70 2938 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 2939 }
91bb873f
RH
2940 else
2941 abort ();
2a2ab3f9
JVA
2942 break;
2943
2944 case MINUS:
e075ae69 2945 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
2a2ab3f9 2946 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 2947 putc ('-', file);
2a2ab3f9 2948 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 2949 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
2a2ab3f9
JVA
2950 break;
2951
91bb873f
RH
2952 case UNSPEC:
2953 if (XVECLEN (x, 0) != 1)
2954 abort ();
2955 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
2956 switch (XINT (x, 1))
2957 {
2958 case 6:
2959 fputs ("@GOT", file);
2960 break;
2961 case 7:
2962 fputs ("@GOTOFF", file);
2963 break;
2964 case 8:
2965 fputs ("@PLT", file);
2966 break;
2967 default:
2968 output_operand_lossage ("invalid UNSPEC as operand");
2969 break;
2970 }
2971 break;
2972
2a2ab3f9
JVA
2973 default:
2974 output_operand_lossage ("invalid expression as operand");
2975 }
2976}
1865dbb5
JM
2977
2978/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
2979 We need to handle our special PIC relocations. */
2980
2981void
2982i386_dwarf_output_addr_const (file, x)
2983 FILE *file;
2984 rtx x;
2985{
2986 fprintf (file, "\t%s\t", INT_ASM_OP);
2987 if (flag_pic)
2988 output_pic_addr_const (file, x, '\0');
2989 else
2990 output_addr_const (file, x);
2991 fputc ('\n', file);
2992}
2993
2994/* In the name of slightly smaller debug output, and to cater to
2995 general assembler losage, recognize PIC+GOTOFF and turn it back
2996 into a direct symbol reference. */
2997
2998rtx
2999i386_simplify_dwarf_addr (orig_x)
3000 rtx orig_x;
3001{
3002 rtx x = orig_x;
3003
3004 if (GET_CODE (x) != PLUS
3005 || GET_CODE (XEXP (x, 0)) != REG
3006 || GET_CODE (XEXP (x, 1)) != CONST)
3007 return orig_x;
3008
3009 x = XEXP (XEXP (x, 1), 0);
3010 if (GET_CODE (x) == UNSPEC
3011 && XINT (x, 1) == 7)
3012 return XVECEXP (x, 0, 0);
3013
3014 if (GET_CODE (x) == PLUS
3015 && GET_CODE (XEXP (x, 0)) == UNSPEC
3016 && GET_CODE (XEXP (x, 1)) == CONST_INT
3017 && XINT (XEXP (x, 0), 1) == 7)
3018 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
3019
3020 return orig_x;
3021}
2a2ab3f9 3022\f
a269a03c 3023static void
e075ae69 3024put_condition_code (code, mode, reverse, fp, file)
a269a03c 3025 enum rtx_code code;
e075ae69
RH
3026 enum machine_mode mode;
3027 int reverse, fp;
a269a03c
JC
3028 FILE *file;
3029{
a269a03c
JC
3030 const char *suffix;
3031
a269a03c
JC
3032 if (reverse)
3033 code = reverse_condition (code);
e075ae69 3034
a269a03c
JC
3035 switch (code)
3036 {
3037 case EQ:
3038 suffix = "e";
3039 break;
a269a03c
JC
3040 case NE:
3041 suffix = "ne";
3042 break;
a269a03c 3043 case GT:
e075ae69
RH
3044 if (mode == CCNOmode)
3045 abort ();
3046 suffix = "g";
a269a03c 3047 break;
a269a03c 3048 case GTU:
e075ae69
RH
3049 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
3050 Those same assemblers have the same but opposite losage on cmov. */
3051 suffix = fp ? "nbe" : "a";
a269a03c 3052 break;
a269a03c 3053 case LT:
e075ae69 3054 if (mode == CCNOmode)
a269a03c
JC
3055 suffix = "s";
3056 else
e075ae69 3057 suffix = "l";
a269a03c 3058 break;
a269a03c
JC
3059 case LTU:
3060 suffix = "b";
3061 break;
a269a03c 3062 case GE:
e075ae69 3063 if (mode == CCNOmode)
a269a03c
JC
3064 suffix = "ns";
3065 else
e075ae69 3066 suffix = "ge";
a269a03c 3067 break;
a269a03c 3068 case GEU:
e075ae69
RH
3069 /* ??? As above. */
3070 suffix = fp ? "nb" : "ae";
a269a03c 3071 break;
a269a03c 3072 case LE:
e075ae69
RH
3073 if (mode == CCNOmode)
3074 abort ();
3075 suffix = "le";
a269a03c 3076 break;
a269a03c
JC
3077 case LEU:
3078 suffix = "be";
3079 break;
3a3677ff
RH
3080 case UNORDERED:
3081 suffix = "p";
3082 break;
3083 case ORDERED:
3084 suffix = "np";
3085 break;
a269a03c
JC
3086 default:
3087 abort ();
3088 }
3089 fputs (suffix, file);
3090}
3091
e075ae69
RH
3092void
3093print_reg (x, code, file)
3094 rtx x;
3095 int code;
3096 FILE *file;
e5cb57e8 3097{
e075ae69 3098 if (REGNO (x) == ARG_POINTER_REGNUM
564d80f4 3099 || REGNO (x) == FRAME_POINTER_REGNUM
e075ae69
RH
3100 || REGNO (x) == FLAGS_REG
3101 || REGNO (x) == FPSR_REG)
3102 abort ();
e9a25f70 3103
e075ae69
RH
3104 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
3105 putc ('%', file);
3106
3107 if (code == 'w')
3108 code = 2;
3109 else if (code == 'b')
3110 code = 1;
3111 else if (code == 'k')
3112 code = 4;
3113 else if (code == 'y')
3114 code = 3;
3115 else if (code == 'h')
3116 code = 0;
a7180f70
BS
3117 else if (code == 'm' || MMX_REG_P (x))
3118 code = 5;
e075ae69
RH
3119 else
3120 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 3121
e075ae69
RH
3122 switch (code)
3123 {
a7180f70
BS
3124 case 5:
3125 fputs (hi_reg_name[REGNO (x)], file);
3126 break;
e075ae69
RH
3127 case 3:
3128 if (STACK_TOP_P (x))
3129 {
3130 fputs ("st(0)", file);
3131 break;
3132 }
3133 /* FALLTHRU */
3134 case 4:
3135 case 8:
3136 case 12:
3137 if (! FP_REG_P (x))
3138 putc ('e', file);
3139 /* FALLTHRU */
a7180f70 3140 case 16:
e075ae69
RH
3141 case 2:
3142 fputs (hi_reg_name[REGNO (x)], file);
3143 break;
3144 case 1:
3145 fputs (qi_reg_name[REGNO (x)], file);
3146 break;
3147 case 0:
3148 fputs (qi_high_reg_name[REGNO (x)], file);
3149 break;
3150 default:
3151 abort ();
fe25fea3 3152 }
e5cb57e8
SC
3153}
3154
2a2ab3f9 3155/* Meaning of CODE:
fe25fea3 3156 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 3157 C -- print opcode suffix for set/cmov insn.
fe25fea3 3158 c -- like C, but print reversed condition
2a2ab3f9
JVA
3159 R -- print the prefix for register names.
3160 z -- print the opcode suffix for the size of the current operand.
3161 * -- print a star (in certain assembler syntax)
3162 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
3163 s -- print a shift double count, followed by the assemblers argument
3164 delimiter.
fe25fea3
SC
3165 b -- print the QImode name of the register for the indicated operand.
3166 %b0 would print %al if operands[0] is reg 0.
3167 w -- likewise, print the HImode name of the register.
3168 k -- likewise, print the SImode name of the register.
3169 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
a7180f70
BS
3170 y -- print "st(0)" instead of "st" as a register.
3171 m -- print "st(n)" as an mmx register. */
2a2ab3f9
JVA
3172
3173void
3174print_operand (file, x, code)
3175 FILE *file;
3176 rtx x;
3177 int code;
3178{
3179 if (code)
3180 {
3181 switch (code)
3182 {
3183 case '*':
e075ae69 3184 if (ASSEMBLER_DIALECT == 0)
2a2ab3f9
JVA
3185 putc ('*', file);
3186 return;
3187
2a2ab3f9 3188 case 'L':
e075ae69
RH
3189 if (ASSEMBLER_DIALECT == 0)
3190 putc ('l', file);
2a2ab3f9
JVA
3191 return;
3192
3193 case 'W':
e075ae69
RH
3194 if (ASSEMBLER_DIALECT == 0)
3195 putc ('w', file);
2a2ab3f9
JVA
3196 return;
3197
3198 case 'B':
e075ae69
RH
3199 if (ASSEMBLER_DIALECT == 0)
3200 putc ('b', file);
2a2ab3f9
JVA
3201 return;
3202
3203 case 'Q':
e075ae69
RH
3204 if (ASSEMBLER_DIALECT == 0)
3205 putc ('l', file);
2a2ab3f9
JVA
3206 return;
3207
3208 case 'S':
e075ae69
RH
3209 if (ASSEMBLER_DIALECT == 0)
3210 putc ('s', file);
2a2ab3f9
JVA
3211 return;
3212
5f1ec3e6 3213 case 'T':
e075ae69
RH
3214 if (ASSEMBLER_DIALECT == 0)
3215 putc ('t', file);
5f1ec3e6
JVA
3216 return;
3217
2a2ab3f9
JVA
3218 case 'z':
3219 /* 387 opcodes don't get size suffixes if the operands are
3220 registers. */
3221
3222 if (STACK_REG_P (x))
3223 return;
3224
e075ae69
RH
3225 /* Intel syntax has no truck with instruction suffixes. */
3226 if (ASSEMBLER_DIALECT != 0)
3227 return;
3228
2a2ab3f9
JVA
3229 /* this is the size of op from size of operand */
3230 switch (GET_MODE_SIZE (GET_MODE (x)))
3231 {
2a2ab3f9 3232 case 2:
155d8a47
JW
3233#ifdef HAVE_GAS_FILDS_FISTS
3234 putc ('s', file);
3235#endif
2a2ab3f9
JVA
3236 return;
3237
3238 case 4:
3239 if (GET_MODE (x) == SFmode)
3240 {
e075ae69 3241 putc ('s', file);
2a2ab3f9
JVA
3242 return;
3243 }
3244 else
e075ae69 3245 putc ('l', file);
2a2ab3f9
JVA
3246 return;
3247
5f1ec3e6 3248 case 12:
e075ae69
RH
3249 putc ('t', file);
3250 return;
5f1ec3e6 3251
2a2ab3f9
JVA
3252 case 8:
3253 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
3254 {
3255#ifdef GAS_MNEMONICS
e075ae69 3256 putc ('q', file);
56c0e8fa 3257#else
e075ae69
RH
3258 putc ('l', file);
3259 putc ('l', file);
56c0e8fa
JVA
3260#endif
3261 }
e075ae69
RH
3262 else
3263 putc ('l', file);
2a2ab3f9 3264 return;
155d8a47
JW
3265
3266 default:
3267 abort ();
2a2ab3f9 3268 }
4af3895e
JVA
3269
3270 case 'b':
3271 case 'w':
3272 case 'k':
3273 case 'h':
3274 case 'y':
a7180f70 3275 case 'm':
5cb6195d 3276 case 'X':
e075ae69 3277 case 'P':
4af3895e
JVA
3278 break;
3279
2d49677f
SC
3280 case 's':
3281 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
3282 {
3283 PRINT_OPERAND (file, x, 0);
e075ae69 3284 putc (',', file);
2d49677f 3285 }
a269a03c
JC
3286 return;
3287
1853aadd 3288 case 'C':
e075ae69 3289 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 3290 return;
fe25fea3 3291 case 'F':
e075ae69 3292 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
3293 return;
3294
e9a25f70 3295 /* Like above, but reverse condition */
e075ae69
RH
3296 case 'c':
3297 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
3298 return;
fe25fea3 3299 case 'f':
e075ae69 3300 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 3301 return;
e5cb57e8 3302
4af3895e 3303 default:
68daafd4
JVA
3304 {
3305 char str[50];
68daafd4
JVA
3306 sprintf (str, "invalid operand code `%c'", code);
3307 output_operand_lossage (str);
3308 }
2a2ab3f9
JVA
3309 }
3310 }
e9a25f70 3311
2a2ab3f9
JVA
3312 if (GET_CODE (x) == REG)
3313 {
3314 PRINT_REG (x, code, file);
3315 }
e9a25f70 3316
2a2ab3f9
JVA
3317 else if (GET_CODE (x) == MEM)
3318 {
e075ae69
RH
3319 /* No `byte ptr' prefix for call instructions. */
3320 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
2a2ab3f9 3321 {
69ddee61 3322 const char * size;
e075ae69
RH
3323 switch (GET_MODE_SIZE (GET_MODE (x)))
3324 {
3325 case 1: size = "BYTE"; break;
3326 case 2: size = "WORD"; break;
3327 case 4: size = "DWORD"; break;
3328 case 8: size = "QWORD"; break;
3329 case 12: size = "XWORD"; break;
a7180f70 3330 case 16: size = "XMMWORD"; break;
e075ae69 3331 default:
564d80f4 3332 abort ();
e075ae69
RH
3333 }
3334 fputs (size, file);
3335 fputs (" PTR ", file);
2a2ab3f9 3336 }
e075ae69
RH
3337
3338 x = XEXP (x, 0);
3339 if (flag_pic && CONSTANT_ADDRESS_P (x))
3340 output_pic_addr_const (file, x, code);
2a2ab3f9 3341 else
e075ae69 3342 output_address (x);
2a2ab3f9 3343 }
e9a25f70 3344
2a2ab3f9
JVA
3345 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
3346 {
e9a25f70
JL
3347 REAL_VALUE_TYPE r;
3348 long l;
3349
5f1ec3e6
JVA
3350 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3351 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69
RH
3352
3353 if (ASSEMBLER_DIALECT == 0)
3354 putc ('$', file);
52267fcb 3355 fprintf (file, "0x%lx", l);
5f1ec3e6 3356 }
e9a25f70 3357
5f1ec3e6
JVA
3358 /* These float cases don't actually occur as immediate operands. */
3359 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
3360 {
e9a25f70
JL
3361 REAL_VALUE_TYPE r;
3362 char dstr[30];
3363
5f1ec3e6
JVA
3364 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3365 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3366 fprintf (file, "%s", dstr);
2a2ab3f9 3367 }
e9a25f70 3368
5f1ec3e6 3369 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
2a2ab3f9 3370 {
e9a25f70
JL
3371 REAL_VALUE_TYPE r;
3372 char dstr[30];
3373
5f1ec3e6
JVA
3374 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3375 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3376 fprintf (file, "%s", dstr);
2a2ab3f9 3377 }
79325812 3378 else
2a2ab3f9 3379 {
4af3895e 3380 if (code != 'P')
2a2ab3f9 3381 {
695dac07 3382 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69
RH
3383 {
3384 if (ASSEMBLER_DIALECT == 0)
3385 putc ('$', file);
3386 }
2a2ab3f9
JVA
3387 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
3388 || GET_CODE (x) == LABEL_REF)
e075ae69
RH
3389 {
3390 if (ASSEMBLER_DIALECT == 0)
3391 putc ('$', file);
3392 else
3393 fputs ("OFFSET FLAT:", file);
3394 }
2a2ab3f9 3395 }
e075ae69
RH
3396 if (GET_CODE (x) == CONST_INT)
3397 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3398 else if (flag_pic)
2a2ab3f9
JVA
3399 output_pic_addr_const (file, x, code);
3400 else
3401 output_addr_const (file, x);
3402 }
3403}
3404\f
3405/* Print a memory operand whose address is ADDR. */
3406
3407void
3408print_operand_address (file, addr)
3409 FILE *file;
3410 register rtx addr;
3411{
e075ae69
RH
3412 struct ix86_address parts;
3413 rtx base, index, disp;
3414 int scale;
e9a25f70 3415
e075ae69
RH
3416 if (! ix86_decompose_address (addr, &parts))
3417 abort ();
e9a25f70 3418
e075ae69
RH
3419 base = parts.base;
3420 index = parts.index;
3421 disp = parts.disp;
3422 scale = parts.scale;
e9a25f70 3423
e075ae69
RH
3424 if (!base && !index)
3425 {
3426 /* Displacement only requires special attention. */
e9a25f70 3427
e075ae69 3428 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 3429 {
e075ae69
RH
3430 if (ASSEMBLER_DIALECT != 0)
3431 fputs ("ds:", file);
3432 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
2a2ab3f9 3433 }
e075ae69
RH
3434 else if (flag_pic)
3435 output_pic_addr_const (file, addr, 0);
3436 else
3437 output_addr_const (file, addr);
3438 }
3439 else
3440 {
3441 if (ASSEMBLER_DIALECT == 0)
2a2ab3f9 3442 {
e075ae69 3443 if (disp)
2a2ab3f9 3444 {
c399861d 3445 if (flag_pic)
e075ae69
RH
3446 output_pic_addr_const (file, disp, 0);
3447 else if (GET_CODE (disp) == LABEL_REF)
3448 output_asm_label (disp);
2a2ab3f9 3449 else
e075ae69 3450 output_addr_const (file, disp);
2a2ab3f9
JVA
3451 }
3452
e075ae69
RH
3453 putc ('(', file);
3454 if (base)
3455 PRINT_REG (base, 0, file);
3456 if (index)
2a2ab3f9 3457 {
e075ae69
RH
3458 putc (',', file);
3459 PRINT_REG (index, 0, file);
3460 if (scale != 1)
3461 fprintf (file, ",%d", scale);
2a2ab3f9 3462 }
e075ae69 3463 putc (')', file);
2a2ab3f9 3464 }
2a2ab3f9
JVA
3465 else
3466 {
e075ae69 3467 rtx offset = NULL_RTX;
e9a25f70 3468
e075ae69
RH
3469 if (disp)
3470 {
3471 /* Pull out the offset of a symbol; print any symbol itself. */
3472 if (GET_CODE (disp) == CONST
3473 && GET_CODE (XEXP (disp, 0)) == PLUS
3474 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
3475 {
3476 offset = XEXP (XEXP (disp, 0), 1);
3477 disp = gen_rtx_CONST (VOIDmode,
3478 XEXP (XEXP (disp, 0), 0));
3479 }
ce193852 3480
e075ae69
RH
3481 if (flag_pic)
3482 output_pic_addr_const (file, disp, 0);
3483 else if (GET_CODE (disp) == LABEL_REF)
3484 output_asm_label (disp);
3485 else if (GET_CODE (disp) == CONST_INT)
3486 offset = disp;
3487 else
3488 output_addr_const (file, disp);
3489 }
e9a25f70 3490
e075ae69
RH
3491 putc ('[', file);
3492 if (base)
a8620236 3493 {
e075ae69
RH
3494 PRINT_REG (base, 0, file);
3495 if (offset)
3496 {
3497 if (INTVAL (offset) >= 0)
3498 putc ('+', file);
3499 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3500 }
a8620236 3501 }
e075ae69
RH
3502 else if (offset)
3503 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 3504 else
e075ae69 3505 putc ('0', file);
e9a25f70 3506
e075ae69
RH
3507 if (index)
3508 {
3509 putc ('+', file);
3510 PRINT_REG (index, 0, file);
3511 if (scale != 1)
3512 fprintf (file, "*%d", scale);
3513 }
3514 putc (']', file);
3515 }
2a2ab3f9
JVA
3516 }
3517}
3518\f
3519/* Split one or more DImode RTL references into pairs of SImode
3520 references. The RTL can be REG, offsettable MEM, integer constant, or
3521 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
3522 split and "num" is its length. lo_half and hi_half are output arrays
3523 that parallel "operands". */
3524
3525void
3526split_di (operands, num, lo_half, hi_half)
3527 rtx operands[];
3528 int num;
3529 rtx lo_half[], hi_half[];
3530{
3531 while (num--)
3532 {
57dbca5e 3533 rtx op = operands[num];
e075ae69
RH
3534 if (CONSTANT_P (op))
3535 split_double (op, &lo_half[num], &hi_half[num]);
3536 else if (! reload_completed)
a269a03c
JC
3537 {
3538 lo_half[num] = gen_lowpart (SImode, op);
3539 hi_half[num] = gen_highpart (SImode, op);
3540 }
3541 else if (GET_CODE (op) == REG)
2a2ab3f9 3542 {
57dbca5e
BS
3543 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
3544 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
2a2ab3f9 3545 }
57dbca5e 3546 else if (offsettable_memref_p (op))
2a2ab3f9 3547 {
57dbca5e
BS
3548 rtx lo_addr = XEXP (op, 0);
3549 rtx hi_addr = XEXP (adj_offsettable_operand (op, 4), 0);
3550 lo_half[num] = change_address (op, SImode, lo_addr);
3551 hi_half[num] = change_address (op, SImode, hi_addr);
2a2ab3f9
JVA
3552 }
3553 else
564d80f4 3554 abort ();
2a2ab3f9
JVA
3555 }
3556}
3557\f
2a2ab3f9
JVA
3558/* Output code to perform a 387 binary operation in INSN, one of PLUS,
3559 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
3560 is the expression of the binary operation. The output may either be
3561 emitted here, or returned to the caller, like all output_* functions.
3562
3563 There is no guarantee that the operands are the same mode, as they
3564 might be within FLOAT or FLOAT_EXTEND expressions. */
3565
e3c2afab
AM
3566#ifndef SYSV386_COMPAT
3567/* Set to 1 for compatibility with brain-damaged assemblers. No-one
3568 wants to fix the assemblers because that causes incompatibility
3569 with gcc. No-one wants to fix gcc because that causes
3570 incompatibility with assemblers... You can use the option of
3571 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
3572#define SYSV386_COMPAT 1
3573#endif
3574
69ddee61 3575const char *
2a2ab3f9
JVA
3576output_387_binary_op (insn, operands)
3577 rtx insn;
3578 rtx *operands;
3579{
e3c2afab 3580 static char buf[30];
69ddee61 3581 const char *p;
2a2ab3f9 3582
e3c2afab
AM
3583#ifdef ENABLE_CHECKING
3584 /* Even if we do not want to check the inputs, this documents input
3585 constraints. Which helps in understanding the following code. */
3586 if (STACK_REG_P (operands[0])
3587 && ((REG_P (operands[1])
3588 && REGNO (operands[0]) == REGNO (operands[1])
3589 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
3590 || (REG_P (operands[2])
3591 && REGNO (operands[0]) == REGNO (operands[2])
3592 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
3593 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
3594 ; /* ok */
3595 else
3596 abort ();
3597#endif
3598
2a2ab3f9
JVA
3599 switch (GET_CODE (operands[3]))
3600 {
3601 case PLUS:
e075ae69
RH
3602 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3603 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3604 p = "fiadd";
3605 else
3606 p = "fadd";
2a2ab3f9
JVA
3607 break;
3608
3609 case MINUS:
e075ae69
RH
3610 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3611 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3612 p = "fisub";
3613 else
3614 p = "fsub";
2a2ab3f9
JVA
3615 break;
3616
3617 case MULT:
e075ae69
RH
3618 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3619 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3620 p = "fimul";
3621 else
3622 p = "fmul";
2a2ab3f9
JVA
3623 break;
3624
3625 case DIV:
e075ae69
RH
3626 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3627 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3628 p = "fidiv";
3629 else
3630 p = "fdiv";
2a2ab3f9
JVA
3631 break;
3632
3633 default:
3634 abort ();
3635 }
3636
e075ae69 3637 strcpy (buf, p);
2a2ab3f9
JVA
3638
3639 switch (GET_CODE (operands[3]))
3640 {
3641 case MULT:
3642 case PLUS:
3643 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
3644 {
e3c2afab 3645 rtx temp = operands[2];
2a2ab3f9
JVA
3646 operands[2] = operands[1];
3647 operands[1] = temp;
3648 }
3649
e3c2afab
AM
3650 /* know operands[0] == operands[1]. */
3651
2a2ab3f9 3652 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
3653 {
3654 p = "%z2\t%2";
3655 break;
3656 }
2a2ab3f9
JVA
3657
3658 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
3659 {
3660 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
3661 /* How is it that we are storing to a dead operand[2]?
3662 Well, presumably operands[1] is dead too. We can't
3663 store the result to st(0) as st(0) gets popped on this
3664 instruction. Instead store to operands[2] (which I
3665 think has to be st(1)). st(1) will be popped later.
3666 gcc <= 2.8.1 didn't have this check and generated
3667 assembly code that the Unixware assembler rejected. */
3668 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 3669 else
e3c2afab 3670 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 3671 break;
6b28fd63 3672 }
2a2ab3f9
JVA
3673
3674 if (STACK_TOP_P (operands[0]))
e3c2afab 3675 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 3676 else
e3c2afab 3677 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 3678 break;
2a2ab3f9
JVA
3679
3680 case MINUS:
3681 case DIV:
3682 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
3683 {
3684 p = "r%z1\t%1";
3685 break;
3686 }
2a2ab3f9
JVA
3687
3688 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
3689 {
3690 p = "%z2\t%2";
3691 break;
3692 }
2a2ab3f9 3693
2a2ab3f9 3694 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 3695 {
e3c2afab
AM
3696#if SYSV386_COMPAT
3697 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
3698 derived assemblers, confusingly reverse the direction of
3699 the operation for fsub{r} and fdiv{r} when the
3700 destination register is not st(0). The Intel assembler
3701 doesn't have this brain damage. Read !SYSV386_COMPAT to
3702 figure out what the hardware really does. */
3703 if (STACK_TOP_P (operands[0]))
3704 p = "{p\t%0, %2|rp\t%2, %0}";
3705 else
3706 p = "{rp\t%2, %0|p\t%0, %2}";
3707#else
6b28fd63 3708 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
3709 /* As above for fmul/fadd, we can't store to st(0). */
3710 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 3711 else
e3c2afab
AM
3712 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
3713#endif
e075ae69 3714 break;
6b28fd63 3715 }
2a2ab3f9
JVA
3716
3717 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 3718 {
e3c2afab 3719#if SYSV386_COMPAT
6b28fd63 3720 if (STACK_TOP_P (operands[0]))
e3c2afab 3721 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 3722 else
e3c2afab
AM
3723 p = "{p\t%1, %0|rp\t%0, %1}";
3724#else
3725 if (STACK_TOP_P (operands[0]))
3726 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
3727 else
3728 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
3729#endif
e075ae69 3730 break;
6b28fd63 3731 }
2a2ab3f9
JVA
3732
3733 if (STACK_TOP_P (operands[0]))
3734 {
3735 if (STACK_TOP_P (operands[1]))
e3c2afab 3736 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 3737 else
e3c2afab 3738 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 3739 break;
2a2ab3f9
JVA
3740 }
3741 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
3742 {
3743#if SYSV386_COMPAT
3744 p = "{\t%1, %0|r\t%0, %1}";
3745#else
3746 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
3747#endif
3748 }
2a2ab3f9 3749 else
e3c2afab
AM
3750 {
3751#if SYSV386_COMPAT
3752 p = "{r\t%2, %0|\t%0, %2}";
3753#else
3754 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
3755#endif
3756 }
e075ae69 3757 break;
2a2ab3f9
JVA
3758
3759 default:
3760 abort ();
3761 }
e075ae69
RH
3762
3763 strcat (buf, p);
3764 return buf;
2a2ab3f9 3765}
e075ae69 3766
2a2ab3f9 3767/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 3768 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 3769 operand may be [SDX]Fmode. */
2a2ab3f9 3770
69ddee61 3771const char *
2a2ab3f9
JVA
3772output_fix_trunc (insn, operands)
3773 rtx insn;
3774 rtx *operands;
3775{
3776 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69
RH
3777 int dimode_p = GET_MODE (operands[0]) == DImode;
3778 rtx xops[4];
2a2ab3f9 3779
e075ae69
RH
3780 /* Jump through a hoop or two for DImode, since the hardware has no
3781 non-popping instruction. We used to do this a different way, but
3782 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
3783 if (dimode_p && !stack_top_dies)
3784 output_asm_insn ("fld\t%y1", operands);
e075ae69
RH
3785
3786 if (! STACK_TOP_P (operands[1]))
10195bd8
JW
3787 abort ();
3788
e075ae69
RH
3789 xops[0] = GEN_INT (12);
3790 xops[1] = adj_offsettable_operand (operands[2], 1);
3791 xops[1] = change_address (xops[1], QImode, NULL_RTX);
305f097e 3792
e075ae69
RH
3793 xops[2] = operands[0];
3794 if (GET_CODE (operands[0]) != MEM)
3795 xops[2] = operands[3];
2a2ab3f9 3796
e075ae69
RH
3797 output_asm_insn ("fnstcw\t%2", operands);
3798 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands);
3799 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops);
3800 output_asm_insn ("fldcw\t%2", operands);
3801 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands);
e9a25f70 3802
e075ae69
RH
3803 if (stack_top_dies || dimode_p)
3804 output_asm_insn ("fistp%z2\t%2", xops);
10195bd8 3805 else
e075ae69
RH
3806 output_asm_insn ("fist%z2\t%2", xops);
3807
3808 output_asm_insn ("fldcw\t%2", operands);
10195bd8 3809
e075ae69 3810 if (GET_CODE (operands[0]) != MEM)
2a2ab3f9 3811 {
e075ae69 3812 if (dimode_p)
2e14a41b 3813 {
e075ae69
RH
3814 split_di (operands+0, 1, xops+0, xops+1);
3815 split_di (operands+3, 1, xops+2, xops+3);
3816 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3817 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops);
2e14a41b 3818 }
46d21d2c 3819 else if (GET_MODE (operands[0]) == SImode)
e3c2afab 3820 output_asm_insn ("mov{l}\t{%3, %0|%0, %3}", operands);
46d21d2c
JW
3821 else
3822 output_asm_insn ("mov{w}\t{%3, %0|%0, %3}", operands);
2a2ab3f9 3823 }
2a2ab3f9 3824
e075ae69 3825 return "";
2a2ab3f9 3826}
cda749b1 3827
e075ae69
RH
3828/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
3829 should be used and 2 when fnstsw should be used. UNORDERED_P is true
3830 when fucom should be used. */
3831
69ddee61 3832const char *
e075ae69 3833output_fp_compare (insn, operands, eflags_p, unordered_p)
cda749b1
JW
3834 rtx insn;
3835 rtx *operands;
e075ae69 3836 int eflags_p, unordered_p;
cda749b1 3837{
e075ae69
RH
3838 int stack_top_dies;
3839 rtx cmp_op0 = operands[0];
3840 rtx cmp_op1 = operands[1];
3841
3842 if (eflags_p == 2)
3843 {
3844 cmp_op0 = cmp_op1;
3845 cmp_op1 = operands[2];
3846 }
cda749b1 3847
e075ae69 3848 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
3849 abort ();
3850
e075ae69 3851 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 3852
e075ae69
RH
3853 if (STACK_REG_P (cmp_op1)
3854 && stack_top_dies
3855 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
3856 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 3857 {
e075ae69
RH
3858 /* If both the top of the 387 stack dies, and the other operand
3859 is also a stack register that dies, then this must be a
3860 `fcompp' float compare */
3861
3862 if (eflags_p == 1)
3863 {
3864 /* There is no double popping fcomi variant. Fortunately,
3865 eflags is immune from the fstp's cc clobbering. */
3866 if (unordered_p)
3867 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
3868 else
3869 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
3870 return "fstp\t%y0";
3871 }
3872 else
cda749b1 3873 {
e075ae69
RH
3874 if (eflags_p == 2)
3875 {
3876 if (unordered_p)
3877 return "fucompp\n\tfnstsw\t%0";
3878 else
3879 return "fcompp\n\tfnstsw\t%0";
3880 }
cda749b1
JW
3881 else
3882 {
e075ae69
RH
3883 if (unordered_p)
3884 return "fucompp";
3885 else
3886 return "fcompp";
cda749b1
JW
3887 }
3888 }
cda749b1
JW
3889 }
3890 else
3891 {
e075ae69 3892 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 3893
69ddee61 3894 static const char * const alt[24] =
e075ae69
RH
3895 {
3896 "fcom%z1\t%y1",
3897 "fcomp%z1\t%y1",
3898 "fucom%z1\t%y1",
3899 "fucomp%z1\t%y1",
3900
3901 "ficom%z1\t%y1",
3902 "ficomp%z1\t%y1",
3903 NULL,
3904 NULL,
3905
3906 "fcomi\t{%y1, %0|%0, %y1}",
3907 "fcomip\t{%y1, %0|%0, %y1}",
3908 "fucomi\t{%y1, %0|%0, %y1}",
3909 "fucomip\t{%y1, %0|%0, %y1}",
3910
3911 NULL,
3912 NULL,
3913 NULL,
3914 NULL,
3915
3916 "fcom%z2\t%y2\n\tfnstsw\t%0",
3917 "fcomp%z2\t%y2\n\tfnstsw\t%0",
3918 "fucom%z2\t%y2\n\tfnstsw\t%0",
3919 "fucomp%z2\t%y2\n\tfnstsw\t%0",
3920
3921 "ficom%z2\t%y2\n\tfnstsw\t%0",
3922 "ficomp%z2\t%y2\n\tfnstsw\t%0",
3923 NULL,
3924 NULL
3925 };
3926
3927 int mask;
69ddee61 3928 const char *ret;
e075ae69
RH
3929
3930 mask = eflags_p << 3;
3931 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
3932 mask |= unordered_p << 1;
3933 mask |= stack_top_dies;
3934
3935 if (mask >= 24)
3936 abort ();
3937 ret = alt[mask];
3938 if (ret == NULL)
3939 abort ();
cda749b1 3940
e075ae69 3941 return ret;
cda749b1
JW
3942 }
3943}
2a2ab3f9 3944
e075ae69 3945/* Output assembler code to FILE to initialize basic-block profiling.
2a2ab3f9 3946
e075ae69 3947 If profile_block_flag == 2
2a2ab3f9 3948
e075ae69
RH
3949 Output code to call the subroutine `__bb_init_trace_func'
3950 and pass two parameters to it. The first parameter is
3951 the address of a block allocated in the object module.
3952 The second parameter is the number of the first basic block
3953 of the function.
2a2ab3f9 3954
e075ae69
RH
3955 The name of the block is a local symbol made with this statement:
3956
3957 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
2a2ab3f9 3958
e075ae69
RH
3959 Of course, since you are writing the definition of
3960 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3961 can take a short cut in the definition of this macro and use the
3962 name that you know will result.
2a2ab3f9 3963
e075ae69
RH
3964 The number of the first basic block of the function is
3965 passed to the macro in BLOCK_OR_LABEL.
2a2ab3f9 3966
e075ae69
RH
3967 If described in a virtual assembler language the code to be
3968 output looks like:
2a2ab3f9 3969
e075ae69
RH
3970 parameter1 <- LPBX0
3971 parameter2 <- BLOCK_OR_LABEL
3972 call __bb_init_trace_func
2a2ab3f9 3973
e075ae69 3974 else if profile_block_flag != 0
e74389ff 3975
e075ae69
RH
3976 Output code to call the subroutine `__bb_init_func'
3977 and pass one single parameter to it, which is the same
3978 as the first parameter to `__bb_init_trace_func'.
e74389ff 3979
e075ae69
RH
3980 The first word of this parameter is a flag which will be nonzero if
3981 the object module has already been initialized. So test this word
3982 first, and do not call `__bb_init_func' if the flag is nonzero.
3983 Note: When profile_block_flag == 2 the test need not be done
3984 but `__bb_init_trace_func' *must* be called.
e74389ff 3985
e075ae69
RH
3986 BLOCK_OR_LABEL may be used to generate a label number as a
3987 branch destination in case `__bb_init_func' will not be called.
e74389ff 3988
e075ae69
RH
3989 If described in a virtual assembler language the code to be
3990 output looks like:
2a2ab3f9 3991
e075ae69
RH
3992 cmp (LPBX0),0
3993 jne local_label
3994 parameter1 <- LPBX0
3995 call __bb_init_func
3996 local_label:
3997*/
c572e5ba 3998
e075ae69
RH
3999void
4000ix86_output_function_block_profiler (file, block_or_label)
4001 FILE *file;
4002 int block_or_label;
c572e5ba 4003{
e075ae69
RH
4004 static int num_func = 0;
4005 rtx xops[8];
4006 char block_table[80], false_label[80];
c572e5ba 4007
e075ae69 4008 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
e9a25f70 4009
e075ae69
RH
4010 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4011 xops[5] = stack_pointer_rtx;
4012 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
2a2ab3f9 4013
e075ae69 4014 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
c572e5ba 4015
e075ae69 4016 switch (profile_block_flag)
c572e5ba 4017 {
e075ae69
RH
4018 case 2:
4019 xops[2] = GEN_INT (block_or_label);
4020 xops[3] = gen_rtx_MEM (Pmode,
4021 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
4022 xops[6] = GEN_INT (8);
e9a25f70 4023
e075ae69
RH
4024 output_asm_insn ("push{l}\t%2", xops);
4025 if (!flag_pic)
4026 output_asm_insn ("push{l}\t%1", xops);
e9a25f70 4027 else
870a0c2c 4028 {
e075ae69
RH
4029 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4030 output_asm_insn ("push{l}\t%7", xops);
870a0c2c 4031 }
e075ae69
RH
4032 output_asm_insn ("call\t%P3", xops);
4033 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4034 break;
c572e5ba 4035
e075ae69
RH
4036 default:
4037 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
c572e5ba 4038
e075ae69
RH
4039 xops[0] = const0_rtx;
4040 xops[2] = gen_rtx_MEM (Pmode,
4041 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
4042 xops[3] = gen_rtx_MEM (Pmode,
4043 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
4044 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
4045 xops[6] = GEN_INT (4);
a14003ee 4046
e075ae69 4047 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
446ba526 4048
e075ae69
RH
4049 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
4050 output_asm_insn ("jne\t%2", xops);
870a0c2c 4051
e075ae69
RH
4052 if (!flag_pic)
4053 output_asm_insn ("push{l}\t%1", xops);
4054 else
4055 {
4056 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
4057 output_asm_insn ("push{l}\t%7", xops);
870a0c2c 4058 }
e075ae69
RH
4059 output_asm_insn ("call\t%P3", xops);
4060 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4061 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
4062 num_func++;
4063 break;
c572e5ba 4064 }
2a2ab3f9 4065}
305f097e 4066
e075ae69
RH
4067/* Output assembler code to FILE to increment a counter associated
4068 with basic block number BLOCKNO.
305f097e 4069
e075ae69 4070 If profile_block_flag == 2
ecbc4695 4071
e075ae69
RH
4072 Output code to initialize the global structure `__bb' and
4073 call the function `__bb_trace_func' which will increment the
4074 counter.
ecbc4695 4075
e075ae69
RH
4076 `__bb' consists of two words. In the first word the number
4077 of the basic block has to be stored. In the second word
4078 the address of a block allocated in the object module
4079 has to be stored.
ecbc4695 4080
e075ae69 4081 The basic block number is given by BLOCKNO.
ecbc4695 4082
e075ae69 4083 The address of the block is given by the label created with
305f097e 4084
e075ae69 4085 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
305f097e 4086
e075ae69 4087 by FUNCTION_BLOCK_PROFILER.
ecbc4695 4088
e075ae69
RH
4089 Of course, since you are writing the definition of
4090 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4091 can take a short cut in the definition of this macro and use the
4092 name that you know will result.
305f097e 4093
e075ae69
RH
4094 If described in a virtual assembler language the code to be
4095 output looks like:
305f097e 4096
e075ae69
RH
4097 move BLOCKNO -> (__bb)
4098 move LPBX0 -> (__bb+4)
4099 call __bb_trace_func
305f097e 4100
e075ae69
RH
4101 Note that function `__bb_trace_func' must not change the
4102 machine state, especially the flag register. To grant
4103 this, you must output code to save and restore registers
4104 either in this macro or in the macros MACHINE_STATE_SAVE
4105 and MACHINE_STATE_RESTORE. The last two macros will be
4106 used in the function `__bb_trace_func', so you must make
4107 sure that the function prologue does not change any
4108 register prior to saving it with MACHINE_STATE_SAVE.
305f097e 4109
e075ae69 4110 else if profile_block_flag != 0
305f097e 4111
e075ae69
RH
4112 Output code to increment the counter directly.
4113 Basic blocks are numbered separately from zero within each
4114 compiled object module. The count associated with block number
4115 BLOCKNO is at index BLOCKNO in an array of words; the name of
4116 this array is a local symbol made with this statement:
32b5b1aa 4117
e075ae69 4118 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
32b5b1aa 4119
e075ae69
RH
4120 Of course, since you are writing the definition of
4121 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4122 can take a short cut in the definition of this macro and use the
4123 name that you know will result.
32b5b1aa 4124
e075ae69
RH
4125 If described in a virtual assembler language the code to be
4126 output looks like:
32b5b1aa 4127
e075ae69
RH
4128 inc (LPBX2+4*BLOCKNO)
4129*/
32b5b1aa 4130
e075ae69
RH
4131void
4132ix86_output_block_profiler (file, blockno)
4133 FILE *file ATTRIBUTE_UNUSED;
4134 int blockno;
4135{
4136 rtx xops[8], cnt_rtx;
4137 char counts[80];
4138 char *block_table = counts;
4139
4140 switch (profile_block_flag)
4141 {
4142 case 2:
4143 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
32b5b1aa 4144
e075ae69
RH
4145 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4146 xops[2] = GEN_INT (blockno);
4147 xops[3] = gen_rtx_MEM (Pmode,
4148 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
4149 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
4150 xops[5] = plus_constant (xops[4], 4);
4151 xops[0] = gen_rtx_MEM (SImode, xops[4]);
4152 xops[6] = gen_rtx_MEM (SImode, xops[5]);
79325812 4153
e075ae69 4154 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
32b5b1aa 4155
e075ae69
RH
4156 output_asm_insn ("pushf", xops);
4157 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4158 if (flag_pic)
32b5b1aa 4159 {
e075ae69
RH
4160 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4161 output_asm_insn ("push{l}\t%7", xops);
4162 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4163 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
4164 output_asm_insn ("pop{l}\t%7", xops);
4165 }
4166 else
4167 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
4168 output_asm_insn ("call\t%P3", xops);
4169 output_asm_insn ("popf", xops);
32b5b1aa 4170
e075ae69 4171 break;
32b5b1aa 4172
e075ae69
RH
4173 default:
4174 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
4175 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
4176 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
32b5b1aa 4177
e075ae69
RH
4178 if (blockno)
4179 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
32b5b1aa 4180
e075ae69
RH
4181 if (flag_pic)
4182 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
32b5b1aa 4183
e075ae69
RH
4184 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
4185 output_asm_insn ("inc{l}\t%0", xops);
32b5b1aa 4186
e075ae69 4187 break;
32b5b1aa 4188 }
32b5b1aa 4189}
32b5b1aa 4190\f
79325812 4191void
e075ae69
RH
4192ix86_expand_move (mode, operands)
4193 enum machine_mode mode;
4194 rtx operands[];
32b5b1aa 4195{
e075ae69 4196 int strict = (reload_in_progress || reload_completed);
e075ae69 4197 rtx insn;
e9a25f70 4198
e075ae69 4199 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
32b5b1aa 4200 {
e075ae69 4201 /* Emit insns to move operands[1] into operands[0]. */
e9a25f70 4202
e075ae69
RH
4203 if (GET_CODE (operands[0]) == MEM)
4204 operands[1] = force_reg (Pmode, operands[1]);
4205 else
32b5b1aa 4206 {
e075ae69
RH
4207 rtx temp = operands[0];
4208 if (GET_CODE (temp) != REG)
4209 temp = gen_reg_rtx (Pmode);
4210 temp = legitimize_pic_address (operands[1], temp);
4211 if (temp == operands[0])
4212 return;
4213 operands[1] = temp;
32b5b1aa 4214 }
e075ae69
RH
4215 }
4216 else
4217 {
d7a29404
JH
4218 if (GET_CODE (operands[0]) == MEM
4219 && (GET_MODE (operands[0]) == QImode
4220 || !push_operand (operands[0], mode))
4221 && GET_CODE (operands[1]) == MEM)
e075ae69 4222 operands[1] = force_reg (mode, operands[1]);
e9a25f70 4223
2c5a510c
RH
4224 if (push_operand (operands[0], mode)
4225 && ! general_no_elim_operand (operands[1], mode))
4226 operands[1] = copy_to_mode_reg (mode, operands[1]);
4227
e075ae69 4228 if (FLOAT_MODE_P (mode))
32b5b1aa 4229 {
d7a29404
JH
4230 /* If we are loading a floating point constant to a register,
4231 force the value to memory now, since we'll get better code
4232 out the back end. */
e075ae69
RH
4233
4234 if (strict)
4235 ;
e075ae69 4236 else if (GET_CODE (operands[1]) == CONST_DOUBLE
d7a29404 4237 && register_operand (operands[0], mode))
e075ae69 4238 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
32b5b1aa 4239 }
32b5b1aa 4240 }
e9a25f70 4241
e075ae69 4242 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
e9a25f70 4243
e075ae69
RH
4244 emit_insn (insn);
4245}
e9a25f70 4246
e075ae69
RH
4247/* Attempt to expand a binary operator. Make the expansion closer to the
4248 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 4249 memory references (one output, two input) in a single insn. */
e9a25f70 4250
e075ae69
RH
4251void
4252ix86_expand_binary_operator (code, mode, operands)
4253 enum rtx_code code;
4254 enum machine_mode mode;
4255 rtx operands[];
4256{
4257 int matching_memory;
4258 rtx src1, src2, dst, op, clob;
4259
4260 dst = operands[0];
4261 src1 = operands[1];
4262 src2 = operands[2];
4263
4264 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
4265 if (GET_RTX_CLASS (code) == 'c'
4266 && (rtx_equal_p (dst, src2)
4267 || immediate_operand (src1, mode)))
4268 {
4269 rtx temp = src1;
4270 src1 = src2;
4271 src2 = temp;
32b5b1aa 4272 }
e9a25f70 4273
e075ae69
RH
4274 /* If the destination is memory, and we do not have matching source
4275 operands, do things in registers. */
4276 matching_memory = 0;
4277 if (GET_CODE (dst) == MEM)
32b5b1aa 4278 {
e075ae69
RH
4279 if (rtx_equal_p (dst, src1))
4280 matching_memory = 1;
4281 else if (GET_RTX_CLASS (code) == 'c'
4282 && rtx_equal_p (dst, src2))
4283 matching_memory = 2;
4284 else
4285 dst = gen_reg_rtx (mode);
4286 }
4287
4288 /* Both source operands cannot be in memory. */
4289 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
4290 {
4291 if (matching_memory != 2)
4292 src2 = force_reg (mode, src2);
4293 else
4294 src1 = force_reg (mode, src1);
32b5b1aa 4295 }
e9a25f70 4296
06a964de
JH
4297 /* If the operation is not commutable, source 1 cannot be a constant
4298 or non-matching memory. */
4299 if ((CONSTANT_P (src1)
4300 || (!matching_memory && GET_CODE (src1) == MEM))
4301 && GET_RTX_CLASS (code) != 'c')
e075ae69
RH
4302 src1 = force_reg (mode, src1);
4303
4304 /* If optimizing, copy to regs to improve CSE */
fe577e58 4305 if (optimize && ! no_new_pseudos)
32b5b1aa 4306 {
e075ae69
RH
4307 if (GET_CODE (dst) == MEM)
4308 dst = gen_reg_rtx (mode);
4309 if (GET_CODE (src1) == MEM)
4310 src1 = force_reg (mode, src1);
4311 if (GET_CODE (src2) == MEM)
4312 src2 = force_reg (mode, src2);
32b5b1aa 4313 }
e9a25f70 4314
e075ae69
RH
4315 /* Emit the instruction. */
4316
4317 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
4318 if (reload_in_progress)
4319 {
4320 /* Reload doesn't know about the flags register, and doesn't know that
4321 it doesn't want to clobber it. We can only do this with PLUS. */
4322 if (code != PLUS)
4323 abort ();
4324 emit_insn (op);
4325 }
4326 else
32b5b1aa 4327 {
e075ae69
RH
4328 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4329 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 4330 }
e9a25f70 4331
e075ae69
RH
4332 /* Fix up the destination if needed. */
4333 if (dst != operands[0])
4334 emit_move_insn (operands[0], dst);
4335}
4336
4337/* Return TRUE or FALSE depending on whether the binary operator meets the
4338 appropriate constraints. */
4339
4340int
4341ix86_binary_operator_ok (code, mode, operands)
4342 enum rtx_code code;
4343 enum machine_mode mode ATTRIBUTE_UNUSED;
4344 rtx operands[3];
4345{
4346 /* Both source operands cannot be in memory. */
4347 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
4348 return 0;
4349 /* If the operation is not commutable, source 1 cannot be a constant. */
4350 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
4351 return 0;
4352 /* If the destination is memory, we must have a matching source operand. */
4353 if (GET_CODE (operands[0]) == MEM
4354 && ! (rtx_equal_p (operands[0], operands[1])
4355 || (GET_RTX_CLASS (code) == 'c'
4356 && rtx_equal_p (operands[0], operands[2]))))
4357 return 0;
06a964de
JH
4358 /* If the operation is not commutable and the source 1 is memory, we must
4359 have a matching destionation. */
4360 if (GET_CODE (operands[1]) == MEM
4361 && GET_RTX_CLASS (code) != 'c'
4362 && ! rtx_equal_p (operands[0], operands[1]))
4363 return 0;
e075ae69
RH
4364 return 1;
4365}
4366
4367/* Attempt to expand a unary operator. Make the expansion closer to the
4368 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 4369 memory references (one output, one input) in a single insn. */
e075ae69 4370
9d81fc27 4371void
e075ae69
RH
4372ix86_expand_unary_operator (code, mode, operands)
4373 enum rtx_code code;
4374 enum machine_mode mode;
4375 rtx operands[];
4376{
06a964de
JH
4377 int matching_memory;
4378 rtx src, dst, op, clob;
4379
4380 dst = operands[0];
4381 src = operands[1];
e075ae69 4382
06a964de
JH
4383 /* If the destination is memory, and we do not have matching source
4384 operands, do things in registers. */
4385 matching_memory = 0;
4386 if (GET_CODE (dst) == MEM)
32b5b1aa 4387 {
06a964de
JH
4388 if (rtx_equal_p (dst, src))
4389 matching_memory = 1;
e075ae69 4390 else
06a964de 4391 dst = gen_reg_rtx (mode);
32b5b1aa 4392 }
e9a25f70 4393
06a964de
JH
4394 /* When source operand is memory, destination must match. */
4395 if (!matching_memory && GET_CODE (src) == MEM)
4396 src = force_reg (mode, src);
4397
4398 /* If optimizing, copy to regs to improve CSE */
fe577e58 4399 if (optimize && ! no_new_pseudos)
06a964de
JH
4400 {
4401 if (GET_CODE (dst) == MEM)
4402 dst = gen_reg_rtx (mode);
4403 if (GET_CODE (src) == MEM)
4404 src = force_reg (mode, src);
4405 }
4406
4407 /* Emit the instruction. */
4408
4409 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
4410 if (reload_in_progress || code == NOT)
4411 {
4412 /* Reload doesn't know about the flags register, and doesn't know that
4413 it doesn't want to clobber it. */
4414 if (code != NOT)
4415 abort ();
4416 emit_insn (op);
4417 }
4418 else
4419 {
4420 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4421 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4422 }
4423
4424 /* Fix up the destination if needed. */
4425 if (dst != operands[0])
4426 emit_move_insn (operands[0], dst);
e075ae69
RH
4427}
4428
4429/* Return TRUE or FALSE depending on whether the unary operator meets the
4430 appropriate constraints. */
4431
4432int
4433ix86_unary_operator_ok (code, mode, operands)
4434 enum rtx_code code ATTRIBUTE_UNUSED;
4435 enum machine_mode mode ATTRIBUTE_UNUSED;
4436 rtx operands[2] ATTRIBUTE_UNUSED;
4437{
06a964de
JH
4438 /* If one of operands is memory, source and destination must match. */
4439 if ((GET_CODE (operands[0]) == MEM
4440 || GET_CODE (operands[1]) == MEM)
4441 && ! rtx_equal_p (operands[0], operands[1]))
4442 return FALSE;
e075ae69
RH
4443 return TRUE;
4444}
4445
16189740
RH
4446/* Return TRUE or FALSE depending on whether the first SET in INSN
4447 has source and destination with matching CC modes, and that the
4448 CC mode is at least as constrained as REQ_MODE. */
4449
4450int
4451ix86_match_ccmode (insn, req_mode)
4452 rtx insn;
4453 enum machine_mode req_mode;
4454{
4455 rtx set;
4456 enum machine_mode set_mode;
4457
4458 set = PATTERN (insn);
4459 if (GET_CODE (set) == PARALLEL)
4460 set = XVECEXP (set, 0, 0);
4461 if (GET_CODE (set) != SET)
4462 abort ();
4463
4464 set_mode = GET_MODE (SET_DEST (set));
4465 switch (set_mode)
4466 {
4467 case CCmode:
4468 if (req_mode == CCNOmode)
4469 return 0;
4470 /* FALLTHRU */
4471 case CCNOmode:
4472 if (req_mode == CCZmode)
4473 return 0;
4474 /* FALLTHRU */
4475 case CCZmode:
4476 break;
4477
4478 default:
4479 abort ();
4480 }
4481
4482 return (GET_MODE (SET_SRC (set)) == set_mode);
4483}
4484
e075ae69
RH
4485/* Produce an unsigned comparison for a given signed comparison. */
4486
4487static enum rtx_code
4488unsigned_comparison (code)
4489 enum rtx_code code;
4490{
4491 switch (code)
32b5b1aa 4492 {
e075ae69
RH
4493 case GT:
4494 code = GTU;
4495 break;
4496 case LT:
4497 code = LTU;
4498 break;
4499 case GE:
4500 code = GEU;
4501 break;
4502 case LE:
4503 code = LEU;
4504 break;
4505 case EQ:
4506 case NE:
4507 case LEU:
4508 case LTU:
4509 case GEU:
4510 case GTU:
3a3677ff
RH
4511 case UNORDERED:
4512 case ORDERED:
e075ae69
RH
4513 break;
4514 default:
4515 abort ();
4516 }
4517 return code;
4518}
4519
4520/* Generate insn patterns to do an integer compare of OPERANDS. */
4521
4522static rtx
4523ix86_expand_int_compare (code, op0, op1)
4524 enum rtx_code code;
4525 rtx op0, op1;
4526{
4527 enum machine_mode cmpmode;
4528 rtx tmp, flags;
4529
4530 cmpmode = SELECT_CC_MODE (code, op0, op1);
4531 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
4532
4533 /* This is very simple, but making the interface the same as in the
4534 FP case makes the rest of the code easier. */
4535 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
4536 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
4537
4538 /* Return the test that should be put into the flags user, i.e.
4539 the bcc, scc, or cmov instruction. */
4540 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
4541}
4542
3a3677ff
RH
4543/* Figure out whether to use ordered or unordered fp comparisons.
4544 Return the appropriate mode to use. */
e075ae69 4545
3a3677ff
RH
4546static enum machine_mode
4547ix86_fp_compare_mode (code)
e075ae69 4548 enum rtx_code code;
e075ae69 4549{
3a3677ff 4550 int unordered;
e075ae69 4551
3a3677ff
RH
4552 switch (code)
4553 {
4554 case NE: case EQ:
4555 /* When not doing IEEE compliant compares, fault on NaNs. */
4556 unordered = (TARGET_IEEE_FP != 0);
4557 break;
4558
4559 case LT: case LE: case GT: case GE:
4560 unordered = 0;
4561 break;
4562
4563 case UNORDERED: case ORDERED:
4564 case UNEQ: case UNGE: case UNGT: case UNLE: case UNLT: case LTGT:
4565 unordered = 1;
4566 break;
4567
4568 default:
4569 abort ();
4570 }
e075ae69
RH
4571
4572 /* ??? If we knew whether invalid-operand exceptions were masked,
4573 we could rely on fcom to raise an exception and take care of
3a3677ff 4574 NaNs. But we don't. We could know this from c99 math pragmas. */
e075ae69
RH
4575 if (TARGET_IEEE_FP)
4576 unordered = 1;
4577
3a3677ff
RH
4578 return unordered ? CCFPUmode : CCFPmode;
4579}
4580
4581/* Return true if we should use an FCOMI instruction for this fp comparison. */
4582
a940d8bd 4583int
3a3677ff
RH
4584ix86_use_fcomi_compare (code)
4585 enum rtx_code code;
4586{
4587 return (TARGET_CMOVE
4588 && (code == ORDERED || code == UNORDERED
4589 /* All other unordered compares require checking
4590 multiple sets of bits. */
4591 || ix86_fp_compare_mode (code) == CCFPmode));
4592}
4593
4594/* Swap, force into registers, or otherwise massage the two operands
4595 to a fp comparison. The operands are updated in place; the new
4596 comparsion code is returned. */
4597
4598static enum rtx_code
4599ix86_prepare_fp_compare_args (code, pop0, pop1)
4600 enum rtx_code code;
4601 rtx *pop0, *pop1;
4602{
4603 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
4604 rtx op0 = *pop0, op1 = *pop1;
4605 enum machine_mode op_mode = GET_MODE (op0);
4606
e075ae69 4607 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
4608 The same is true of the XFmode compare instructions. The same is
4609 true of the fcomi compare instructions. */
4610
4611 if (fpcmp_mode == CCFPUmode
4612 || op_mode == XFmode
4613 || ix86_use_fcomi_compare (code))
e075ae69 4614 {
3a3677ff
RH
4615 op0 = force_reg (op_mode, op0);
4616 op1 = force_reg (op_mode, op1);
e075ae69
RH
4617 }
4618 else
4619 {
4620 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
4621 things around if they appear profitable, otherwise force op0
4622 into a register. */
4623
4624 if (standard_80387_constant_p (op0) == 0
4625 || (GET_CODE (op0) == MEM
4626 && ! (standard_80387_constant_p (op1) == 0
4627 || GET_CODE (op1) == MEM)))
32b5b1aa 4628 {
e075ae69
RH
4629 rtx tmp;
4630 tmp = op0, op0 = op1, op1 = tmp;
4631 code = swap_condition (code);
4632 }
4633
4634 if (GET_CODE (op0) != REG)
3a3677ff 4635 op0 = force_reg (op_mode, op0);
e075ae69
RH
4636
4637 if (CONSTANT_P (op1))
4638 {
4639 if (standard_80387_constant_p (op1))
3a3677ff 4640 op1 = force_reg (op_mode, op1);
e075ae69 4641 else
3a3677ff 4642 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
4643 }
4644 }
e9a25f70 4645
3a3677ff
RH
4646 *pop0 = op0;
4647 *pop1 = op1;
4648 return code;
4649}
4650
4651/* Generate insn patterns to do a floating point compare of OPERANDS. */
4652
4653rtx
4654ix86_expand_fp_compare (code, op0, op1, scratch)
4655 enum rtx_code code;
4656 rtx op0, op1, scratch;
4657{
4658 enum machine_mode fpcmp_mode, intcmp_mode;
4659 rtx tmp;
4660
4661 fpcmp_mode = ix86_fp_compare_mode (code);
4662 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
4663
e075ae69
RH
4664 /* %%% fcomi is probably always faster, even when dealing with memory,
4665 since compare-and-branch would be three insns instead of four. */
3a3677ff 4666 if (ix86_use_fcomi_compare (code))
32b5b1aa 4667 {
e075ae69
RH
4668 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
4669 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
4670 emit_insn (tmp);
4671
4672 /* The FP codes work out to act like unsigned. */
4673 code = unsigned_comparison (code);
3a3677ff 4674 intcmp_mode = CCmode;
e075ae69
RH
4675 }
4676 else
4677 {
4678 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e9a25f70 4679
e075ae69
RH
4680 rtx tmp2;
4681 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
4682 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
3a3677ff 4683 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 4684
3a3677ff
RH
4685 if (fpcmp_mode == CCFPmode
4686 || code == ORDERED
4687 || code == UNORDERED)
32b5b1aa 4688 {
e075ae69
RH
4689 /* We have two options here -- use sahf, or testing bits of ah
4690 directly. On PPRO, they are equivalent, sahf being one byte
4691 smaller. On Pentium, sahf is non-pairable while test is UV
4692 pairable. */
4693
4694 if (TARGET_USE_SAHF || optimize_size)
32b5b1aa 4695 {
e075ae69 4696 do_sahf:
3a3677ff 4697 emit_insn (gen_x86_sahf_1 (scratch));
e9a25f70 4698
e075ae69
RH
4699 /* The FP codes work out to act like unsigned. */
4700 code = unsigned_comparison (code);
e075ae69 4701 intcmp_mode = CCmode;
32b5b1aa
SC
4702 }
4703 else
4704 {
e075ae69
RH
4705 /*
4706 * The numbers below correspond to the bits of the FPSW in AH.
d22ce03d 4707 * C3, C2, and C0 are in bits 0x40, 0x4, and 0x01 respectively.
e075ae69
RH
4708 *
4709 * cmp C3 C2 C0
4710 * > 0 0 0
4711 * < 0 0 1
4712 * = 1 0 0
4713 * un 1 1 1
4714 */
4715
4716 int mask;
4717
4718 switch (code)
32b5b1aa 4719 {
e075ae69 4720 case GT:
d22ce03d 4721 mask = 0x41;
e075ae69
RH
4722 code = EQ;
4723 break;
4724 case LT:
4725 mask = 0x01;
4726 code = NE;
4727 break;
4728 case GE:
4729 /* We'd have to use `xorb 1,ah; andb 0x41,ah', so it's
4730 faster in all cases to just fall back on sahf. */
4731 goto do_sahf;
4732 case LE:
4733 mask = 0x41;
4734 code = NE;
4735 break;
4736 case EQ:
4737 mask = 0x40;
4738 code = NE;
4739 break;
4740 case NE:
4741 mask = 0x40;
4742 code = EQ;
4743 break;
3a3677ff
RH
4744 case UNORDERED:
4745 mask = 0x04;
4746 code = NE;
4747 break;
4748 case ORDERED:
4749 mask = 0x04;
4750 code = EQ;
4751 break;
4752
e075ae69
RH
4753 default:
4754 abort ();
32b5b1aa 4755 }
e075ae69 4756
3a3677ff 4757 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (mask)));
e075ae69 4758 intcmp_mode = CCNOmode;
32b5b1aa
SC
4759 }
4760 }
4761 else
4762 {
e075ae69
RH
4763 /* In the unordered case, we have to check C2 for NaN's, which
4764 doesn't happen to work out to anything nice combination-wise.
4765 So do some bit twiddling on the value we've got in AH to come
4766 up with an appropriate set of condition codes. */
4767
4768 intcmp_mode = CCNOmode;
4769 switch (code)
32b5b1aa 4770 {
e075ae69 4771 case GT:
3a3677ff 4772 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69
RH
4773 code = EQ;
4774 break;
4775 case LT:
3a3677ff
RH
4776 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4777 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
4778 intcmp_mode = CCmode;
4779 code = EQ;
4780 break;
4781 case GE:
3a3677ff 4782 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69
RH
4783 code = EQ;
4784 break;
4785 case LE:
3a3677ff
RH
4786 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4787 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
4788 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
4789 intcmp_mode = CCmode;
4790 code = LTU;
4791 break;
4792 case EQ:
3a3677ff
RH
4793 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4794 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
4795 intcmp_mode = CCmode;
4796 code = EQ;
4797 break;
4798 case NE:
3a3677ff
RH
4799 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4800 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, GEN_INT (0x40)));
4801 code = NE;
4802 break;
4803
4804 case UNORDERED:
4805 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
4806 code = NE;
4807 break;
4808 case ORDERED:
4809 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
4810 code = EQ;
4811 break;
4812 case UNEQ:
4813 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
4814 code = NE;
4815 break;
4816 case UNGE:
4817 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4818 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, GEN_INT (0x01)));
4819 code = NE;
4820 break;
4821 case UNGT:
4822 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4823 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
4824 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
4825 code = GEU;
4826 break;
4827 case UNLE:
4828 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69
RH
4829 code = NE;
4830 break;
3a3677ff
RH
4831 case UNLT:
4832 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
4833 code = NE;
4834 break;
4835 case LTGT:
4836 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
4837 code = EQ;
4838 break;
4839
e075ae69
RH
4840 default:
4841 abort ();
32b5b1aa
SC
4842 }
4843 }
32b5b1aa 4844 }
e075ae69
RH
4845
4846 /* Return the test that should be put into the flags user, i.e.
4847 the bcc, scc, or cmov instruction. */
4848 return gen_rtx_fmt_ee (code, VOIDmode,
4849 gen_rtx_REG (intcmp_mode, FLAGS_REG),
4850 const0_rtx);
4851}
4852
4853static rtx
3a3677ff 4854ix86_expand_compare (code)
e075ae69 4855 enum rtx_code code;
e075ae69
RH
4856{
4857 rtx op0, op1, ret;
4858 op0 = ix86_compare_op0;
4859 op1 = ix86_compare_op1;
4860
4861 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
3a3677ff 4862 ret = ix86_expand_fp_compare (code, op0, op1, gen_reg_rtx (HImode));
32b5b1aa 4863 else
e075ae69
RH
4864 ret = ix86_expand_int_compare (code, op0, op1);
4865
4866 return ret;
4867}
4868
4869void
3a3677ff 4870ix86_expand_branch (code, label)
e075ae69 4871 enum rtx_code code;
e075ae69
RH
4872 rtx label;
4873{
3a3677ff 4874 rtx tmp;
e075ae69 4875
3a3677ff 4876 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 4877 {
3a3677ff
RH
4878 case QImode:
4879 case HImode:
4880 case SImode:
4881 tmp = ix86_expand_compare (code);
e075ae69
RH
4882 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
4883 gen_rtx_LABEL_REF (VOIDmode, label),
4884 pc_rtx);
4885 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 4886 return;
e075ae69 4887
3a3677ff
RH
4888 case SFmode:
4889 case DFmode:
4890 case XFmode:
4891 /* Don't expand the comparison early, so that we get better code
4892 when jump or whoever decides to reverse the comparison. */
4893 {
4894 rtvec vec;
4895 int use_fcomi;
4896
4897 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
4898 &ix86_compare_op1);
4899
0b9aaeee 4900 tmp = gen_rtx_fmt_ee (code, VOIDmode,
3a3677ff
RH
4901 ix86_compare_op0, ix86_compare_op1);
4902 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
4903 gen_rtx_LABEL_REF (VOIDmode, label),
4904 pc_rtx);
4905 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
4906
4907 use_fcomi = ix86_use_fcomi_compare (code);
4908 vec = rtvec_alloc (3 + !use_fcomi);
4909 RTVEC_ELT (vec, 0) = tmp;
4910 RTVEC_ELT (vec, 1)
4911 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
4912 RTVEC_ELT (vec, 2)
4913 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
4914 if (! use_fcomi)
4915 RTVEC_ELT (vec, 3)
4916 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
4917
4918 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
4919 return;
4920 }
32b5b1aa 4921
3a3677ff
RH
4922 case DImode:
4923 /* Expand DImode branch into multiple compare+branch. */
4924 {
4925 rtx lo[2], hi[2], label2;
4926 enum rtx_code code1, code2, code3;
32b5b1aa 4927
3a3677ff
RH
4928 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
4929 {
4930 tmp = ix86_compare_op0;
4931 ix86_compare_op0 = ix86_compare_op1;
4932 ix86_compare_op1 = tmp;
4933 code = swap_condition (code);
4934 }
4935 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
4936 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 4937
3a3677ff
RH
4938 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
4939 avoid two branches. This costs one extra insn, so disable when
4940 optimizing for size. */
32b5b1aa 4941
3a3677ff
RH
4942 if ((code == EQ || code == NE)
4943 && (!optimize_size
4944 || hi[1] == const0_rtx || lo[1] == const0_rtx))
4945 {
4946 rtx xor0, xor1;
32b5b1aa 4947
3a3677ff
RH
4948 xor1 = hi[0];
4949 if (hi[1] != const0_rtx)
4950 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
4951 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 4952
3a3677ff
RH
4953 xor0 = lo[0];
4954 if (lo[1] != const0_rtx)
4955 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
4956 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 4957
3a3677ff
RH
4958 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
4959 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 4960
3a3677ff
RH
4961 ix86_compare_op0 = tmp;
4962 ix86_compare_op1 = const0_rtx;
4963 ix86_expand_branch (code, label);
4964 return;
4965 }
e075ae69 4966
3a3677ff
RH
4967 /* Otherwise, if we are doing less-than, op1 is a constant and the
4968 low word is zero, then we can just examine the high word. */
32b5b1aa 4969
3a3677ff
RH
4970 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx
4971 && (code == LT || code == LTU))
4972 {
4973 ix86_compare_op0 = hi[0];
4974 ix86_compare_op1 = hi[1];
4975 ix86_expand_branch (code, label);
4976 return;
4977 }
e075ae69 4978
3a3677ff 4979 /* Otherwise, we need two or three jumps. */
e075ae69 4980
3a3677ff 4981 label2 = gen_label_rtx ();
e075ae69 4982
3a3677ff
RH
4983 code1 = code;
4984 code2 = swap_condition (code);
4985 code3 = unsigned_condition (code);
e075ae69 4986
3a3677ff
RH
4987 switch (code)
4988 {
4989 case LT: case GT: case LTU: case GTU:
4990 break;
e075ae69 4991
3a3677ff
RH
4992 case LE: code1 = LT; code2 = GT; break;
4993 case GE: code1 = GT; code2 = LT; break;
4994 case LEU: code1 = LTU; code2 = GTU; break;
4995 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 4996
3a3677ff
RH
4997 case EQ: code1 = NIL; code2 = NE; break;
4998 case NE: code2 = NIL; break;
e075ae69 4999
3a3677ff
RH
5000 default:
5001 abort ();
5002 }
e075ae69 5003
3a3677ff
RH
5004 /*
5005 * a < b =>
5006 * if (hi(a) < hi(b)) goto true;
5007 * if (hi(a) > hi(b)) goto false;
5008 * if (lo(a) < lo(b)) goto true;
5009 * false:
5010 */
5011
5012 ix86_compare_op0 = hi[0];
5013 ix86_compare_op1 = hi[1];
5014
5015 if (code1 != NIL)
5016 ix86_expand_branch (code1, label);
5017 if (code2 != NIL)
5018 ix86_expand_branch (code2, label2);
5019
5020 ix86_compare_op0 = lo[0];
5021 ix86_compare_op1 = lo[1];
5022 ix86_expand_branch (code3, label);
5023
5024 if (code2 != NIL)
5025 emit_label (label2);
5026 return;
5027 }
e075ae69 5028
3a3677ff
RH
5029 default:
5030 abort ();
5031 }
32b5b1aa 5032}
e075ae69 5033
32b5b1aa 5034int
3a3677ff 5035ix86_expand_setcc (code, dest)
e075ae69 5036 enum rtx_code code;
e075ae69 5037 rtx dest;
32b5b1aa 5038{
e075ae69
RH
5039 rtx ret, tmp;
5040 int type;
5041
5042 if (GET_MODE (ix86_compare_op0) == DImode)
5043 return 0; /* FAIL */
5044
5045 /* Three modes of generation:
5046 0 -- destination does not overlap compare sources:
5047 clear dest first, emit strict_low_part setcc.
5048 1 -- destination does overlap compare sources:
5049 emit subreg setcc, zero extend.
5050 2 -- destination is in QImode:
5051 emit setcc only.
5052 */
5053
5054 type = 0;
e075ae69
RH
5055
5056 if (GET_MODE (dest) == QImode)
5057 type = 2;
5058 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
a500c31b 5059 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
e075ae69
RH
5060 type = 1;
5061
5062 if (type == 0)
5063 emit_move_insn (dest, const0_rtx);
5064
3a3677ff 5065 ret = ix86_expand_compare (code);
e075ae69
RH
5066 PUT_MODE (ret, QImode);
5067
5068 tmp = dest;
5069 if (type == 0)
32b5b1aa 5070 {
e075ae69
RH
5071 tmp = gen_lowpart (QImode, dest);
5072 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
5073 }
5074 else if (type == 1)
5075 {
5076 if (!cse_not_expected)
5077 tmp = gen_reg_rtx (QImode);
5078 else
5079 tmp = gen_lowpart (QImode, dest);
5080 }
32b5b1aa 5081
e075ae69
RH
5082 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
5083
5084 if (type == 1)
5085 {
5086 rtx clob;
5087
5088 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
5089 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
5090 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
5091 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5092 emit_insn (tmp);
32b5b1aa 5093 }
e075ae69
RH
5094
5095 return 1; /* DONE */
32b5b1aa 5096}
e075ae69 5097
32b5b1aa 5098int
e075ae69
RH
5099ix86_expand_int_movcc (operands)
5100 rtx operands[];
32b5b1aa 5101{
e075ae69
RH
5102 enum rtx_code code = GET_CODE (operands[1]), compare_code;
5103 rtx compare_seq, compare_op;
32b5b1aa 5104
36583fea
JH
5105 /* When the compare code is not LTU or GEU, we can not use sbbl case.
5106 In case comparsion is done with immediate, we can convert it to LTU or
5107 GEU by altering the integer. */
5108
5109 if ((code == LEU || code == GTU)
5110 && GET_CODE (ix86_compare_op1) == CONST_INT
5111 && GET_MODE (operands[0]) != HImode
5112 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
5113 && GET_CODE (operands[2]) == CONST_INT
5114 && GET_CODE (operands[3]) == CONST_INT)
5115 {
5116 if (code == LEU)
5117 code = LTU;
5118 else
5119 code = GEU;
5120 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
5121 }
3a3677ff 5122
e075ae69 5123 start_sequence ();
3a3677ff 5124 compare_op = ix86_expand_compare (code);
e075ae69
RH
5125 compare_seq = gen_sequence ();
5126 end_sequence ();
5127
5128 compare_code = GET_CODE (compare_op);
5129
5130 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
5131 HImode insns, we'd be swallowed in word prefix ops. */
5132
5133 if (GET_MODE (operands[0]) != HImode
5134 && GET_CODE (operands[2]) == CONST_INT
5135 && GET_CODE (operands[3]) == CONST_INT)
5136 {
5137 rtx out = operands[0];
5138 HOST_WIDE_INT ct = INTVAL (operands[2]);
5139 HOST_WIDE_INT cf = INTVAL (operands[3]);
5140 HOST_WIDE_INT diff;
5141
36583fea 5142 if (compare_code == LTU || compare_code == GEU)
e075ae69 5143 {
e075ae69
RH
5144
5145 /* Detect overlap between destination and compare sources. */
5146 rtx tmp = out;
5147
36583fea
JH
5148 /* To simplify rest of code, restrict to the GEU case. */
5149 if (compare_code == LTU)
5150 {
5151 int tmp = ct;
5152 ct = cf;
5153 cf = tmp;
5154 compare_code = reverse_condition (compare_code);
5155 code = reverse_condition (code);
5156 }
5157 diff = ct - cf;
5158
e075ae69 5159 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
a500c31b 5160 || reg_overlap_mentioned_p (out, ix86_compare_op1))
e075ae69
RH
5161 tmp = gen_reg_rtx (SImode);
5162
5163 emit_insn (compare_seq);
5164 emit_insn (gen_x86_movsicc_0_m1 (tmp));
5165
36583fea
JH
5166 if (diff == 1)
5167 {
5168 /*
5169 * cmpl op0,op1
5170 * sbbl dest,dest
5171 * [addl dest, ct]
5172 *
5173 * Size 5 - 8.
5174 */
5175 if (ct)
5176 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5177 }
5178 else if (cf == -1)
5179 {
5180 /*
5181 * cmpl op0,op1
5182 * sbbl dest,dest
5183 * orl $ct, dest
5184 *
5185 * Size 8.
5186 */
5187 emit_insn (gen_iorsi3 (out, out, GEN_INT (ct)));
5188 }
5189 else if (diff == -1 && ct)
5190 {
5191 /*
5192 * cmpl op0,op1
5193 * sbbl dest,dest
5194 * xorl $-1, dest
5195 * [addl dest, cf]
5196 *
5197 * Size 8 - 11.
5198 */
5199 emit_insn (gen_one_cmplsi2 (tmp, tmp));
5200 if (cf)
5201 emit_insn (gen_addsi3 (out, out, GEN_INT (cf)));
5202 }
5203 else
5204 {
5205 /*
5206 * cmpl op0,op1
5207 * sbbl dest,dest
5208 * andl cf - ct, dest
5209 * [addl dest, ct]
5210 *
5211 * Size 8 - 11.
5212 */
5213 emit_insn (gen_andsi3 (out, out, GEN_INT (cf - ct)));
5214 if (ct)
5215 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5216 }
e075ae69
RH
5217
5218 if (tmp != out)
5219 emit_move_insn (out, tmp);
5220
5221 return 1; /* DONE */
5222 }
5223
5224 diff = ct - cf;
5225 if (diff < 0)
5226 {
5227 HOST_WIDE_INT tmp;
5228 tmp = ct, ct = cf, cf = tmp;
5229 diff = -diff;
5230 compare_code = reverse_condition (compare_code);
5231 code = reverse_condition (code);
5232 }
5233 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
5234 || diff == 3 || diff == 5 || diff == 9)
5235 {
5236 /*
5237 * xorl dest,dest
5238 * cmpl op1,op2
5239 * setcc dest
5240 * lea cf(dest*(ct-cf)),dest
5241 *
5242 * Size 14.
5243 *
5244 * This also catches the degenerate setcc-only case.
5245 */
5246
5247 rtx tmp;
5248 int nops;
5249
5250 out = emit_store_flag (out, code, ix86_compare_op0,
5251 ix86_compare_op1, VOIDmode, 0, 1);
5252
5253 nops = 0;
5254 if (diff == 1)
5255 tmp = out;
5256 else
5257 {
5258 tmp = gen_rtx_MULT (SImode, out, GEN_INT (diff & ~1));
5259 nops++;
5260 if (diff & 1)
5261 {
5262 tmp = gen_rtx_PLUS (SImode, tmp, out);
5263 nops++;
5264 }
5265 }
5266 if (cf != 0)
5267 {
5268 tmp = gen_rtx_PLUS (SImode, tmp, GEN_INT (cf));
5269 nops++;
5270 }
5271 if (tmp != out)
5272 {
5273 if (nops == 0)
5274 emit_move_insn (out, tmp);
5275 else if (nops == 1)
5276 {
5277 rtx clob;
5278
5279 clob = gen_rtx_REG (CCmode, FLAGS_REG);
5280 clob = gen_rtx_CLOBBER (VOIDmode, clob);
5281
5282 tmp = gen_rtx_SET (VOIDmode, out, tmp);
5283 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5284 emit_insn (tmp);
5285 }
5286 else
5287 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
5288 }
5289 if (out != operands[0])
5290 emit_move_insn (operands[0], out);
5291
5292 return 1; /* DONE */
5293 }
5294
5295 /*
5296 * General case: Jumpful:
5297 * xorl dest,dest cmpl op1, op2
5298 * cmpl op1, op2 movl ct, dest
5299 * setcc dest jcc 1f
5300 * decl dest movl cf, dest
5301 * andl (cf-ct),dest 1:
5302 * addl ct,dest
5303 *
5304 * Size 20. Size 14.
5305 *
5306 * This is reasonably steep, but branch mispredict costs are
5307 * high on modern cpus, so consider failing only if optimizing
5308 * for space.
5309 *
5310 * %%% Parameterize branch_cost on the tuning architecture, then
5311 * use that. The 80386 couldn't care less about mispredicts.
5312 */
5313
5314 if (!optimize_size && !TARGET_CMOVE)
5315 {
5316 if (ct == 0)
5317 {
5318 ct = cf;
5319 cf = 0;
5320 compare_code = reverse_condition (compare_code);
5321 code = reverse_condition (code);
5322 }
5323
5324 out = emit_store_flag (out, code, ix86_compare_op0,
5325 ix86_compare_op1, VOIDmode, 0, 1);
5326
5327 emit_insn (gen_addsi3 (out, out, constm1_rtx));
5328 emit_insn (gen_andsi3 (out, out, GEN_INT (cf-ct)));
5329 if (ct != 0)
5330 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5331 if (out != operands[0])
5332 emit_move_insn (operands[0], out);
5333
5334 return 1; /* DONE */
5335 }
5336 }
5337
5338 if (!TARGET_CMOVE)
5339 {
5340 /* Try a few things more with specific constants and a variable. */
5341
78a0d70c 5342 optab op;
e075ae69
RH
5343 rtx var, orig_out, out, tmp;
5344
5345 if (optimize_size)
5346 return 0; /* FAIL */
5347
5348 /* If one of the two operands is an interesting constant, load a
5349 constant with the above and mask it in with a logical operation. */
5350
5351 if (GET_CODE (operands[2]) == CONST_INT)
5352 {
5353 var = operands[3];
5354 if (INTVAL (operands[2]) == 0)
5355 operands[3] = constm1_rtx, op = and_optab;
5356 else if (INTVAL (operands[2]) == -1)
5357 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
5358 else
5359 return 0; /* FAIL */
e075ae69
RH
5360 }
5361 else if (GET_CODE (operands[3]) == CONST_INT)
5362 {
5363 var = operands[2];
5364 if (INTVAL (operands[3]) == 0)
5365 operands[2] = constm1_rtx, op = and_optab;
5366 else if (INTVAL (operands[3]) == -1)
5367 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
5368 else
5369 return 0; /* FAIL */
e075ae69 5370 }
78a0d70c 5371 else
e075ae69
RH
5372 return 0; /* FAIL */
5373
5374 orig_out = operands[0];
5375 tmp = gen_reg_rtx (GET_MODE (orig_out));
5376 operands[0] = tmp;
5377
5378 /* Recurse to get the constant loaded. */
5379 if (ix86_expand_int_movcc (operands) == 0)
5380 return 0; /* FAIL */
5381
5382 /* Mask in the interesting variable. */
5383 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
5384 OPTAB_WIDEN);
5385 if (out != orig_out)
5386 emit_move_insn (orig_out, out);
5387
5388 return 1; /* DONE */
5389 }
5390
5391 /*
5392 * For comparison with above,
5393 *
5394 * movl cf,dest
5395 * movl ct,tmp
5396 * cmpl op1,op2
5397 * cmovcc tmp,dest
5398 *
5399 * Size 15.
5400 */
5401
5402 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
5403 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
5404 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
5405 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
5406
5407 emit_insn (compare_seq);
5408 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5409 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5410 compare_op, operands[2],
5411 operands[3])));
5412
5413 return 1; /* DONE */
e9a25f70 5414}
e075ae69 5415
32b5b1aa 5416int
e075ae69
RH
5417ix86_expand_fp_movcc (operands)
5418 rtx operands[];
32b5b1aa 5419{
e075ae69
RH
5420 enum rtx_code code;
5421 enum machine_mode mode;
5422 rtx tmp;
32b5b1aa 5423
e075ae69
RH
5424 /* The floating point conditional move instructions don't directly
5425 support conditions resulting from a signed integer comparison. */
32b5b1aa 5426
e075ae69
RH
5427 code = GET_CODE (operands[1]);
5428 switch (code)
5429 {
5430 case LT:
5431 case LE:
5432 case GE:
5433 case GT:
5434 tmp = gen_reg_rtx (QImode);
3a3677ff 5435 ix86_expand_setcc (code, tmp);
e075ae69
RH
5436 code = NE;
5437 ix86_compare_op0 = tmp;
5438 ix86_compare_op1 = const0_rtx;
5439 break;
5440
5441 default:
5442 break;
5443 }
e9a25f70 5444
e075ae69
RH
5445 mode = SELECT_CC_MODE (code, ix86_compare_op0, ix86_compare_op1);
5446 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (mode, FLAGS_REG),
5447 gen_rtx_COMPARE (mode,
5448 ix86_compare_op0,
5449 ix86_compare_op1)));
5450 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5451 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5452 gen_rtx_fmt_ee (code, VOIDmode,
5453 gen_rtx_REG (mode, FLAGS_REG),
5454 const0_rtx),
5455 operands[2],
5456 operands[3])));
32b5b1aa 5457
e075ae69 5458 return 1;
32b5b1aa
SC
5459}
5460
2450a057
JH
5461/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
5462 works for floating pointer parameters and nonoffsetable memories.
5463 For pushes, it returns just stack offsets; the values will be saved
5464 in the right order. Maximally three parts are generated. */
5465
5466static void
5467ix86_split_to_parts (operand, parts, mode)
5468 rtx operand;
5469 rtx *parts;
5470 enum machine_mode mode;
32b5b1aa 5471{
2450a057
JH
5472 int size = GET_MODE_SIZE (mode) / 4;
5473
a7180f70
BS
5474 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
5475 abort ();
2450a057
JH
5476 if (size < 2 || size > 3)
5477 abort ();
5478
d7a29404
JH
5479 /* Optimize constant pool reference to immediates. This is used by fp moves,
5480 that force all constants to memory to allow combining. */
5481
5482 if (GET_CODE (operand) == MEM
5483 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
5484 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
5485 operand = get_pool_constant (XEXP (operand, 0));
5486
2450a057 5487 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 5488 {
2450a057
JH
5489 /* The only non-offsetable memories we handle are pushes. */
5490 if (! push_operand (operand, VOIDmode))
5491 abort ();
5492
5493 PUT_MODE (operand, SImode);
5494 parts[0] = parts[1] = parts[2] = operand;
5495 }
5496 else
5497 {
5498 if (mode == DImode)
5499 split_di (&operand, 1, &parts[0], &parts[1]);
5500 else
e075ae69 5501 {
2450a057
JH
5502 if (REG_P (operand))
5503 {
5504 if (!reload_completed)
5505 abort ();
5506 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
5507 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
5508 if (size == 3)
5509 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
5510 }
5511 else if (offsettable_memref_p (operand))
5512 {
5513 PUT_MODE (operand, SImode);
5514 parts[0] = operand;
5515 parts[1] = adj_offsettable_operand (operand, 4);
5516 if (size == 3)
5517 parts[2] = adj_offsettable_operand (operand, 8);
5518 }
5519 else if (GET_CODE (operand) == CONST_DOUBLE)
5520 {
5521 REAL_VALUE_TYPE r;
5522 long l[3];
5523
5524 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
5525 switch (mode)
5526 {
5527 case XFmode:
5528 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
5529 parts[2] = GEN_INT (l[2]);
5530 break;
5531 case DFmode:
5532 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
5533 break;
5534 default:
5535 abort ();
5536 }
5537 parts[1] = GEN_INT (l[1]);
5538 parts[0] = GEN_INT (l[0]);
5539 }
5540 else
5541 abort ();
e075ae69 5542 }
2450a057
JH
5543 }
5544
5545 return;
5546}
5547
5548/* Emit insns to perform a move or push of DI, DF, and XF values.
5549 Return false when normal moves are needed; true when all required
5550 insns have been emitted. Operands 2-4 contain the input values
5551 int the correct order; operands 5-7 contain the output values. */
5552
5553int
5554ix86_split_long_move (operands1)
5555 rtx operands1[];
5556{
5557 rtx part[2][3];
5558 rtx operands[2];
5559 int size = GET_MODE_SIZE (GET_MODE (operands1[0])) / 4;
5560 int push = 0;
5561 int collisions = 0;
5562
5563 /* Make our own copy to avoid clobbering the operands. */
5564 operands[0] = copy_rtx (operands1[0]);
5565 operands[1] = copy_rtx (operands1[1]);
5566
5567 if (size < 2 || size > 3)
5568 abort ();
5569
5570 /* The only non-offsettable memory we handle is push. */
5571 if (push_operand (operands[0], VOIDmode))
5572 push = 1;
5573 else if (GET_CODE (operands[0]) == MEM
5574 && ! offsettable_memref_p (operands[0]))
5575 abort ();
5576
5577 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands1[0]));
5578 ix86_split_to_parts (operands[1], part[1], GET_MODE (operands1[0]));
5579
5580 /* When emitting push, take care for source operands on the stack. */
5581 if (push && GET_CODE (operands[1]) == MEM
5582 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
5583 {
5584 if (size == 3)
5585 part[1][1] = part[1][2];
5586 part[1][0] = part[1][1];
5587 }
5588
5589 /* We need to do copy in the right order in case an address register
5590 of the source overlaps the destination. */
5591 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
5592 {
5593 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
5594 collisions++;
5595 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
5596 collisions++;
5597 if (size == 3
5598 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
5599 collisions++;
5600
5601 /* Collision in the middle part can be handled by reordering. */
5602 if (collisions == 1 && size == 3
5603 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 5604 {
2450a057
JH
5605 rtx tmp;
5606 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
5607 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
5608 }
e075ae69 5609
2450a057
JH
5610 /* If there are more collisions, we can't handle it by reordering.
5611 Do an lea to the last part and use only one colliding move. */
5612 else if (collisions > 1)
5613 {
5614 collisions = 1;
5615 emit_insn (gen_rtx_SET (VOIDmode, part[0][size - 1],
5616 XEXP (part[1][0], 0)));
5617 part[1][0] = change_address (part[1][0], SImode, part[0][size - 1]);
5618 part[1][1] = adj_offsettable_operand (part[1][0], 4);
5619 if (size == 3)
5620 part[1][2] = adj_offsettable_operand (part[1][0], 8);
5621 }
5622 }
5623
5624 if (push)
5625 {
5626 if (size == 3)
5627 emit_insn (gen_push (part[1][2]));
5628 emit_insn (gen_push (part[1][1]));
5629 emit_insn (gen_push (part[1][0]));
5630 return 1;
5631 }
5632
5633 /* Choose correct order to not overwrite the source before it is copied. */
5634 if ((REG_P (part[0][0])
5635 && REG_P (part[1][1])
5636 && (REGNO (part[0][0]) == REGNO (part[1][1])
5637 || (size == 3
5638 && REGNO (part[0][0]) == REGNO (part[1][2]))))
5639 || (collisions > 0
5640 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
5641 {
5642 if (size == 3)
5643 {
5644 operands1[2] = part[0][2];
5645 operands1[3] = part[0][1];
5646 operands1[4] = part[0][0];
5647 operands1[5] = part[1][2];
5648 operands1[6] = part[1][1];
5649 operands1[7] = part[1][0];
5650 }
5651 else
5652 {
5653 operands1[2] = part[0][1];
5654 operands1[3] = part[0][0];
5655 operands1[5] = part[1][1];
5656 operands1[6] = part[1][0];
5657 }
5658 }
5659 else
5660 {
5661 if (size == 3)
5662 {
5663 operands1[2] = part[0][0];
5664 operands1[3] = part[0][1];
5665 operands1[4] = part[0][2];
5666 operands1[5] = part[1][0];
5667 operands1[6] = part[1][1];
5668 operands1[7] = part[1][2];
5669 }
5670 else
5671 {
5672 operands1[2] = part[0][0];
5673 operands1[3] = part[0][1];
5674 operands1[5] = part[1][0];
5675 operands1[6] = part[1][1];
e075ae69
RH
5676 }
5677 }
32b5b1aa 5678
e9a25f70 5679 return 0;
32b5b1aa 5680}
32b5b1aa 5681
e075ae69
RH
5682void
5683ix86_split_ashldi (operands, scratch)
5684 rtx *operands, scratch;
32b5b1aa 5685{
e075ae69
RH
5686 rtx low[2], high[2];
5687 int count;
b985a30f 5688
e075ae69
RH
5689 if (GET_CODE (operands[2]) == CONST_INT)
5690 {
5691 split_di (operands, 2, low, high);
5692 count = INTVAL (operands[2]) & 63;
32b5b1aa 5693
e075ae69
RH
5694 if (count >= 32)
5695 {
5696 emit_move_insn (high[0], low[1]);
5697 emit_move_insn (low[0], const0_rtx);
b985a30f 5698
e075ae69
RH
5699 if (count > 32)
5700 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
5701 }
5702 else
5703 {
5704 if (!rtx_equal_p (operands[0], operands[1]))
5705 emit_move_insn (operands[0], operands[1]);
5706 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
5707 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
5708 }
5709 }
5710 else
5711 {
5712 if (!rtx_equal_p (operands[0], operands[1]))
5713 emit_move_insn (operands[0], operands[1]);
b985a30f 5714
e075ae69 5715 split_di (operands, 1, low, high);
b985a30f 5716
e075ae69
RH
5717 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
5718 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 5719
fe577e58 5720 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 5721 {
fe577e58 5722 if (! no_new_pseudos)
e075ae69
RH
5723 scratch = force_reg (SImode, const0_rtx);
5724 else
5725 emit_move_insn (scratch, const0_rtx);
5726
5727 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
5728 scratch));
5729 }
5730 else
5731 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
5732 }
e9a25f70 5733}
32b5b1aa 5734
e075ae69
RH
5735void
5736ix86_split_ashrdi (operands, scratch)
5737 rtx *operands, scratch;
32b5b1aa 5738{
e075ae69
RH
5739 rtx low[2], high[2];
5740 int count;
32b5b1aa 5741
e075ae69
RH
5742 if (GET_CODE (operands[2]) == CONST_INT)
5743 {
5744 split_di (operands, 2, low, high);
5745 count = INTVAL (operands[2]) & 63;
32b5b1aa 5746
e075ae69
RH
5747 if (count >= 32)
5748 {
5749 emit_move_insn (low[0], high[1]);
32b5b1aa 5750
e075ae69
RH
5751 if (! reload_completed)
5752 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
5753 else
5754 {
5755 emit_move_insn (high[0], low[0]);
5756 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
5757 }
5758
5759 if (count > 32)
5760 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
5761 }
5762 else
5763 {
5764 if (!rtx_equal_p (operands[0], operands[1]))
5765 emit_move_insn (operands[0], operands[1]);
5766 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
5767 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
5768 }
5769 }
5770 else
32b5b1aa 5771 {
e075ae69
RH
5772 if (!rtx_equal_p (operands[0], operands[1]))
5773 emit_move_insn (operands[0], operands[1]);
5774
5775 split_di (operands, 1, low, high);
5776
5777 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
5778 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
5779
fe577e58 5780 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 5781 {
fe577e58 5782 if (! no_new_pseudos)
e075ae69
RH
5783 scratch = gen_reg_rtx (SImode);
5784 emit_move_insn (scratch, high[0]);
5785 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
5786 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
5787 scratch));
5788 }
5789 else
5790 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 5791 }
e075ae69 5792}
32b5b1aa 5793
e075ae69
RH
5794void
5795ix86_split_lshrdi (operands, scratch)
5796 rtx *operands, scratch;
5797{
5798 rtx low[2], high[2];
5799 int count;
32b5b1aa 5800
e075ae69 5801 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 5802 {
e075ae69
RH
5803 split_di (operands, 2, low, high);
5804 count = INTVAL (operands[2]) & 63;
5805
5806 if (count >= 32)
c7271385 5807 {
e075ae69
RH
5808 emit_move_insn (low[0], high[1]);
5809 emit_move_insn (high[0], const0_rtx);
32b5b1aa 5810
e075ae69
RH
5811 if (count > 32)
5812 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
5813 }
5814 else
5815 {
5816 if (!rtx_equal_p (operands[0], operands[1]))
5817 emit_move_insn (operands[0], operands[1]);
5818 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
5819 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
5820 }
32b5b1aa 5821 }
e075ae69
RH
5822 else
5823 {
5824 if (!rtx_equal_p (operands[0], operands[1]))
5825 emit_move_insn (operands[0], operands[1]);
32b5b1aa 5826
e075ae69
RH
5827 split_di (operands, 1, low, high);
5828
5829 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
5830 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
5831
5832 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 5833 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 5834 {
fe577e58 5835 if (! no_new_pseudos)
e075ae69
RH
5836 scratch = force_reg (SImode, const0_rtx);
5837 else
5838 emit_move_insn (scratch, const0_rtx);
5839
5840 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
5841 scratch));
5842 }
5843 else
5844 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
5845 }
32b5b1aa 5846}
3f803cd9 5847
e075ae69
RH
5848/* Expand the appropriate insns for doing strlen if not just doing
5849 repnz; scasb
5850
5851 out = result, initialized with the start address
5852 align_rtx = alignment of the address.
5853 scratch = scratch register, initialized with the startaddress when
5854 not aligned, otherwise undefined
3f803cd9
SC
5855
5856 This is just the body. It needs the initialisations mentioned above and
5857 some address computing at the end. These things are done in i386.md. */
5858
e075ae69
RH
5859void
5860ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch)
5861 rtx out, align_rtx, scratch;
3f803cd9 5862{
e075ae69
RH
5863 int align;
5864 rtx tmp;
5865 rtx align_2_label = NULL_RTX;
5866 rtx align_3_label = NULL_RTX;
5867 rtx align_4_label = gen_label_rtx ();
5868 rtx end_0_label = gen_label_rtx ();
e075ae69 5869 rtx mem;
16189740
RH
5870 rtx no_flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
5871 rtx z_flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
e2e52e1b 5872 rtx tmpreg = gen_reg_rtx (SImode);
e075ae69
RH
5873
5874 align = 0;
5875 if (GET_CODE (align_rtx) == CONST_INT)
5876 align = INTVAL (align_rtx);
3f803cd9 5877
e9a25f70 5878 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 5879
e9a25f70 5880 /* Is there a known alignment and is it less than 4? */
e075ae69 5881 if (align < 4)
3f803cd9 5882 {
e9a25f70 5883 /* Is there a known alignment and is it not 2? */
e075ae69 5884 if (align != 2)
3f803cd9 5885 {
e075ae69
RH
5886 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
5887 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
5888
5889 /* Leave just the 3 lower bits. */
5890 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (3),
5891 NULL_RTX, 0, OPTAB_WIDEN);
5892
16189740 5893 emit_insn (gen_cmpsi_ccz_1 (align_rtx, const0_rtx));
e075ae69 5894
16189740 5895 tmp = gen_rtx_EQ (VOIDmode, z_flags, const0_rtx);
e075ae69
RH
5896 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5897 gen_rtx_LABEL_REF (VOIDmode,
5898 align_4_label),
5899 pc_rtx);
5900 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5901
16189740 5902 emit_insn (gen_cmpsi_ccno_1 (align_rtx, GEN_INT (2)));
e075ae69 5903
16189740 5904 tmp = gen_rtx_EQ (VOIDmode, no_flags, const0_rtx);
e075ae69
RH
5905 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5906 gen_rtx_LABEL_REF (VOIDmode,
5907 align_2_label),
5908 pc_rtx);
5909 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5910
16189740 5911 tmp = gen_rtx_GTU (VOIDmode, no_flags, const0_rtx);
e075ae69
RH
5912 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5913 gen_rtx_LABEL_REF (VOIDmode,
5914 align_3_label),
5915 pc_rtx);
5916 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
3f803cd9
SC
5917 }
5918 else
5919 {
e9a25f70
JL
5920 /* Since the alignment is 2, we have to check 2 or 0 bytes;
5921 check if is aligned to 4 - byte. */
e9a25f70 5922
e075ae69
RH
5923 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (2),
5924 NULL_RTX, 0, OPTAB_WIDEN);
5925
16189740 5926 emit_insn (gen_cmpsi_ccz_1 (align_rtx, const0_rtx));
e075ae69 5927
16189740 5928 tmp = gen_rtx_EQ (VOIDmode, z_flags, const0_rtx);
e075ae69
RH
5929 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5930 gen_rtx_LABEL_REF (VOIDmode,
5931 align_4_label),
5932 pc_rtx);
5933 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
3f803cd9
SC
5934 }
5935
e075ae69 5936 mem = gen_rtx_MEM (QImode, out);
e9a25f70 5937
e075ae69 5938 /* Now compare the bytes. */
e9a25f70 5939
e075ae69 5940 /* Compare the first n unaligned byte on a byte per byte basis. */
16189740 5941 emit_insn (gen_cmpqi_ccz_1 (mem, const0_rtx));
e9a25f70 5942
16189740 5943 tmp = gen_rtx_EQ (VOIDmode, z_flags, const0_rtx);
e075ae69
RH
5944 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5945 gen_rtx_LABEL_REF (VOIDmode, end_0_label),
5946 pc_rtx);
5947 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
3f803cd9 5948
e075ae69
RH
5949 /* Increment the address. */
5950 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 5951
e075ae69
RH
5952 /* Not needed with an alignment of 2 */
5953 if (align != 2)
5954 {
5955 emit_label (align_2_label);
3f803cd9 5956
16189740 5957 emit_insn (gen_cmpqi_ccz_1 (mem, const0_rtx));
3f803cd9 5958
16189740 5959 tmp = gen_rtx_EQ (VOIDmode, z_flags, const0_rtx);
e075ae69
RH
5960 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5961 gen_rtx_LABEL_REF (VOIDmode,
5962 end_0_label),
5963 pc_rtx);
5964 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5965
5966 emit_insn (gen_addsi3 (out, out, const1_rtx));
5967
5968 emit_label (align_3_label);
5969 }
5970
16189740 5971 emit_insn (gen_cmpqi_ccz_1 (mem, const0_rtx));
e9a25f70 5972
16189740 5973 tmp = gen_rtx_EQ (VOIDmode, z_flags, const0_rtx);
e075ae69
RH
5974 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5975 gen_rtx_LABEL_REF (VOIDmode, end_0_label),
5976 pc_rtx);
5977 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5978
5979 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
5980 }
5981
e075ae69
RH
5982 /* Generate loop to check 4 bytes at a time. It is not a good idea to
5983 align this loop. It gives only huge programs, but does not help to
5984 speed up. */
5985 emit_label (align_4_label);
3f803cd9 5986
e075ae69
RH
5987 mem = gen_rtx_MEM (SImode, out);
5988 emit_move_insn (scratch, mem);
e075ae69 5989 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 5990
e2e52e1b
JH
5991 /* This formula yields a nonzero result iff one of the bytes is zero.
5992 This saves three branches inside loop and many cycles. */
5993
5994 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
5995 emit_insn (gen_one_cmplsi2 (scratch, scratch));
5996 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
5997 emit_insn (gen_andsi3 (tmpreg, tmpreg, GEN_INT (0x80808080)));
5998 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, 0, align_4_label);
5999
6000 if (TARGET_CMOVE)
6001 {
6002 rtx reg = gen_reg_rtx (SImode);
6003 emit_move_insn (reg, tmpreg);
6004 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
6005
6006 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 6007 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
6008 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6009 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6010 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
6011 gen_rtx_IF_THEN_ELSE (SImode, tmp,
6012 reg,
6013 tmpreg)));
6014 /* Emit lea manually to avoid clobbering of flags. */
6015 emit_insn (gen_rtx_SET (SImode, reg,
6016 gen_rtx_PLUS (SImode, out, GEN_INT (2))));
6017
6018 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6019 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6020 emit_insn (gen_rtx_SET (VOIDmode, out,
6021 gen_rtx_IF_THEN_ELSE (SImode, tmp,
6022 reg,
6023 out)));
6024
6025 }
6026 else
6027 {
6028 rtx end_2_label = gen_label_rtx ();
6029 /* Is zero in the first two bytes? */
6030
16189740 6031 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
6032 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6033 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
6034 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
6035 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
6036 pc_rtx);
6037 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
6038 JUMP_LABEL (tmp) = end_2_label;
6039
6040 /* Not in the first two. Move two bytes forward. */
6041 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
6042 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
6043
6044 emit_label (end_2_label);
6045
6046 }
6047
6048 /* Avoid branch in fixing the byte. */
6049 tmpreg = gen_lowpart (QImode, tmpreg);
6050 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
6051 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
e075ae69
RH
6052
6053 emit_label (end_0_label);
6054}
6055\f
e075ae69
RH
6056/* Clear stack slot assignments remembered from previous functions.
6057 This is called from INIT_EXPANDERS once before RTL is emitted for each
6058 function. */
6059
36edd3cc
BS
6060static void
6061ix86_init_machine_status (p)
1526a060 6062 struct function *p;
e075ae69
RH
6063{
6064 enum machine_mode mode;
6065 int n;
36edd3cc
BS
6066 p->machine
6067 = (struct machine_function *) xmalloc (sizeof (struct machine_function));
e075ae69
RH
6068
6069 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
6070 mode = (enum machine_mode) ((int) mode + 1))
6071 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
6072 ix86_stack_locals[(int) mode][n] = NULL_RTX;
e075ae69
RH
6073}
6074
1526a060
BS
6075/* Mark machine specific bits of P for GC. */
6076static void
6077ix86_mark_machine_status (p)
6078 struct function *p;
6079{
6080 enum machine_mode mode;
6081 int n;
6082
6083 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
6084 mode = (enum machine_mode) ((int) mode + 1))
6085 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
6086 ggc_mark_rtx (p->machine->stack_locals[(int) mode][n]);
6087}
6088
e075ae69
RH
6089/* Return a MEM corresponding to a stack slot with mode MODE.
6090 Allocate a new slot if necessary.
6091
6092 The RTL for a function can have several slots available: N is
6093 which slot to use. */
6094
6095rtx
6096assign_386_stack_local (mode, n)
6097 enum machine_mode mode;
6098 int n;
6099{
6100 if (n < 0 || n >= MAX_386_STACK_LOCALS)
6101 abort ();
6102
6103 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
6104 ix86_stack_locals[(int) mode][n]
6105 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
6106
6107 return ix86_stack_locals[(int) mode][n];
6108}
6109\f
6110/* Calculate the length of the memory address in the instruction
6111 encoding. Does not include the one-byte modrm, opcode, or prefix. */
6112
6113static int
6114memory_address_length (addr)
6115 rtx addr;
6116{
6117 struct ix86_address parts;
6118 rtx base, index, disp;
6119 int len;
6120
6121 if (GET_CODE (addr) == PRE_DEC
6122 || GET_CODE (addr) == POST_INC)
6123 return 0;
3f803cd9 6124
e075ae69
RH
6125 if (! ix86_decompose_address (addr, &parts))
6126 abort ();
3f803cd9 6127
e075ae69
RH
6128 base = parts.base;
6129 index = parts.index;
6130 disp = parts.disp;
6131 len = 0;
3f803cd9 6132
e075ae69
RH
6133 /* Register Indirect. */
6134 if (base && !index && !disp)
6135 {
6136 /* Special cases: ebp and esp need the two-byte modrm form. */
6137 if (addr == stack_pointer_rtx
6138 || addr == arg_pointer_rtx
564d80f4
JH
6139 || addr == frame_pointer_rtx
6140 || addr == hard_frame_pointer_rtx)
e075ae69 6141 len = 1;
3f803cd9 6142 }
e9a25f70 6143
e075ae69
RH
6144 /* Direct Addressing. */
6145 else if (disp && !base && !index)
6146 len = 4;
6147
3f803cd9
SC
6148 else
6149 {
e075ae69
RH
6150 /* Find the length of the displacement constant. */
6151 if (disp)
6152 {
6153 if (GET_CODE (disp) == CONST_INT
6154 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
6155 len = 1;
6156 else
6157 len = 4;
6158 }
3f803cd9 6159
e075ae69
RH
6160 /* An index requires the two-byte modrm form. */
6161 if (index)
6162 len += 1;
3f803cd9
SC
6163 }
6164
e075ae69
RH
6165 return len;
6166}
79325812 6167
6ef67412
JH
6168/* Compute default value for "length_immediate" attribute. When SHORTFORM is set
6169 expect that insn have 8bit immediate alternative. */
e075ae69 6170int
6ef67412 6171ix86_attr_length_immediate_default (insn, shortform)
e075ae69 6172 rtx insn;
6ef67412 6173 int shortform;
e075ae69 6174{
6ef67412
JH
6175 int len = 0;
6176 int i;
e075ae69 6177 extract_insn (insn);
6ef67412
JH
6178 for (i = recog_data.n_operands - 1; i >= 0; --i)
6179 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 6180 {
6ef67412 6181 if (len)
3071fab5 6182 abort ();
6ef67412
JH
6183 if (shortform
6184 && GET_CODE (recog_data.operand[i]) == CONST_INT
6185 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
6186 len = 1;
6187 else
6188 {
6189 switch (get_attr_mode (insn))
6190 {
6191 case MODE_QI:
6192 len+=1;
6193 break;
6194 case MODE_HI:
6195 len+=2;
6196 break;
6197 case MODE_SI:
6198 len+=4;
6199 break;
6200 default:
6201 fatal_insn ("Unknown insn mode", insn);
6202 }
6203 }
3071fab5 6204 }
6ef67412
JH
6205 return len;
6206}
6207/* Compute default value for "length_address" attribute. */
6208int
6209ix86_attr_length_address_default (insn)
6210 rtx insn;
6211{
6212 int i;
6213 extract_insn (insn);
1ccbefce
RH
6214 for (i = recog_data.n_operands - 1; i >= 0; --i)
6215 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 6216 {
6ef67412 6217 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
6218 break;
6219 }
6ef67412 6220 return 0;
3f803cd9 6221}
e075ae69
RH
6222\f
6223/* Return the maximum number of instructions a cpu can issue. */
b657fc39 6224
e075ae69
RH
6225int
6226ix86_issue_rate ()
b657fc39 6227{
e075ae69 6228 switch (ix86_cpu)
b657fc39 6229 {
e075ae69
RH
6230 case PROCESSOR_PENTIUM:
6231 case PROCESSOR_K6:
6232 return 2;
79325812 6233
e075ae69
RH
6234 case PROCESSOR_PENTIUMPRO:
6235 return 3;
b657fc39 6236
b657fc39 6237 default:
e075ae69 6238 return 1;
b657fc39 6239 }
b657fc39
L
6240}
6241
e075ae69
RH
6242/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
6243 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 6244
e075ae69
RH
6245static int
6246ix86_flags_dependant (insn, dep_insn, insn_type)
6247 rtx insn, dep_insn;
6248 enum attr_type insn_type;
6249{
6250 rtx set, set2;
b657fc39 6251
e075ae69
RH
6252 /* Simplify the test for uninteresting insns. */
6253 if (insn_type != TYPE_SETCC
6254 && insn_type != TYPE_ICMOV
6255 && insn_type != TYPE_FCMOV
6256 && insn_type != TYPE_IBR)
6257 return 0;
b657fc39 6258
e075ae69
RH
6259 if ((set = single_set (dep_insn)) != 0)
6260 {
6261 set = SET_DEST (set);
6262 set2 = NULL_RTX;
6263 }
6264 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
6265 && XVECLEN (PATTERN (dep_insn), 0) == 2
6266 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
6267 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
6268 {
6269 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
6270 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
6271 }
78a0d70c
ZW
6272 else
6273 return 0;
b657fc39 6274
78a0d70c
ZW
6275 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
6276 return 0;
b657fc39 6277
78a0d70c
ZW
6278 /* This test is true if the dependant insn reads the flags but
6279 not any other potentially set register. */
6280 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
6281 return 0;
6282
6283 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
6284 return 0;
6285
6286 return 1;
e075ae69 6287}
b657fc39 6288
e075ae69
RH
6289/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
6290 address with operands set by DEP_INSN. */
6291
6292static int
6293ix86_agi_dependant (insn, dep_insn, insn_type)
6294 rtx insn, dep_insn;
6295 enum attr_type insn_type;
6296{
6297 rtx addr;
6298
6299 if (insn_type == TYPE_LEA)
5fbdde42
RH
6300 {
6301 addr = PATTERN (insn);
6302 if (GET_CODE (addr) == SET)
6303 ;
6304 else if (GET_CODE (addr) == PARALLEL
6305 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
6306 addr = XVECEXP (addr, 0, 0);
6307 else
6308 abort ();
6309 addr = SET_SRC (addr);
6310 }
e075ae69
RH
6311 else
6312 {
6313 int i;
6314 extract_insn (insn);
1ccbefce
RH
6315 for (i = recog_data.n_operands - 1; i >= 0; --i)
6316 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 6317 {
1ccbefce 6318 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
6319 goto found;
6320 }
6321 return 0;
6322 found:;
b657fc39
L
6323 }
6324
e075ae69 6325 return modified_in_p (addr, dep_insn);
b657fc39 6326}
a269a03c
JC
6327
6328int
e075ae69 6329ix86_adjust_cost (insn, link, dep_insn, cost)
a269a03c
JC
6330 rtx insn, link, dep_insn;
6331 int cost;
6332{
e075ae69 6333 enum attr_type insn_type, dep_insn_type;
0b5107cf 6334 enum attr_memory memory;
e075ae69 6335 rtx set, set2;
9b00189f 6336 int dep_insn_code_number;
a269a03c 6337
309ada50 6338 /* Anti and output depenancies have zero cost on all CPUs. */
e075ae69 6339 if (REG_NOTE_KIND (link) != 0)
309ada50 6340 return 0;
a269a03c 6341
9b00189f
JH
6342 dep_insn_code_number = recog_memoized (dep_insn);
6343
e075ae69 6344 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 6345 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 6346 return cost;
a269a03c 6347
1c71e60e
JH
6348 insn_type = get_attr_type (insn);
6349 dep_insn_type = get_attr_type (dep_insn);
9b00189f 6350
1c71e60e
JH
6351 /* Prologue and epilogue allocators can have a false dependency on ebp.
6352 This results in one cycle extra stall on Pentium prologue scheduling,
6353 so handle this important case manually. */
6354 if (dep_insn_code_number == CODE_FOR_pro_epilogue_adjust_stack
6355 && dep_insn_type == TYPE_ALU
9b00189f
JH
6356 && !reg_mentioned_p (stack_pointer_rtx, insn))
6357 return 0;
6358
a269a03c
JC
6359 switch (ix86_cpu)
6360 {
6361 case PROCESSOR_PENTIUM:
e075ae69
RH
6362 /* Address Generation Interlock adds a cycle of latency. */
6363 if (ix86_agi_dependant (insn, dep_insn, insn_type))
6364 cost += 1;
6365
6366 /* ??? Compares pair with jump/setcc. */
6367 if (ix86_flags_dependant (insn, dep_insn, insn_type))
6368 cost = 0;
6369
6370 /* Floating point stores require value to be ready one cycle ealier. */
6371 if (insn_type == TYPE_FMOV
6372 && get_attr_memory (insn) == MEMORY_STORE
6373 && !ix86_agi_dependant (insn, dep_insn, insn_type))
6374 cost += 1;
6375 break;
a269a03c 6376
e075ae69
RH
6377 case PROCESSOR_PENTIUMPRO:
6378 /* Since we can't represent delayed latencies of load+operation,
6379 increase the cost here for non-imov insns. */
6380 if (dep_insn_type != TYPE_IMOV
6381 && dep_insn_type != TYPE_FMOV
0b5107cf
JH
6382 && ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
6383 || memory == MEMORY_BOTH))
e075ae69
RH
6384 cost += 1;
6385
6386 /* INT->FP conversion is expensive. */
6387 if (get_attr_fp_int_src (dep_insn))
6388 cost += 5;
6389
6390 /* There is one cycle extra latency between an FP op and a store. */
6391 if (insn_type == TYPE_FMOV
6392 && (set = single_set (dep_insn)) != NULL_RTX
6393 && (set2 = single_set (insn)) != NULL_RTX
6394 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
6395 && GET_CODE (SET_DEST (set2)) == MEM)
6396 cost += 1;
6397 break;
a269a03c 6398
e075ae69
RH
6399 case PROCESSOR_K6:
6400 /* The esp dependency is resolved before the instruction is really
6401 finished. */
6402 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
6403 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
6404 return 1;
a269a03c 6405
e075ae69
RH
6406 /* Since we can't represent delayed latencies of load+operation,
6407 increase the cost here for non-imov insns. */
0b5107cf
JH
6408 if ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
6409 || memory == MEMORY_BOTH)
e075ae69
RH
6410 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
6411
6412 /* INT->FP conversion is expensive. */
6413 if (get_attr_fp_int_src (dep_insn))
6414 cost += 5;
a14003ee 6415 break;
e075ae69 6416
309ada50 6417 case PROCESSOR_ATHLON:
0b5107cf
JH
6418 if ((memory = get_attr_memory (dep_insn)) == MEMORY_LOAD
6419 || memory == MEMORY_BOTH)
6420 {
6421 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
6422 cost += 2;
6423 else
6424 cost += 3;
6425 }
309ada50 6426
a269a03c 6427 default:
a269a03c
JC
6428 break;
6429 }
6430
6431 return cost;
6432}
0a726ef1 6433
e075ae69
RH
6434static union
6435{
6436 struct ppro_sched_data
6437 {
6438 rtx decode[3];
6439 int issued_this_cycle;
6440 } ppro;
6441} ix86_sched_data;
0a726ef1 6442
e075ae69
RH
6443static int
6444ix86_safe_length (insn)
6445 rtx insn;
6446{
6447 if (recog_memoized (insn) >= 0)
6448 return get_attr_length(insn);
6449 else
6450 return 128;
6451}
0a726ef1 6452
e075ae69
RH
6453static int
6454ix86_safe_length_prefix (insn)
6455 rtx insn;
6456{
6457 if (recog_memoized (insn) >= 0)
6458 return get_attr_length(insn);
6459 else
6460 return 0;
6461}
6462
6463static enum attr_memory
6464ix86_safe_memory (insn)
6465 rtx insn;
6466{
6467 if (recog_memoized (insn) >= 0)
6468 return get_attr_memory(insn);
6469 else
6470 return MEMORY_UNKNOWN;
6471}
0a726ef1 6472
e075ae69
RH
6473static enum attr_pent_pair
6474ix86_safe_pent_pair (insn)
6475 rtx insn;
6476{
6477 if (recog_memoized (insn) >= 0)
6478 return get_attr_pent_pair(insn);
6479 else
6480 return PENT_PAIR_NP;
6481}
0a726ef1 6482
e075ae69
RH
6483static enum attr_ppro_uops
6484ix86_safe_ppro_uops (insn)
6485 rtx insn;
6486{
6487 if (recog_memoized (insn) >= 0)
6488 return get_attr_ppro_uops (insn);
6489 else
6490 return PPRO_UOPS_MANY;
6491}
0a726ef1 6492
e075ae69
RH
6493static void
6494ix86_dump_ppro_packet (dump)
6495 FILE *dump;
0a726ef1 6496{
e075ae69 6497 if (ix86_sched_data.ppro.decode[0])
0a726ef1 6498 {
e075ae69
RH
6499 fprintf (dump, "PPRO packet: %d",
6500 INSN_UID (ix86_sched_data.ppro.decode[0]));
6501 if (ix86_sched_data.ppro.decode[1])
6502 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
6503 if (ix86_sched_data.ppro.decode[2])
6504 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
6505 fputc ('\n', dump);
6506 }
6507}
0a726ef1 6508
e075ae69 6509/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 6510
e075ae69
RH
6511void
6512ix86_sched_init (dump, sched_verbose)
6513 FILE *dump ATTRIBUTE_UNUSED;
6514 int sched_verbose ATTRIBUTE_UNUSED;
6515{
6516 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
6517}
6518
6519/* Shift INSN to SLOT, and shift everything else down. */
6520
6521static void
6522ix86_reorder_insn (insnp, slot)
6523 rtx *insnp, *slot;
6524{
6525 if (insnp != slot)
6526 {
6527 rtx insn = *insnp;
6528 do
6529 insnp[0] = insnp[1];
6530 while (++insnp != slot);
6531 *insnp = insn;
0a726ef1 6532 }
e075ae69
RH
6533}
6534
6535/* Find an instruction with given pairability and minimal amount of cycles
6536 lost by the fact that the CPU waits for both pipelines to finish before
6537 reading next instructions. Also take care that both instructions together
6538 can not exceed 7 bytes. */
6539
6540static rtx *
6541ix86_pent_find_pair (e_ready, ready, type, first)
6542 rtx *e_ready;
6543 rtx *ready;
6544 enum attr_pent_pair type;
6545 rtx first;
6546{
6547 int mincycles, cycles;
6548 enum attr_pent_pair tmp;
6549 enum attr_memory memory;
6550 rtx *insnp, *bestinsnp = NULL;
0a726ef1 6551
e075ae69
RH
6552 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
6553 return NULL;
0a726ef1 6554
e075ae69
RH
6555 memory = ix86_safe_memory (first);
6556 cycles = result_ready_cost (first);
6557 mincycles = INT_MAX;
6558
6559 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
6560 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
6561 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
6ec6d558 6562 {
e075ae69
RH
6563 enum attr_memory second_memory;
6564 int secondcycles, currentcycles;
6565
6566 second_memory = ix86_safe_memory (*insnp);
6567 secondcycles = result_ready_cost (*insnp);
6568 currentcycles = abs (cycles - secondcycles);
6569
6570 if (secondcycles >= 1 && cycles >= 1)
6ec6d558 6571 {
e075ae69
RH
6572 /* Two read/modify/write instructions together takes two
6573 cycles longer. */
6574 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
6575 currentcycles += 2;
6576
6577 /* Read modify/write instruction followed by read/modify
6578 takes one cycle longer. */
6579 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
6580 && tmp != PENT_PAIR_UV
6581 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
6582 currentcycles += 1;
6ec6d558 6583 }
e075ae69
RH
6584 if (currentcycles < mincycles)
6585 bestinsnp = insnp, mincycles = currentcycles;
6ec6d558 6586 }
0a726ef1 6587
e075ae69
RH
6588 return bestinsnp;
6589}
6590
78a0d70c 6591/* Subroutines of ix86_sched_reorder. */
e075ae69 6592
c6991660 6593static void
78a0d70c 6594ix86_sched_reorder_pentium (ready, e_ready)
e075ae69 6595 rtx *ready;
78a0d70c 6596 rtx *e_ready;
e075ae69 6597{
78a0d70c 6598 enum attr_pent_pair pair1, pair2;
e075ae69 6599 rtx *insnp;
e075ae69 6600
78a0d70c
ZW
6601 /* This wouldn't be necessary if Haifa knew that static insn ordering
6602 is important to which pipe an insn is issued to. So we have to make
6603 some minor rearrangements. */
e075ae69 6604
78a0d70c
ZW
6605 pair1 = ix86_safe_pent_pair (*e_ready);
6606
6607 /* If the first insn is non-pairable, let it be. */
6608 if (pair1 == PENT_PAIR_NP)
6609 return;
6610
6611 pair2 = PENT_PAIR_NP;
6612 insnp = 0;
6613
6614 /* If the first insn is UV or PV pairable, search for a PU
6615 insn to go with. */
6616 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
e075ae69 6617 {
78a0d70c
ZW
6618 insnp = ix86_pent_find_pair (e_ready-1, ready,
6619 PENT_PAIR_PU, *e_ready);
6620 if (insnp)
6621 pair2 = PENT_PAIR_PU;
6622 }
e075ae69 6623
78a0d70c
ZW
6624 /* If the first insn is PU or UV pairable, search for a PV
6625 insn to go with. */
6626 if (pair2 == PENT_PAIR_NP
6627 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
6628 {
6629 insnp = ix86_pent_find_pair (e_ready-1, ready,
6630 PENT_PAIR_PV, *e_ready);
6631 if (insnp)
6632 pair2 = PENT_PAIR_PV;
6633 }
e075ae69 6634
78a0d70c
ZW
6635 /* If the first insn is pairable, search for a UV
6636 insn to go with. */
6637 if (pair2 == PENT_PAIR_NP)
6638 {
6639 insnp = ix86_pent_find_pair (e_ready-1, ready,
6640 PENT_PAIR_UV, *e_ready);
6641 if (insnp)
6642 pair2 = PENT_PAIR_UV;
6643 }
e075ae69 6644
78a0d70c
ZW
6645 if (pair2 == PENT_PAIR_NP)
6646 return;
e075ae69 6647
78a0d70c
ZW
6648 /* Found something! Decide if we need to swap the order. */
6649 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
6650 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
6651 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
6652 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
6653 ix86_reorder_insn (insnp, e_ready);
6654 else
6655 ix86_reorder_insn (insnp, e_ready - 1);
6656}
e075ae69 6657
c6991660 6658static void
78a0d70c
ZW
6659ix86_sched_reorder_ppro (ready, e_ready)
6660 rtx *ready;
6661 rtx *e_ready;
6662{
6663 rtx decode[3];
6664 enum attr_ppro_uops cur_uops;
6665 int issued_this_cycle;
6666 rtx *insnp;
6667 int i;
e075ae69 6668
78a0d70c
ZW
6669 /* At this point .ppro.decode contains the state of the three
6670 decoders from last "cycle". That is, those insns that were
6671 actually independent. But here we're scheduling for the
6672 decoder, and we may find things that are decodable in the
6673 same cycle. */
e075ae69 6674
78a0d70c
ZW
6675 memcpy (decode, ix86_sched_data.ppro.decode, sizeof(decode));
6676 issued_this_cycle = 0;
e075ae69 6677
78a0d70c
ZW
6678 insnp = e_ready;
6679 cur_uops = ix86_safe_ppro_uops (*insnp);
0a726ef1 6680
78a0d70c
ZW
6681 /* If the decoders are empty, and we've a complex insn at the
6682 head of the priority queue, let it issue without complaint. */
6683 if (decode[0] == NULL)
6684 {
6685 if (cur_uops == PPRO_UOPS_MANY)
6686 {
6687 decode[0] = *insnp;
6688 goto ppro_done;
6689 }
6690
6691 /* Otherwise, search for a 2-4 uop unsn to issue. */
6692 while (cur_uops != PPRO_UOPS_FEW)
6693 {
6694 if (insnp == ready)
6695 break;
6696 cur_uops = ix86_safe_ppro_uops (*--insnp);
6697 }
6698
6699 /* If so, move it to the head of the line. */
6700 if (cur_uops == PPRO_UOPS_FEW)
6701 ix86_reorder_insn (insnp, e_ready);
0a726ef1 6702
78a0d70c
ZW
6703 /* Issue the head of the queue. */
6704 issued_this_cycle = 1;
6705 decode[0] = *e_ready--;
6706 }
fb693d44 6707
78a0d70c
ZW
6708 /* Look for simple insns to fill in the other two slots. */
6709 for (i = 1; i < 3; ++i)
6710 if (decode[i] == NULL)
6711 {
6712 if (ready >= e_ready)
6713 goto ppro_done;
fb693d44 6714
e075ae69
RH
6715 insnp = e_ready;
6716 cur_uops = ix86_safe_ppro_uops (*insnp);
78a0d70c
ZW
6717 while (cur_uops != PPRO_UOPS_ONE)
6718 {
6719 if (insnp == ready)
6720 break;
6721 cur_uops = ix86_safe_ppro_uops (*--insnp);
6722 }
fb693d44 6723
78a0d70c
ZW
6724 /* Found one. Move it to the head of the queue and issue it. */
6725 if (cur_uops == PPRO_UOPS_ONE)
e075ae69 6726 {
78a0d70c
ZW
6727 ix86_reorder_insn (insnp, e_ready);
6728 decode[i] = *e_ready--;
6729 issued_this_cycle++;
6730 continue;
6731 }
fb693d44 6732
78a0d70c
ZW
6733 /* ??? Didn't find one. Ideally, here we would do a lazy split
6734 of 2-uop insns, issue one and queue the other. */
6735 }
fb693d44 6736
78a0d70c
ZW
6737 ppro_done:
6738 if (issued_this_cycle == 0)
6739 issued_this_cycle = 1;
6740 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
6741}
fb693d44 6742
78a0d70c
ZW
6743
6744/* We are about to being issuing insns for this clock cycle.
6745 Override the default sort algorithm to better slot instructions. */
6746int
6747ix86_sched_reorder (dump, sched_verbose, ready, n_ready, clock_var)
6748 FILE *dump ATTRIBUTE_UNUSED;
6749 int sched_verbose ATTRIBUTE_UNUSED;
6750 rtx *ready;
6751 int n_ready;
6752 int clock_var ATTRIBUTE_UNUSED;
6753{
6754 rtx *e_ready = ready + n_ready - 1;
fb693d44 6755
78a0d70c
ZW
6756 if (n_ready < 2)
6757 goto out;
e075ae69 6758
78a0d70c
ZW
6759 switch (ix86_cpu)
6760 {
6761 default:
6762 break;
e075ae69 6763
78a0d70c
ZW
6764 case PROCESSOR_PENTIUM:
6765 ix86_sched_reorder_pentium (ready, e_ready);
6766 break;
e075ae69 6767
78a0d70c
ZW
6768 case PROCESSOR_PENTIUMPRO:
6769 ix86_sched_reorder_ppro (ready, e_ready);
e075ae69 6770 break;
fb693d44
RH
6771 }
6772
e075ae69
RH
6773out:
6774 return ix86_issue_rate ();
6775}
fb693d44 6776
e075ae69
RH
6777/* We are about to issue INSN. Return the number of insns left on the
6778 ready queue that can be issued this cycle. */
b222082e 6779
e075ae69
RH
6780int
6781ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
6782 FILE *dump;
6783 int sched_verbose;
6784 rtx insn;
6785 int can_issue_more;
6786{
6787 int i;
6788 switch (ix86_cpu)
fb693d44 6789 {
e075ae69
RH
6790 default:
6791 return can_issue_more - 1;
fb693d44 6792
e075ae69
RH
6793 case PROCESSOR_PENTIUMPRO:
6794 {
6795 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 6796
e075ae69
RH
6797 if (uops == PPRO_UOPS_MANY)
6798 {
6799 if (sched_verbose)
6800 ix86_dump_ppro_packet (dump);
6801 ix86_sched_data.ppro.decode[0] = insn;
6802 ix86_sched_data.ppro.decode[1] = NULL;
6803 ix86_sched_data.ppro.decode[2] = NULL;
6804 if (sched_verbose)
6805 ix86_dump_ppro_packet (dump);
6806 ix86_sched_data.ppro.decode[0] = NULL;
6807 }
6808 else if (uops == PPRO_UOPS_FEW)
6809 {
6810 if (sched_verbose)
6811 ix86_dump_ppro_packet (dump);
6812 ix86_sched_data.ppro.decode[0] = insn;
6813 ix86_sched_data.ppro.decode[1] = NULL;
6814 ix86_sched_data.ppro.decode[2] = NULL;
6815 }
6816 else
6817 {
6818 for (i = 0; i < 3; ++i)
6819 if (ix86_sched_data.ppro.decode[i] == NULL)
6820 {
6821 ix86_sched_data.ppro.decode[i] = insn;
6822 break;
6823 }
6824 if (i == 3)
6825 abort ();
6826 if (i == 2)
6827 {
6828 if (sched_verbose)
6829 ix86_dump_ppro_packet (dump);
6830 ix86_sched_data.ppro.decode[0] = NULL;
6831 ix86_sched_data.ppro.decode[1] = NULL;
6832 ix86_sched_data.ppro.decode[2] = NULL;
6833 }
6834 }
6835 }
6836 return --ix86_sched_data.ppro.issued_this_cycle;
6837 }
fb693d44 6838}
a7180f70
BS
6839\f
6840/* Compute the alignment given to a constant that is being placed in memory.
6841 EXP is the constant and ALIGN is the alignment that the object would
6842 ordinarily have.
6843 The value of this function is used instead of that alignment to align
6844 the object. */
6845
6846int
6847ix86_constant_alignment (exp, align)
6848 tree exp;
6849 int align;
6850{
6851 if (TREE_CODE (exp) == REAL_CST)
6852 {
6853 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
6854 return 64;
6855 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
6856 return 128;
6857 }
6858 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
6859 && align < 256)
6860 return 256;
6861
6862 return align;
6863}
6864
6865/* Compute the alignment for a static variable.
6866 TYPE is the data type, and ALIGN is the alignment that
6867 the object would ordinarily have. The value of this function is used
6868 instead of that alignment to align the object. */
6869
6870int
6871ix86_data_alignment (type, align)
6872 tree type;
6873 int align;
6874{
6875 if (AGGREGATE_TYPE_P (type)
6876 && TYPE_SIZE (type)
6877 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6878 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
6879 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
6880 return 256;
6881
6882 if (TREE_CODE (type) == ARRAY_TYPE)
6883 {
6884 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
6885 return 64;
6886 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
6887 return 128;
6888 }
6889 else if (TREE_CODE (type) == COMPLEX_TYPE)
6890 {
6891
6892 if (TYPE_MODE (type) == DCmode && align < 64)
6893 return 64;
6894 if (TYPE_MODE (type) == XCmode && align < 128)
6895 return 128;
6896 }
6897 else if ((TREE_CODE (type) == RECORD_TYPE
6898 || TREE_CODE (type) == UNION_TYPE
6899 || TREE_CODE (type) == QUAL_UNION_TYPE)
6900 && TYPE_FIELDS (type))
6901 {
6902 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
6903 return 64;
6904 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
6905 return 128;
6906 }
6907 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
6908 || TREE_CODE (type) == INTEGER_TYPE)
6909 {
6910 if (TYPE_MODE (type) == DFmode && align < 64)
6911 return 64;
6912 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
6913 return 128;
6914 }
6915
6916 return align;
6917}
6918
6919/* Compute the alignment for a local variable.
6920 TYPE is the data type, and ALIGN is the alignment that
6921 the object would ordinarily have. The value of this macro is used
6922 instead of that alignment to align the object. */
6923
6924int
6925ix86_local_alignment (type, align)
6926 tree type;
6927 int align;
6928{
6929 if (TREE_CODE (type) == ARRAY_TYPE)
6930 {
6931 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
6932 return 64;
6933 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
6934 return 128;
6935 }
6936 else if (TREE_CODE (type) == COMPLEX_TYPE)
6937 {
6938 if (TYPE_MODE (type) == DCmode && align < 64)
6939 return 64;
6940 if (TYPE_MODE (type) == XCmode && align < 128)
6941 return 128;
6942 }
6943 else if ((TREE_CODE (type) == RECORD_TYPE
6944 || TREE_CODE (type) == UNION_TYPE
6945 || TREE_CODE (type) == QUAL_UNION_TYPE)
6946 && TYPE_FIELDS (type))
6947 {
6948 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
6949 return 64;
6950 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
6951 return 128;
6952 }
6953 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
6954 || TREE_CODE (type) == INTEGER_TYPE)
6955 {
6956
6957 if (TYPE_MODE (type) == DFmode && align < 64)
6958 return 64;
6959 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
6960 return 128;
6961 }
6962 return align;
6963}
This page took 1.834706 seconds and 5 git commands to generate.