]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
i386.c (fcmov_comparison_operator): Check for CCFPmode or CCFPUmode instead of CCmode.
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
4592bdcb
JL
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000
3 Free Software Foundation, Inc.
2a2ab3f9
JVA
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
1fba7553 23#include <setjmp.h>
bb5177ac 24#include "system.h"
2a2ab3f9 25#include "rtl.h"
6baf1cc8
BS
26#include "tree.h"
27#include "tm_p.h"
2a2ab3f9
JVA
28#include "regs.h"
29#include "hard-reg-set.h"
30#include "real.h"
31#include "insn-config.h"
32#include "conditions.h"
33#include "insn-flags.h"
34#include "output.h"
35#include "insn-attr.h"
2a2ab3f9 36#include "flags.h"
a8ffcc81 37#include "except.h"
ecbc4695 38#include "function.h"
00c79232 39#include "recog.h"
ced8dd8c 40#include "expr.h"
f103890b 41#include "toplev.h"
e075ae69 42#include "basic-block.h"
1526a060 43#include "ggc.h"
2a2ab3f9 44
8dfe5673
RK
45#ifndef CHECK_STACK_LIMIT
46#define CHECK_STACK_LIMIT -1
47#endif
48
32b5b1aa
SC
49/* Processor costs (relative to an add) */
50struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 51 1, /* cost of an add instruction */
32b5b1aa
SC
52 1, /* cost of a lea instruction */
53 3, /* variable shift costs */
54 2, /* constant shift costs */
55 6, /* cost of starting a multiply */
56 1, /* cost of multiply per each bit set */
e075ae69 57 23, /* cost of a divide/mod */
96e7ae40 58 15, /* "large" insn */
e2e52e1b 59 3, /* MOVE_RATIO */
7c6b971d 60 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
61 {2, 4, 2}, /* cost of loading integer registers
62 in QImode, HImode and SImode.
0f290768 63 Relative to reg-reg move (2). */
96e7ae40
JH
64 {2, 4, 2}, /* cost of storing integer registers */
65 2, /* cost of reg,reg fld/fst */
66 {8, 8, 8}, /* cost of loading fp registers
67 in SFmode, DFmode and XFmode */
68 {8, 8, 8} /* cost of loading integer registers */
32b5b1aa
SC
69};
70
71struct processor_costs i486_cost = { /* 486 specific costs */
72 1, /* cost of an add instruction */
73 1, /* cost of a lea instruction */
74 3, /* variable shift costs */
75 2, /* constant shift costs */
76 12, /* cost of starting a multiply */
77 1, /* cost of multiply per each bit set */
e075ae69 78 40, /* cost of a divide/mod */
96e7ae40 79 15, /* "large" insn */
e2e52e1b 80 3, /* MOVE_RATIO */
7c6b971d 81 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
82 {2, 4, 2}, /* cost of loading integer registers
83 in QImode, HImode and SImode.
0f290768 84 Relative to reg-reg move (2). */
96e7ae40
JH
85 {2, 4, 2}, /* cost of storing integer registers */
86 2, /* cost of reg,reg fld/fst */
87 {8, 8, 8}, /* cost of loading fp registers
88 in SFmode, DFmode and XFmode */
89 {8, 8, 8} /* cost of loading integer registers */
32b5b1aa
SC
90};
91
e5cb57e8 92struct processor_costs pentium_cost = {
32b5b1aa
SC
93 1, /* cost of an add instruction */
94 1, /* cost of a lea instruction */
856b07a1 95 4, /* variable shift costs */
e5cb57e8 96 1, /* constant shift costs */
856b07a1
SC
97 11, /* cost of starting a multiply */
98 0, /* cost of multiply per each bit set */
e075ae69 99 25, /* cost of a divide/mod */
96e7ae40 100 8, /* "large" insn */
e2e52e1b 101 6, /* MOVE_RATIO */
7c6b971d 102 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
0f290768 105 Relative to reg-reg move (2). */
96e7ae40
JH
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {2, 2, 6}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
110 {4, 4, 6} /* cost of loading integer registers */
32b5b1aa
SC
111};
112
856b07a1
SC
113struct processor_costs pentiumpro_cost = {
114 1, /* cost of an add instruction */
115 1, /* cost of a lea instruction */
e075ae69 116 1, /* variable shift costs */
856b07a1 117 1, /* constant shift costs */
369e59b1 118 4, /* cost of starting a multiply */
856b07a1 119 0, /* cost of multiply per each bit set */
e075ae69 120 17, /* cost of a divide/mod */
96e7ae40 121 8, /* "large" insn */
e2e52e1b 122 6, /* MOVE_RATIO */
7c6b971d 123 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
124 {4, 4, 4}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
0f290768 126 Relative to reg-reg move (2). */
96e7ae40
JH
127 {2, 2, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {2, 2, 6}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {4, 4, 6} /* cost of loading integer registers */
856b07a1
SC
132};
133
a269a03c
JC
134struct processor_costs k6_cost = {
135 1, /* cost of an add instruction */
e075ae69 136 2, /* cost of a lea instruction */
a269a03c
JC
137 1, /* variable shift costs */
138 1, /* constant shift costs */
73fe76e4 139 3, /* cost of starting a multiply */
a269a03c 140 0, /* cost of multiply per each bit set */
e075ae69 141 18, /* cost of a divide/mod */
96e7ae40 142 8, /* "large" insn */
e2e52e1b 143 4, /* MOVE_RATIO */
7c6b971d 144 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
145 {4, 5, 4}, /* cost of loading integer registers
146 in QImode, HImode and SImode.
0f290768 147 Relative to reg-reg move (2). */
96e7ae40
JH
148 {2, 3, 2}, /* cost of storing integer registers */
149 4, /* cost of reg,reg fld/fst */
150 {6, 6, 6}, /* cost of loading fp registers
151 in SFmode, DFmode and XFmode */
152 {4, 4, 4} /* cost of loading integer registers */
a269a03c
JC
153};
154
309ada50
JH
155struct processor_costs athlon_cost = {
156 1, /* cost of an add instruction */
0b5107cf 157 2, /* cost of a lea instruction */
309ada50
JH
158 1, /* variable shift costs */
159 1, /* constant shift costs */
160 5, /* cost of starting a multiply */
161 0, /* cost of multiply per each bit set */
0b5107cf 162 42, /* cost of a divide/mod */
309ada50 163 8, /* "large" insn */
e2e52e1b 164 9, /* MOVE_RATIO */
309ada50
JH
165 4, /* cost for loading QImode using movzbl */
166 {4, 5, 4}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
0f290768 168 Relative to reg-reg move (2). */
309ada50
JH
169 {2, 3, 2}, /* cost of storing integer registers */
170 4, /* cost of reg,reg fld/fst */
0b5107cf 171 {6, 6, 20}, /* cost of loading fp registers
309ada50 172 in SFmode, DFmode and XFmode */
0b5107cf 173 {4, 4, 16} /* cost of loading integer registers */
309ada50
JH
174};
175
32b5b1aa
SC
176struct processor_costs *ix86_cost = &pentium_cost;
177
a269a03c
JC
178/* Processor feature/optimization bitmasks. */
179#define m_386 (1<<PROCESSOR_I386)
180#define m_486 (1<<PROCESSOR_I486)
181#define m_PENT (1<<PROCESSOR_PENTIUM)
182#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
183#define m_K6 (1<<PROCESSOR_K6)
309ada50 184#define m_ATHLON (1<<PROCESSOR_ATHLON)
a269a03c 185
309ada50
JH
186const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
187const int x86_push_memory = m_386 | m_K6 | m_ATHLON;
a269a03c 188const int x86_zero_extend_with_and = m_486 | m_PENT;
369e59b1 189const int x86_movx = m_ATHLON | m_PPRO /* m_386 | m_K6 */;
e075ae69 190const int x86_double_with_add = ~m_386;
a269a03c 191const int x86_use_bit_test = m_386;
e2e52e1b 192const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
a269a03c
JC
193const int x86_use_q_reg = m_PENT | m_PPRO | m_K6;
194const int x86_use_any_reg = m_486;
309ada50
JH
195const int x86_cmove = m_PPRO | m_ATHLON;
196const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON;
197const int x86_use_sahf = m_PPRO | m_K6 | m_ATHLON;
e075ae69
RH
198const int x86_partial_reg_stall = m_PPRO;
199const int x86_use_loop = m_K6;
309ada50 200const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
e075ae69
RH
201const int x86_use_mov0 = m_K6;
202const int x86_use_cltd = ~(m_PENT | m_K6);
203const int x86_read_modify_write = ~m_PENT;
204const int x86_read_modify = ~(m_PENT | m_PPRO);
205const int x86_split_long_moves = m_PPRO;
e9e80858 206const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
f90800f8 207const int x86_single_stringop = m_386;
d9f32422
JH
208const int x86_qimode_math = ~(0);
209const int x86_promote_qi_regs = 0;
210const int x86_himode_math = ~(m_PPRO);
211const int x86_promote_hi_regs = m_PPRO;
bdeb029c
JH
212const int x86_sub_esp_4 = m_ATHLON | m_PPRO;
213const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486;
214const int x86_add_esp_4 = m_ATHLON | m_K6;
215const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486;
0b5107cf
JH
216const int x86_integer_DFmode_moves = ~m_ATHLON;
217const int x86_partial_reg_dependency = m_ATHLON;
218const int x86_memory_mismatch_stall = m_ATHLON;
a269a03c 219
564d80f4 220#define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
2a2ab3f9 221
e075ae69
RH
222const char * const hi_reg_name[] = HI_REGISTER_NAMES;
223const char * const qi_reg_name[] = QI_REGISTER_NAMES;
224const char * const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
4c0d89b5
RS
225
226/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 227 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 228
e075ae69 229enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
230{
231 /* ax, dx, cx, bx */
ab408a86 232 AREG, DREG, CREG, BREG,
4c0d89b5 233 /* si, di, bp, sp */
e075ae69 234 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
235 /* FP registers */
236 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 237 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 238 /* arg pointer */
83774849 239 NON_Q_REGS,
564d80f4 240 /* flags, fpsr, dirflag, frame */
a7180f70
BS
241 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
242 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
243 SSE_REGS, SSE_REGS,
244 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
245 MMX_REGS, MMX_REGS
4c0d89b5 246};
c572e5ba 247
83774849
RH
248/* The "default" register map. */
249
0f290768 250int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
251{
252 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
253 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
254 -1, -1, -1, -1, /* arg, flags, fpsr, dir */
a7180f70
BS
255 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
256 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
83774849
RH
257};
258
259/* Define the register numbers to be used in Dwarf debugging information.
260 The SVR4 reference port C compiler uses the following register numbers
261 in its Dwarf output code:
262 0 for %eax (gcc regno = 0)
263 1 for %ecx (gcc regno = 2)
264 2 for %edx (gcc regno = 1)
265 3 for %ebx (gcc regno = 3)
266 4 for %esp (gcc regno = 7)
267 5 for %ebp (gcc regno = 6)
268 6 for %esi (gcc regno = 4)
269 7 for %edi (gcc regno = 5)
270 The following three DWARF register numbers are never generated by
271 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
272 believes these numbers have these meanings.
273 8 for %eip (no gcc equivalent)
274 9 for %eflags (gcc regno = 17)
275 10 for %trapno (no gcc equivalent)
276 It is not at all clear how we should number the FP stack registers
277 for the x86 architecture. If the version of SDB on x86/svr4 were
278 a bit less brain dead with respect to floating-point then we would
279 have a precedent to follow with respect to DWARF register numbers
280 for x86 FP registers, but the SDB on x86/svr4 is so completely
281 broken with respect to FP registers that it is hardly worth thinking
282 of it as something to strive for compatibility with.
283 The version of x86/svr4 SDB I have at the moment does (partially)
284 seem to believe that DWARF register number 11 is associated with
285 the x86 register %st(0), but that's about all. Higher DWARF
286 register numbers don't seem to be associated with anything in
287 particular, and even for DWARF regno 11, SDB only seems to under-
288 stand that it should say that a variable lives in %st(0) (when
289 asked via an `=' command) if we said it was in DWARF regno 11,
290 but SDB still prints garbage when asked for the value of the
291 variable in question (via a `/' command).
292 (Also note that the labels SDB prints for various FP stack regs
293 when doing an `x' command are all wrong.)
294 Note that these problems generally don't affect the native SVR4
295 C compiler because it doesn't allow the use of -O with -g and
296 because when it is *not* optimizing, it allocates a memory
297 location for each floating-point variable, and the memory
298 location is what gets described in the DWARF AT_location
299 attribute for the variable in question.
300 Regardless of the severe mental illness of the x86/svr4 SDB, we
301 do something sensible here and we use the following DWARF
302 register numbers. Note that these are all stack-top-relative
303 numbers.
304 11 for %st(0) (gcc regno = 8)
305 12 for %st(1) (gcc regno = 9)
306 13 for %st(2) (gcc regno = 10)
307 14 for %st(3) (gcc regno = 11)
308 15 for %st(4) (gcc regno = 12)
309 16 for %st(5) (gcc regno = 13)
310 17 for %st(6) (gcc regno = 14)
311 18 for %st(7) (gcc regno = 15)
312*/
0f290768 313int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
314{
315 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
316 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
317 -1, 9, -1, -1, /* arg, flags, fpsr, dir */
a7180f70
BS
318 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
319 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
83774849
RH
320};
321
c572e5ba
JVA
322/* Test and compare insns in i386.md store the information needed to
323 generate branch and scc insns here. */
324
e075ae69
RH
325struct rtx_def *ix86_compare_op0 = NULL_RTX;
326struct rtx_def *ix86_compare_op1 = NULL_RTX;
f5316dfe 327
36edd3cc
BS
328#define MAX_386_STACK_LOCALS 2
329
330/* Define the structure for the machine field in struct function. */
331struct machine_function
332{
333 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
334};
335
01d939e8 336#define ix86_stack_locals (cfun->machine->stack_locals)
36edd3cc 337
c8c5cb99 338/* which cpu are we scheduling for */
e42ea7f9 339enum processor_type ix86_cpu;
c8c5cb99
SC
340
341/* which instruction set architecture to use. */
c942177e 342int ix86_arch;
c8c5cb99
SC
343
344/* Strings to hold which cpu and instruction set architecture to use. */
9c23aa47
ZW
345const char *ix86_cpu_string; /* for -mcpu=<xxx> */
346const char *ix86_arch_string; /* for -march=<xxx> */
c8c5cb99 347
f5316dfe 348/* Register allocation order */
e075ae69 349const char *ix86_reg_alloc_order;
f5316dfe
MM
350static char regs_allocated[FIRST_PSEUDO_REGISTER];
351
0f290768 352/* # of registers to use to pass arguments. */
e075ae69 353const char *ix86_regparm_string;
e9a25f70 354
e075ae69
RH
355/* ix86_regparm_string as a number */
356int ix86_regparm;
e9a25f70
JL
357
358/* Alignment to use for loops and jumps: */
359
0f290768 360/* Power of two alignment for loops. */
e075ae69 361const char *ix86_align_loops_string;
e9a25f70 362
0f290768 363/* Power of two alignment for non-loop jumps. */
e075ae69 364const char *ix86_align_jumps_string;
e9a25f70 365
3af4bd89 366/* Power of two alignment for stack boundary in bytes. */
e075ae69 367const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
368
369/* Preferred alignment for stack boundary in bits. */
e075ae69 370int ix86_preferred_stack_boundary;
3af4bd89 371
e9a25f70 372/* Values 1-5: see jump.c */
e075ae69
RH
373int ix86_branch_cost;
374const char *ix86_branch_cost_string;
e9a25f70 375
0f290768 376/* Power of two alignment for functions. */
e075ae69
RH
377int ix86_align_funcs;
378const char *ix86_align_funcs_string;
b08de47e 379
0f290768 380/* Power of two alignment for loops. */
e075ae69 381int ix86_align_loops;
b08de47e 382
0f290768 383/* Power of two alignment for non-loop jumps. */
e075ae69
RH
384int ix86_align_jumps;
385\f
f6da8bc3
KG
386static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
387static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
e075ae69 388 int, int, FILE *));
f6da8bc3
KG
389static enum rtx_code unsigned_comparison PARAMS ((enum rtx_code code));
390static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
3a3677ff
RH
391static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
392 rtx *, rtx *));
f6da8bc3
KG
393static rtx gen_push PARAMS ((rtx));
394static int memory_address_length PARAMS ((rtx addr));
395static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
396static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
397static int ix86_safe_length PARAMS ((rtx));
398static enum attr_memory ix86_safe_memory PARAMS ((rtx));
399static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
400static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
401static void ix86_dump_ppro_packet PARAMS ((FILE *));
402static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
403static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
e075ae69 404 rtx));
f6da8bc3
KG
405static void ix86_init_machine_status PARAMS ((struct function *));
406static void ix86_mark_machine_status PARAMS ((struct function *));
407static void ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
408static int ix86_safe_length_prefix PARAMS ((rtx));
564d80f4
JH
409static HOST_WIDE_INT ix86_compute_frame_size PARAMS((HOST_WIDE_INT,
410 int *, int *, int *));
0903fcab
JH
411static int ix86_nsaved_regs PARAMS((void));
412static void ix86_emit_save_regs PARAMS((void));
da2d1d3a 413static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int));
0903fcab 414static void ix86_emit_epilogue_esp_adjustment PARAMS((int));
c6991660
KG
415static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
416static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
55efb413 417static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
e075ae69
RH
418
419struct ix86_address
420{
421 rtx base, index, disp;
422 HOST_WIDE_INT scale;
423};
b08de47e 424
e075ae69 425static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
bd793c65
BS
426
427struct builtin_description;
428static rtx ix86_expand_sse_comi PARAMS ((struct builtin_description *, tree,
429 rtx));
430static rtx ix86_expand_sse_compare PARAMS ((struct builtin_description *, tree,
431 rtx));
432static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
433static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
434static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
435static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
436static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
e075ae69 437\f
f5316dfe
MM
438/* Sometimes certain combinations of command options do not make
439 sense on a particular target machine. You can define a macro
440 `OVERRIDE_OPTIONS' to take account of this. This macro, if
441 defined, is executed once just after all the command options have
442 been parsed.
443
444 Don't use this macro to turn on various extra optimizations for
445 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
446
447void
448override_options ()
449{
e075ae69
RH
450 /* Comes from final.c -- no real reason to change it. */
451#define MAX_CODE_ALIGN 16
f5316dfe 452
c8c5cb99
SC
453 static struct ptt
454 {
e075ae69
RH
455 struct processor_costs *cost; /* Processor costs */
456 int target_enable; /* Target flags to enable. */
457 int target_disable; /* Target flags to disable. */
458 int align_loop; /* Default alignments. */
459 int align_jump;
460 int align_func;
461 int branch_cost;
462 }
0f290768 463 const processor_target_table[PROCESSOR_max] =
e075ae69
RH
464 {
465 {&i386_cost, 0, 0, 2, 2, 2, 1},
466 {&i486_cost, 0, 0, 4, 4, 4, 1},
467 {&pentium_cost, 0, 0, -4, -4, -4, 1},
468 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
309ada50
JH
469 {&k6_cost, 0, 0, -5, -5, 4, 1},
470 {&athlon_cost, 0, 0, 4, -4, 4, 1}
e075ae69
RH
471 };
472
473 static struct pta
474 {
0f290768 475 const char *name; /* processor name or nickname. */
e075ae69
RH
476 enum processor_type processor;
477 }
0f290768 478 const processor_alias_table[] =
e075ae69
RH
479 {
480 {"i386", PROCESSOR_I386},
481 {"i486", PROCESSOR_I486},
482 {"i586", PROCESSOR_PENTIUM},
483 {"pentium", PROCESSOR_PENTIUM},
484 {"i686", PROCESSOR_PENTIUMPRO},
485 {"pentiumpro", PROCESSOR_PENTIUMPRO},
e075ae69 486 {"k6", PROCESSOR_K6},
309ada50 487 {"athlon", PROCESSOR_ATHLON},
3af4bd89 488 };
c8c5cb99 489
0f290768 490 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
c8c5cb99 491
f5316dfe
MM
492#ifdef SUBTARGET_OVERRIDE_OPTIONS
493 SUBTARGET_OVERRIDE_OPTIONS;
494#endif
495
5a6ee819 496 ix86_arch = PROCESSOR_I386;
e075ae69
RH
497 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
498
499 if (ix86_arch_string != 0)
500 {
501 int i;
502 for (i = 0; i < pta_size; i++)
503 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
504 {
505 ix86_arch = processor_alias_table[i].processor;
506 /* Default cpu tuning to the architecture. */
507 ix86_cpu = ix86_arch;
508 break;
509 }
510 if (i == pta_size)
511 error ("bad value (%s) for -march= switch", ix86_arch_string);
512 }
513
514 if (ix86_cpu_string != 0)
515 {
516 int i;
517 for (i = 0; i < pta_size; i++)
518 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
519 {
520 ix86_cpu = processor_alias_table[i].processor;
521 break;
522 }
523 if (i == pta_size)
524 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
525 }
526
527 ix86_cost = processor_target_table[ix86_cpu].cost;
528 target_flags |= processor_target_table[ix86_cpu].target_enable;
529 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
530
36edd3cc
BS
531 /* Arrange to set up i386_stack_locals for all functions. */
532 init_machine_status = ix86_init_machine_status;
1526a060 533 mark_machine_status = ix86_mark_machine_status;
36edd3cc 534
e9a25f70 535 /* Validate registers in register allocation order. */
e075ae69 536 if (ix86_reg_alloc_order)
f5316dfe 537 {
e075ae69
RH
538 int i, ch;
539 for (i = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
f5316dfe 540 {
00c79232 541 int regno = 0;
79325812 542
f5316dfe
MM
543 switch (ch)
544 {
545 case 'a': regno = 0; break;
546 case 'd': regno = 1; break;
547 case 'c': regno = 2; break;
548 case 'b': regno = 3; break;
549 case 'S': regno = 4; break;
550 case 'D': regno = 5; break;
551 case 'B': regno = 6; break;
552
553 default: fatal ("Register '%c' is unknown", ch);
554 }
555
556 if (regs_allocated[regno])
e9a25f70 557 fatal ("Register '%c' already specified in allocation order", ch);
f5316dfe
MM
558
559 regs_allocated[regno] = 1;
560 }
561 }
b08de47e 562
0f290768 563 /* Validate -mregparm= value. */
e075ae69 564 if (ix86_regparm_string)
b08de47e 565 {
e075ae69
RH
566 ix86_regparm = atoi (ix86_regparm_string);
567 if (ix86_regparm < 0 || ix86_regparm > REGPARM_MAX)
e9a25f70 568 fatal ("-mregparm=%d is not between 0 and %d",
e075ae69 569 ix86_regparm, REGPARM_MAX);
b08de47e
MM
570 }
571
e9a25f70 572 /* Validate -malign-loops= value, or provide default. */
e075ae69
RH
573 ix86_align_loops = processor_target_table[ix86_cpu].align_loop;
574 if (ix86_align_loops_string)
b08de47e 575 {
e075ae69
RH
576 ix86_align_loops = atoi (ix86_align_loops_string);
577 if (ix86_align_loops < 0 || ix86_align_loops > MAX_CODE_ALIGN)
b08de47e 578 fatal ("-malign-loops=%d is not between 0 and %d",
e075ae69 579 ix86_align_loops, MAX_CODE_ALIGN);
b08de47e 580 }
3af4bd89
JH
581
582 /* Validate -malign-jumps= value, or provide default. */
e075ae69
RH
583 ix86_align_jumps = processor_target_table[ix86_cpu].align_jump;
584 if (ix86_align_jumps_string)
b08de47e 585 {
e075ae69
RH
586 ix86_align_jumps = atoi (ix86_align_jumps_string);
587 if (ix86_align_jumps < 0 || ix86_align_jumps > MAX_CODE_ALIGN)
b08de47e 588 fatal ("-malign-jumps=%d is not between 0 and %d",
e075ae69 589 ix86_align_jumps, MAX_CODE_ALIGN);
b08de47e 590 }
b08de47e 591
0f290768 592 /* Validate -malign-functions= value, or provide default. */
e075ae69
RH
593 ix86_align_funcs = processor_target_table[ix86_cpu].align_func;
594 if (ix86_align_funcs_string)
b08de47e 595 {
e075ae69
RH
596 ix86_align_funcs = atoi (ix86_align_funcs_string);
597 if (ix86_align_funcs < 0 || ix86_align_funcs > MAX_CODE_ALIGN)
b08de47e 598 fatal ("-malign-functions=%d is not between 0 and %d",
e075ae69 599 ix86_align_funcs, MAX_CODE_ALIGN);
b08de47e 600 }
3af4bd89 601
e4c0478d 602 /* Validate -mpreferred-stack-boundary= value, or provide default.
3af4bd89 603 The default of 128 bits is for Pentium III's SSE __m128. */
e075ae69
RH
604 ix86_preferred_stack_boundary = 128;
605 if (ix86_preferred_stack_boundary_string)
3af4bd89 606 {
e075ae69 607 int i = atoi (ix86_preferred_stack_boundary_string);
3af4bd89 608 if (i < 2 || i > 31)
e4c0478d 609 fatal ("-mpreferred-stack-boundary=%d is not between 2 and 31", i);
e075ae69 610 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 611 }
77a989d1 612
0f290768 613 /* Validate -mbranch-cost= value, or provide default. */
e075ae69
RH
614 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
615 if (ix86_branch_cost_string)
804a8ee0 616 {
e075ae69
RH
617 ix86_branch_cost = atoi (ix86_branch_cost_string);
618 if (ix86_branch_cost < 0 || ix86_branch_cost > 5)
619 fatal ("-mbranch-cost=%d is not between 0 and 5",
620 ix86_branch_cost);
804a8ee0 621 }
804a8ee0 622
e9a25f70
JL
623 /* Keep nonleaf frame pointers. */
624 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 625 flag_omit_frame_pointer = 1;
e075ae69
RH
626
627 /* If we're doing fast math, we don't care about comparison order
628 wrt NaNs. This lets us use a shorter comparison sequence. */
629 if (flag_fast_math)
630 target_flags &= ~MASK_IEEE_FP;
631
a7180f70
BS
632 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
633 on by -msse. */
634 if (TARGET_SSE)
635 target_flags |= MASK_MMX;
f5316dfe
MM
636}
637\f
638/* A C statement (sans semicolon) to choose the order in which to
639 allocate hard registers for pseudo-registers local to a basic
640 block.
641
642 Store the desired register order in the array `reg_alloc_order'.
643 Element 0 should be the register to allocate first; element 1, the
644 next register; and so on.
645
646 The macro body should not assume anything about the contents of
647 `reg_alloc_order' before execution of the macro.
648
649 On most machines, it is not necessary to define this macro. */
650
651void
652order_regs_for_local_alloc ()
653{
00c79232 654 int i, ch, order;
f5316dfe 655
e9a25f70
JL
656 /* User specified the register allocation order. */
657
e075ae69 658 if (ix86_reg_alloc_order)
f5316dfe 659 {
e075ae69 660 for (i = order = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
f5316dfe 661 {
00c79232 662 int regno = 0;
79325812 663
f5316dfe
MM
664 switch (ch)
665 {
666 case 'a': regno = 0; break;
667 case 'd': regno = 1; break;
668 case 'c': regno = 2; break;
669 case 'b': regno = 3; break;
670 case 'S': regno = 4; break;
671 case 'D': regno = 5; break;
672 case 'B': regno = 6; break;
673 }
674
675 reg_alloc_order[order++] = regno;
676 }
677
678 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
679 {
e9a25f70 680 if (! regs_allocated[i])
f5316dfe
MM
681 reg_alloc_order[order++] = i;
682 }
683 }
684
0f290768 685 /* If user did not specify a register allocation order, use natural order. */
f5316dfe
MM
686 else
687 {
688 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
689 reg_alloc_order[i] = i;
f5316dfe
MM
690 }
691}
32b5b1aa
SC
692\f
693void
c6aded7c 694optimization_options (level, size)
32b5b1aa 695 int level;
bb5177ac 696 int size ATTRIBUTE_UNUSED;
32b5b1aa 697{
e9a25f70
JL
698 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
699 make the problem with not enough registers even worse. */
32b5b1aa
SC
700#ifdef INSN_SCHEDULING
701 if (level > 1)
702 flag_schedule_insns = 0;
703#endif
704}
b08de47e
MM
705\f
706/* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
707 attribute for DECL. The attributes in ATTRIBUTES have previously been
708 assigned to DECL. */
709
710int
e075ae69 711ix86_valid_decl_attribute_p (decl, attributes, identifier, args)
bb5177ac
RL
712 tree decl ATTRIBUTE_UNUSED;
713 tree attributes ATTRIBUTE_UNUSED;
714 tree identifier ATTRIBUTE_UNUSED;
715 tree args ATTRIBUTE_UNUSED;
b08de47e
MM
716{
717 return 0;
718}
719
720/* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
721 attribute for TYPE. The attributes in ATTRIBUTES have previously been
722 assigned to TYPE. */
723
724int
e075ae69 725ix86_valid_type_attribute_p (type, attributes, identifier, args)
b08de47e 726 tree type;
bb5177ac 727 tree attributes ATTRIBUTE_UNUSED;
b08de47e
MM
728 tree identifier;
729 tree args;
730{
731 if (TREE_CODE (type) != FUNCTION_TYPE
ac478ac0 732 && TREE_CODE (type) != METHOD_TYPE
b08de47e
MM
733 && TREE_CODE (type) != FIELD_DECL
734 && TREE_CODE (type) != TYPE_DECL)
735 return 0;
736
737 /* Stdcall attribute says callee is responsible for popping arguments
738 if they are not variable. */
739 if (is_attribute_p ("stdcall", identifier))
740 return (args == NULL_TREE);
741
0f290768 742 /* Cdecl attribute says the callee is a normal C declaration. */
b08de47e
MM
743 if (is_attribute_p ("cdecl", identifier))
744 return (args == NULL_TREE);
745
746 /* Regparm attribute specifies how many integer arguments are to be
0f290768 747 passed in registers. */
b08de47e
MM
748 if (is_attribute_p ("regparm", identifier))
749 {
750 tree cst;
751
e9a25f70 752 if (! args || TREE_CODE (args) != TREE_LIST
b08de47e
MM
753 || TREE_CHAIN (args) != NULL_TREE
754 || TREE_VALUE (args) == NULL_TREE)
755 return 0;
756
757 cst = TREE_VALUE (args);
758 if (TREE_CODE (cst) != INTEGER_CST)
759 return 0;
760
cce097f1 761 if (compare_tree_int (cst, REGPARM_MAX) > 0)
b08de47e
MM
762 return 0;
763
764 return 1;
765 }
766
767 return 0;
768}
769
770/* Return 0 if the attributes for two types are incompatible, 1 if they
771 are compatible, and 2 if they are nearly compatible (which causes a
772 warning to be generated). */
773
774int
e075ae69 775ix86_comp_type_attributes (type1, type2)
afcfe58c
MM
776 tree type1;
777 tree type2;
b08de47e 778{
0f290768 779 /* Check for mismatch of non-default calling convention. */
69ddee61 780 const char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
781
782 if (TREE_CODE (type1) != FUNCTION_TYPE)
783 return 1;
784
785 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
786 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
787 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 788 return 0;
b08de47e
MM
789 return 1;
790}
b08de47e
MM
791\f
792/* Value is the number of bytes of arguments automatically
793 popped when returning from a subroutine call.
794 FUNDECL is the declaration node of the function (as a tree),
795 FUNTYPE is the data type of the function (as a tree),
796 or for a library call it is an identifier node for the subroutine name.
797 SIZE is the number of bytes of arguments passed on the stack.
798
799 On the 80386, the RTD insn may be used to pop them if the number
800 of args is fixed, but if the number is variable then the caller
801 must pop them all. RTD can't be used for library calls now
802 because the library is compiled with the Unix compiler.
803 Use of RTD is a selectable option, since it is incompatible with
804 standard Unix calling sequences. If the option is not selected,
805 the caller must always pop the args.
806
807 The attribute stdcall is equivalent to RTD on a per module basis. */
808
809int
e075ae69 810ix86_return_pops_args (fundecl, funtype, size)
b08de47e
MM
811 tree fundecl;
812 tree funtype;
813 int size;
79325812 814{
3345ee7d 815 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 816
0f290768 817 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 818 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 819
0f290768 820 /* Stdcall functions will pop the stack if not variable args. */
698cdd84
SC
821 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
822 rtd = 1;
79325812 823
698cdd84
SC
824 if (rtd
825 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
826 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
827 == void_type_node)))
698cdd84
SC
828 return size;
829 }
79325812 830
e9a25f70 831 /* Lose any fake structure return argument. */
698cdd84
SC
832 if (aggregate_value_p (TREE_TYPE (funtype)))
833 return GET_MODE_SIZE (Pmode);
79325812 834
2614aac6 835 return 0;
b08de47e 836}
b08de47e
MM
837\f
838/* Argument support functions. */
839
840/* Initialize a variable CUM of type CUMULATIVE_ARGS
841 for a call to a function whose data type is FNTYPE.
842 For a library call, FNTYPE is 0. */
843
844void
845init_cumulative_args (cum, fntype, libname)
e9a25f70 846 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
b08de47e
MM
847 tree fntype; /* tree ptr for function decl */
848 rtx libname; /* SYMBOL_REF of library name or 0 */
849{
850 static CUMULATIVE_ARGS zero_cum;
851 tree param, next_param;
852
853 if (TARGET_DEBUG_ARG)
854 {
855 fprintf (stderr, "\ninit_cumulative_args (");
856 if (fntype)
e9a25f70
JL
857 fprintf (stderr, "fntype code = %s, ret code = %s",
858 tree_code_name[(int) TREE_CODE (fntype)],
859 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
860 else
861 fprintf (stderr, "no fntype");
862
863 if (libname)
864 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
865 }
866
867 *cum = zero_cum;
868
869 /* Set up the number of registers to use for passing arguments. */
e075ae69 870 cum->nregs = ix86_regparm;
b08de47e
MM
871 if (fntype)
872 {
873 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
e9a25f70 874
b08de47e
MM
875 if (attr)
876 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
877 }
878
879 /* Determine if this function has variable arguments. This is
880 indicated by the last argument being 'void_type_mode' if there
881 are no variable arguments. If there are variable arguments, then
882 we won't pass anything in registers */
883
884 if (cum->nregs)
885 {
886 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 887 param != 0; param = next_param)
b08de47e
MM
888 {
889 next_param = TREE_CHAIN (param);
e9a25f70 890 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
b08de47e
MM
891 cum->nregs = 0;
892 }
893 }
894
895 if (TARGET_DEBUG_ARG)
896 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
897
898 return;
899}
900
901/* Update the data in CUM to advance over an argument
902 of mode MODE and data type TYPE.
903 (TYPE is null for libcalls where that information may not be available.) */
904
905void
906function_arg_advance (cum, mode, type, named)
907 CUMULATIVE_ARGS *cum; /* current arg information */
908 enum machine_mode mode; /* current arg mode */
909 tree type; /* type of the argument or 0 if lib support */
910 int named; /* whether or not the argument was named */
911{
5ac9118e
KG
912 int bytes =
913 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
914 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
915
916 if (TARGET_DEBUG_ARG)
917 fprintf (stderr,
e9a25f70 918 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
b08de47e
MM
919 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
920
921 cum->words += words;
922 cum->nregs -= words;
923 cum->regno += words;
924
925 if (cum->nregs <= 0)
926 {
927 cum->nregs = 0;
928 cum->regno = 0;
929 }
930
931 return;
932}
933
934/* Define where to put the arguments to a function.
935 Value is zero to push the argument on the stack,
936 or a hard register in which to store the argument.
937
938 MODE is the argument's machine mode.
939 TYPE is the data type of the argument (as a tree).
940 This is null for libcalls where that information may
941 not be available.
942 CUM is a variable of type CUMULATIVE_ARGS which gives info about
943 the preceding args and about the function being called.
944 NAMED is nonzero if this argument is a named parameter
945 (otherwise it is an extra parameter matching an ellipsis). */
946
947struct rtx_def *
948function_arg (cum, mode, type, named)
949 CUMULATIVE_ARGS *cum; /* current arg information */
950 enum machine_mode mode; /* current arg mode */
951 tree type; /* type of the argument or 0 if lib support */
952 int named; /* != 0 for normal args, == 0 for ... args */
953{
954 rtx ret = NULL_RTX;
5ac9118e
KG
955 int bytes =
956 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
957 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
958
959 switch (mode)
960 {
0f290768 961 /* For now, pass fp/complex values on the stack. */
e9a25f70 962 default:
b08de47e
MM
963 break;
964
965 case BLKmode:
966 case DImode:
967 case SImode:
968 case HImode:
969 case QImode:
970 if (words <= cum->nregs)
f64cecad 971 ret = gen_rtx_REG (mode, cum->regno);
b08de47e
MM
972 break;
973 }
974
975 if (TARGET_DEBUG_ARG)
976 {
977 fprintf (stderr,
e9a25f70 978 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
b08de47e
MM
979 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
980
981 if (ret)
982 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
983 else
984 fprintf (stderr, ", stack");
985
986 fprintf (stderr, " )\n");
987 }
988
989 return ret;
990}
e075ae69 991\f
8bad7136
JL
992
993/* Return nonzero if OP is (const_int 1), else return zero. */
994
995int
996const_int_1_operand (op, mode)
997 rtx op;
998 enum machine_mode mode ATTRIBUTE_UNUSED;
999{
1000 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
1001}
1002
e075ae69
RH
1003/* Returns 1 if OP is either a symbol reference or a sum of a symbol
1004 reference and a constant. */
b08de47e
MM
1005
1006int
e075ae69
RH
1007symbolic_operand (op, mode)
1008 register rtx op;
1009 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 1010{
e075ae69 1011 switch (GET_CODE (op))
2a2ab3f9 1012 {
e075ae69
RH
1013 case SYMBOL_REF:
1014 case LABEL_REF:
1015 return 1;
1016
1017 case CONST:
1018 op = XEXP (op, 0);
1019 if (GET_CODE (op) == SYMBOL_REF
1020 || GET_CODE (op) == LABEL_REF
1021 || (GET_CODE (op) == UNSPEC
1022 && XINT (op, 1) >= 6
1023 && XINT (op, 1) <= 7))
1024 return 1;
1025 if (GET_CODE (op) != PLUS
1026 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1027 return 0;
1028
1029 op = XEXP (op, 0);
1030 if (GET_CODE (op) == SYMBOL_REF
1031 || GET_CODE (op) == LABEL_REF)
1032 return 1;
1033 /* Only @GOTOFF gets offsets. */
1034 if (GET_CODE (op) != UNSPEC
1035 || XINT (op, 1) != 7)
1036 return 0;
1037
1038 op = XVECEXP (op, 0, 0);
1039 if (GET_CODE (op) == SYMBOL_REF
1040 || GET_CODE (op) == LABEL_REF)
1041 return 1;
1042 return 0;
1043
1044 default:
1045 return 0;
2a2ab3f9
JVA
1046 }
1047}
2a2ab3f9 1048
e075ae69 1049/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 1050
e075ae69
RH
1051int
1052pic_symbolic_operand (op, mode)
1053 register rtx op;
1054 enum machine_mode mode ATTRIBUTE_UNUSED;
1055{
1056 if (GET_CODE (op) == CONST)
2a2ab3f9 1057 {
e075ae69
RH
1058 op = XEXP (op, 0);
1059 if (GET_CODE (op) == UNSPEC)
1060 return 1;
1061 if (GET_CODE (op) != PLUS
1062 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1063 return 0;
1064 op = XEXP (op, 0);
1065 if (GET_CODE (op) == UNSPEC)
1066 return 1;
2a2ab3f9 1067 }
e075ae69 1068 return 0;
2a2ab3f9 1069}
2a2ab3f9 1070
28d52ffb
RH
1071/* Test for a valid operand for a call instruction. Don't allow the
1072 arg pointer register or virtual regs since they may decay into
1073 reg + const, which the patterns can't handle. */
2a2ab3f9 1074
e075ae69
RH
1075int
1076call_insn_operand (op, mode)
1077 rtx op;
1078 enum machine_mode mode ATTRIBUTE_UNUSED;
1079{
e075ae69
RH
1080 /* Disallow indirect through a virtual register. This leads to
1081 compiler aborts when trying to eliminate them. */
1082 if (GET_CODE (op) == REG
1083 && (op == arg_pointer_rtx
564d80f4 1084 || op == frame_pointer_rtx
e075ae69
RH
1085 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
1086 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
1087 return 0;
2a2ab3f9 1088
28d52ffb
RH
1089 /* Disallow `call 1234'. Due to varying assembler lameness this
1090 gets either rejected or translated to `call .+1234'. */
1091 if (GET_CODE (op) == CONST_INT)
1092 return 0;
1093
cbbf65e0
RH
1094 /* Explicitly allow SYMBOL_REF even if pic. */
1095 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 1096 return 1;
2a2ab3f9 1097
cbbf65e0
RH
1098 /* Half-pic doesn't allow anything but registers and constants.
1099 We've just taken care of the later. */
1100 if (HALF_PIC_P ())
1101 return register_operand (op, Pmode);
1102
1103 /* Otherwise we can allow any general_operand in the address. */
1104 return general_operand (op, Pmode);
e075ae69 1105}
79325812 1106
e075ae69
RH
1107int
1108constant_call_address_operand (op, mode)
1109 rtx op;
1110 enum machine_mode mode ATTRIBUTE_UNUSED;
1111{
e1ff012c 1112 return GET_CODE (op) == SYMBOL_REF;
e075ae69 1113}
2a2ab3f9 1114
e075ae69 1115/* Match exactly zero and one. */
e9a25f70 1116
0f290768 1117int
e075ae69
RH
1118const0_operand (op, mode)
1119 register rtx op;
1120 enum machine_mode mode;
1121{
1122 return op == CONST0_RTX (mode);
1123}
e9a25f70 1124
0f290768 1125int
e075ae69
RH
1126const1_operand (op, mode)
1127 register rtx op;
1128 enum machine_mode mode ATTRIBUTE_UNUSED;
1129{
1130 return op == const1_rtx;
1131}
2a2ab3f9 1132
e075ae69 1133/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 1134
e075ae69
RH
1135int
1136const248_operand (op, mode)
1137 register rtx op;
1138 enum machine_mode mode ATTRIBUTE_UNUSED;
1139{
1140 return (GET_CODE (op) == CONST_INT
1141 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
1142}
e9a25f70 1143
e075ae69 1144/* True if this is a constant appropriate for an increment or decremenmt. */
81fd0956 1145
e075ae69
RH
1146int
1147incdec_operand (op, mode)
1148 register rtx op;
1149 enum machine_mode mode;
1150{
1151 if (op == const1_rtx || op == constm1_rtx)
1152 return 1;
1153 if (GET_CODE (op) != CONST_INT)
1154 return 0;
1155 if (mode == SImode && INTVAL (op) == (HOST_WIDE_INT) 0xffffffff)
1156 return 1;
1157 if (mode == HImode && INTVAL (op) == (HOST_WIDE_INT) 0xffff)
1158 return 1;
1159 if (mode == QImode && INTVAL (op) == (HOST_WIDE_INT) 0xff)
1160 return 1;
1161 return 0;
1162}
2a2ab3f9 1163
0f290768 1164/* Return false if this is the stack pointer, or any other fake
e075ae69
RH
1165 register eliminable to the stack pointer. Otherwise, this is
1166 a register operand.
2a2ab3f9 1167
e075ae69
RH
1168 This is used to prevent esp from being used as an index reg.
1169 Which would only happen in pathological cases. */
5f1ec3e6 1170
e075ae69
RH
1171int
1172reg_no_sp_operand (op, mode)
1173 register rtx op;
1174 enum machine_mode mode;
1175{
1176 rtx t = op;
1177 if (GET_CODE (t) == SUBREG)
1178 t = SUBREG_REG (t);
564d80f4 1179 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 1180 return 0;
2a2ab3f9 1181
e075ae69 1182 return register_operand (op, mode);
2a2ab3f9 1183}
b840bfb0 1184
915119a5
BS
1185int
1186mmx_reg_operand (op, mode)
1187 register rtx op;
bd793c65 1188 enum machine_mode mode ATTRIBUTE_UNUSED;
915119a5
BS
1189{
1190 return MMX_REG_P (op);
1191}
1192
2c5a510c
RH
1193/* Return false if this is any eliminable register. Otherwise
1194 general_operand. */
1195
1196int
1197general_no_elim_operand (op, mode)
1198 register rtx op;
1199 enum machine_mode mode;
1200{
1201 rtx t = op;
1202 if (GET_CODE (t) == SUBREG)
1203 t = SUBREG_REG (t);
1204 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1205 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1206 || t == virtual_stack_dynamic_rtx)
1207 return 0;
1208
1209 return general_operand (op, mode);
1210}
1211
1212/* Return false if this is any eliminable register. Otherwise
1213 register_operand or const_int. */
1214
1215int
1216nonmemory_no_elim_operand (op, mode)
1217 register rtx op;
1218 enum machine_mode mode;
1219{
1220 rtx t = op;
1221 if (GET_CODE (t) == SUBREG)
1222 t = SUBREG_REG (t);
1223 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1224 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1225 || t == virtual_stack_dynamic_rtx)
1226 return 0;
1227
1228 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
1229}
1230
e075ae69 1231/* Return true if op is a Q_REGS class register. */
b840bfb0 1232
e075ae69
RH
1233int
1234q_regs_operand (op, mode)
1235 register rtx op;
1236 enum machine_mode mode;
b840bfb0 1237{
e075ae69
RH
1238 if (mode != VOIDmode && GET_MODE (op) != mode)
1239 return 0;
1240 if (GET_CODE (op) == SUBREG)
1241 op = SUBREG_REG (op);
1242 return QI_REG_P (op);
0f290768 1243}
b840bfb0 1244
e075ae69 1245/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 1246
e075ae69
RH
1247int
1248non_q_regs_operand (op, mode)
1249 register rtx op;
1250 enum machine_mode mode;
1251{
1252 if (mode != VOIDmode && GET_MODE (op) != mode)
1253 return 0;
1254 if (GET_CODE (op) == SUBREG)
1255 op = SUBREG_REG (op);
1256 return NON_QI_REG_P (op);
0f290768 1257}
b840bfb0 1258
e075ae69
RH
1259/* Return 1 if OP is a comparison operator that can use the condition code
1260 generated by a logical operation, which characteristicly does not set
1261 overflow or carry. To be used with CCNOmode. */
b840bfb0 1262
e075ae69
RH
1263int
1264no_comparison_operator (op, mode)
1265 register rtx op;
1266 enum machine_mode mode;
1267{
3a3677ff
RH
1268 if (mode != VOIDmode && GET_MODE (op) != mode)
1269 return 0;
1270
1271 switch (GET_CODE (op))
1272 {
1273 case EQ: case NE:
1274 case LT: case GE:
1275 case LEU: case LTU: case GEU: case GTU:
1276 return 1;
1277
1278 default:
1279 return 0;
1280 }
e075ae69 1281}
b840bfb0 1282
915119a5
BS
1283/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
1284 insns. */
1285int
1286sse_comparison_operator (op, mode)
1287 rtx op;
1288 enum machine_mode mode ATTRIBUTE_UNUSED;
1289{
1290 enum rtx_code code = GET_CODE (op);
1291 return code == EQ || code == LT || code == LE || code == UNORDERED;
1292}
9076b9c1 1293/* Return 1 if OP is a valid comparison operator in valid mode. */
e075ae69 1294int
9076b9c1
JH
1295ix86_comparison_operator (op, mode)
1296 register rtx op;
1297 enum machine_mode mode;
e075ae69 1298{
9076b9c1 1299 enum machine_mode inmode;
3a3677ff
RH
1300 if (mode != VOIDmode && GET_MODE (op) != mode)
1301 return 0;
3a3677ff
RH
1302 switch (GET_CODE (op))
1303 {
1304 case EQ: case NE:
3a3677ff 1305 return 1;
9076b9c1
JH
1306 case LT: case GE:
1307 inmode = GET_MODE (XEXP (op, 0));
7e08e190 1308 if (inmode == CCmode || inmode == CCGCmode
9076b9c1
JH
1309 || inmode == CCGOCmode || inmode == CCNOmode)
1310 return 1;
1311 return 0;
7e08e190 1312 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
265dab10 1313 inmode = GET_MODE (XEXP (op, 0));
7e08e190 1314 if (inmode == CCmode)
9076b9c1
JH
1315 return 1;
1316 return 0;
1317 case GT: case LE:
1318 inmode = GET_MODE (XEXP (op, 0));
7e08e190 1319 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
9076b9c1
JH
1320 return 1;
1321 return 0;
3a3677ff
RH
1322 default:
1323 return 0;
1324 }
1325}
1326
9076b9c1 1327/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3a3677ff 1328
9076b9c1
JH
1329int
1330fcmov_comparison_operator (op, mode)
3a3677ff
RH
1331 register rtx op;
1332 enum machine_mode mode;
1333{
9076b9c1 1334 enum machine_mode inmode = GET_MODE (XEXP (op, 0));
3a3677ff
RH
1335 if (mode != VOIDmode && GET_MODE (op) != mode)
1336 return 0;
3a3677ff
RH
1337 switch (GET_CODE (op))
1338 {
1339 case EQ: case NE:
3a3677ff 1340 return 1;
9076b9c1 1341 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
32ee391b 1342 if (inmode == CCFPmode || inmode == CCFPUmode)
9076b9c1
JH
1343 return 1;
1344 return 0;
3a3677ff
RH
1345 default:
1346 return 0;
1347 }
e075ae69 1348}
b840bfb0 1349
e9e80858
JH
1350/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1351
1352int
1353promotable_binary_operator (op, mode)
1354 register rtx op;
1355 enum machine_mode mode ATTRIBUTE_UNUSED;
1356{
1357 switch (GET_CODE (op))
1358 {
1359 case MULT:
1360 /* Modern CPUs have same latency for HImode and SImode multiply,
1361 but 386 and 486 do HImode multiply faster. */
1362 return ix86_cpu > PROCESSOR_I486;
1363 case PLUS:
1364 case AND:
1365 case IOR:
1366 case XOR:
1367 case ASHIFT:
1368 return 1;
1369 default:
1370 return 0;
1371 }
1372}
1373
e075ae69
RH
1374/* Nearly general operand, but accept any const_double, since we wish
1375 to be able to drop them into memory rather than have them get pulled
1376 into registers. */
b840bfb0 1377
2a2ab3f9 1378int
e075ae69
RH
1379cmp_fp_expander_operand (op, mode)
1380 register rtx op;
1381 enum machine_mode mode;
2a2ab3f9 1382{
e075ae69 1383 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 1384 return 0;
e075ae69 1385 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 1386 return 1;
e075ae69 1387 return general_operand (op, mode);
2a2ab3f9
JVA
1388}
1389
e075ae69 1390/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
1391
1392int
e075ae69 1393ext_register_operand (op, mode)
2a2ab3f9 1394 register rtx op;
bb5177ac 1395 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 1396{
e075ae69
RH
1397 if (GET_MODE (op) != SImode && GET_MODE (op) != HImode)
1398 return 0;
1399 return register_operand (op, VOIDmode);
1400}
1401
1402/* Return 1 if this is a valid binary floating-point operation.
0f290768 1403 OP is the expression matched, and MODE is its mode. */
e075ae69
RH
1404
1405int
1406binary_fp_operator (op, mode)
1407 register rtx op;
1408 enum machine_mode mode;
1409{
1410 if (mode != VOIDmode && mode != GET_MODE (op))
1411 return 0;
1412
2a2ab3f9
JVA
1413 switch (GET_CODE (op))
1414 {
e075ae69
RH
1415 case PLUS:
1416 case MINUS:
1417 case MULT:
1418 case DIV:
1419 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 1420
2a2ab3f9
JVA
1421 default:
1422 return 0;
1423 }
1424}
fee2770d 1425
e075ae69
RH
1426int
1427mult_operator(op, mode)
1428 register rtx op;
1429 enum machine_mode mode ATTRIBUTE_UNUSED;
1430{
1431 return GET_CODE (op) == MULT;
1432}
1433
1434int
1435div_operator(op, mode)
1436 register rtx op;
1437 enum machine_mode mode ATTRIBUTE_UNUSED;
1438{
1439 return GET_CODE (op) == DIV;
1440}
0a726ef1
JL
1441
1442int
e075ae69
RH
1443arith_or_logical_operator (op, mode)
1444 rtx op;
1445 enum machine_mode mode;
0a726ef1 1446{
e075ae69
RH
1447 return ((mode == VOIDmode || GET_MODE (op) == mode)
1448 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
1449 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
1450}
1451
e075ae69 1452/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
1453
1454int
e075ae69
RH
1455memory_displacement_operand (op, mode)
1456 register rtx op;
1457 enum machine_mode mode;
4f2c8ebb 1458{
e075ae69 1459 struct ix86_address parts;
e9a25f70 1460
e075ae69
RH
1461 if (! memory_operand (op, mode))
1462 return 0;
1463
1464 if (! ix86_decompose_address (XEXP (op, 0), &parts))
1465 abort ();
1466
1467 return parts.disp != NULL_RTX;
4f2c8ebb
RS
1468}
1469
16189740 1470/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
1471 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1472
1473 ??? It seems likely that this will only work because cmpsi is an
1474 expander, and no actual insns use this. */
4f2c8ebb
RS
1475
1476int
e075ae69
RH
1477cmpsi_operand (op, mode)
1478 rtx op;
1479 enum machine_mode mode;
fee2770d 1480{
e075ae69
RH
1481 if (general_operand (op, mode))
1482 return 1;
1483
1484 if (GET_CODE (op) == AND
1485 && GET_MODE (op) == SImode
1486 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
1487 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
1488 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
1489 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
1490 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
1491 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 1492 return 1;
e9a25f70 1493
fee2770d
RS
1494 return 0;
1495}
d784886d 1496
e075ae69
RH
1497/* Returns 1 if OP is memory operand that can not be represented by the
1498 modRM array. */
d784886d
RK
1499
1500int
e075ae69 1501long_memory_operand (op, mode)
d784886d
RK
1502 register rtx op;
1503 enum machine_mode mode;
1504{
e075ae69 1505 if (! memory_operand (op, mode))
d784886d
RK
1506 return 0;
1507
e075ae69 1508 return memory_address_length (op) != 0;
d784886d 1509}
2247f6ed
JH
1510
1511/* Return nonzero if the rtx is known aligned. */
1512
1513int
1514aligned_operand (op, mode)
1515 rtx op;
1516 enum machine_mode mode;
1517{
1518 struct ix86_address parts;
1519
1520 if (!general_operand (op, mode))
1521 return 0;
1522
0f290768 1523 /* Registers and immediate operands are always "aligned". */
2247f6ed
JH
1524 if (GET_CODE (op) != MEM)
1525 return 1;
1526
0f290768 1527 /* Don't even try to do any aligned optimizations with volatiles. */
2247f6ed
JH
1528 if (MEM_VOLATILE_P (op))
1529 return 0;
1530
1531 op = XEXP (op, 0);
1532
1533 /* Pushes and pops are only valid on the stack pointer. */
1534 if (GET_CODE (op) == PRE_DEC
1535 || GET_CODE (op) == POST_INC)
1536 return 1;
1537
1538 /* Decode the address. */
1539 if (! ix86_decompose_address (op, &parts))
1540 abort ();
1541
1542 /* Look for some component that isn't known to be aligned. */
1543 if (parts.index)
1544 {
1545 if (parts.scale < 4
bdb429a5 1546 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
1547 return 0;
1548 }
1549 if (parts.base)
1550 {
bdb429a5 1551 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
1552 return 0;
1553 }
1554 if (parts.disp)
1555 {
1556 if (GET_CODE (parts.disp) != CONST_INT
1557 || (INTVAL (parts.disp) & 3) != 0)
1558 return 0;
1559 }
1560
1561 /* Didn't find one -- this must be an aligned address. */
1562 return 1;
1563}
e075ae69
RH
1564\f
1565/* Return true if the constant is something that can be loaded with
1566 a special instruction. Only handle 0.0 and 1.0; others are less
1567 worthwhile. */
57dbca5e
BS
1568
1569int
e075ae69
RH
1570standard_80387_constant_p (x)
1571 rtx x;
57dbca5e 1572{
e075ae69
RH
1573 if (GET_CODE (x) != CONST_DOUBLE)
1574 return -1;
1575
1576#if ! defined (REAL_IS_NOT_DOUBLE) || defined (REAL_ARITHMETIC)
1577 {
1578 REAL_VALUE_TYPE d;
1579 jmp_buf handler;
1580 int is0, is1;
1581
1582 if (setjmp (handler))
1583 return 0;
1584
1585 set_float_handler (handler);
1586 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
1587 is0 = REAL_VALUES_EQUAL (d, dconst0) && !REAL_VALUE_MINUS_ZERO (d);
1588 is1 = REAL_VALUES_EQUAL (d, dconst1);
1589 set_float_handler (NULL_PTR);
1590
1591 if (is0)
1592 return 1;
1593
1594 if (is1)
1595 return 2;
1596
1597 /* Note that on the 80387, other constants, such as pi,
1598 are much slower to load as standard constants
1599 than to load from doubles in memory! */
1600 /* ??? Not true on K6: all constants are equal cost. */
1601 }
1602#endif
1603
1604 return 0;
57dbca5e
BS
1605}
1606
2a2ab3f9
JVA
1607/* Returns 1 if OP contains a symbol reference */
1608
1609int
1610symbolic_reference_mentioned_p (op)
1611 rtx op;
1612{
6f7d635c 1613 register const char *fmt;
2a2ab3f9
JVA
1614 register int i;
1615
1616 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
1617 return 1;
1618
1619 fmt = GET_RTX_FORMAT (GET_CODE (op));
1620 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
1621 {
1622 if (fmt[i] == 'E')
1623 {
1624 register int j;
1625
1626 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
1627 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
1628 return 1;
1629 }
e9a25f70 1630
2a2ab3f9
JVA
1631 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
1632 return 1;
1633 }
1634
1635 return 0;
1636}
e075ae69
RH
1637
1638/* Return 1 if it is appropriate to emit `ret' instructions in the
1639 body of a function. Do this only if the epilogue is simple, needing a
1640 couple of insns. Prior to reloading, we can't tell how many registers
1641 must be saved, so return 0 then. Return 0 if there is no frame
1642 marker to de-allocate.
1643
1644 If NON_SAVING_SETJMP is defined and true, then it is not possible
1645 for the epilogue to be simple, so return 0. This is a special case
1646 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1647 until final, but jump_optimize may need to know sooner if a
1648 `return' is OK. */
32b5b1aa
SC
1649
1650int
e075ae69 1651ix86_can_use_return_insn_p ()
32b5b1aa 1652{
9a7372d6
RH
1653 HOST_WIDE_INT tsize;
1654 int nregs;
1655
e075ae69
RH
1656#ifdef NON_SAVING_SETJMP
1657 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
1658 return 0;
1659#endif
9a7372d6
RH
1660#ifdef FUNCTION_BLOCK_PROFILER_EXIT
1661 if (profile_block_flag == 2)
1662 return 0;
1663#endif
1664
1665 if (! reload_completed || frame_pointer_needed)
1666 return 0;
32b5b1aa 1667
9a7372d6
RH
1668 /* Don't allow more than 32 pop, since that's all we can do
1669 with one instruction. */
1670 if (current_function_pops_args
1671 && current_function_args_size >= 32768)
e075ae69 1672 return 0;
32b5b1aa 1673
9a7372d6
RH
1674 tsize = ix86_compute_frame_size (get_frame_size (), &nregs, NULL, NULL);
1675 return tsize == 0 && nregs == 0;
e075ae69
RH
1676}
1677\f
21a427cc 1678static char *pic_label_name;
e075ae69 1679static int pic_label_output;
21a427cc 1680static char *global_offset_table_name;
e9a25f70 1681
e075ae69
RH
1682/* This function generates code for -fpic that loads %ebx with
1683 the return address of the caller and then returns. */
1684
1685void
1686asm_output_function_prefix (file, name)
1687 FILE *file;
3cce094d 1688 const char *name ATTRIBUTE_UNUSED;
e075ae69
RH
1689{
1690 rtx xops[2];
1691 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1692 || current_function_uses_const_pool);
1693 xops[0] = pic_offset_table_rtx;
1694 xops[1] = stack_pointer_rtx;
32b5b1aa 1695
0f290768 1696 /* Deep branch prediction favors having a return for every call. */
e075ae69 1697 if (pic_reg_used && TARGET_DEEP_BRANCH_PREDICTION)
32b5b1aa 1698 {
e075ae69
RH
1699 if (!pic_label_output)
1700 {
1701 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
1702 internal (non-global) label that's being emitted, it didn't make
1703 sense to have .type information for local labels. This caused
1704 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
1705 me debug info for a label that you're declaring non-global?) this
0f290768 1706 was changed to call ASM_OUTPUT_LABEL() instead. */
32b5b1aa 1707
0f290768 1708 ASM_OUTPUT_LABEL (file, pic_label_name);
e9a25f70 1709
e075ae69
RH
1710 xops[1] = gen_rtx_MEM (SImode, xops[1]);
1711 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
1712 output_asm_insn ("ret", xops);
0afeb08a 1713
e075ae69 1714 pic_label_output = 1;
32b5b1aa 1715 }
32b5b1aa 1716 }
32b5b1aa 1717}
32b5b1aa 1718
e075ae69
RH
1719void
1720load_pic_register ()
32b5b1aa 1721{
e075ae69 1722 rtx gotsym, pclab;
32b5b1aa 1723
21a427cc
AS
1724 if (global_offset_table_name == NULL)
1725 {
0f290768 1726 global_offset_table_name =
21a427cc
AS
1727 ggc_alloc_string ("_GLOBAL_OFFSET_TABLE_", 21);
1728 ggc_add_string_root (&global_offset_table_name, 1);
1729 }
1730 gotsym = gen_rtx_SYMBOL_REF (Pmode, global_offset_table_name);
32b5b1aa 1731
e075ae69 1732 if (TARGET_DEEP_BRANCH_PREDICTION)
32b5b1aa 1733 {
0f290768 1734 if (pic_label_name == NULL)
21a427cc
AS
1735 {
1736 pic_label_name = ggc_alloc_string (NULL, 32);
1737 ggc_add_string_root (&pic_label_name, 1);
1738 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
1739 }
e075ae69 1740 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
32b5b1aa 1741 }
e075ae69 1742 else
e5cb57e8 1743 {
e075ae69 1744 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
e5cb57e8 1745 }
e5cb57e8 1746
e075ae69 1747 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
2a2ab3f9 1748
e075ae69
RH
1749 if (! TARGET_DEEP_BRANCH_PREDICTION)
1750 emit_insn (gen_popsi1 (pic_offset_table_rtx));
79325812 1751
e075ae69 1752 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
e9a25f70 1753}
8dfe5673 1754
e075ae69 1755/* Generate an SImode "push" pattern for input ARG. */
e9a25f70 1756
e075ae69
RH
1757static rtx
1758gen_push (arg)
1759 rtx arg;
e9a25f70 1760{
c5c76735
JL
1761 return gen_rtx_SET (VOIDmode,
1762 gen_rtx_MEM (SImode,
1763 gen_rtx_PRE_DEC (SImode,
1764 stack_pointer_rtx)),
1765 arg);
e9a25f70
JL
1766}
1767
0903fcab
JH
1768/* Return number of registers to be saved on the stack. */
1769
1770static int
1771ix86_nsaved_regs ()
1772{
1773 int nregs = 0;
1774 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1775 || current_function_uses_const_pool);
1776 int limit = (frame_pointer_needed
1777 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1778 int regno;
1779
1780 for (regno = limit - 1; regno >= 0; regno--)
1781 if ((regs_ever_live[regno] && ! call_used_regs[regno])
1782 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1783 {
1784 nregs ++;
1785 }
1786 return nregs;
1787}
1788
1789/* Return the offset between two registers, one to be eliminated, and the other
1790 its replacement, at the start of a routine. */
1791
1792HOST_WIDE_INT
1793ix86_initial_elimination_offset (from, to)
1794 int from;
1795 int to;
1796{
564d80f4
JH
1797 int padding1;
1798 int nregs;
1799
1800 /* Stack grows downward:
0f290768 1801
564d80f4
JH
1802 [arguments]
1803 <- ARG_POINTER
1804 saved pc
1805
1806 saved frame pointer if frame_pointer_needed
1807 <- HARD_FRAME_POINTER
1c71e60e 1808 [saved regs]
564d80f4
JH
1809
1810 [padding1] \
1811 | <- FRAME_POINTER
1812 [frame] > tsize
1813 |
1814 [padding2] /
564d80f4
JH
1815 */
1816
1817 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
1818 /* Skip saved PC and previous frame pointer.
1819 Executed only when frame_pointer_needed. */
1820 return 8;
1821 else if (from == FRAME_POINTER_REGNUM
1822 && to == HARD_FRAME_POINTER_REGNUM)
1823 {
0f290768 1824 ix86_compute_frame_size (get_frame_size (), &nregs, &padding1, (int *) 0);
1c71e60e 1825 padding1 += nregs * UNITS_PER_WORD;
564d80f4
JH
1826 return -padding1;
1827 }
0903fcab
JH
1828 else
1829 {
564d80f4
JH
1830 /* ARG_POINTER or FRAME_POINTER to STACK_POINTER elimination. */
1831 int frame_size = frame_pointer_needed ? 8 : 4;
0903fcab 1832 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (),
0f290768 1833 &nregs, &padding1, (int *) 0);
0903fcab 1834
564d80f4
JH
1835 if (to != STACK_POINTER_REGNUM)
1836 abort ();
1837 else if (from == ARG_POINTER_REGNUM)
1838 return tsize + nregs * UNITS_PER_WORD + frame_size;
1839 else if (from != FRAME_POINTER_REGNUM)
1840 abort ();
0903fcab 1841 else
1c71e60e 1842 return tsize - padding1;
0903fcab
JH
1843 }
1844}
1845
65954bd8
JL
1846/* Compute the size of local storage taking into consideration the
1847 desired stack alignment which is to be maintained. Also determine
0f290768
KH
1848 the number of registers saved below the local storage.
1849
564d80f4
JH
1850 PADDING1 returns padding before stack frame and PADDING2 returns
1851 padding after stack frame;
1852 */
1853
1854static HOST_WIDE_INT
1855ix86_compute_frame_size (size, nregs_on_stack, rpadding1, rpadding2)
65954bd8
JL
1856 HOST_WIDE_INT size;
1857 int *nregs_on_stack;
564d80f4
JH
1858 int *rpadding1;
1859 int *rpadding2;
65954bd8 1860{
65954bd8 1861 int nregs;
564d80f4
JH
1862 int padding1 = 0;
1863 int padding2 = 0;
65954bd8 1864 HOST_WIDE_INT total_size;
564d80f4 1865 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
44affdae
JH
1866 int offset;
1867 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
65954bd8 1868
564d80f4 1869 nregs = ix86_nsaved_regs ();
564d80f4 1870 total_size = size;
65954bd8 1871
44affdae 1872 offset = frame_pointer_needed ? 8 : 4;
564d80f4 1873
44affdae
JH
1874 /* Do some sanity checking of stack_alignment_needed and preferred_alignment,
1875 since i386 port is the only using those features that may break easilly. */
564d80f4 1876
44affdae
JH
1877 if (size && !stack_alignment_needed)
1878 abort ();
5f677a9e 1879 if (!size && stack_alignment_needed != STACK_BOUNDARY / BITS_PER_UNIT)
44affdae
JH
1880 abort ();
1881 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
1882 abort ();
1883 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1884 abort ();
1885 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1886 abort ();
564d80f4 1887
44affdae
JH
1888 if (stack_alignment_needed < 4)
1889 stack_alignment_needed = 4;
564d80f4 1890
44affdae 1891 offset += nregs * UNITS_PER_WORD;
65954bd8 1892
f73ad30e
JH
1893 if (ACCUMULATE_OUTGOING_ARGS)
1894 total_size += current_function_outgoing_args_size;
1895
44affdae 1896 total_size += offset;
65954bd8 1897
44affdae
JH
1898 /* Align start of frame for local function. */
1899 padding1 = ((offset + stack_alignment_needed - 1)
1900 & -stack_alignment_needed) - offset;
1901 total_size += padding1;
54ff41b7 1902
0f290768 1903 /* Align stack boundary. */
44affdae
JH
1904 padding2 = ((total_size + preferred_alignment - 1)
1905 & -preferred_alignment) - total_size;
65954bd8 1906
f73ad30e
JH
1907 if (ACCUMULATE_OUTGOING_ARGS)
1908 padding2 += current_function_outgoing_args_size;
1909
65954bd8
JL
1910 if (nregs_on_stack)
1911 *nregs_on_stack = nregs;
564d80f4
JH
1912 if (rpadding1)
1913 *rpadding1 = padding1;
564d80f4
JH
1914 if (rpadding2)
1915 *rpadding2 = padding2;
1916
1917 return size + padding1 + padding2;
65954bd8
JL
1918}
1919
0903fcab
JH
1920/* Emit code to save registers in the prologue. */
1921
1922static void
1923ix86_emit_save_regs ()
1924{
1925 register int regno;
1926 int limit;
1927 rtx insn;
1928 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1929 || current_function_uses_const_pool);
1930 limit = (frame_pointer_needed
564d80f4 1931 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
0903fcab
JH
1932
1933 for (regno = limit - 1; regno >= 0; regno--)
1934 if ((regs_ever_live[regno] && !call_used_regs[regno])
1935 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1936 {
1937 insn = emit_insn (gen_push (gen_rtx_REG (SImode, regno)));
1938 RTX_FRAME_RELATED_P (insn) = 1;
1939 }
1940}
1941
0f290768 1942/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
1943
1944void
1945ix86_expand_prologue ()
2a2ab3f9 1946{
0f290768
KH
1947 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), (int *) 0, (int *) 0,
1948 (int *) 0);
564d80f4 1949 rtx insn;
aae75261
JVA
1950 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1951 || current_function_uses_const_pool);
79325812 1952
e075ae69
RH
1953 /* Note: AT&T enter does NOT have reversed args. Enter is probably
1954 slower on all targets. Also sdb doesn't like it. */
e9a25f70 1955
2a2ab3f9
JVA
1956 if (frame_pointer_needed)
1957 {
564d80f4 1958 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 1959 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 1960
564d80f4 1961 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 1962 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
1963 }
1964
1c71e60e 1965 ix86_emit_save_regs ();
564d80f4 1966
8dfe5673
RK
1967 if (tsize == 0)
1968 ;
1969 else if (! TARGET_STACK_PROBE || tsize < CHECK_STACK_LIMIT)
469ac993 1970 {
e075ae69 1971 if (frame_pointer_needed)
1c71e60e
JH
1972 insn = emit_insn (gen_pro_epilogue_adjust_stack
1973 (stack_pointer_rtx, stack_pointer_rtx,
1974 GEN_INT (-tsize), hard_frame_pointer_rtx));
79325812 1975 else
e075ae69
RH
1976 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
1977 GEN_INT (-tsize)));
1978 RTX_FRAME_RELATED_P (insn) = 1;
469ac993 1979 }
79325812 1980 else
8dfe5673 1981 {
e075ae69 1982 /* ??? Is this only valid for Win32? */
e9a25f70 1983
e075ae69 1984 rtx arg0, sym;
e9a25f70 1985
e075ae69
RH
1986 arg0 = gen_rtx_REG (SImode, 0);
1987 emit_move_insn (arg0, GEN_INT (tsize));
77a989d1 1988
e075ae69
RH
1989 sym = gen_rtx_MEM (FUNCTION_MODE,
1990 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
1991 insn = emit_call_insn (gen_call (sym, const0_rtx));
1992
1993 CALL_INSN_FUNCTION_USAGE (insn)
276ab4a4
RH
1994 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
1995 CALL_INSN_FUNCTION_USAGE (insn));
e075ae69 1996 }
e9a25f70 1997
84530511
SC
1998#ifdef SUBTARGET_PROLOGUE
1999 SUBTARGET_PROLOGUE;
0f290768 2000#endif
84530511 2001
e9a25f70 2002 if (pic_reg_used)
e075ae69 2003 load_pic_register ();
77a989d1 2004
e9a25f70
JL
2005 /* If we are profiling, make sure no instructions are scheduled before
2006 the call to mcount. However, if -fpic, the above call will have
2007 done that. */
e075ae69 2008 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
e9a25f70 2009 emit_insn (gen_blockage ());
77a989d1
SC
2010}
2011
0903fcab
JH
2012/* Emit code to add TSIZE to esp value. Use POP instruction when
2013 profitable. */
2014
2015static void
2016ix86_emit_epilogue_esp_adjustment (tsize)
2017 int tsize;
2018{
bdeb029c
JH
2019 /* If a frame pointer is present, we must be sure to tie the sp
2020 to the fp so that we don't mis-schedule. */
2021 if (frame_pointer_needed)
2022 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2023 stack_pointer_rtx,
2024 GEN_INT (tsize),
2025 hard_frame_pointer_rtx));
0903fcab 2026 else
bdeb029c
JH
2027 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
2028 GEN_INT (tsize)));
0903fcab
JH
2029}
2030
da2d1d3a
JH
2031/* Emit code to restore saved registers using MOV insns. First register
2032 is restored from POINTER + OFFSET. */
2033static void
2034ix86_emit_restore_regs_using_mov (pointer, offset)
2035 rtx pointer;
2036 int offset;
2037{
2038 int regno;
2039 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2040 || current_function_uses_const_pool);
2041 int limit = (frame_pointer_needed
2042 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
2043
2044 for (regno = 0; regno < limit; regno++)
2045 if ((regs_ever_live[regno] && !call_used_regs[regno])
2046 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2047 {
2048 emit_move_insn (gen_rtx_REG (SImode, regno),
2049 adj_offsettable_operand (gen_rtx_MEM (SImode,
2050 pointer),
2051 offset));
2052 offset += 4;
2053 }
2054}
2055
0f290768 2056/* Restore function stack, frame, and registers. */
e9a25f70 2057
2a2ab3f9 2058void
cbbf65e0
RH
2059ix86_expand_epilogue (emit_return)
2060 int emit_return;
2a2ab3f9 2061{
65954bd8 2062 int nregs;
1c71e60e
JH
2063 int regno;
2064
aae75261
JVA
2065 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2066 || current_function_uses_const_pool);
fdb8a883 2067 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
65954bd8 2068 HOST_WIDE_INT offset;
1c71e60e 2069 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), &nregs,
0f290768 2070 (int *) 0, (int *) 0);
2a2ab3f9 2071
1c71e60e
JH
2072 /* Calculate start of saved registers relative to ebp. */
2073 offset = -nregs * UNITS_PER_WORD;
2a2ab3f9 2074
1c71e60e
JH
2075#ifdef FUNCTION_BLOCK_PROFILER_EXIT
2076 if (profile_block_flag == 2)
564d80f4 2077 {
1c71e60e 2078 FUNCTION_BLOCK_PROFILER_EXIT;
564d80f4 2079 }
1c71e60e 2080#endif
564d80f4 2081
fdb8a883
JW
2082 /* If we're only restoring one register and sp is not valid then
2083 using a move instruction to restore the register since it's
0f290768 2084 less work than reloading sp and popping the register.
da2d1d3a
JH
2085
2086 The default code result in stack adjustment using add/lea instruction,
2087 while this code results in LEAVE instruction (or discrete equivalent),
2088 so it is profitable in some other cases as well. Especially when there
2089 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2090 and there is exactly one register to pop. This heruistic may need some
2091 tuning in future. */
2092 if ((!sp_valid && nregs <= 1)
2093 || (frame_pointer_needed && !nregs && tsize)
2094 || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
2095 && nregs == 1))
2a2ab3f9 2096 {
da2d1d3a
JH
2097 /* Restore registers. We can use ebp or esp to address the memory
2098 locations. If both are available, default to ebp, since offsets
2099 are known to be small. Only exception is esp pointing directly to the
2100 end of block of saved registers, where we may simplify addressing
2101 mode. */
2102
2103 if (!frame_pointer_needed || (sp_valid && !tsize))
2104 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, tsize);
2105 else
2106 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, offset);
2a2ab3f9 2107
da2d1d3a
JH
2108 if (!frame_pointer_needed)
2109 ix86_emit_epilogue_esp_adjustment (tsize + nregs * UNITS_PER_WORD);
0f290768 2110 /* If not an i386, mov & pop is faster than "leave". */
da2d1d3a 2111 else if (TARGET_USE_LEAVE || optimize_size)
564d80f4 2112 emit_insn (gen_leave ());
c8c5cb99 2113 else
2a2ab3f9 2114 {
1c71e60e
JH
2115 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2116 hard_frame_pointer_rtx,
2117 const0_rtx,
2118 hard_frame_pointer_rtx));
564d80f4 2119 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
2120 }
2121 }
1c71e60e 2122 else
68f654ec 2123 {
1c71e60e
JH
2124 /* First step is to deallocate the stack frame so that we can
2125 pop the registers. */
2126 if (!sp_valid)
2127 {
2128 if (!frame_pointer_needed)
2129 abort ();
2130 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2131 hard_frame_pointer_rtx,
2132 GEN_INT (offset),
2133 hard_frame_pointer_rtx));
2134 }
2135 else if (tsize)
2136 ix86_emit_epilogue_esp_adjustment (tsize);
2137
2138 for (regno = 0; regno < STACK_POINTER_REGNUM; regno++)
2139 if ((regs_ever_live[regno] && !call_used_regs[regno])
2140 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2141 emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
68f654ec 2142 }
68f654ec 2143
cbbf65e0
RH
2144 /* Sibcall epilogues don't want a return instruction. */
2145 if (! emit_return)
2146 return;
2147
2a2ab3f9
JVA
2148 if (current_function_pops_args && current_function_args_size)
2149 {
e075ae69 2150 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 2151
b8c752c8
UD
2152 /* i386 can only pop 64K bytes. If asked to pop more, pop
2153 return address, do explicit add, and jump indirectly to the
0f290768 2154 caller. */
2a2ab3f9 2155
b8c752c8 2156 if (current_function_pops_args >= 65536)
2a2ab3f9 2157 {
e075ae69 2158 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 2159
e075ae69
RH
2160 emit_insn (gen_popsi1 (ecx));
2161 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 2162 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 2163 }
79325812 2164 else
e075ae69
RH
2165 emit_jump_insn (gen_return_pop_internal (popc));
2166 }
2167 else
2168 emit_jump_insn (gen_return_internal ());
2169}
2170\f
2171/* Extract the parts of an RTL expression that is a valid memory address
2172 for an instruction. Return false if the structure of the address is
2173 grossly off. */
2174
2175static int
2176ix86_decompose_address (addr, out)
2177 register rtx addr;
2178 struct ix86_address *out;
2179{
2180 rtx base = NULL_RTX;
2181 rtx index = NULL_RTX;
2182 rtx disp = NULL_RTX;
2183 HOST_WIDE_INT scale = 1;
2184 rtx scale_rtx = NULL_RTX;
2185
2186 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
2187 base = addr;
2188 else if (GET_CODE (addr) == PLUS)
2189 {
2190 rtx op0 = XEXP (addr, 0);
2191 rtx op1 = XEXP (addr, 1);
2192 enum rtx_code code0 = GET_CODE (op0);
2193 enum rtx_code code1 = GET_CODE (op1);
2194
2195 if (code0 == REG || code0 == SUBREG)
2196 {
2197 if (code1 == REG || code1 == SUBREG)
2198 index = op0, base = op1; /* index + base */
2199 else
2200 base = op0, disp = op1; /* base + displacement */
2201 }
2202 else if (code0 == MULT)
e9a25f70 2203 {
e075ae69
RH
2204 index = XEXP (op0, 0);
2205 scale_rtx = XEXP (op0, 1);
2206 if (code1 == REG || code1 == SUBREG)
2207 base = op1; /* index*scale + base */
e9a25f70 2208 else
e075ae69
RH
2209 disp = op1; /* index*scale + disp */
2210 }
2211 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
2212 {
2213 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
2214 scale_rtx = XEXP (XEXP (op0, 0), 1);
2215 base = XEXP (op0, 1);
2216 disp = op1;
2a2ab3f9 2217 }
e075ae69
RH
2218 else if (code0 == PLUS)
2219 {
2220 index = XEXP (op0, 0); /* index + base + disp */
2221 base = XEXP (op0, 1);
2222 disp = op1;
2223 }
2224 else
2225 return FALSE;
2226 }
2227 else if (GET_CODE (addr) == MULT)
2228 {
2229 index = XEXP (addr, 0); /* index*scale */
2230 scale_rtx = XEXP (addr, 1);
2231 }
2232 else if (GET_CODE (addr) == ASHIFT)
2233 {
2234 rtx tmp;
2235
2236 /* We're called for lea too, which implements ashift on occasion. */
2237 index = XEXP (addr, 0);
2238 tmp = XEXP (addr, 1);
2239 if (GET_CODE (tmp) != CONST_INT)
2240 return FALSE;
2241 scale = INTVAL (tmp);
2242 if ((unsigned HOST_WIDE_INT) scale > 3)
2243 return FALSE;
2244 scale = 1 << scale;
2a2ab3f9 2245 }
2a2ab3f9 2246 else
e075ae69
RH
2247 disp = addr; /* displacement */
2248
2249 /* Extract the integral value of scale. */
2250 if (scale_rtx)
e9a25f70 2251 {
e075ae69
RH
2252 if (GET_CODE (scale_rtx) != CONST_INT)
2253 return FALSE;
2254 scale = INTVAL (scale_rtx);
e9a25f70 2255 }
3b3c6a3f 2256
e075ae69
RH
2257 /* Allow arg pointer and stack pointer as index if there is not scaling */
2258 if (base && index && scale == 1
564d80f4
JH
2259 && (index == arg_pointer_rtx || index == frame_pointer_rtx
2260 || index == stack_pointer_rtx))
e075ae69
RH
2261 {
2262 rtx tmp = base;
2263 base = index;
2264 index = tmp;
2265 }
2266
2267 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
2268 if ((base == hard_frame_pointer_rtx
2269 || base == frame_pointer_rtx
2270 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
2271 disp = const0_rtx;
2272
2273 /* Special case: on K6, [%esi] makes the instruction vector decoded.
2274 Avoid this by transforming to [%esi+0]. */
2275 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
2276 && base && !index && !disp
329e1d01 2277 && REG_P (base)
e075ae69
RH
2278 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
2279 disp = const0_rtx;
2280
2281 /* Special case: encode reg+reg instead of reg*2. */
2282 if (!base && index && scale && scale == 2)
2283 base = index, scale = 1;
0f290768 2284
e075ae69
RH
2285 /* Special case: scaling cannot be encoded without base or displacement. */
2286 if (!base && !disp && index && scale != 1)
2287 disp = const0_rtx;
2288
2289 out->base = base;
2290 out->index = index;
2291 out->disp = disp;
2292 out->scale = scale;
3b3c6a3f 2293
e075ae69
RH
2294 return TRUE;
2295}
01329426
JH
2296\f
2297/* Return cost of the memory address x.
2298 For i386, it is better to use a complex address than let gcc copy
2299 the address into a reg and make a new pseudo. But not if the address
2300 requires to two regs - that would mean more pseudos with longer
2301 lifetimes. */
2302int
2303ix86_address_cost (x)
2304 rtx x;
2305{
2306 struct ix86_address parts;
2307 int cost = 1;
3b3c6a3f 2308
01329426
JH
2309 if (!ix86_decompose_address (x, &parts))
2310 abort ();
2311
2312 /* More complex memory references are better. */
2313 if (parts.disp && parts.disp != const0_rtx)
2314 cost--;
2315
2316 /* Attempt to minimize number of registers in the address. */
2317 if ((parts.base
2318 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
2319 || (parts.index
2320 && (!REG_P (parts.index)
2321 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
2322 cost++;
2323
2324 if (parts.base
2325 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
2326 && parts.index
2327 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
2328 && parts.base != parts.index)
2329 cost++;
2330
2331 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
2332 since it's predecode logic can't detect the length of instructions
2333 and it degenerates to vector decoded. Increase cost of such
2334 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 2335 to split such addresses or even refuse such addresses at all.
01329426
JH
2336
2337 Following addressing modes are affected:
2338 [base+scale*index]
2339 [scale*index+disp]
2340 [base+index]
0f290768 2341
01329426
JH
2342 The first and last case may be avoidable by explicitly coding the zero in
2343 memory address, but I don't have AMD-K6 machine handy to check this
2344 theory. */
2345
2346 if (TARGET_K6
2347 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
2348 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
2349 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
2350 cost += 10;
0f290768 2351
01329426
JH
2352 return cost;
2353}
2354\f
b949ea8b
JW
2355/* If X is a machine specific address (i.e. a symbol or label being
2356 referenced as a displacement from the GOT implemented using an
2357 UNSPEC), then return the base term. Otherwise return X. */
2358
2359rtx
2360ix86_find_base_term (x)
2361 rtx x;
2362{
2363 rtx term;
2364
2365 if (GET_CODE (x) != PLUS
2366 || XEXP (x, 0) != pic_offset_table_rtx
2367 || GET_CODE (XEXP (x, 1)) != CONST)
2368 return x;
2369
2370 term = XEXP (XEXP (x, 1), 0);
2371
2372 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
2373 term = XEXP (term, 0);
2374
2375 if (GET_CODE (term) != UNSPEC
2376 || XVECLEN (term, 0) != 1
2377 || XINT (term, 1) != 7)
2378 return x;
2379
2380 term = XVECEXP (term, 0, 0);
2381
2382 if (GET_CODE (term) != SYMBOL_REF
2383 && GET_CODE (term) != LABEL_REF)
2384 return x;
2385
2386 return term;
2387}
2388\f
e075ae69
RH
2389/* Determine if a given CONST RTX is a valid memory displacement
2390 in PIC mode. */
0f290768 2391
59be65f6 2392int
91bb873f
RH
2393legitimate_pic_address_disp_p (disp)
2394 register rtx disp;
2395{
2396 if (GET_CODE (disp) != CONST)
2397 return 0;
2398 disp = XEXP (disp, 0);
2399
2400 if (GET_CODE (disp) == PLUS)
2401 {
2402 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
2403 return 0;
2404 disp = XEXP (disp, 0);
2405 }
2406
2407 if (GET_CODE (disp) != UNSPEC
2408 || XVECLEN (disp, 0) != 1)
2409 return 0;
2410
2411 /* Must be @GOT or @GOTOFF. */
2412 if (XINT (disp, 1) != 6
2413 && XINT (disp, 1) != 7)
2414 return 0;
2415
2416 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2417 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
2418 return 0;
2419
2420 return 1;
2421}
2422
e075ae69
RH
2423/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
2424 memory address for an instruction. The MODE argument is the machine mode
2425 for the MEM expression that wants to use this address.
2426
2427 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
2428 convert common non-canonical forms to canonical form so that they will
2429 be recognized. */
2430
3b3c6a3f
MM
2431int
2432legitimate_address_p (mode, addr, strict)
2433 enum machine_mode mode;
2434 register rtx addr;
2435 int strict;
2436{
e075ae69
RH
2437 struct ix86_address parts;
2438 rtx base, index, disp;
2439 HOST_WIDE_INT scale;
2440 const char *reason = NULL;
2441 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
2442
2443 if (TARGET_DEBUG_ADDR)
2444 {
2445 fprintf (stderr,
e9a25f70 2446 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 2447 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
2448 debug_rtx (addr);
2449 }
2450
e075ae69 2451 if (! ix86_decompose_address (addr, &parts))
3b3c6a3f 2452 {
e075ae69 2453 reason = "decomposition failed";
50e60bc3 2454 goto report_error;
3b3c6a3f
MM
2455 }
2456
e075ae69
RH
2457 base = parts.base;
2458 index = parts.index;
2459 disp = parts.disp;
2460 scale = parts.scale;
91f0226f 2461
e075ae69 2462 /* Validate base register.
e9a25f70
JL
2463
2464 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
2465 is one word out of a two word structure, which is represented internally
2466 as a DImode int. */
e9a25f70 2467
3b3c6a3f
MM
2468 if (base)
2469 {
e075ae69
RH
2470 reason_rtx = base;
2471
3d771dfd 2472 if (GET_CODE (base) != REG)
3b3c6a3f 2473 {
e075ae69 2474 reason = "base is not a register";
50e60bc3 2475 goto report_error;
3b3c6a3f
MM
2476 }
2477
c954bd01
RH
2478 if (GET_MODE (base) != Pmode)
2479 {
e075ae69 2480 reason = "base is not in Pmode";
50e60bc3 2481 goto report_error;
c954bd01
RH
2482 }
2483
e9a25f70
JL
2484 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
2485 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
3b3c6a3f 2486 {
e075ae69 2487 reason = "base is not valid";
50e60bc3 2488 goto report_error;
3b3c6a3f
MM
2489 }
2490 }
2491
e075ae69 2492 /* Validate index register.
e9a25f70
JL
2493
2494 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
2495 is one word out of a two word structure, which is represented internally
2496 as a DImode int. */
e075ae69
RH
2497
2498 if (index)
3b3c6a3f 2499 {
e075ae69
RH
2500 reason_rtx = index;
2501
2502 if (GET_CODE (index) != REG)
3b3c6a3f 2503 {
e075ae69 2504 reason = "index is not a register";
50e60bc3 2505 goto report_error;
3b3c6a3f
MM
2506 }
2507
e075ae69 2508 if (GET_MODE (index) != Pmode)
c954bd01 2509 {
e075ae69 2510 reason = "index is not in Pmode";
50e60bc3 2511 goto report_error;
c954bd01
RH
2512 }
2513
e075ae69
RH
2514 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
2515 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
3b3c6a3f 2516 {
e075ae69 2517 reason = "index is not valid";
50e60bc3 2518 goto report_error;
3b3c6a3f
MM
2519 }
2520 }
3b3c6a3f 2521
e075ae69
RH
2522 /* Validate scale factor. */
2523 if (scale != 1)
3b3c6a3f 2524 {
e075ae69
RH
2525 reason_rtx = GEN_INT (scale);
2526 if (!index)
3b3c6a3f 2527 {
e075ae69 2528 reason = "scale without index";
50e60bc3 2529 goto report_error;
3b3c6a3f
MM
2530 }
2531
e075ae69 2532 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 2533 {
e075ae69 2534 reason = "scale is not a valid multiplier";
50e60bc3 2535 goto report_error;
3b3c6a3f
MM
2536 }
2537 }
2538
91bb873f 2539 /* Validate displacement. */
3b3c6a3f
MM
2540 if (disp)
2541 {
e075ae69
RH
2542 reason_rtx = disp;
2543
91bb873f 2544 if (!CONSTANT_ADDRESS_P (disp))
3b3c6a3f 2545 {
e075ae69 2546 reason = "displacement is not constant";
50e60bc3 2547 goto report_error;
3b3c6a3f
MM
2548 }
2549
e075ae69 2550 if (GET_CODE (disp) == CONST_DOUBLE)
3b3c6a3f 2551 {
e075ae69 2552 reason = "displacement is a const_double";
50e60bc3 2553 goto report_error;
3b3c6a3f
MM
2554 }
2555
91bb873f 2556 if (flag_pic && SYMBOLIC_CONST (disp))
3b3c6a3f 2557 {
91bb873f
RH
2558 if (! legitimate_pic_address_disp_p (disp))
2559 {
e075ae69 2560 reason = "displacement is an invalid pic construct";
50e60bc3 2561 goto report_error;
91bb873f
RH
2562 }
2563
4e9efe54 2564 /* This code used to verify that a symbolic pic displacement
0f290768
KH
2565 includes the pic_offset_table_rtx register.
2566
4e9efe54
JH
2567 While this is good idea, unfortunately these constructs may
2568 be created by "adds using lea" optimization for incorrect
2569 code like:
2570
2571 int a;
2572 int foo(int i)
2573 {
2574 return *(&a+i);
2575 }
2576
50e60bc3 2577 This code is nonsensical, but results in addressing
4e9efe54
JH
2578 GOT table with pic_offset_table_rtx base. We can't
2579 just refuse it easilly, since it gets matched by
2580 "addsi3" pattern, that later gets split to lea in the
2581 case output register differs from input. While this
2582 can be handled by separate addsi pattern for this case
2583 that never results in lea, this seems to be easier and
2584 correct fix for crash to disable this test. */
3b3c6a3f 2585 }
91bb873f 2586 else if (HALF_PIC_P ())
3b3c6a3f 2587 {
91bb873f 2588 if (! HALF_PIC_ADDRESS_P (disp)
e075ae69 2589 || (base != NULL_RTX || index != NULL_RTX))
91bb873f 2590 {
e075ae69 2591 reason = "displacement is an invalid half-pic reference";
50e60bc3 2592 goto report_error;
91bb873f 2593 }
3b3c6a3f
MM
2594 }
2595 }
2596
e075ae69 2597 /* Everything looks valid. */
3b3c6a3f 2598 if (TARGET_DEBUG_ADDR)
e075ae69 2599 fprintf (stderr, "Success.\n");
3b3c6a3f 2600 return TRUE;
e075ae69 2601
50e60bc3 2602report_error:
e075ae69
RH
2603 if (TARGET_DEBUG_ADDR)
2604 {
2605 fprintf (stderr, "Error: %s\n", reason);
2606 debug_rtx (reason_rtx);
2607 }
2608 return FALSE;
3b3c6a3f 2609}
3b3c6a3f 2610\f
55efb413
JW
2611/* Return an unique alias set for the GOT. */
2612
0f290768 2613static HOST_WIDE_INT
55efb413
JW
2614ix86_GOT_alias_set ()
2615{
2616 static HOST_WIDE_INT set = -1;
2617 if (set == -1)
2618 set = new_alias_set ();
2619 return set;
0f290768 2620}
55efb413 2621
3b3c6a3f
MM
2622/* Return a legitimate reference for ORIG (an address) using the
2623 register REG. If REG is 0, a new pseudo is generated.
2624
91bb873f 2625 There are two types of references that must be handled:
3b3c6a3f
MM
2626
2627 1. Global data references must load the address from the GOT, via
2628 the PIC reg. An insn is emitted to do this load, and the reg is
2629 returned.
2630
91bb873f
RH
2631 2. Static data references, constant pool addresses, and code labels
2632 compute the address as an offset from the GOT, whose base is in
2633 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
2634 differentiate them from global data objects. The returned
2635 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
2636
2637 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 2638 reg also appears in the address. */
3b3c6a3f
MM
2639
2640rtx
2641legitimize_pic_address (orig, reg)
2642 rtx orig;
2643 rtx reg;
2644{
2645 rtx addr = orig;
2646 rtx new = orig;
91bb873f 2647 rtx base;
3b3c6a3f 2648
91bb873f
RH
2649 if (GET_CODE (addr) == LABEL_REF
2650 || (GET_CODE (addr) == SYMBOL_REF
2651 && (CONSTANT_POOL_ADDRESS_P (addr)
2652 || SYMBOL_REF_FLAG (addr))))
3b3c6a3f 2653 {
91bb873f
RH
2654 /* This symbol may be referenced via a displacement from the PIC
2655 base address (@GOTOFF). */
3b3c6a3f 2656
91bb873f 2657 current_function_uses_pic_offset_table = 1;
4859dd36
RH
2658 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
2659 new = gen_rtx_CONST (Pmode, new);
91bb873f 2660 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 2661
91bb873f
RH
2662 if (reg != 0)
2663 {
3b3c6a3f 2664 emit_move_insn (reg, new);
91bb873f 2665 new = reg;
3b3c6a3f 2666 }
3b3c6a3f 2667 }
91bb873f 2668 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 2669 {
91bb873f 2670 /* This symbol must be referenced via a load from the
0f290768 2671 Global Offset Table (@GOT). */
3b3c6a3f 2672
91bb873f 2673 current_function_uses_pic_offset_table = 1;
4859dd36
RH
2674 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
2675 new = gen_rtx_CONST (Pmode, new);
91bb873f
RH
2676 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2677 new = gen_rtx_MEM (Pmode, new);
2678 RTX_UNCHANGING_P (new) = 1;
0f290768 2679 MEM_ALIAS_SET (new) = ix86_GOT_alias_set ();
3b3c6a3f
MM
2680
2681 if (reg == 0)
2682 reg = gen_reg_rtx (Pmode);
91bb873f
RH
2683 emit_move_insn (reg, new);
2684 new = reg;
0f290768 2685 }
91bb873f
RH
2686 else
2687 {
2688 if (GET_CODE (addr) == CONST)
3b3c6a3f 2689 {
91bb873f
RH
2690 addr = XEXP (addr, 0);
2691 if (GET_CODE (addr) == UNSPEC)
2692 {
2693 /* Check that the unspec is one of the ones we generate? */
2694 }
2695 else if (GET_CODE (addr) != PLUS)
564d80f4 2696 abort ();
3b3c6a3f 2697 }
91bb873f
RH
2698 if (GET_CODE (addr) == PLUS)
2699 {
2700 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 2701
91bb873f
RH
2702 /* Check first to see if this is a constant offset from a @GOTOFF
2703 symbol reference. */
2704 if ((GET_CODE (op0) == LABEL_REF
2705 || (GET_CODE (op0) == SYMBOL_REF
2706 && (CONSTANT_POOL_ADDRESS_P (op0)
2707 || SYMBOL_REF_FLAG (op0))))
2708 && GET_CODE (op1) == CONST_INT)
2709 {
2710 current_function_uses_pic_offset_table = 1;
4859dd36
RH
2711 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
2712 new = gen_rtx_PLUS (Pmode, new, op1);
2713 new = gen_rtx_CONST (Pmode, new);
91bb873f
RH
2714 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2715
2716 if (reg != 0)
2717 {
2718 emit_move_insn (reg, new);
2719 new = reg;
2720 }
2721 }
2722 else
2723 {
2724 base = legitimize_pic_address (XEXP (addr, 0), reg);
2725 new = legitimize_pic_address (XEXP (addr, 1),
2726 base == reg ? NULL_RTX : reg);
2727
2728 if (GET_CODE (new) == CONST_INT)
2729 new = plus_constant (base, INTVAL (new));
2730 else
2731 {
2732 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
2733 {
2734 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
2735 new = XEXP (new, 1);
2736 }
2737 new = gen_rtx_PLUS (Pmode, base, new);
2738 }
2739 }
2740 }
3b3c6a3f
MM
2741 }
2742 return new;
2743}
2744\f
3b3c6a3f
MM
2745/* Try machine-dependent ways of modifying an illegitimate address
2746 to be legitimate. If we find one, return the new, valid address.
2747 This macro is used in only one place: `memory_address' in explow.c.
2748
2749 OLDX is the address as it was before break_out_memory_refs was called.
2750 In some cases it is useful to look at this to decide what needs to be done.
2751
2752 MODE and WIN are passed so that this macro can use
2753 GO_IF_LEGITIMATE_ADDRESS.
2754
2755 It is always safe for this macro to do nothing. It exists to recognize
2756 opportunities to optimize the output.
2757
2758 For the 80386, we handle X+REG by loading X into a register R and
2759 using R+REG. R will go in a general reg and indexing will be used.
2760 However, if REG is a broken-out memory address or multiplication,
2761 nothing needs to be done because REG can certainly go in a general reg.
2762
2763 When -fpic is used, special handling is needed for symbolic references.
2764 See comments by legitimize_pic_address in i386.c for details. */
2765
2766rtx
2767legitimize_address (x, oldx, mode)
2768 register rtx x;
bb5177ac 2769 register rtx oldx ATTRIBUTE_UNUSED;
3b3c6a3f
MM
2770 enum machine_mode mode;
2771{
2772 int changed = 0;
2773 unsigned log;
2774
2775 if (TARGET_DEBUG_ADDR)
2776 {
e9a25f70
JL
2777 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
2778 GET_MODE_NAME (mode));
3b3c6a3f
MM
2779 debug_rtx (x);
2780 }
2781
2782 if (flag_pic && SYMBOLIC_CONST (x))
2783 return legitimize_pic_address (x, 0);
2784
2785 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
2786 if (GET_CODE (x) == ASHIFT
2787 && GET_CODE (XEXP (x, 1)) == CONST_INT
2788 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
2789 {
2790 changed = 1;
a269a03c
JC
2791 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
2792 GEN_INT (1 << log));
3b3c6a3f
MM
2793 }
2794
2795 if (GET_CODE (x) == PLUS)
2796 {
0f290768 2797 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 2798
3b3c6a3f
MM
2799 if (GET_CODE (XEXP (x, 0)) == ASHIFT
2800 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2801 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
2802 {
2803 changed = 1;
c5c76735
JL
2804 XEXP (x, 0) = gen_rtx_MULT (Pmode,
2805 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
2806 GEN_INT (1 << log));
3b3c6a3f
MM
2807 }
2808
2809 if (GET_CODE (XEXP (x, 1)) == ASHIFT
2810 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
2811 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
2812 {
2813 changed = 1;
c5c76735
JL
2814 XEXP (x, 1) = gen_rtx_MULT (Pmode,
2815 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
2816 GEN_INT (1 << log));
3b3c6a3f
MM
2817 }
2818
0f290768 2819 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
2820 if (GET_CODE (XEXP (x, 1)) == MULT)
2821 {
2822 rtx tmp = XEXP (x, 0);
2823 XEXP (x, 0) = XEXP (x, 1);
2824 XEXP (x, 1) = tmp;
2825 changed = 1;
2826 }
2827
2828 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
2829 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
2830 created by virtual register instantiation, register elimination, and
2831 similar optimizations. */
2832 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
2833 {
2834 changed = 1;
c5c76735
JL
2835 x = gen_rtx_PLUS (Pmode,
2836 gen_rtx_PLUS (Pmode, XEXP (x, 0),
2837 XEXP (XEXP (x, 1), 0)),
2838 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
2839 }
2840
e9a25f70
JL
2841 /* Canonicalize
2842 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
2843 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
2844 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
2845 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
2846 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
2847 && CONSTANT_P (XEXP (x, 1)))
2848 {
00c79232
ML
2849 rtx constant;
2850 rtx other = NULL_RTX;
3b3c6a3f
MM
2851
2852 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2853 {
2854 constant = XEXP (x, 1);
2855 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
2856 }
2857 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
2858 {
2859 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
2860 other = XEXP (x, 1);
2861 }
2862 else
2863 constant = 0;
2864
2865 if (constant)
2866 {
2867 changed = 1;
c5c76735
JL
2868 x = gen_rtx_PLUS (Pmode,
2869 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
2870 XEXP (XEXP (XEXP (x, 0), 1), 0)),
2871 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
2872 }
2873 }
2874
2875 if (changed && legitimate_address_p (mode, x, FALSE))
2876 return x;
2877
2878 if (GET_CODE (XEXP (x, 0)) == MULT)
2879 {
2880 changed = 1;
2881 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
2882 }
2883
2884 if (GET_CODE (XEXP (x, 1)) == MULT)
2885 {
2886 changed = 1;
2887 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
2888 }
2889
2890 if (changed
2891 && GET_CODE (XEXP (x, 1)) == REG
2892 && GET_CODE (XEXP (x, 0)) == REG)
2893 return x;
2894
2895 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
2896 {
2897 changed = 1;
2898 x = legitimize_pic_address (x, 0);
2899 }
2900
2901 if (changed && legitimate_address_p (mode, x, FALSE))
2902 return x;
2903
2904 if (GET_CODE (XEXP (x, 0)) == REG)
2905 {
2906 register rtx temp = gen_reg_rtx (Pmode);
2907 register rtx val = force_operand (XEXP (x, 1), temp);
2908 if (val != temp)
2909 emit_move_insn (temp, val);
2910
2911 XEXP (x, 1) = temp;
2912 return x;
2913 }
2914
2915 else if (GET_CODE (XEXP (x, 1)) == REG)
2916 {
2917 register rtx temp = gen_reg_rtx (Pmode);
2918 register rtx val = force_operand (XEXP (x, 0), temp);
2919 if (val != temp)
2920 emit_move_insn (temp, val);
2921
2922 XEXP (x, 0) = temp;
2923 return x;
2924 }
2925 }
2926
2927 return x;
2928}
2a2ab3f9
JVA
2929\f
2930/* Print an integer constant expression in assembler syntax. Addition
2931 and subtraction are the only arithmetic that may appear in these
2932 expressions. FILE is the stdio stream to write to, X is the rtx, and
2933 CODE is the operand print code from the output string. */
2934
2935static void
2936output_pic_addr_const (file, x, code)
2937 FILE *file;
2938 rtx x;
2939 int code;
2940{
2941 char buf[256];
2942
2943 switch (GET_CODE (x))
2944 {
2945 case PC:
2946 if (flag_pic)
2947 putc ('.', file);
2948 else
2949 abort ();
2950 break;
2951
2952 case SYMBOL_REF:
91bb873f
RH
2953 assemble_name (file, XSTR (x, 0));
2954 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
2955 fputs ("@PLT", file);
2a2ab3f9
JVA
2956 break;
2957
91bb873f
RH
2958 case LABEL_REF:
2959 x = XEXP (x, 0);
2960 /* FALLTHRU */
2a2ab3f9
JVA
2961 case CODE_LABEL:
2962 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
2963 assemble_name (asm_out_file, buf);
2964 break;
2965
2966 case CONST_INT:
f64cecad 2967 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
2968 break;
2969
2970 case CONST:
2971 /* This used to output parentheses around the expression,
2972 but that does not work on the 386 (either ATT or BSD assembler). */
2973 output_pic_addr_const (file, XEXP (x, 0), code);
2974 break;
2975
2976 case CONST_DOUBLE:
2977 if (GET_MODE (x) == VOIDmode)
2978 {
2979 /* We can use %d if the number is <32 bits and positive. */
2980 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
2981 fprintf (file, "0x%lx%08lx",
2982 (unsigned long) CONST_DOUBLE_HIGH (x),
2983 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 2984 else
f64cecad 2985 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
2986 }
2987 else
2988 /* We can't handle floating point constants;
2989 PRINT_OPERAND must handle them. */
2990 output_operand_lossage ("floating constant misused");
2991 break;
2992
2993 case PLUS:
e9a25f70 2994 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
2995 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
2996 {
2a2ab3f9 2997 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 2998 putc ('+', file);
e9a25f70 2999 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 3000 }
91bb873f 3001 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 3002 {
2a2ab3f9 3003 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 3004 putc ('+', file);
e9a25f70 3005 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 3006 }
91bb873f
RH
3007 else
3008 abort ();
2a2ab3f9
JVA
3009 break;
3010
3011 case MINUS:
e075ae69 3012 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
2a2ab3f9 3013 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 3014 putc ('-', file);
2a2ab3f9 3015 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 3016 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
2a2ab3f9
JVA
3017 break;
3018
91bb873f
RH
3019 case UNSPEC:
3020 if (XVECLEN (x, 0) != 1)
3021 abort ();
3022 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
3023 switch (XINT (x, 1))
3024 {
3025 case 6:
3026 fputs ("@GOT", file);
3027 break;
3028 case 7:
3029 fputs ("@GOTOFF", file);
3030 break;
3031 case 8:
3032 fputs ("@PLT", file);
3033 break;
3034 default:
3035 output_operand_lossage ("invalid UNSPEC as operand");
3036 break;
3037 }
3038 break;
3039
2a2ab3f9
JVA
3040 default:
3041 output_operand_lossage ("invalid expression as operand");
3042 }
3043}
1865dbb5 3044
0f290768 3045/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
1865dbb5
JM
3046 We need to handle our special PIC relocations. */
3047
0f290768 3048void
1865dbb5
JM
3049i386_dwarf_output_addr_const (file, x)
3050 FILE *file;
3051 rtx x;
3052{
f0ca81d2 3053 fprintf (file, "%s", INT_ASM_OP);
1865dbb5
JM
3054 if (flag_pic)
3055 output_pic_addr_const (file, x, '\0');
3056 else
3057 output_addr_const (file, x);
3058 fputc ('\n', file);
3059}
3060
3061/* In the name of slightly smaller debug output, and to cater to
3062 general assembler losage, recognize PIC+GOTOFF and turn it back
3063 into a direct symbol reference. */
3064
3065rtx
3066i386_simplify_dwarf_addr (orig_x)
3067 rtx orig_x;
3068{
3069 rtx x = orig_x;
3070
3071 if (GET_CODE (x) != PLUS
3072 || GET_CODE (XEXP (x, 0)) != REG
3073 || GET_CODE (XEXP (x, 1)) != CONST)
3074 return orig_x;
3075
3076 x = XEXP (XEXP (x, 1), 0);
3077 if (GET_CODE (x) == UNSPEC
3078 && XINT (x, 1) == 7)
3079 return XVECEXP (x, 0, 0);
3080
3081 if (GET_CODE (x) == PLUS
3082 && GET_CODE (XEXP (x, 0)) == UNSPEC
3083 && GET_CODE (XEXP (x, 1)) == CONST_INT
3084 && XINT (XEXP (x, 0), 1) == 7)
3085 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
3086
3087 return orig_x;
3088}
2a2ab3f9 3089\f
a269a03c 3090static void
e075ae69 3091put_condition_code (code, mode, reverse, fp, file)
a269a03c 3092 enum rtx_code code;
e075ae69
RH
3093 enum machine_mode mode;
3094 int reverse, fp;
a269a03c
JC
3095 FILE *file;
3096{
a269a03c
JC
3097 const char *suffix;
3098
a269a03c
JC
3099 if (reverse)
3100 code = reverse_condition (code);
e075ae69 3101
a269a03c
JC
3102 switch (code)
3103 {
3104 case EQ:
3105 suffix = "e";
3106 break;
a269a03c
JC
3107 case NE:
3108 suffix = "ne";
3109 break;
a269a03c 3110 case GT:
7e08e190 3111 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
3112 abort ();
3113 suffix = "g";
a269a03c 3114 break;
a269a03c 3115 case GTU:
e075ae69
RH
3116 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
3117 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 3118 if (mode != CCmode)
0f290768 3119 abort ();
e075ae69 3120 suffix = fp ? "nbe" : "a";
a269a03c 3121 break;
a269a03c 3122 case LT:
9076b9c1 3123 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 3124 suffix = "s";
7e08e190 3125 else if (mode == CCmode || mode == CCGCmode)
e075ae69 3126 suffix = "l";
9076b9c1 3127 else
0f290768 3128 abort ();
a269a03c 3129 break;
a269a03c 3130 case LTU:
9076b9c1 3131 if (mode != CCmode)
0f290768 3132 abort ();
a269a03c
JC
3133 suffix = "b";
3134 break;
a269a03c 3135 case GE:
9076b9c1 3136 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 3137 suffix = "ns";
7e08e190 3138 else if (mode == CCmode || mode == CCGCmode)
e075ae69 3139 suffix = "ge";
9076b9c1 3140 else
0f290768 3141 abort ();
a269a03c 3142 break;
a269a03c 3143 case GEU:
e075ae69 3144 /* ??? As above. */
7e08e190 3145 if (mode != CCmode)
0f290768 3146 abort ();
7e08e190 3147 suffix = fp ? "nb" : "ae";
a269a03c 3148 break;
a269a03c 3149 case LE:
7e08e190 3150 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
3151 abort ();
3152 suffix = "le";
a269a03c 3153 break;
a269a03c 3154 case LEU:
9076b9c1
JH
3155 if (mode != CCmode)
3156 abort ();
7e08e190 3157 suffix = "be";
a269a03c 3158 break;
3a3677ff
RH
3159 case UNORDERED:
3160 suffix = "p";
3161 break;
3162 case ORDERED:
3163 suffix = "np";
3164 break;
a269a03c
JC
3165 default:
3166 abort ();
3167 }
3168 fputs (suffix, file);
3169}
3170
e075ae69
RH
3171void
3172print_reg (x, code, file)
3173 rtx x;
3174 int code;
3175 FILE *file;
e5cb57e8 3176{
e075ae69 3177 if (REGNO (x) == ARG_POINTER_REGNUM
564d80f4 3178 || REGNO (x) == FRAME_POINTER_REGNUM
e075ae69
RH
3179 || REGNO (x) == FLAGS_REG
3180 || REGNO (x) == FPSR_REG)
3181 abort ();
e9a25f70 3182
e075ae69
RH
3183 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
3184 putc ('%', file);
3185
3186 if (code == 'w')
3187 code = 2;
3188 else if (code == 'b')
3189 code = 1;
3190 else if (code == 'k')
3191 code = 4;
3192 else if (code == 'y')
3193 code = 3;
3194 else if (code == 'h')
3195 code = 0;
a7180f70
BS
3196 else if (code == 'm' || MMX_REG_P (x))
3197 code = 5;
e075ae69
RH
3198 else
3199 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 3200
e075ae69
RH
3201 switch (code)
3202 {
a7180f70
BS
3203 case 5:
3204 fputs (hi_reg_name[REGNO (x)], file);
3205 break;
e075ae69
RH
3206 case 3:
3207 if (STACK_TOP_P (x))
3208 {
3209 fputs ("st(0)", file);
3210 break;
3211 }
3212 /* FALLTHRU */
3213 case 4:
3214 case 8:
3215 case 12:
3216 if (! FP_REG_P (x))
3217 putc ('e', file);
3218 /* FALLTHRU */
a7180f70 3219 case 16:
e075ae69
RH
3220 case 2:
3221 fputs (hi_reg_name[REGNO (x)], file);
3222 break;
3223 case 1:
3224 fputs (qi_reg_name[REGNO (x)], file);
3225 break;
3226 case 0:
3227 fputs (qi_high_reg_name[REGNO (x)], file);
3228 break;
3229 default:
3230 abort ();
fe25fea3 3231 }
e5cb57e8
SC
3232}
3233
2a2ab3f9 3234/* Meaning of CODE:
fe25fea3 3235 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 3236 C -- print opcode suffix for set/cmov insn.
fe25fea3 3237 c -- like C, but print reversed condition
2a2ab3f9
JVA
3238 R -- print the prefix for register names.
3239 z -- print the opcode suffix for the size of the current operand.
3240 * -- print a star (in certain assembler syntax)
fb204271 3241 A -- print an absolute memory reference.
2a2ab3f9 3242 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
3243 s -- print a shift double count, followed by the assemblers argument
3244 delimiter.
fe25fea3
SC
3245 b -- print the QImode name of the register for the indicated operand.
3246 %b0 would print %al if operands[0] is reg 0.
3247 w -- likewise, print the HImode name of the register.
3248 k -- likewise, print the SImode name of the register.
3249 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
a7180f70
BS
3250 y -- print "st(0)" instead of "st" as a register.
3251 m -- print "st(n)" as an mmx register. */
2a2ab3f9
JVA
3252
3253void
3254print_operand (file, x, code)
3255 FILE *file;
3256 rtx x;
3257 int code;
3258{
3259 if (code)
3260 {
3261 switch (code)
3262 {
3263 case '*':
e075ae69 3264 if (ASSEMBLER_DIALECT == 0)
2a2ab3f9
JVA
3265 putc ('*', file);
3266 return;
3267
fb204271
DN
3268 case 'A':
3269 if (ASSEMBLER_DIALECT == 0)
3270 putc ('*', file);
3271 else if (ASSEMBLER_DIALECT == 1)
3272 {
3273 /* Intel syntax. For absolute addresses, registers should not
3274 be surrounded by braces. */
3275 if (GET_CODE (x) != REG)
3276 {
3277 putc ('[', file);
3278 PRINT_OPERAND (file, x, 0);
3279 putc (']', file);
3280 return;
3281 }
3282 }
3283
3284 PRINT_OPERAND (file, x, 0);
3285 return;
3286
3287
2a2ab3f9 3288 case 'L':
e075ae69
RH
3289 if (ASSEMBLER_DIALECT == 0)
3290 putc ('l', file);
2a2ab3f9
JVA
3291 return;
3292
3293 case 'W':
e075ae69
RH
3294 if (ASSEMBLER_DIALECT == 0)
3295 putc ('w', file);
2a2ab3f9
JVA
3296 return;
3297
3298 case 'B':
e075ae69
RH
3299 if (ASSEMBLER_DIALECT == 0)
3300 putc ('b', file);
2a2ab3f9
JVA
3301 return;
3302
3303 case 'Q':
e075ae69
RH
3304 if (ASSEMBLER_DIALECT == 0)
3305 putc ('l', file);
2a2ab3f9
JVA
3306 return;
3307
3308 case 'S':
e075ae69
RH
3309 if (ASSEMBLER_DIALECT == 0)
3310 putc ('s', file);
2a2ab3f9
JVA
3311 return;
3312
5f1ec3e6 3313 case 'T':
e075ae69
RH
3314 if (ASSEMBLER_DIALECT == 0)
3315 putc ('t', file);
5f1ec3e6
JVA
3316 return;
3317
2a2ab3f9
JVA
3318 case 'z':
3319 /* 387 opcodes don't get size suffixes if the operands are
0f290768 3320 registers. */
2a2ab3f9
JVA
3321
3322 if (STACK_REG_P (x))
3323 return;
3324
3325 /* this is the size of op from size of operand */
3326 switch (GET_MODE_SIZE (GET_MODE (x)))
3327 {
2a2ab3f9 3328 case 2:
155d8a47
JW
3329#ifdef HAVE_GAS_FILDS_FISTS
3330 putc ('s', file);
3331#endif
2a2ab3f9
JVA
3332 return;
3333
3334 case 4:
3335 if (GET_MODE (x) == SFmode)
3336 {
e075ae69 3337 putc ('s', file);
2a2ab3f9
JVA
3338 return;
3339 }
3340 else
e075ae69 3341 putc ('l', file);
2a2ab3f9
JVA
3342 return;
3343
5f1ec3e6 3344 case 12:
e075ae69
RH
3345 putc ('t', file);
3346 return;
5f1ec3e6 3347
2a2ab3f9
JVA
3348 case 8:
3349 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
3350 {
3351#ifdef GAS_MNEMONICS
e075ae69 3352 putc ('q', file);
56c0e8fa 3353#else
e075ae69
RH
3354 putc ('l', file);
3355 putc ('l', file);
56c0e8fa
JVA
3356#endif
3357 }
e075ae69
RH
3358 else
3359 putc ('l', file);
2a2ab3f9 3360 return;
155d8a47
JW
3361
3362 default:
3363 abort ();
2a2ab3f9 3364 }
4af3895e
JVA
3365
3366 case 'b':
3367 case 'w':
3368 case 'k':
3369 case 'h':
3370 case 'y':
a7180f70 3371 case 'm':
5cb6195d 3372 case 'X':
e075ae69 3373 case 'P':
4af3895e
JVA
3374 break;
3375
2d49677f
SC
3376 case 's':
3377 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
3378 {
3379 PRINT_OPERAND (file, x, 0);
e075ae69 3380 putc (',', file);
2d49677f 3381 }
a269a03c
JC
3382 return;
3383
1853aadd 3384 case 'C':
e075ae69 3385 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 3386 return;
fe25fea3 3387 case 'F':
e075ae69 3388 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
3389 return;
3390
e9a25f70 3391 /* Like above, but reverse condition */
e075ae69
RH
3392 case 'c':
3393 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
3394 return;
fe25fea3 3395 case 'f':
e075ae69 3396 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 3397 return;
e5cb57e8 3398
4af3895e 3399 default:
68daafd4
JVA
3400 {
3401 char str[50];
68daafd4
JVA
3402 sprintf (str, "invalid operand code `%c'", code);
3403 output_operand_lossage (str);
3404 }
2a2ab3f9
JVA
3405 }
3406 }
e9a25f70 3407
2a2ab3f9
JVA
3408 if (GET_CODE (x) == REG)
3409 {
3410 PRINT_REG (x, code, file);
3411 }
e9a25f70 3412
2a2ab3f9
JVA
3413 else if (GET_CODE (x) == MEM)
3414 {
e075ae69
RH
3415 /* No `byte ptr' prefix for call instructions. */
3416 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
2a2ab3f9 3417 {
69ddee61 3418 const char * size;
e075ae69
RH
3419 switch (GET_MODE_SIZE (GET_MODE (x)))
3420 {
3421 case 1: size = "BYTE"; break;
3422 case 2: size = "WORD"; break;
3423 case 4: size = "DWORD"; break;
3424 case 8: size = "QWORD"; break;
3425 case 12: size = "XWORD"; break;
a7180f70 3426 case 16: size = "XMMWORD"; break;
e075ae69 3427 default:
564d80f4 3428 abort ();
e075ae69 3429 }
fb204271
DN
3430
3431 /* Check for explicit size override (codes 'b', 'w' and 'k') */
3432 if (code == 'b')
3433 size = "BYTE";
3434 else if (code == 'w')
3435 size = "WORD";
3436 else if (code == 'k')
3437 size = "DWORD";
3438
e075ae69
RH
3439 fputs (size, file);
3440 fputs (" PTR ", file);
2a2ab3f9 3441 }
e075ae69
RH
3442
3443 x = XEXP (x, 0);
3444 if (flag_pic && CONSTANT_ADDRESS_P (x))
3445 output_pic_addr_const (file, x, code);
2a2ab3f9 3446 else
e075ae69 3447 output_address (x);
2a2ab3f9 3448 }
e9a25f70 3449
2a2ab3f9
JVA
3450 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
3451 {
e9a25f70
JL
3452 REAL_VALUE_TYPE r;
3453 long l;
3454
5f1ec3e6
JVA
3455 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3456 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69
RH
3457
3458 if (ASSEMBLER_DIALECT == 0)
3459 putc ('$', file);
52267fcb 3460 fprintf (file, "0x%lx", l);
5f1ec3e6 3461 }
e9a25f70 3462
0f290768 3463 /* These float cases don't actually occur as immediate operands. */
5f1ec3e6
JVA
3464 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
3465 {
e9a25f70
JL
3466 REAL_VALUE_TYPE r;
3467 char dstr[30];
3468
5f1ec3e6
JVA
3469 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3470 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3471 fprintf (file, "%s", dstr);
2a2ab3f9 3472 }
e9a25f70 3473
5f1ec3e6 3474 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
2a2ab3f9 3475 {
e9a25f70
JL
3476 REAL_VALUE_TYPE r;
3477 char dstr[30];
3478
5f1ec3e6
JVA
3479 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3480 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3481 fprintf (file, "%s", dstr);
2a2ab3f9 3482 }
79325812 3483 else
2a2ab3f9 3484 {
4af3895e 3485 if (code != 'P')
2a2ab3f9 3486 {
695dac07 3487 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69
RH
3488 {
3489 if (ASSEMBLER_DIALECT == 0)
3490 putc ('$', file);
3491 }
2a2ab3f9
JVA
3492 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
3493 || GET_CODE (x) == LABEL_REF)
e075ae69
RH
3494 {
3495 if (ASSEMBLER_DIALECT == 0)
3496 putc ('$', file);
3497 else
3498 fputs ("OFFSET FLAT:", file);
3499 }
2a2ab3f9 3500 }
e075ae69
RH
3501 if (GET_CODE (x) == CONST_INT)
3502 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3503 else if (flag_pic)
2a2ab3f9
JVA
3504 output_pic_addr_const (file, x, code);
3505 else
3506 output_addr_const (file, x);
3507 }
3508}
3509\f
3510/* Print a memory operand whose address is ADDR. */
3511
3512void
3513print_operand_address (file, addr)
3514 FILE *file;
3515 register rtx addr;
3516{
e075ae69
RH
3517 struct ix86_address parts;
3518 rtx base, index, disp;
3519 int scale;
e9a25f70 3520
e075ae69
RH
3521 if (! ix86_decompose_address (addr, &parts))
3522 abort ();
e9a25f70 3523
e075ae69
RH
3524 base = parts.base;
3525 index = parts.index;
3526 disp = parts.disp;
3527 scale = parts.scale;
e9a25f70 3528
e075ae69
RH
3529 if (!base && !index)
3530 {
3531 /* Displacement only requires special attention. */
e9a25f70 3532
e075ae69 3533 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 3534 {
e075ae69 3535 if (ASSEMBLER_DIALECT != 0)
fb204271
DN
3536 {
3537 if (USER_LABEL_PREFIX[0] == 0)
3538 putc ('%', file);
3539 fputs ("ds:", file);
3540 }
e075ae69 3541 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
2a2ab3f9 3542 }
e075ae69
RH
3543 else if (flag_pic)
3544 output_pic_addr_const (file, addr, 0);
3545 else
3546 output_addr_const (file, addr);
3547 }
3548 else
3549 {
3550 if (ASSEMBLER_DIALECT == 0)
2a2ab3f9 3551 {
e075ae69 3552 if (disp)
2a2ab3f9 3553 {
c399861d 3554 if (flag_pic)
e075ae69
RH
3555 output_pic_addr_const (file, disp, 0);
3556 else if (GET_CODE (disp) == LABEL_REF)
3557 output_asm_label (disp);
2a2ab3f9 3558 else
e075ae69 3559 output_addr_const (file, disp);
2a2ab3f9
JVA
3560 }
3561
e075ae69
RH
3562 putc ('(', file);
3563 if (base)
3564 PRINT_REG (base, 0, file);
3565 if (index)
2a2ab3f9 3566 {
e075ae69
RH
3567 putc (',', file);
3568 PRINT_REG (index, 0, file);
3569 if (scale != 1)
3570 fprintf (file, ",%d", scale);
2a2ab3f9 3571 }
e075ae69 3572 putc (')', file);
2a2ab3f9 3573 }
2a2ab3f9
JVA
3574 else
3575 {
e075ae69 3576 rtx offset = NULL_RTX;
e9a25f70 3577
e075ae69
RH
3578 if (disp)
3579 {
3580 /* Pull out the offset of a symbol; print any symbol itself. */
3581 if (GET_CODE (disp) == CONST
3582 && GET_CODE (XEXP (disp, 0)) == PLUS
3583 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
3584 {
3585 offset = XEXP (XEXP (disp, 0), 1);
3586 disp = gen_rtx_CONST (VOIDmode,
3587 XEXP (XEXP (disp, 0), 0));
3588 }
ce193852 3589
e075ae69
RH
3590 if (flag_pic)
3591 output_pic_addr_const (file, disp, 0);
3592 else if (GET_CODE (disp) == LABEL_REF)
3593 output_asm_label (disp);
3594 else if (GET_CODE (disp) == CONST_INT)
3595 offset = disp;
3596 else
3597 output_addr_const (file, disp);
3598 }
e9a25f70 3599
e075ae69
RH
3600 putc ('[', file);
3601 if (base)
a8620236 3602 {
e075ae69
RH
3603 PRINT_REG (base, 0, file);
3604 if (offset)
3605 {
3606 if (INTVAL (offset) >= 0)
3607 putc ('+', file);
3608 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3609 }
a8620236 3610 }
e075ae69
RH
3611 else if (offset)
3612 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 3613 else
e075ae69 3614 putc ('0', file);
e9a25f70 3615
e075ae69
RH
3616 if (index)
3617 {
3618 putc ('+', file);
3619 PRINT_REG (index, 0, file);
3620 if (scale != 1)
3621 fprintf (file, "*%d", scale);
3622 }
3623 putc (']', file);
3624 }
2a2ab3f9
JVA
3625 }
3626}
3627\f
3628/* Split one or more DImode RTL references into pairs of SImode
3629 references. The RTL can be REG, offsettable MEM, integer constant, or
3630 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
3631 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 3632 that parallel "operands". */
2a2ab3f9
JVA
3633
3634void
3635split_di (operands, num, lo_half, hi_half)
3636 rtx operands[];
3637 int num;
3638 rtx lo_half[], hi_half[];
3639{
3640 while (num--)
3641 {
57dbca5e 3642 rtx op = operands[num];
e075ae69
RH
3643 if (CONSTANT_P (op))
3644 split_double (op, &lo_half[num], &hi_half[num]);
3645 else if (! reload_completed)
a269a03c
JC
3646 {
3647 lo_half[num] = gen_lowpart (SImode, op);
3648 hi_half[num] = gen_highpart (SImode, op);
3649 }
3650 else if (GET_CODE (op) == REG)
2a2ab3f9 3651 {
57dbca5e
BS
3652 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
3653 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
2a2ab3f9 3654 }
57dbca5e 3655 else if (offsettable_memref_p (op))
2a2ab3f9 3656 {
57dbca5e
BS
3657 rtx lo_addr = XEXP (op, 0);
3658 rtx hi_addr = XEXP (adj_offsettable_operand (op, 4), 0);
3659 lo_half[num] = change_address (op, SImode, lo_addr);
3660 hi_half[num] = change_address (op, SImode, hi_addr);
2a2ab3f9
JVA
3661 }
3662 else
564d80f4 3663 abort ();
2a2ab3f9
JVA
3664 }
3665}
3666\f
2a2ab3f9
JVA
3667/* Output code to perform a 387 binary operation in INSN, one of PLUS,
3668 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
3669 is the expression of the binary operation. The output may either be
3670 emitted here, or returned to the caller, like all output_* functions.
3671
3672 There is no guarantee that the operands are the same mode, as they
0f290768 3673 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 3674
e3c2afab
AM
3675#ifndef SYSV386_COMPAT
3676/* Set to 1 for compatibility with brain-damaged assemblers. No-one
3677 wants to fix the assemblers because that causes incompatibility
3678 with gcc. No-one wants to fix gcc because that causes
3679 incompatibility with assemblers... You can use the option of
3680 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
3681#define SYSV386_COMPAT 1
3682#endif
3683
69ddee61 3684const char *
2a2ab3f9
JVA
3685output_387_binary_op (insn, operands)
3686 rtx insn;
3687 rtx *operands;
3688{
e3c2afab 3689 static char buf[30];
69ddee61 3690 const char *p;
2a2ab3f9 3691
e3c2afab
AM
3692#ifdef ENABLE_CHECKING
3693 /* Even if we do not want to check the inputs, this documents input
3694 constraints. Which helps in understanding the following code. */
3695 if (STACK_REG_P (operands[0])
3696 && ((REG_P (operands[1])
3697 && REGNO (operands[0]) == REGNO (operands[1])
3698 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
3699 || (REG_P (operands[2])
3700 && REGNO (operands[0]) == REGNO (operands[2])
3701 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
3702 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
3703 ; /* ok */
3704 else
3705 abort ();
3706#endif
3707
2a2ab3f9
JVA
3708 switch (GET_CODE (operands[3]))
3709 {
3710 case PLUS:
e075ae69
RH
3711 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3712 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3713 p = "fiadd";
3714 else
3715 p = "fadd";
2a2ab3f9
JVA
3716 break;
3717
3718 case MINUS:
e075ae69
RH
3719 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3720 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3721 p = "fisub";
3722 else
3723 p = "fsub";
2a2ab3f9
JVA
3724 break;
3725
3726 case MULT:
e075ae69
RH
3727 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3728 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3729 p = "fimul";
3730 else
3731 p = "fmul";
2a2ab3f9
JVA
3732 break;
3733
3734 case DIV:
e075ae69
RH
3735 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3736 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3737 p = "fidiv";
3738 else
3739 p = "fdiv";
2a2ab3f9
JVA
3740 break;
3741
3742 default:
3743 abort ();
3744 }
3745
e075ae69 3746 strcpy (buf, p);
2a2ab3f9
JVA
3747
3748 switch (GET_CODE (operands[3]))
3749 {
3750 case MULT:
3751 case PLUS:
3752 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
3753 {
e3c2afab 3754 rtx temp = operands[2];
2a2ab3f9
JVA
3755 operands[2] = operands[1];
3756 operands[1] = temp;
3757 }
3758
e3c2afab
AM
3759 /* know operands[0] == operands[1]. */
3760
2a2ab3f9 3761 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
3762 {
3763 p = "%z2\t%2";
3764 break;
3765 }
2a2ab3f9
JVA
3766
3767 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
3768 {
3769 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
3770 /* How is it that we are storing to a dead operand[2]?
3771 Well, presumably operands[1] is dead too. We can't
3772 store the result to st(0) as st(0) gets popped on this
3773 instruction. Instead store to operands[2] (which I
3774 think has to be st(1)). st(1) will be popped later.
3775 gcc <= 2.8.1 didn't have this check and generated
3776 assembly code that the Unixware assembler rejected. */
3777 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 3778 else
e3c2afab 3779 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 3780 break;
6b28fd63 3781 }
2a2ab3f9
JVA
3782
3783 if (STACK_TOP_P (operands[0]))
e3c2afab 3784 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 3785 else
e3c2afab 3786 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 3787 break;
2a2ab3f9
JVA
3788
3789 case MINUS:
3790 case DIV:
3791 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
3792 {
3793 p = "r%z1\t%1";
3794 break;
3795 }
2a2ab3f9
JVA
3796
3797 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
3798 {
3799 p = "%z2\t%2";
3800 break;
3801 }
2a2ab3f9 3802
2a2ab3f9 3803 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 3804 {
e3c2afab
AM
3805#if SYSV386_COMPAT
3806 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
3807 derived assemblers, confusingly reverse the direction of
3808 the operation for fsub{r} and fdiv{r} when the
3809 destination register is not st(0). The Intel assembler
3810 doesn't have this brain damage. Read !SYSV386_COMPAT to
3811 figure out what the hardware really does. */
3812 if (STACK_TOP_P (operands[0]))
3813 p = "{p\t%0, %2|rp\t%2, %0}";
3814 else
3815 p = "{rp\t%2, %0|p\t%0, %2}";
3816#else
6b28fd63 3817 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
3818 /* As above for fmul/fadd, we can't store to st(0). */
3819 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 3820 else
e3c2afab
AM
3821 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
3822#endif
e075ae69 3823 break;
6b28fd63 3824 }
2a2ab3f9
JVA
3825
3826 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 3827 {
e3c2afab 3828#if SYSV386_COMPAT
6b28fd63 3829 if (STACK_TOP_P (operands[0]))
e3c2afab 3830 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 3831 else
e3c2afab
AM
3832 p = "{p\t%1, %0|rp\t%0, %1}";
3833#else
3834 if (STACK_TOP_P (operands[0]))
3835 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
3836 else
3837 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
3838#endif
e075ae69 3839 break;
6b28fd63 3840 }
2a2ab3f9
JVA
3841
3842 if (STACK_TOP_P (operands[0]))
3843 {
3844 if (STACK_TOP_P (operands[1]))
e3c2afab 3845 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 3846 else
e3c2afab 3847 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 3848 break;
2a2ab3f9
JVA
3849 }
3850 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
3851 {
3852#if SYSV386_COMPAT
3853 p = "{\t%1, %0|r\t%0, %1}";
3854#else
3855 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
3856#endif
3857 }
2a2ab3f9 3858 else
e3c2afab
AM
3859 {
3860#if SYSV386_COMPAT
3861 p = "{r\t%2, %0|\t%0, %2}";
3862#else
3863 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
3864#endif
3865 }
e075ae69 3866 break;
2a2ab3f9
JVA
3867
3868 default:
3869 abort ();
3870 }
e075ae69
RH
3871
3872 strcat (buf, p);
3873 return buf;
2a2ab3f9 3874}
e075ae69 3875
2a2ab3f9 3876/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 3877 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 3878 operand may be [SDX]Fmode. */
2a2ab3f9 3879
69ddee61 3880const char *
2a2ab3f9
JVA
3881output_fix_trunc (insn, operands)
3882 rtx insn;
3883 rtx *operands;
3884{
3885 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69
RH
3886 int dimode_p = GET_MODE (operands[0]) == DImode;
3887 rtx xops[4];
2a2ab3f9 3888
e075ae69
RH
3889 /* Jump through a hoop or two for DImode, since the hardware has no
3890 non-popping instruction. We used to do this a different way, but
3891 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
3892 if (dimode_p && !stack_top_dies)
3893 output_asm_insn ("fld\t%y1", operands);
e075ae69
RH
3894
3895 if (! STACK_TOP_P (operands[1]))
10195bd8
JW
3896 abort ();
3897
e075ae69
RH
3898 xops[0] = GEN_INT (12);
3899 xops[1] = adj_offsettable_operand (operands[2], 1);
3900 xops[1] = change_address (xops[1], QImode, NULL_RTX);
305f097e 3901
e075ae69
RH
3902 xops[2] = operands[0];
3903 if (GET_CODE (operands[0]) != MEM)
3904 xops[2] = operands[3];
2a2ab3f9 3905
e075ae69
RH
3906 output_asm_insn ("fnstcw\t%2", operands);
3907 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands);
3908 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops);
3909 output_asm_insn ("fldcw\t%2", operands);
3910 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands);
e9a25f70 3911
e075ae69
RH
3912 if (stack_top_dies || dimode_p)
3913 output_asm_insn ("fistp%z2\t%2", xops);
10195bd8 3914 else
e075ae69
RH
3915 output_asm_insn ("fist%z2\t%2", xops);
3916
3917 output_asm_insn ("fldcw\t%2", operands);
10195bd8 3918
e075ae69 3919 if (GET_CODE (operands[0]) != MEM)
2a2ab3f9 3920 {
e075ae69 3921 if (dimode_p)
2e14a41b 3922 {
e075ae69
RH
3923 split_di (operands+0, 1, xops+0, xops+1);
3924 split_di (operands+3, 1, xops+2, xops+3);
3925 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3926 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops);
2e14a41b 3927 }
46d21d2c 3928 else if (GET_MODE (operands[0]) == SImode)
e3c2afab 3929 output_asm_insn ("mov{l}\t{%3, %0|%0, %3}", operands);
46d21d2c
JW
3930 else
3931 output_asm_insn ("mov{w}\t{%3, %0|%0, %3}", operands);
2a2ab3f9 3932 }
2a2ab3f9 3933
e075ae69 3934 return "";
2a2ab3f9 3935}
cda749b1 3936
e075ae69
RH
3937/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
3938 should be used and 2 when fnstsw should be used. UNORDERED_P is true
3939 when fucom should be used. */
3940
69ddee61 3941const char *
e075ae69 3942output_fp_compare (insn, operands, eflags_p, unordered_p)
cda749b1
JW
3943 rtx insn;
3944 rtx *operands;
e075ae69 3945 int eflags_p, unordered_p;
cda749b1 3946{
e075ae69
RH
3947 int stack_top_dies;
3948 rtx cmp_op0 = operands[0];
3949 rtx cmp_op1 = operands[1];
3950
3951 if (eflags_p == 2)
3952 {
3953 cmp_op0 = cmp_op1;
3954 cmp_op1 = operands[2];
3955 }
cda749b1 3956
e075ae69 3957 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
3958 abort ();
3959
e075ae69 3960 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 3961
e075ae69
RH
3962 if (STACK_REG_P (cmp_op1)
3963 && stack_top_dies
3964 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
3965 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 3966 {
e075ae69
RH
3967 /* If both the top of the 387 stack dies, and the other operand
3968 is also a stack register that dies, then this must be a
3969 `fcompp' float compare */
3970
3971 if (eflags_p == 1)
3972 {
3973 /* There is no double popping fcomi variant. Fortunately,
3974 eflags is immune from the fstp's cc clobbering. */
3975 if (unordered_p)
3976 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
3977 else
3978 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
3979 return "fstp\t%y0";
3980 }
3981 else
cda749b1 3982 {
e075ae69
RH
3983 if (eflags_p == 2)
3984 {
3985 if (unordered_p)
3986 return "fucompp\n\tfnstsw\t%0";
3987 else
3988 return "fcompp\n\tfnstsw\t%0";
3989 }
cda749b1
JW
3990 else
3991 {
e075ae69
RH
3992 if (unordered_p)
3993 return "fucompp";
3994 else
3995 return "fcompp";
cda749b1
JW
3996 }
3997 }
cda749b1
JW
3998 }
3999 else
4000 {
e075ae69 4001 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 4002
0f290768 4003 static const char * const alt[24] =
e075ae69
RH
4004 {
4005 "fcom%z1\t%y1",
4006 "fcomp%z1\t%y1",
4007 "fucom%z1\t%y1",
4008 "fucomp%z1\t%y1",
0f290768 4009
e075ae69
RH
4010 "ficom%z1\t%y1",
4011 "ficomp%z1\t%y1",
4012 NULL,
4013 NULL,
4014
4015 "fcomi\t{%y1, %0|%0, %y1}",
4016 "fcomip\t{%y1, %0|%0, %y1}",
4017 "fucomi\t{%y1, %0|%0, %y1}",
4018 "fucomip\t{%y1, %0|%0, %y1}",
4019
4020 NULL,
4021 NULL,
4022 NULL,
4023 NULL,
4024
4025 "fcom%z2\t%y2\n\tfnstsw\t%0",
4026 "fcomp%z2\t%y2\n\tfnstsw\t%0",
4027 "fucom%z2\t%y2\n\tfnstsw\t%0",
4028 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 4029
e075ae69
RH
4030 "ficom%z2\t%y2\n\tfnstsw\t%0",
4031 "ficomp%z2\t%y2\n\tfnstsw\t%0",
4032 NULL,
4033 NULL
4034 };
4035
4036 int mask;
69ddee61 4037 const char *ret;
e075ae69
RH
4038
4039 mask = eflags_p << 3;
4040 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
4041 mask |= unordered_p << 1;
4042 mask |= stack_top_dies;
4043
4044 if (mask >= 24)
4045 abort ();
4046 ret = alt[mask];
4047 if (ret == NULL)
4048 abort ();
cda749b1 4049
e075ae69 4050 return ret;
cda749b1
JW
4051 }
4052}
2a2ab3f9 4053
e075ae69 4054/* Output assembler code to FILE to initialize basic-block profiling.
2a2ab3f9 4055
e075ae69 4056 If profile_block_flag == 2
2a2ab3f9 4057
e075ae69
RH
4058 Output code to call the subroutine `__bb_init_trace_func'
4059 and pass two parameters to it. The first parameter is
4060 the address of a block allocated in the object module.
4061 The second parameter is the number of the first basic block
4062 of the function.
2a2ab3f9 4063
e075ae69 4064 The name of the block is a local symbol made with this statement:
0f290768 4065
e075ae69 4066 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
2a2ab3f9 4067
e075ae69
RH
4068 Of course, since you are writing the definition of
4069 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4070 can take a short cut in the definition of this macro and use the
4071 name that you know will result.
2a2ab3f9 4072
e075ae69
RH
4073 The number of the first basic block of the function is
4074 passed to the macro in BLOCK_OR_LABEL.
2a2ab3f9 4075
e075ae69
RH
4076 If described in a virtual assembler language the code to be
4077 output looks like:
2a2ab3f9 4078
e075ae69
RH
4079 parameter1 <- LPBX0
4080 parameter2 <- BLOCK_OR_LABEL
4081 call __bb_init_trace_func
2a2ab3f9 4082
e075ae69 4083 else if profile_block_flag != 0
e74389ff 4084
e075ae69
RH
4085 Output code to call the subroutine `__bb_init_func'
4086 and pass one single parameter to it, which is the same
4087 as the first parameter to `__bb_init_trace_func'.
e74389ff 4088
e075ae69
RH
4089 The first word of this parameter is a flag which will be nonzero if
4090 the object module has already been initialized. So test this word
4091 first, and do not call `__bb_init_func' if the flag is nonzero.
4092 Note: When profile_block_flag == 2 the test need not be done
4093 but `__bb_init_trace_func' *must* be called.
e74389ff 4094
e075ae69
RH
4095 BLOCK_OR_LABEL may be used to generate a label number as a
4096 branch destination in case `__bb_init_func' will not be called.
e74389ff 4097
e075ae69
RH
4098 If described in a virtual assembler language the code to be
4099 output looks like:
2a2ab3f9 4100
e075ae69
RH
4101 cmp (LPBX0),0
4102 jne local_label
4103 parameter1 <- LPBX0
4104 call __bb_init_func
4105 local_label:
4106*/
c572e5ba 4107
e075ae69
RH
4108void
4109ix86_output_function_block_profiler (file, block_or_label)
4110 FILE *file;
4111 int block_or_label;
c572e5ba 4112{
e075ae69
RH
4113 static int num_func = 0;
4114 rtx xops[8];
4115 char block_table[80], false_label[80];
c572e5ba 4116
e075ae69 4117 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
e9a25f70 4118
e075ae69
RH
4119 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4120 xops[5] = stack_pointer_rtx;
4121 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
2a2ab3f9 4122
e075ae69 4123 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
c572e5ba 4124
e075ae69 4125 switch (profile_block_flag)
c572e5ba 4126 {
e075ae69
RH
4127 case 2:
4128 xops[2] = GEN_INT (block_or_label);
4129 xops[3] = gen_rtx_MEM (Pmode,
4130 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
4131 xops[6] = GEN_INT (8);
e9a25f70 4132
e075ae69
RH
4133 output_asm_insn ("push{l}\t%2", xops);
4134 if (!flag_pic)
4135 output_asm_insn ("push{l}\t%1", xops);
e9a25f70 4136 else
870a0c2c 4137 {
e075ae69
RH
4138 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4139 output_asm_insn ("push{l}\t%7", xops);
870a0c2c 4140 }
e075ae69
RH
4141 output_asm_insn ("call\t%P3", xops);
4142 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4143 break;
c572e5ba 4144
e075ae69
RH
4145 default:
4146 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
c572e5ba 4147
e075ae69
RH
4148 xops[0] = const0_rtx;
4149 xops[2] = gen_rtx_MEM (Pmode,
4150 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
4151 xops[3] = gen_rtx_MEM (Pmode,
4152 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
4153 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
4154 xops[6] = GEN_INT (4);
a14003ee 4155
e075ae69 4156 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
446ba526 4157
e075ae69
RH
4158 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
4159 output_asm_insn ("jne\t%2", xops);
870a0c2c 4160
e075ae69
RH
4161 if (!flag_pic)
4162 output_asm_insn ("push{l}\t%1", xops);
4163 else
4164 {
4165 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
4166 output_asm_insn ("push{l}\t%7", xops);
870a0c2c 4167 }
e075ae69
RH
4168 output_asm_insn ("call\t%P3", xops);
4169 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4170 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
4171 num_func++;
4172 break;
c572e5ba 4173 }
2a2ab3f9 4174}
305f097e 4175
e075ae69
RH
4176/* Output assembler code to FILE to increment a counter associated
4177 with basic block number BLOCKNO.
305f097e 4178
e075ae69 4179 If profile_block_flag == 2
ecbc4695 4180
e075ae69
RH
4181 Output code to initialize the global structure `__bb' and
4182 call the function `__bb_trace_func' which will increment the
4183 counter.
ecbc4695 4184
e075ae69
RH
4185 `__bb' consists of two words. In the first word the number
4186 of the basic block has to be stored. In the second word
0f290768 4187 the address of a block allocated in the object module
e075ae69 4188 has to be stored.
ecbc4695 4189
e075ae69 4190 The basic block number is given by BLOCKNO.
ecbc4695 4191
0f290768 4192 The address of the block is given by the label created with
305f097e 4193
e075ae69 4194 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
305f097e 4195
e075ae69 4196 by FUNCTION_BLOCK_PROFILER.
ecbc4695 4197
e075ae69
RH
4198 Of course, since you are writing the definition of
4199 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4200 can take a short cut in the definition of this macro and use the
4201 name that you know will result.
305f097e 4202
e075ae69
RH
4203 If described in a virtual assembler language the code to be
4204 output looks like:
305f097e 4205
e075ae69
RH
4206 move BLOCKNO -> (__bb)
4207 move LPBX0 -> (__bb+4)
4208 call __bb_trace_func
305f097e 4209
e075ae69
RH
4210 Note that function `__bb_trace_func' must not change the
4211 machine state, especially the flag register. To grant
4212 this, you must output code to save and restore registers
4213 either in this macro or in the macros MACHINE_STATE_SAVE
4214 and MACHINE_STATE_RESTORE. The last two macros will be
4215 used in the function `__bb_trace_func', so you must make
0f290768 4216 sure that the function prologue does not change any
e075ae69 4217 register prior to saving it with MACHINE_STATE_SAVE.
305f097e 4218
e075ae69 4219 else if profile_block_flag != 0
305f097e 4220
e075ae69
RH
4221 Output code to increment the counter directly.
4222 Basic blocks are numbered separately from zero within each
4223 compiled object module. The count associated with block number
0f290768 4224 BLOCKNO is at index BLOCKNO in an array of words; the name of
e075ae69 4225 this array is a local symbol made with this statement:
32b5b1aa 4226
e075ae69 4227 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
32b5b1aa 4228
e075ae69
RH
4229 Of course, since you are writing the definition of
4230 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4231 can take a short cut in the definition of this macro and use the
0f290768 4232 name that you know will result.
32b5b1aa 4233
e075ae69
RH
4234 If described in a virtual assembler language the code to be
4235 output looks like:
32b5b1aa 4236
e075ae69
RH
4237 inc (LPBX2+4*BLOCKNO)
4238*/
32b5b1aa 4239
e075ae69
RH
4240void
4241ix86_output_block_profiler (file, blockno)
4242 FILE *file ATTRIBUTE_UNUSED;
4243 int blockno;
4244{
4245 rtx xops[8], cnt_rtx;
4246 char counts[80];
4247 char *block_table = counts;
4248
4249 switch (profile_block_flag)
4250 {
4251 case 2:
4252 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
32b5b1aa 4253
e075ae69
RH
4254 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4255 xops[2] = GEN_INT (blockno);
4256 xops[3] = gen_rtx_MEM (Pmode,
4257 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
4258 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
4259 xops[5] = plus_constant (xops[4], 4);
4260 xops[0] = gen_rtx_MEM (SImode, xops[4]);
4261 xops[6] = gen_rtx_MEM (SImode, xops[5]);
79325812 4262
e075ae69 4263 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
32b5b1aa 4264
e075ae69
RH
4265 output_asm_insn ("pushf", xops);
4266 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4267 if (flag_pic)
32b5b1aa 4268 {
e075ae69
RH
4269 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4270 output_asm_insn ("push{l}\t%7", xops);
4271 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4272 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
4273 output_asm_insn ("pop{l}\t%7", xops);
4274 }
4275 else
4276 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
4277 output_asm_insn ("call\t%P3", xops);
4278 output_asm_insn ("popf", xops);
32b5b1aa 4279
e075ae69 4280 break;
32b5b1aa 4281
e075ae69
RH
4282 default:
4283 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
4284 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
4285 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
32b5b1aa 4286
e075ae69
RH
4287 if (blockno)
4288 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
32b5b1aa 4289
e075ae69
RH
4290 if (flag_pic)
4291 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
32b5b1aa 4292
e075ae69
RH
4293 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
4294 output_asm_insn ("inc{l}\t%0", xops);
32b5b1aa 4295
e075ae69 4296 break;
32b5b1aa 4297 }
32b5b1aa 4298}
32b5b1aa 4299\f
79325812 4300void
e075ae69
RH
4301ix86_expand_move (mode, operands)
4302 enum machine_mode mode;
4303 rtx operands[];
32b5b1aa 4304{
e075ae69 4305 int strict = (reload_in_progress || reload_completed);
e075ae69 4306 rtx insn;
e9a25f70 4307
e075ae69 4308 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
32b5b1aa 4309 {
e075ae69 4310 /* Emit insns to move operands[1] into operands[0]. */
e9a25f70 4311
e075ae69
RH
4312 if (GET_CODE (operands[0]) == MEM)
4313 operands[1] = force_reg (Pmode, operands[1]);
4314 else
32b5b1aa 4315 {
e075ae69
RH
4316 rtx temp = operands[0];
4317 if (GET_CODE (temp) != REG)
4318 temp = gen_reg_rtx (Pmode);
4319 temp = legitimize_pic_address (operands[1], temp);
4320 if (temp == operands[0])
4321 return;
4322 operands[1] = temp;
32b5b1aa 4323 }
e075ae69
RH
4324 }
4325 else
4326 {
d7a29404
JH
4327 if (GET_CODE (operands[0]) == MEM
4328 && (GET_MODE (operands[0]) == QImode
4329 || !push_operand (operands[0], mode))
4330 && GET_CODE (operands[1]) == MEM)
e075ae69 4331 operands[1] = force_reg (mode, operands[1]);
e9a25f70 4332
2c5a510c
RH
4333 if (push_operand (operands[0], mode)
4334 && ! general_no_elim_operand (operands[1], mode))
4335 operands[1] = copy_to_mode_reg (mode, operands[1]);
4336
e075ae69 4337 if (FLOAT_MODE_P (mode))
32b5b1aa 4338 {
d7a29404
JH
4339 /* If we are loading a floating point constant to a register,
4340 force the value to memory now, since we'll get better code
4341 out the back end. */
e075ae69
RH
4342
4343 if (strict)
4344 ;
e075ae69 4345 else if (GET_CODE (operands[1]) == CONST_DOUBLE
d7a29404 4346 && register_operand (operands[0], mode))
e075ae69 4347 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
32b5b1aa 4348 }
32b5b1aa 4349 }
e9a25f70 4350
e075ae69 4351 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
e9a25f70 4352
e075ae69
RH
4353 emit_insn (insn);
4354}
e9a25f70 4355
e075ae69
RH
4356/* Attempt to expand a binary operator. Make the expansion closer to the
4357 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 4358 memory references (one output, two input) in a single insn. */
e9a25f70 4359
e075ae69
RH
4360void
4361ix86_expand_binary_operator (code, mode, operands)
4362 enum rtx_code code;
4363 enum machine_mode mode;
4364 rtx operands[];
4365{
4366 int matching_memory;
4367 rtx src1, src2, dst, op, clob;
4368
4369 dst = operands[0];
4370 src1 = operands[1];
4371 src2 = operands[2];
4372
4373 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
4374 if (GET_RTX_CLASS (code) == 'c'
4375 && (rtx_equal_p (dst, src2)
4376 || immediate_operand (src1, mode)))
4377 {
4378 rtx temp = src1;
4379 src1 = src2;
4380 src2 = temp;
32b5b1aa 4381 }
e9a25f70 4382
e075ae69
RH
4383 /* If the destination is memory, and we do not have matching source
4384 operands, do things in registers. */
4385 matching_memory = 0;
4386 if (GET_CODE (dst) == MEM)
32b5b1aa 4387 {
e075ae69
RH
4388 if (rtx_equal_p (dst, src1))
4389 matching_memory = 1;
4390 else if (GET_RTX_CLASS (code) == 'c'
4391 && rtx_equal_p (dst, src2))
4392 matching_memory = 2;
4393 else
4394 dst = gen_reg_rtx (mode);
4395 }
0f290768 4396
e075ae69
RH
4397 /* Both source operands cannot be in memory. */
4398 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
4399 {
4400 if (matching_memory != 2)
4401 src2 = force_reg (mode, src2);
4402 else
4403 src1 = force_reg (mode, src1);
32b5b1aa 4404 }
e9a25f70 4405
06a964de
JH
4406 /* If the operation is not commutable, source 1 cannot be a constant
4407 or non-matching memory. */
0f290768 4408 if ((CONSTANT_P (src1)
06a964de
JH
4409 || (!matching_memory && GET_CODE (src1) == MEM))
4410 && GET_RTX_CLASS (code) != 'c')
e075ae69 4411 src1 = force_reg (mode, src1);
0f290768 4412
e075ae69 4413 /* If optimizing, copy to regs to improve CSE */
fe577e58 4414 if (optimize && ! no_new_pseudos)
32b5b1aa 4415 {
e075ae69
RH
4416 if (GET_CODE (dst) == MEM)
4417 dst = gen_reg_rtx (mode);
4418 if (GET_CODE (src1) == MEM)
4419 src1 = force_reg (mode, src1);
4420 if (GET_CODE (src2) == MEM)
4421 src2 = force_reg (mode, src2);
32b5b1aa 4422 }
e9a25f70 4423
e075ae69
RH
4424 /* Emit the instruction. */
4425
4426 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
4427 if (reload_in_progress)
4428 {
4429 /* Reload doesn't know about the flags register, and doesn't know that
4430 it doesn't want to clobber it. We can only do this with PLUS. */
4431 if (code != PLUS)
4432 abort ();
4433 emit_insn (op);
4434 }
4435 else
32b5b1aa 4436 {
e075ae69
RH
4437 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4438 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 4439 }
e9a25f70 4440
e075ae69
RH
4441 /* Fix up the destination if needed. */
4442 if (dst != operands[0])
4443 emit_move_insn (operands[0], dst);
4444}
4445
4446/* Return TRUE or FALSE depending on whether the binary operator meets the
4447 appropriate constraints. */
4448
4449int
4450ix86_binary_operator_ok (code, mode, operands)
4451 enum rtx_code code;
4452 enum machine_mode mode ATTRIBUTE_UNUSED;
4453 rtx operands[3];
4454{
4455 /* Both source operands cannot be in memory. */
4456 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
4457 return 0;
4458 /* If the operation is not commutable, source 1 cannot be a constant. */
4459 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
4460 return 0;
4461 /* If the destination is memory, we must have a matching source operand. */
4462 if (GET_CODE (operands[0]) == MEM
4463 && ! (rtx_equal_p (operands[0], operands[1])
4464 || (GET_RTX_CLASS (code) == 'c'
4465 && rtx_equal_p (operands[0], operands[2]))))
4466 return 0;
06a964de
JH
4467 /* If the operation is not commutable and the source 1 is memory, we must
4468 have a matching destionation. */
4469 if (GET_CODE (operands[1]) == MEM
4470 && GET_RTX_CLASS (code) != 'c'
4471 && ! rtx_equal_p (operands[0], operands[1]))
4472 return 0;
e075ae69
RH
4473 return 1;
4474}
4475
4476/* Attempt to expand a unary operator. Make the expansion closer to the
4477 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 4478 memory references (one output, one input) in a single insn. */
e075ae69 4479
9d81fc27 4480void
e075ae69
RH
4481ix86_expand_unary_operator (code, mode, operands)
4482 enum rtx_code code;
4483 enum machine_mode mode;
4484 rtx operands[];
4485{
06a964de
JH
4486 int matching_memory;
4487 rtx src, dst, op, clob;
4488
4489 dst = operands[0];
4490 src = operands[1];
e075ae69 4491
06a964de
JH
4492 /* If the destination is memory, and we do not have matching source
4493 operands, do things in registers. */
4494 matching_memory = 0;
4495 if (GET_CODE (dst) == MEM)
32b5b1aa 4496 {
06a964de
JH
4497 if (rtx_equal_p (dst, src))
4498 matching_memory = 1;
e075ae69 4499 else
06a964de 4500 dst = gen_reg_rtx (mode);
32b5b1aa 4501 }
e9a25f70 4502
06a964de
JH
4503 /* When source operand is memory, destination must match. */
4504 if (!matching_memory && GET_CODE (src) == MEM)
4505 src = force_reg (mode, src);
0f290768 4506
06a964de 4507 /* If optimizing, copy to regs to improve CSE */
fe577e58 4508 if (optimize && ! no_new_pseudos)
06a964de
JH
4509 {
4510 if (GET_CODE (dst) == MEM)
4511 dst = gen_reg_rtx (mode);
4512 if (GET_CODE (src) == MEM)
4513 src = force_reg (mode, src);
4514 }
4515
4516 /* Emit the instruction. */
4517
4518 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
4519 if (reload_in_progress || code == NOT)
4520 {
4521 /* Reload doesn't know about the flags register, and doesn't know that
4522 it doesn't want to clobber it. */
4523 if (code != NOT)
4524 abort ();
4525 emit_insn (op);
4526 }
4527 else
4528 {
4529 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4530 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4531 }
4532
4533 /* Fix up the destination if needed. */
4534 if (dst != operands[0])
4535 emit_move_insn (operands[0], dst);
e075ae69
RH
4536}
4537
4538/* Return TRUE or FALSE depending on whether the unary operator meets the
4539 appropriate constraints. */
4540
4541int
4542ix86_unary_operator_ok (code, mode, operands)
4543 enum rtx_code code ATTRIBUTE_UNUSED;
4544 enum machine_mode mode ATTRIBUTE_UNUSED;
4545 rtx operands[2] ATTRIBUTE_UNUSED;
4546{
06a964de
JH
4547 /* If one of operands is memory, source and destination must match. */
4548 if ((GET_CODE (operands[0]) == MEM
4549 || GET_CODE (operands[1]) == MEM)
4550 && ! rtx_equal_p (operands[0], operands[1]))
4551 return FALSE;
e075ae69
RH
4552 return TRUE;
4553}
4554
16189740
RH
4555/* Return TRUE or FALSE depending on whether the first SET in INSN
4556 has source and destination with matching CC modes, and that the
4557 CC mode is at least as constrained as REQ_MODE. */
4558
4559int
4560ix86_match_ccmode (insn, req_mode)
4561 rtx insn;
4562 enum machine_mode req_mode;
4563{
4564 rtx set;
4565 enum machine_mode set_mode;
4566
4567 set = PATTERN (insn);
4568 if (GET_CODE (set) == PARALLEL)
4569 set = XVECEXP (set, 0, 0);
4570 if (GET_CODE (set) != SET)
4571 abort ();
9076b9c1
JH
4572 if (GET_CODE (SET_SRC (set)) != COMPARE)
4573 abort ();
16189740
RH
4574
4575 set_mode = GET_MODE (SET_DEST (set));
4576 switch (set_mode)
4577 {
9076b9c1
JH
4578 case CCNOmode:
4579 if (req_mode != CCNOmode
4580 && (req_mode != CCmode
4581 || XEXP (SET_SRC (set), 1) != const0_rtx))
4582 return 0;
4583 break;
16189740 4584 case CCmode:
9076b9c1 4585 if (req_mode == CCGCmode)
16189740
RH
4586 return 0;
4587 /* FALLTHRU */
9076b9c1
JH
4588 case CCGCmode:
4589 if (req_mode == CCGOCmode || req_mode == CCNOmode)
4590 return 0;
4591 /* FALLTHRU */
4592 case CCGOCmode:
16189740
RH
4593 if (req_mode == CCZmode)
4594 return 0;
4595 /* FALLTHRU */
4596 case CCZmode:
4597 break;
4598
4599 default:
4600 abort ();
4601 }
4602
4603 return (GET_MODE (SET_SRC (set)) == set_mode);
4604}
4605
e075ae69
RH
4606/* Produce an unsigned comparison for a given signed comparison. */
4607
4608static enum rtx_code
4609unsigned_comparison (code)
4610 enum rtx_code code;
4611{
4612 switch (code)
32b5b1aa 4613 {
e075ae69
RH
4614 case GT:
4615 code = GTU;
4616 break;
4617 case LT:
4618 code = LTU;
4619 break;
4620 case GE:
4621 code = GEU;
4622 break;
4623 case LE:
4624 code = LEU;
4625 break;
4626 case EQ:
4627 case NE:
4628 case LEU:
4629 case LTU:
4630 case GEU:
4631 case GTU:
3a3677ff
RH
4632 case UNORDERED:
4633 case ORDERED:
e075ae69
RH
4634 break;
4635 default:
4636 abort ();
4637 }
4638 return code;
4639}
4640
4641/* Generate insn patterns to do an integer compare of OPERANDS. */
4642
4643static rtx
4644ix86_expand_int_compare (code, op0, op1)
4645 enum rtx_code code;
4646 rtx op0, op1;
4647{
4648 enum machine_mode cmpmode;
4649 rtx tmp, flags;
4650
4651 cmpmode = SELECT_CC_MODE (code, op0, op1);
4652 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
4653
4654 /* This is very simple, but making the interface the same as in the
4655 FP case makes the rest of the code easier. */
4656 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
4657 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
4658
4659 /* Return the test that should be put into the flags user, i.e.
4660 the bcc, scc, or cmov instruction. */
4661 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
4662}
4663
3a3677ff
RH
4664/* Figure out whether to use ordered or unordered fp comparisons.
4665 Return the appropriate mode to use. */
e075ae69 4666
b1cdafbb 4667enum machine_mode
3a3677ff 4668ix86_fp_compare_mode (code)
e075ae69 4669 enum rtx_code code;
e075ae69 4670{
3a3677ff 4671 int unordered;
e075ae69 4672
3a3677ff
RH
4673 switch (code)
4674 {
4675 case NE: case EQ:
4676 /* When not doing IEEE compliant compares, fault on NaNs. */
4677 unordered = (TARGET_IEEE_FP != 0);
4678 break;
4679
4680 case LT: case LE: case GT: case GE:
4681 unordered = 0;
4682 break;
4683
4684 case UNORDERED: case ORDERED:
4685 case UNEQ: case UNGE: case UNGT: case UNLE: case UNLT: case LTGT:
4686 unordered = 1;
4687 break;
4688
4689 default:
4690 abort ();
4691 }
e075ae69
RH
4692
4693 /* ??? If we knew whether invalid-operand exceptions were masked,
4694 we could rely on fcom to raise an exception and take care of
3a3677ff 4695 NaNs. But we don't. We could know this from c99 math pragmas. */
e075ae69
RH
4696 if (TARGET_IEEE_FP)
4697 unordered = 1;
4698
3a3677ff
RH
4699 return unordered ? CCFPUmode : CCFPmode;
4700}
4701
9076b9c1
JH
4702enum machine_mode
4703ix86_cc_mode (code, op0, op1)
4704 enum rtx_code code;
4705 rtx op0, op1;
4706{
4707 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
4708 return ix86_fp_compare_mode (code);
4709 switch (code)
4710 {
4711 /* Only zero flag is needed. */
4712 case EQ: /* ZF=0 */
4713 case NE: /* ZF!=0 */
4714 return CCZmode;
4715 /* Codes needing carry flag. */
265dab10
JH
4716 case GEU: /* CF=0 */
4717 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
4718 case LTU: /* CF=1 */
4719 case LEU: /* CF=1 | ZF=1 */
265dab10 4720 return CCmode;
9076b9c1
JH
4721 /* Codes possibly doable only with sign flag when
4722 comparing against zero. */
4723 case GE: /* SF=OF or SF=0 */
7e08e190 4724 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
4725 if (op1 == const0_rtx)
4726 return CCGOCmode;
4727 else
4728 /* For other cases Carry flag is not required. */
4729 return CCGCmode;
4730 /* Codes doable only with sign flag when comparing
4731 against zero, but we miss jump instruction for it
4732 so we need to use relational tests agains overflow
4733 that thus needs to be zero. */
4734 case GT: /* ZF=0 & SF=OF */
4735 case LE: /* ZF=1 | SF<>OF */
4736 if (op1 == const0_rtx)
4737 return CCNOmode;
4738 else
4739 return CCGCmode;
4740 default:
0f290768 4741 abort ();
9076b9c1
JH
4742 }
4743}
4744
3a3677ff
RH
4745/* Return true if we should use an FCOMI instruction for this fp comparison. */
4746
a940d8bd 4747int
3a3677ff
RH
4748ix86_use_fcomi_compare (code)
4749 enum rtx_code code;
4750{
4751 return (TARGET_CMOVE
4752 && (code == ORDERED || code == UNORDERED
4753 /* All other unordered compares require checking
4754 multiple sets of bits. */
4755 || ix86_fp_compare_mode (code) == CCFPmode));
4756}
4757
0f290768 4758/* Swap, force into registers, or otherwise massage the two operands
3a3677ff
RH
4759 to a fp comparison. The operands are updated in place; the new
4760 comparsion code is returned. */
4761
4762static enum rtx_code
4763ix86_prepare_fp_compare_args (code, pop0, pop1)
4764 enum rtx_code code;
4765 rtx *pop0, *pop1;
4766{
4767 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
4768 rtx op0 = *pop0, op1 = *pop1;
4769 enum machine_mode op_mode = GET_MODE (op0);
4770
e075ae69 4771 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
4772 The same is true of the XFmode compare instructions. The same is
4773 true of the fcomi compare instructions. */
4774
4775 if (fpcmp_mode == CCFPUmode
4776 || op_mode == XFmode
4777 || ix86_use_fcomi_compare (code))
e075ae69 4778 {
3a3677ff
RH
4779 op0 = force_reg (op_mode, op0);
4780 op1 = force_reg (op_mode, op1);
e075ae69
RH
4781 }
4782 else
4783 {
4784 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
4785 things around if they appear profitable, otherwise force op0
4786 into a register. */
4787
4788 if (standard_80387_constant_p (op0) == 0
4789 || (GET_CODE (op0) == MEM
4790 && ! (standard_80387_constant_p (op1) == 0
4791 || GET_CODE (op1) == MEM)))
32b5b1aa 4792 {
e075ae69
RH
4793 rtx tmp;
4794 tmp = op0, op0 = op1, op1 = tmp;
4795 code = swap_condition (code);
4796 }
4797
4798 if (GET_CODE (op0) != REG)
3a3677ff 4799 op0 = force_reg (op_mode, op0);
e075ae69
RH
4800
4801 if (CONSTANT_P (op1))
4802 {
4803 if (standard_80387_constant_p (op1))
3a3677ff 4804 op1 = force_reg (op_mode, op1);
e075ae69 4805 else
3a3677ff 4806 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
4807 }
4808 }
e9a25f70 4809
3a3677ff
RH
4810 *pop0 = op0;
4811 *pop1 = op1;
4812 return code;
4813}
4814
4815/* Generate insn patterns to do a floating point compare of OPERANDS. */
4816
4817rtx
4818ix86_expand_fp_compare (code, op0, op1, scratch)
4819 enum rtx_code code;
4820 rtx op0, op1, scratch;
4821{
4822 enum machine_mode fpcmp_mode, intcmp_mode;
4823 rtx tmp;
4824
4825 fpcmp_mode = ix86_fp_compare_mode (code);
4826 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
4827
e075ae69
RH
4828 /* %%% fcomi is probably always faster, even when dealing with memory,
4829 since compare-and-branch would be three insns instead of four. */
3a3677ff 4830 if (ix86_use_fcomi_compare (code))
32b5b1aa 4831 {
e075ae69
RH
4832 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
4833 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
4834 emit_insn (tmp);
4835
4836 /* The FP codes work out to act like unsigned. */
4837 code = unsigned_comparison (code);
3a3677ff 4838 intcmp_mode = CCmode;
e075ae69
RH
4839 }
4840 else
4841 {
4842 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e9a25f70 4843
e075ae69
RH
4844 rtx tmp2;
4845 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
4846 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
3a3677ff 4847 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 4848
3a3677ff
RH
4849 if (fpcmp_mode == CCFPmode
4850 || code == ORDERED
4851 || code == UNORDERED)
32b5b1aa 4852 {
e075ae69
RH
4853 /* We have two options here -- use sahf, or testing bits of ah
4854 directly. On PPRO, they are equivalent, sahf being one byte
4855 smaller. On Pentium, sahf is non-pairable while test is UV
4856 pairable. */
4857
4858 if (TARGET_USE_SAHF || optimize_size)
32b5b1aa 4859 {
e075ae69 4860 do_sahf:
3a3677ff 4861 emit_insn (gen_x86_sahf_1 (scratch));
e9a25f70 4862
e075ae69
RH
4863 /* The FP codes work out to act like unsigned. */
4864 code = unsigned_comparison (code);
e075ae69 4865 intcmp_mode = CCmode;
32b5b1aa
SC
4866 }
4867 else
4868 {
e075ae69
RH
4869 /*
4870 * The numbers below correspond to the bits of the FPSW in AH.
d22ce03d 4871 * C3, C2, and C0 are in bits 0x40, 0x4, and 0x01 respectively.
e075ae69
RH
4872 *
4873 * cmp C3 C2 C0
4874 * > 0 0 0
4875 * < 0 0 1
4876 * = 1 0 0
4877 * un 1 1 1
4878 */
4879
4880 int mask;
4881
4882 switch (code)
32b5b1aa 4883 {
e075ae69 4884 case GT:
d22ce03d 4885 mask = 0x41;
e075ae69
RH
4886 code = EQ;
4887 break;
4888 case LT:
4889 mask = 0x01;
4890 code = NE;
4891 break;
4892 case GE:
0f290768
KH
4893 /* We'd have to use `xorb 1,ah; andb 0x41,ah', so it's
4894 faster in all cases to just fall back on sahf. */
e075ae69
RH
4895 goto do_sahf;
4896 case LE:
4897 mask = 0x41;
4898 code = NE;
4899 break;
4900 case EQ:
4901 mask = 0x40;
4902 code = NE;
4903 break;
4904 case NE:
4905 mask = 0x40;
4906 code = EQ;
4907 break;
3a3677ff
RH
4908 case UNORDERED:
4909 mask = 0x04;
4910 code = NE;
4911 break;
4912 case ORDERED:
4913 mask = 0x04;
4914 code = EQ;
4915 break;
4916
e075ae69
RH
4917 default:
4918 abort ();
32b5b1aa 4919 }
e075ae69 4920
3a3677ff 4921 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (mask)));
e075ae69 4922 intcmp_mode = CCNOmode;
32b5b1aa
SC
4923 }
4924 }
4925 else
4926 {
e075ae69
RH
4927 /* In the unordered case, we have to check C2 for NaN's, which
4928 doesn't happen to work out to anything nice combination-wise.
4929 So do some bit twiddling on the value we've got in AH to come
4930 up with an appropriate set of condition codes. */
4931
4932 intcmp_mode = CCNOmode;
4933 switch (code)
32b5b1aa 4934 {
e075ae69 4935 case GT:
3a3677ff 4936 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69
RH
4937 code = EQ;
4938 break;
4939 case LT:
3a3677ff
RH
4940 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4941 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
4942 intcmp_mode = CCmode;
4943 code = EQ;
4944 break;
4945 case GE:
3a3677ff 4946 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69
RH
4947 code = EQ;
4948 break;
4949 case LE:
3a3677ff
RH
4950 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4951 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
4952 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
4953 intcmp_mode = CCmode;
4954 code = LTU;
4955 break;
4956 case EQ:
3a3677ff
RH
4957 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4958 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
4959 intcmp_mode = CCmode;
4960 code = EQ;
4961 break;
4962 case NE:
3a3677ff
RH
4963 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4964 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, GEN_INT (0x40)));
4965 code = NE;
4966 break;
4967
4968 case UNORDERED:
4969 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
4970 code = NE;
4971 break;
4972 case ORDERED:
4973 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
4974 code = EQ;
4975 break;
4976 case UNEQ:
4977 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
4978 code = NE;
4979 break;
4980 case UNGE:
4981 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4982 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, GEN_INT (0x01)));
4983 code = NE;
4984 break;
4985 case UNGT:
4986 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4987 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
4988 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9076b9c1 4989 intcmp_mode = CCmode;
3a3677ff
RH
4990 code = GEU;
4991 break;
4992 case UNLE:
4993 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69
RH
4994 code = NE;
4995 break;
3a3677ff
RH
4996 case UNLT:
4997 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
4998 code = NE;
4999 break;
5000 case LTGT:
5001 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
5002 code = EQ;
5003 break;
5004
e075ae69
RH
5005 default:
5006 abort ();
32b5b1aa
SC
5007 }
5008 }
32b5b1aa 5009 }
e075ae69
RH
5010
5011 /* Return the test that should be put into the flags user, i.e.
5012 the bcc, scc, or cmov instruction. */
5013 return gen_rtx_fmt_ee (code, VOIDmode,
5014 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5015 const0_rtx);
5016}
5017
9e3e266c 5018rtx
3a3677ff 5019ix86_expand_compare (code)
e075ae69 5020 enum rtx_code code;
e075ae69
RH
5021{
5022 rtx op0, op1, ret;
5023 op0 = ix86_compare_op0;
5024 op1 = ix86_compare_op1;
5025
5026 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
3a3677ff 5027 ret = ix86_expand_fp_compare (code, op0, op1, gen_reg_rtx (HImode));
32b5b1aa 5028 else
e075ae69
RH
5029 ret = ix86_expand_int_compare (code, op0, op1);
5030
5031 return ret;
5032}
5033
5034void
3a3677ff 5035ix86_expand_branch (code, label)
e075ae69 5036 enum rtx_code code;
e075ae69
RH
5037 rtx label;
5038{
3a3677ff 5039 rtx tmp;
e075ae69 5040
3a3677ff 5041 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 5042 {
3a3677ff
RH
5043 case QImode:
5044 case HImode:
5045 case SImode:
5046 tmp = ix86_expand_compare (code);
e075ae69
RH
5047 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5048 gen_rtx_LABEL_REF (VOIDmode, label),
5049 pc_rtx);
5050 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 5051 return;
e075ae69 5052
3a3677ff
RH
5053 case SFmode:
5054 case DFmode:
0f290768 5055 case XFmode:
3a3677ff
RH
5056 /* Don't expand the comparison early, so that we get better code
5057 when jump or whoever decides to reverse the comparison. */
5058 {
5059 rtvec vec;
5060 int use_fcomi;
5061
5062 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
5063 &ix86_compare_op1);
5064
0b9aaeee 5065 tmp = gen_rtx_fmt_ee (code, VOIDmode,
3a3677ff
RH
5066 ix86_compare_op0, ix86_compare_op1);
5067 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5068 gen_rtx_LABEL_REF (VOIDmode, label),
5069 pc_rtx);
5070 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
5071
5072 use_fcomi = ix86_use_fcomi_compare (code);
5073 vec = rtvec_alloc (3 + !use_fcomi);
5074 RTVEC_ELT (vec, 0) = tmp;
5075 RTVEC_ELT (vec, 1)
5076 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
5077 RTVEC_ELT (vec, 2)
5078 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
5079 if (! use_fcomi)
5080 RTVEC_ELT (vec, 3)
5081 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
5082
5083 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
5084 return;
5085 }
32b5b1aa 5086
3a3677ff
RH
5087 case DImode:
5088 /* Expand DImode branch into multiple compare+branch. */
5089 {
5090 rtx lo[2], hi[2], label2;
5091 enum rtx_code code1, code2, code3;
32b5b1aa 5092
3a3677ff
RH
5093 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
5094 {
5095 tmp = ix86_compare_op0;
5096 ix86_compare_op0 = ix86_compare_op1;
5097 ix86_compare_op1 = tmp;
5098 code = swap_condition (code);
5099 }
5100 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
5101 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 5102
3a3677ff
RH
5103 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
5104 avoid two branches. This costs one extra insn, so disable when
5105 optimizing for size. */
32b5b1aa 5106
3a3677ff
RH
5107 if ((code == EQ || code == NE)
5108 && (!optimize_size
5109 || hi[1] == const0_rtx || lo[1] == const0_rtx))
5110 {
5111 rtx xor0, xor1;
32b5b1aa 5112
3a3677ff
RH
5113 xor1 = hi[0];
5114 if (hi[1] != const0_rtx)
5115 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
5116 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 5117
3a3677ff
RH
5118 xor0 = lo[0];
5119 if (lo[1] != const0_rtx)
5120 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
5121 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 5122
3a3677ff
RH
5123 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
5124 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 5125
3a3677ff
RH
5126 ix86_compare_op0 = tmp;
5127 ix86_compare_op1 = const0_rtx;
5128 ix86_expand_branch (code, label);
5129 return;
5130 }
e075ae69 5131
1f9124e4
JJ
5132 /* Otherwise, if we are doing less-than or greater-or-equal-than,
5133 op1 is a constant and the low word is zero, then we can just
5134 examine the high word. */
32b5b1aa 5135
1f9124e4
JJ
5136 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
5137 switch (code)
5138 {
5139 case LT: case LTU: case GE: case GEU:
5140 ix86_compare_op0 = hi[0];
5141 ix86_compare_op1 = hi[1];
5142 ix86_expand_branch (code, label);
5143 return;
5144 default:
5145 break;
5146 }
e075ae69 5147
3a3677ff 5148 /* Otherwise, we need two or three jumps. */
e075ae69 5149
3a3677ff 5150 label2 = gen_label_rtx ();
e075ae69 5151
3a3677ff
RH
5152 code1 = code;
5153 code2 = swap_condition (code);
5154 code3 = unsigned_condition (code);
e075ae69 5155
3a3677ff
RH
5156 switch (code)
5157 {
5158 case LT: case GT: case LTU: case GTU:
5159 break;
e075ae69 5160
3a3677ff
RH
5161 case LE: code1 = LT; code2 = GT; break;
5162 case GE: code1 = GT; code2 = LT; break;
5163 case LEU: code1 = LTU; code2 = GTU; break;
5164 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 5165
3a3677ff
RH
5166 case EQ: code1 = NIL; code2 = NE; break;
5167 case NE: code2 = NIL; break;
e075ae69 5168
3a3677ff
RH
5169 default:
5170 abort ();
5171 }
e075ae69 5172
3a3677ff
RH
5173 /*
5174 * a < b =>
5175 * if (hi(a) < hi(b)) goto true;
5176 * if (hi(a) > hi(b)) goto false;
5177 * if (lo(a) < lo(b)) goto true;
5178 * false:
5179 */
5180
5181 ix86_compare_op0 = hi[0];
5182 ix86_compare_op1 = hi[1];
5183
5184 if (code1 != NIL)
5185 ix86_expand_branch (code1, label);
5186 if (code2 != NIL)
5187 ix86_expand_branch (code2, label2);
5188
5189 ix86_compare_op0 = lo[0];
5190 ix86_compare_op1 = lo[1];
5191 ix86_expand_branch (code3, label);
5192
5193 if (code2 != NIL)
5194 emit_label (label2);
5195 return;
5196 }
e075ae69 5197
3a3677ff
RH
5198 default:
5199 abort ();
5200 }
32b5b1aa 5201}
e075ae69 5202
32b5b1aa 5203int
3a3677ff 5204ix86_expand_setcc (code, dest)
e075ae69 5205 enum rtx_code code;
e075ae69 5206 rtx dest;
32b5b1aa 5207{
e075ae69
RH
5208 rtx ret, tmp;
5209 int type;
5210
5211 if (GET_MODE (ix86_compare_op0) == DImode)
5212 return 0; /* FAIL */
5213
5214 /* Three modes of generation:
5215 0 -- destination does not overlap compare sources:
5216 clear dest first, emit strict_low_part setcc.
5217 1 -- destination does overlap compare sources:
5218 emit subreg setcc, zero extend.
5219 2 -- destination is in QImode:
5220 emit setcc only.
5221 */
5222
5223 type = 0;
e075ae69
RH
5224
5225 if (GET_MODE (dest) == QImode)
5226 type = 2;
5227 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
a500c31b 5228 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
e075ae69
RH
5229 type = 1;
5230
5231 if (type == 0)
5232 emit_move_insn (dest, const0_rtx);
5233
3a3677ff 5234 ret = ix86_expand_compare (code);
e075ae69
RH
5235 PUT_MODE (ret, QImode);
5236
5237 tmp = dest;
5238 if (type == 0)
32b5b1aa 5239 {
e075ae69
RH
5240 tmp = gen_lowpart (QImode, dest);
5241 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
5242 }
5243 else if (type == 1)
5244 {
5245 if (!cse_not_expected)
5246 tmp = gen_reg_rtx (QImode);
5247 else
5248 tmp = gen_lowpart (QImode, dest);
5249 }
32b5b1aa 5250
e075ae69
RH
5251 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
5252
5253 if (type == 1)
5254 {
5255 rtx clob;
5256
5257 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
5258 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
5259 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
5260 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5261 emit_insn (tmp);
32b5b1aa 5262 }
e075ae69
RH
5263
5264 return 1; /* DONE */
32b5b1aa 5265}
e075ae69 5266
32b5b1aa 5267int
e075ae69
RH
5268ix86_expand_int_movcc (operands)
5269 rtx operands[];
32b5b1aa 5270{
e075ae69
RH
5271 enum rtx_code code = GET_CODE (operands[1]), compare_code;
5272 rtx compare_seq, compare_op;
32b5b1aa 5273
36583fea
JH
5274 /* When the compare code is not LTU or GEU, we can not use sbbl case.
5275 In case comparsion is done with immediate, we can convert it to LTU or
5276 GEU by altering the integer. */
5277
5278 if ((code == LEU || code == GTU)
5279 && GET_CODE (ix86_compare_op1) == CONST_INT
5280 && GET_MODE (operands[0]) != HImode
5281 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
0f290768 5282 && GET_CODE (operands[2]) == CONST_INT
36583fea
JH
5283 && GET_CODE (operands[3]) == CONST_INT)
5284 {
5285 if (code == LEU)
5286 code = LTU;
5287 else
5288 code = GEU;
5289 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
5290 }
3a3677ff 5291
e075ae69 5292 start_sequence ();
3a3677ff 5293 compare_op = ix86_expand_compare (code);
e075ae69
RH
5294 compare_seq = gen_sequence ();
5295 end_sequence ();
5296
5297 compare_code = GET_CODE (compare_op);
5298
5299 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
5300 HImode insns, we'd be swallowed in word prefix ops. */
5301
5302 if (GET_MODE (operands[0]) != HImode
0f290768 5303 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
5304 && GET_CODE (operands[3]) == CONST_INT)
5305 {
5306 rtx out = operands[0];
5307 HOST_WIDE_INT ct = INTVAL (operands[2]);
5308 HOST_WIDE_INT cf = INTVAL (operands[3]);
5309 HOST_WIDE_INT diff;
5310
36583fea 5311 if (compare_code == LTU || compare_code == GEU)
e075ae69 5312 {
e075ae69
RH
5313
5314 /* Detect overlap between destination and compare sources. */
5315 rtx tmp = out;
5316
0f290768 5317 /* To simplify rest of code, restrict to the GEU case. */
36583fea
JH
5318 if (compare_code == LTU)
5319 {
5320 int tmp = ct;
5321 ct = cf;
5322 cf = tmp;
5323 compare_code = reverse_condition (compare_code);
5324 code = reverse_condition (code);
5325 }
5326 diff = ct - cf;
5327
e075ae69 5328 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
a500c31b 5329 || reg_overlap_mentioned_p (out, ix86_compare_op1))
e075ae69
RH
5330 tmp = gen_reg_rtx (SImode);
5331
5332 emit_insn (compare_seq);
5333 emit_insn (gen_x86_movsicc_0_m1 (tmp));
5334
36583fea
JH
5335 if (diff == 1)
5336 {
5337 /*
5338 * cmpl op0,op1
5339 * sbbl dest,dest
5340 * [addl dest, ct]
5341 *
5342 * Size 5 - 8.
5343 */
5344 if (ct)
5345 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5346 }
5347 else if (cf == -1)
5348 {
5349 /*
5350 * cmpl op0,op1
5351 * sbbl dest,dest
5352 * orl $ct, dest
5353 *
5354 * Size 8.
5355 */
5356 emit_insn (gen_iorsi3 (out, out, GEN_INT (ct)));
5357 }
5358 else if (diff == -1 && ct)
5359 {
5360 /*
5361 * cmpl op0,op1
5362 * sbbl dest,dest
5363 * xorl $-1, dest
5364 * [addl dest, cf]
5365 *
5366 * Size 8 - 11.
5367 */
5368 emit_insn (gen_one_cmplsi2 (tmp, tmp));
5369 if (cf)
5370 emit_insn (gen_addsi3 (out, out, GEN_INT (cf)));
5371 }
5372 else
5373 {
5374 /*
5375 * cmpl op0,op1
5376 * sbbl dest,dest
5377 * andl cf - ct, dest
5378 * [addl dest, ct]
5379 *
5380 * Size 8 - 11.
5381 */
5382 emit_insn (gen_andsi3 (out, out, GEN_INT (cf - ct)));
5383 if (ct)
5384 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5385 }
e075ae69
RH
5386
5387 if (tmp != out)
5388 emit_move_insn (out, tmp);
5389
5390 return 1; /* DONE */
5391 }
5392
5393 diff = ct - cf;
5394 if (diff < 0)
5395 {
5396 HOST_WIDE_INT tmp;
5397 tmp = ct, ct = cf, cf = tmp;
5398 diff = -diff;
5399 compare_code = reverse_condition (compare_code);
5400 code = reverse_condition (code);
5401 }
5402 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
5403 || diff == 3 || diff == 5 || diff == 9)
5404 {
5405 /*
5406 * xorl dest,dest
5407 * cmpl op1,op2
5408 * setcc dest
5409 * lea cf(dest*(ct-cf)),dest
5410 *
5411 * Size 14.
5412 *
5413 * This also catches the degenerate setcc-only case.
5414 */
5415
5416 rtx tmp;
5417 int nops;
5418
5419 out = emit_store_flag (out, code, ix86_compare_op0,
5420 ix86_compare_op1, VOIDmode, 0, 1);
5421
5422 nops = 0;
5423 if (diff == 1)
5424 tmp = out;
5425 else
5426 {
5427 tmp = gen_rtx_MULT (SImode, out, GEN_INT (diff & ~1));
5428 nops++;
5429 if (diff & 1)
5430 {
5431 tmp = gen_rtx_PLUS (SImode, tmp, out);
5432 nops++;
5433 }
5434 }
5435 if (cf != 0)
5436 {
5437 tmp = gen_rtx_PLUS (SImode, tmp, GEN_INT (cf));
5438 nops++;
5439 }
5440 if (tmp != out)
5441 {
5442 if (nops == 0)
5443 emit_move_insn (out, tmp);
5444 else if (nops == 1)
5445 {
5446 rtx clob;
5447
5448 clob = gen_rtx_REG (CCmode, FLAGS_REG);
5449 clob = gen_rtx_CLOBBER (VOIDmode, clob);
5450
5451 tmp = gen_rtx_SET (VOIDmode, out, tmp);
5452 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5453 emit_insn (tmp);
5454 }
5455 else
5456 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
5457 }
5458 if (out != operands[0])
5459 emit_move_insn (operands[0], out);
5460
5461 return 1; /* DONE */
5462 }
5463
5464 /*
5465 * General case: Jumpful:
5466 * xorl dest,dest cmpl op1, op2
5467 * cmpl op1, op2 movl ct, dest
5468 * setcc dest jcc 1f
5469 * decl dest movl cf, dest
5470 * andl (cf-ct),dest 1:
5471 * addl ct,dest
0f290768 5472 *
e075ae69
RH
5473 * Size 20. Size 14.
5474 *
5475 * This is reasonably steep, but branch mispredict costs are
5476 * high on modern cpus, so consider failing only if optimizing
5477 * for space.
5478 *
5479 * %%% Parameterize branch_cost on the tuning architecture, then
5480 * use that. The 80386 couldn't care less about mispredicts.
5481 */
5482
5483 if (!optimize_size && !TARGET_CMOVE)
5484 {
5485 if (ct == 0)
5486 {
5487 ct = cf;
5488 cf = 0;
5489 compare_code = reverse_condition (compare_code);
5490 code = reverse_condition (code);
5491 }
5492
5493 out = emit_store_flag (out, code, ix86_compare_op0,
5494 ix86_compare_op1, VOIDmode, 0, 1);
5495
5496 emit_insn (gen_addsi3 (out, out, constm1_rtx));
5497 emit_insn (gen_andsi3 (out, out, GEN_INT (cf-ct)));
5498 if (ct != 0)
5499 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5500 if (out != operands[0])
5501 emit_move_insn (operands[0], out);
5502
5503 return 1; /* DONE */
5504 }
5505 }
5506
5507 if (!TARGET_CMOVE)
5508 {
5509 /* Try a few things more with specific constants and a variable. */
5510
78a0d70c 5511 optab op;
e075ae69
RH
5512 rtx var, orig_out, out, tmp;
5513
5514 if (optimize_size)
5515 return 0; /* FAIL */
5516
0f290768 5517 /* If one of the two operands is an interesting constant, load a
e075ae69 5518 constant with the above and mask it in with a logical operation. */
0f290768 5519
e075ae69
RH
5520 if (GET_CODE (operands[2]) == CONST_INT)
5521 {
5522 var = operands[3];
5523 if (INTVAL (operands[2]) == 0)
5524 operands[3] = constm1_rtx, op = and_optab;
5525 else if (INTVAL (operands[2]) == -1)
5526 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
5527 else
5528 return 0; /* FAIL */
e075ae69
RH
5529 }
5530 else if (GET_CODE (operands[3]) == CONST_INT)
5531 {
5532 var = operands[2];
5533 if (INTVAL (operands[3]) == 0)
5534 operands[2] = constm1_rtx, op = and_optab;
5535 else if (INTVAL (operands[3]) == -1)
5536 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
5537 else
5538 return 0; /* FAIL */
e075ae69 5539 }
78a0d70c 5540 else
e075ae69
RH
5541 return 0; /* FAIL */
5542
5543 orig_out = operands[0];
5544 tmp = gen_reg_rtx (GET_MODE (orig_out));
5545 operands[0] = tmp;
5546
5547 /* Recurse to get the constant loaded. */
5548 if (ix86_expand_int_movcc (operands) == 0)
5549 return 0; /* FAIL */
5550
5551 /* Mask in the interesting variable. */
5552 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
5553 OPTAB_WIDEN);
5554 if (out != orig_out)
5555 emit_move_insn (orig_out, out);
5556
5557 return 1; /* DONE */
5558 }
5559
5560 /*
5561 * For comparison with above,
5562 *
5563 * movl cf,dest
5564 * movl ct,tmp
5565 * cmpl op1,op2
5566 * cmovcc tmp,dest
5567 *
5568 * Size 15.
5569 */
5570
5571 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
5572 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
5573 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
5574 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
5575
5576 emit_insn (compare_seq);
5577 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5578 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5579 compare_op, operands[2],
5580 operands[3])));
5581
5582 return 1; /* DONE */
e9a25f70 5583}
e075ae69 5584
32b5b1aa 5585int
e075ae69
RH
5586ix86_expand_fp_movcc (operands)
5587 rtx operands[];
32b5b1aa 5588{
e075ae69
RH
5589 enum rtx_code code;
5590 enum machine_mode mode;
5591 rtx tmp;
32b5b1aa 5592
e075ae69 5593 /* The floating point conditional move instructions don't directly
0f290768 5594 support conditions resulting from a signed integer comparison. */
32b5b1aa 5595
e075ae69
RH
5596 code = GET_CODE (operands[1]);
5597 switch (code)
5598 {
5599 case LT:
5600 case LE:
5601 case GE:
5602 case GT:
32ee391b
RH
5603 case UNEQ:
5604 case UNGE:
5605 case UNGT:
5606 case UNLE:
5607 case UNLT:
5608 case LTGT:
e075ae69 5609 tmp = gen_reg_rtx (QImode);
3a3677ff 5610 ix86_expand_setcc (code, tmp);
e075ae69
RH
5611 code = NE;
5612 ix86_compare_op0 = tmp;
5613 ix86_compare_op1 = const0_rtx;
5614 break;
5615
5616 default:
5617 break;
5618 }
e9a25f70 5619
e075ae69
RH
5620 mode = SELECT_CC_MODE (code, ix86_compare_op0, ix86_compare_op1);
5621 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (mode, FLAGS_REG),
5622 gen_rtx_COMPARE (mode,
5623 ix86_compare_op0,
5624 ix86_compare_op1)));
5625 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5626 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5627 gen_rtx_fmt_ee (code, VOIDmode,
5628 gen_rtx_REG (mode, FLAGS_REG),
5629 const0_rtx),
5630 operands[2],
5631 operands[3])));
32b5b1aa 5632
e075ae69 5633 return 1;
32b5b1aa
SC
5634}
5635
2450a057
JH
5636/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
5637 works for floating pointer parameters and nonoffsetable memories.
5638 For pushes, it returns just stack offsets; the values will be saved
5639 in the right order. Maximally three parts are generated. */
5640
5641static void
5642ix86_split_to_parts (operand, parts, mode)
5643 rtx operand;
5644 rtx *parts;
5645 enum machine_mode mode;
32b5b1aa 5646{
2450a057
JH
5647 int size = GET_MODE_SIZE (mode) / 4;
5648
a7180f70
BS
5649 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
5650 abort ();
2450a057
JH
5651 if (size < 2 || size > 3)
5652 abort ();
5653
d7a29404
JH
5654 /* Optimize constant pool reference to immediates. This is used by fp moves,
5655 that force all constants to memory to allow combining. */
5656
5657 if (GET_CODE (operand) == MEM
5658 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
5659 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
5660 operand = get_pool_constant (XEXP (operand, 0));
5661
2450a057 5662 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 5663 {
2450a057
JH
5664 /* The only non-offsetable memories we handle are pushes. */
5665 if (! push_operand (operand, VOIDmode))
5666 abort ();
5667
5668 PUT_MODE (operand, SImode);
5669 parts[0] = parts[1] = parts[2] = operand;
5670 }
5671 else
5672 {
5673 if (mode == DImode)
5674 split_di (&operand, 1, &parts[0], &parts[1]);
5675 else
e075ae69 5676 {
2450a057
JH
5677 if (REG_P (operand))
5678 {
5679 if (!reload_completed)
5680 abort ();
5681 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
5682 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
5683 if (size == 3)
5684 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
5685 }
5686 else if (offsettable_memref_p (operand))
5687 {
5688 PUT_MODE (operand, SImode);
5689 parts[0] = operand;
5690 parts[1] = adj_offsettable_operand (operand, 4);
5691 if (size == 3)
5692 parts[2] = adj_offsettable_operand (operand, 8);
5693 }
5694 else if (GET_CODE (operand) == CONST_DOUBLE)
5695 {
5696 REAL_VALUE_TYPE r;
5697 long l[3];
5698
5699 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
5700 switch (mode)
5701 {
5702 case XFmode:
5703 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
5704 parts[2] = GEN_INT (l[2]);
5705 break;
5706 case DFmode:
5707 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
5708 break;
5709 default:
5710 abort ();
5711 }
5712 parts[1] = GEN_INT (l[1]);
5713 parts[0] = GEN_INT (l[0]);
5714 }
5715 else
5716 abort ();
e075ae69 5717 }
2450a057
JH
5718 }
5719
5720 return;
5721}
5722
5723/* Emit insns to perform a move or push of DI, DF, and XF values.
5724 Return false when normal moves are needed; true when all required
5725 insns have been emitted. Operands 2-4 contain the input values
5726 int the correct order; operands 5-7 contain the output values. */
5727
0f290768 5728int
2450a057
JH
5729ix86_split_long_move (operands1)
5730 rtx operands1[];
5731{
5732 rtx part[2][3];
5733 rtx operands[2];
5734 int size = GET_MODE_SIZE (GET_MODE (operands1[0])) / 4;
5735 int push = 0;
5736 int collisions = 0;
5737
5738 /* Make our own copy to avoid clobbering the operands. */
5739 operands[0] = copy_rtx (operands1[0]);
5740 operands[1] = copy_rtx (operands1[1]);
5741
5742 if (size < 2 || size > 3)
5743 abort ();
5744
5745 /* The only non-offsettable memory we handle is push. */
5746 if (push_operand (operands[0], VOIDmode))
5747 push = 1;
5748 else if (GET_CODE (operands[0]) == MEM
5749 && ! offsettable_memref_p (operands[0]))
5750 abort ();
5751
5752 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands1[0]));
5753 ix86_split_to_parts (operands[1], part[1], GET_MODE (operands1[0]));
5754
5755 /* When emitting push, take care for source operands on the stack. */
5756 if (push && GET_CODE (operands[1]) == MEM
5757 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
5758 {
5759 if (size == 3)
5760 part[1][1] = part[1][2];
5761 part[1][0] = part[1][1];
5762 }
5763
0f290768 5764 /* We need to do copy in the right order in case an address register
2450a057
JH
5765 of the source overlaps the destination. */
5766 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
5767 {
5768 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
5769 collisions++;
5770 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
5771 collisions++;
5772 if (size == 3
5773 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
5774 collisions++;
5775
5776 /* Collision in the middle part can be handled by reordering. */
5777 if (collisions == 1 && size == 3
5778 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 5779 {
2450a057
JH
5780 rtx tmp;
5781 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
5782 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
5783 }
e075ae69 5784
2450a057
JH
5785 /* If there are more collisions, we can't handle it by reordering.
5786 Do an lea to the last part and use only one colliding move. */
5787 else if (collisions > 1)
5788 {
5789 collisions = 1;
5790 emit_insn (gen_rtx_SET (VOIDmode, part[0][size - 1],
5791 XEXP (part[1][0], 0)));
5792 part[1][0] = change_address (part[1][0], SImode, part[0][size - 1]);
5793 part[1][1] = adj_offsettable_operand (part[1][0], 4);
5794 if (size == 3)
5795 part[1][2] = adj_offsettable_operand (part[1][0], 8);
5796 }
5797 }
5798
5799 if (push)
5800 {
5801 if (size == 3)
5802 emit_insn (gen_push (part[1][2]));
5803 emit_insn (gen_push (part[1][1]));
5804 emit_insn (gen_push (part[1][0]));
5805 return 1;
5806 }
5807
5808 /* Choose correct order to not overwrite the source before it is copied. */
5809 if ((REG_P (part[0][0])
5810 && REG_P (part[1][1])
5811 && (REGNO (part[0][0]) == REGNO (part[1][1])
5812 || (size == 3
5813 && REGNO (part[0][0]) == REGNO (part[1][2]))))
5814 || (collisions > 0
5815 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
5816 {
5817 if (size == 3)
5818 {
5819 operands1[2] = part[0][2];
5820 operands1[3] = part[0][1];
5821 operands1[4] = part[0][0];
5822 operands1[5] = part[1][2];
5823 operands1[6] = part[1][1];
5824 operands1[7] = part[1][0];
5825 }
5826 else
5827 {
5828 operands1[2] = part[0][1];
5829 operands1[3] = part[0][0];
5830 operands1[5] = part[1][1];
5831 operands1[6] = part[1][0];
5832 }
5833 }
5834 else
5835 {
5836 if (size == 3)
5837 {
5838 operands1[2] = part[0][0];
5839 operands1[3] = part[0][1];
5840 operands1[4] = part[0][2];
5841 operands1[5] = part[1][0];
5842 operands1[6] = part[1][1];
5843 operands1[7] = part[1][2];
5844 }
5845 else
5846 {
5847 operands1[2] = part[0][0];
5848 operands1[3] = part[0][1];
5849 operands1[5] = part[1][0];
5850 operands1[6] = part[1][1];
e075ae69
RH
5851 }
5852 }
32b5b1aa 5853
e9a25f70 5854 return 0;
32b5b1aa 5855}
32b5b1aa 5856
e075ae69
RH
5857void
5858ix86_split_ashldi (operands, scratch)
5859 rtx *operands, scratch;
32b5b1aa 5860{
e075ae69
RH
5861 rtx low[2], high[2];
5862 int count;
b985a30f 5863
e075ae69
RH
5864 if (GET_CODE (operands[2]) == CONST_INT)
5865 {
5866 split_di (operands, 2, low, high);
5867 count = INTVAL (operands[2]) & 63;
32b5b1aa 5868
e075ae69
RH
5869 if (count >= 32)
5870 {
5871 emit_move_insn (high[0], low[1]);
5872 emit_move_insn (low[0], const0_rtx);
b985a30f 5873
e075ae69
RH
5874 if (count > 32)
5875 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
5876 }
5877 else
5878 {
5879 if (!rtx_equal_p (operands[0], operands[1]))
5880 emit_move_insn (operands[0], operands[1]);
5881 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
5882 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
5883 }
5884 }
5885 else
5886 {
5887 if (!rtx_equal_p (operands[0], operands[1]))
5888 emit_move_insn (operands[0], operands[1]);
b985a30f 5889
e075ae69 5890 split_di (operands, 1, low, high);
b985a30f 5891
e075ae69
RH
5892 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
5893 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 5894
fe577e58 5895 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 5896 {
fe577e58 5897 if (! no_new_pseudos)
e075ae69
RH
5898 scratch = force_reg (SImode, const0_rtx);
5899 else
5900 emit_move_insn (scratch, const0_rtx);
5901
5902 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
5903 scratch));
5904 }
5905 else
5906 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
5907 }
e9a25f70 5908}
32b5b1aa 5909
e075ae69
RH
5910void
5911ix86_split_ashrdi (operands, scratch)
5912 rtx *operands, scratch;
32b5b1aa 5913{
e075ae69
RH
5914 rtx low[2], high[2];
5915 int count;
32b5b1aa 5916
e075ae69
RH
5917 if (GET_CODE (operands[2]) == CONST_INT)
5918 {
5919 split_di (operands, 2, low, high);
5920 count = INTVAL (operands[2]) & 63;
32b5b1aa 5921
e075ae69
RH
5922 if (count >= 32)
5923 {
5924 emit_move_insn (low[0], high[1]);
32b5b1aa 5925
e075ae69
RH
5926 if (! reload_completed)
5927 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
5928 else
5929 {
5930 emit_move_insn (high[0], low[0]);
5931 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
5932 }
5933
5934 if (count > 32)
5935 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
5936 }
5937 else
5938 {
5939 if (!rtx_equal_p (operands[0], operands[1]))
5940 emit_move_insn (operands[0], operands[1]);
5941 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
5942 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
5943 }
5944 }
5945 else
32b5b1aa 5946 {
e075ae69
RH
5947 if (!rtx_equal_p (operands[0], operands[1]))
5948 emit_move_insn (operands[0], operands[1]);
5949
5950 split_di (operands, 1, low, high);
5951
5952 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
5953 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
5954
fe577e58 5955 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 5956 {
fe577e58 5957 if (! no_new_pseudos)
e075ae69
RH
5958 scratch = gen_reg_rtx (SImode);
5959 emit_move_insn (scratch, high[0]);
5960 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
5961 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
5962 scratch));
5963 }
5964 else
5965 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 5966 }
e075ae69 5967}
32b5b1aa 5968
e075ae69
RH
5969void
5970ix86_split_lshrdi (operands, scratch)
5971 rtx *operands, scratch;
5972{
5973 rtx low[2], high[2];
5974 int count;
32b5b1aa 5975
e075ae69 5976 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 5977 {
e075ae69
RH
5978 split_di (operands, 2, low, high);
5979 count = INTVAL (operands[2]) & 63;
5980
5981 if (count >= 32)
c7271385 5982 {
e075ae69
RH
5983 emit_move_insn (low[0], high[1]);
5984 emit_move_insn (high[0], const0_rtx);
32b5b1aa 5985
e075ae69
RH
5986 if (count > 32)
5987 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
5988 }
5989 else
5990 {
5991 if (!rtx_equal_p (operands[0], operands[1]))
5992 emit_move_insn (operands[0], operands[1]);
5993 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
5994 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
5995 }
32b5b1aa 5996 }
e075ae69
RH
5997 else
5998 {
5999 if (!rtx_equal_p (operands[0], operands[1]))
6000 emit_move_insn (operands[0], operands[1]);
32b5b1aa 6001
e075ae69
RH
6002 split_di (operands, 1, low, high);
6003
6004 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
6005 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
6006
6007 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 6008 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 6009 {
fe577e58 6010 if (! no_new_pseudos)
e075ae69
RH
6011 scratch = force_reg (SImode, const0_rtx);
6012 else
6013 emit_move_insn (scratch, const0_rtx);
6014
6015 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
6016 scratch));
6017 }
6018 else
6019 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
6020 }
32b5b1aa 6021}
3f803cd9 6022
e075ae69
RH
6023/* Expand the appropriate insns for doing strlen if not just doing
6024 repnz; scasb
6025
6026 out = result, initialized with the start address
6027 align_rtx = alignment of the address.
6028 scratch = scratch register, initialized with the startaddress when
6029 not aligned, otherwise undefined
3f803cd9
SC
6030
6031 This is just the body. It needs the initialisations mentioned above and
6032 some address computing at the end. These things are done in i386.md. */
6033
e075ae69
RH
6034void
6035ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch)
6036 rtx out, align_rtx, scratch;
3f803cd9 6037{
e075ae69
RH
6038 int align;
6039 rtx tmp;
6040 rtx align_2_label = NULL_RTX;
6041 rtx align_3_label = NULL_RTX;
6042 rtx align_4_label = gen_label_rtx ();
6043 rtx end_0_label = gen_label_rtx ();
e075ae69 6044 rtx mem;
e2e52e1b 6045 rtx tmpreg = gen_reg_rtx (SImode);
e075ae69
RH
6046
6047 align = 0;
6048 if (GET_CODE (align_rtx) == CONST_INT)
6049 align = INTVAL (align_rtx);
3f803cd9 6050
e9a25f70 6051 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 6052
e9a25f70 6053 /* Is there a known alignment and is it less than 4? */
e075ae69 6054 if (align < 4)
3f803cd9 6055 {
e9a25f70 6056 /* Is there a known alignment and is it not 2? */
e075ae69 6057 if (align != 2)
3f803cd9 6058 {
e075ae69
RH
6059 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
6060 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
6061
6062 /* Leave just the 3 lower bits. */
6063 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (3),
6064 NULL_RTX, 0, OPTAB_WIDEN);
6065
9076b9c1
JH
6066 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
6067 SImode, 1, 0, align_4_label);
6068 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
6069 SImode, 1, 0, align_2_label);
6070 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
6071 SImode, 1, 0, align_3_label);
3f803cd9
SC
6072 }
6073 else
6074 {
e9a25f70
JL
6075 /* Since the alignment is 2, we have to check 2 or 0 bytes;
6076 check if is aligned to 4 - byte. */
e9a25f70 6077
e075ae69
RH
6078 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (2),
6079 NULL_RTX, 0, OPTAB_WIDEN);
6080
9076b9c1
JH
6081 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
6082 SImode, 1, 0, align_4_label);
3f803cd9
SC
6083 }
6084
e075ae69 6085 mem = gen_rtx_MEM (QImode, out);
e9a25f70 6086
e075ae69 6087 /* Now compare the bytes. */
e9a25f70 6088
0f290768 6089 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1
JH
6090 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6091 QImode, 1, 0, end_0_label);
3f803cd9 6092
0f290768 6093 /* Increment the address. */
e075ae69 6094 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 6095
e075ae69
RH
6096 /* Not needed with an alignment of 2 */
6097 if (align != 2)
6098 {
6099 emit_label (align_2_label);
3f803cd9 6100
9076b9c1
JH
6101 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6102 QImode, 1, 0, end_0_label);
e075ae69
RH
6103
6104 emit_insn (gen_addsi3 (out, out, const1_rtx));
6105
6106 emit_label (align_3_label);
6107 }
6108
9076b9c1
JH
6109 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6110 QImode, 1, 0, end_0_label);
e075ae69
RH
6111
6112 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
6113 }
6114
e075ae69
RH
6115 /* Generate loop to check 4 bytes at a time. It is not a good idea to
6116 align this loop. It gives only huge programs, but does not help to
6117 speed up. */
6118 emit_label (align_4_label);
3f803cd9 6119
e075ae69
RH
6120 mem = gen_rtx_MEM (SImode, out);
6121 emit_move_insn (scratch, mem);
e075ae69 6122 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 6123
e2e52e1b
JH
6124 /* This formula yields a nonzero result iff one of the bytes is zero.
6125 This saves three branches inside loop and many cycles. */
6126
6127 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
6128 emit_insn (gen_one_cmplsi2 (scratch, scratch));
6129 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
6130 emit_insn (gen_andsi3 (tmpreg, tmpreg, GEN_INT (0x80808080)));
9076b9c1
JH
6131 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0,
6132 SImode, 1, 0, align_4_label);
e2e52e1b
JH
6133
6134 if (TARGET_CMOVE)
6135 {
6136 rtx reg = gen_reg_rtx (SImode);
6137 emit_move_insn (reg, tmpreg);
6138 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
6139
0f290768 6140 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 6141 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
6142 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6143 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6144 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
6145 gen_rtx_IF_THEN_ELSE (SImode, tmp,
0f290768 6146 reg,
e2e52e1b
JH
6147 tmpreg)));
6148 /* Emit lea manually to avoid clobbering of flags. */
6149 emit_insn (gen_rtx_SET (SImode, reg,
6150 gen_rtx_PLUS (SImode, out, GEN_INT (2))));
6151
6152 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6153 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6154 emit_insn (gen_rtx_SET (VOIDmode, out,
6155 gen_rtx_IF_THEN_ELSE (SImode, tmp,
6156 reg,
6157 out)));
6158
6159 }
6160 else
6161 {
6162 rtx end_2_label = gen_label_rtx ();
6163 /* Is zero in the first two bytes? */
6164
16189740 6165 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
6166 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6167 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
6168 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
6169 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
6170 pc_rtx);
6171 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
6172 JUMP_LABEL (tmp) = end_2_label;
6173
0f290768 6174 /* Not in the first two. Move two bytes forward. */
e2e52e1b
JH
6175 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
6176 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
6177
6178 emit_label (end_2_label);
6179
6180 }
6181
0f290768 6182 /* Avoid branch in fixing the byte. */
e2e52e1b 6183 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190
JH
6184 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
6185 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
e075ae69
RH
6186
6187 emit_label (end_0_label);
6188}
6189\f
e075ae69
RH
6190/* Clear stack slot assignments remembered from previous functions.
6191 This is called from INIT_EXPANDERS once before RTL is emitted for each
6192 function. */
6193
36edd3cc
BS
6194static void
6195ix86_init_machine_status (p)
1526a060 6196 struct function *p;
e075ae69
RH
6197{
6198 enum machine_mode mode;
6199 int n;
36edd3cc
BS
6200 p->machine
6201 = (struct machine_function *) xmalloc (sizeof (struct machine_function));
e075ae69
RH
6202
6203 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
6204 mode = (enum machine_mode) ((int) mode + 1))
6205 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
6206 ix86_stack_locals[(int) mode][n] = NULL_RTX;
e075ae69
RH
6207}
6208
1526a060
BS
6209/* Mark machine specific bits of P for GC. */
6210static void
6211ix86_mark_machine_status (p)
6212 struct function *p;
6213{
6214 enum machine_mode mode;
6215 int n;
6216
6217 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
6218 mode = (enum machine_mode) ((int) mode + 1))
6219 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
6220 ggc_mark_rtx (p->machine->stack_locals[(int) mode][n]);
6221}
6222
e075ae69
RH
6223/* Return a MEM corresponding to a stack slot with mode MODE.
6224 Allocate a new slot if necessary.
6225
6226 The RTL for a function can have several slots available: N is
6227 which slot to use. */
6228
6229rtx
6230assign_386_stack_local (mode, n)
6231 enum machine_mode mode;
6232 int n;
6233{
6234 if (n < 0 || n >= MAX_386_STACK_LOCALS)
6235 abort ();
6236
6237 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
6238 ix86_stack_locals[(int) mode][n]
6239 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
6240
6241 return ix86_stack_locals[(int) mode][n];
6242}
6243\f
6244/* Calculate the length of the memory address in the instruction
6245 encoding. Does not include the one-byte modrm, opcode, or prefix. */
6246
6247static int
6248memory_address_length (addr)
6249 rtx addr;
6250{
6251 struct ix86_address parts;
6252 rtx base, index, disp;
6253 int len;
6254
6255 if (GET_CODE (addr) == PRE_DEC
6256 || GET_CODE (addr) == POST_INC)
6257 return 0;
3f803cd9 6258
e075ae69
RH
6259 if (! ix86_decompose_address (addr, &parts))
6260 abort ();
3f803cd9 6261
e075ae69
RH
6262 base = parts.base;
6263 index = parts.index;
6264 disp = parts.disp;
6265 len = 0;
3f803cd9 6266
e075ae69
RH
6267 /* Register Indirect. */
6268 if (base && !index && !disp)
6269 {
6270 /* Special cases: ebp and esp need the two-byte modrm form. */
6271 if (addr == stack_pointer_rtx
6272 || addr == arg_pointer_rtx
564d80f4
JH
6273 || addr == frame_pointer_rtx
6274 || addr == hard_frame_pointer_rtx)
e075ae69 6275 len = 1;
3f803cd9 6276 }
e9a25f70 6277
e075ae69
RH
6278 /* Direct Addressing. */
6279 else if (disp && !base && !index)
6280 len = 4;
6281
3f803cd9
SC
6282 else
6283 {
e075ae69
RH
6284 /* Find the length of the displacement constant. */
6285 if (disp)
6286 {
6287 if (GET_CODE (disp) == CONST_INT
6288 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
6289 len = 1;
6290 else
6291 len = 4;
6292 }
3f803cd9 6293
e075ae69
RH
6294 /* An index requires the two-byte modrm form. */
6295 if (index)
6296 len += 1;
3f803cd9
SC
6297 }
6298
e075ae69
RH
6299 return len;
6300}
79325812 6301
6ef67412
JH
6302/* Compute default value for "length_immediate" attribute. When SHORTFORM is set
6303 expect that insn have 8bit immediate alternative. */
e075ae69 6304int
6ef67412 6305ix86_attr_length_immediate_default (insn, shortform)
e075ae69 6306 rtx insn;
6ef67412 6307 int shortform;
e075ae69 6308{
6ef67412
JH
6309 int len = 0;
6310 int i;
6c698a6d 6311 extract_insn_cached (insn);
6ef67412
JH
6312 for (i = recog_data.n_operands - 1; i >= 0; --i)
6313 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 6314 {
6ef67412 6315 if (len)
3071fab5 6316 abort ();
6ef67412
JH
6317 if (shortform
6318 && GET_CODE (recog_data.operand[i]) == CONST_INT
6319 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
6320 len = 1;
6321 else
6322 {
6323 switch (get_attr_mode (insn))
6324 {
6325 case MODE_QI:
6326 len+=1;
6327 break;
6328 case MODE_HI:
6329 len+=2;
6330 break;
6331 case MODE_SI:
6332 len+=4;
6333 break;
6334 default:
6335 fatal_insn ("Unknown insn mode", insn);
6336 }
6337 }
3071fab5 6338 }
6ef67412
JH
6339 return len;
6340}
6341/* Compute default value for "length_address" attribute. */
6342int
6343ix86_attr_length_address_default (insn)
6344 rtx insn;
6345{
6346 int i;
6c698a6d 6347 extract_insn_cached (insn);
1ccbefce
RH
6348 for (i = recog_data.n_operands - 1; i >= 0; --i)
6349 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 6350 {
6ef67412 6351 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
6352 break;
6353 }
6ef67412 6354 return 0;
3f803cd9 6355}
e075ae69
RH
6356\f
6357/* Return the maximum number of instructions a cpu can issue. */
b657fc39 6358
e075ae69
RH
6359int
6360ix86_issue_rate ()
b657fc39 6361{
e075ae69 6362 switch (ix86_cpu)
b657fc39 6363 {
e075ae69
RH
6364 case PROCESSOR_PENTIUM:
6365 case PROCESSOR_K6:
6366 return 2;
79325812 6367
e075ae69
RH
6368 case PROCESSOR_PENTIUMPRO:
6369 return 3;
b657fc39 6370
b657fc39 6371 default:
e075ae69 6372 return 1;
b657fc39 6373 }
b657fc39
L
6374}
6375
e075ae69
RH
6376/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
6377 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 6378
e075ae69
RH
6379static int
6380ix86_flags_dependant (insn, dep_insn, insn_type)
6381 rtx insn, dep_insn;
6382 enum attr_type insn_type;
6383{
6384 rtx set, set2;
b657fc39 6385
e075ae69
RH
6386 /* Simplify the test for uninteresting insns. */
6387 if (insn_type != TYPE_SETCC
6388 && insn_type != TYPE_ICMOV
6389 && insn_type != TYPE_FCMOV
6390 && insn_type != TYPE_IBR)
6391 return 0;
b657fc39 6392
e075ae69
RH
6393 if ((set = single_set (dep_insn)) != 0)
6394 {
6395 set = SET_DEST (set);
6396 set2 = NULL_RTX;
6397 }
6398 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
6399 && XVECLEN (PATTERN (dep_insn), 0) == 2
6400 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
6401 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
6402 {
6403 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
6404 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
6405 }
78a0d70c
ZW
6406 else
6407 return 0;
b657fc39 6408
78a0d70c
ZW
6409 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
6410 return 0;
b657fc39 6411
78a0d70c
ZW
6412 /* This test is true if the dependant insn reads the flags but
6413 not any other potentially set register. */
6414 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
6415 return 0;
6416
6417 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
6418 return 0;
6419
6420 return 1;
e075ae69 6421}
b657fc39 6422
e075ae69
RH
6423/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
6424 address with operands set by DEP_INSN. */
6425
6426static int
6427ix86_agi_dependant (insn, dep_insn, insn_type)
6428 rtx insn, dep_insn;
6429 enum attr_type insn_type;
6430{
6431 rtx addr;
6432
6433 if (insn_type == TYPE_LEA)
5fbdde42
RH
6434 {
6435 addr = PATTERN (insn);
6436 if (GET_CODE (addr) == SET)
6437 ;
6438 else if (GET_CODE (addr) == PARALLEL
6439 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
6440 addr = XVECEXP (addr, 0, 0);
6441 else
6442 abort ();
6443 addr = SET_SRC (addr);
6444 }
e075ae69
RH
6445 else
6446 {
6447 int i;
6c698a6d 6448 extract_insn_cached (insn);
1ccbefce
RH
6449 for (i = recog_data.n_operands - 1; i >= 0; --i)
6450 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 6451 {
1ccbefce 6452 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
6453 goto found;
6454 }
6455 return 0;
6456 found:;
b657fc39
L
6457 }
6458
e075ae69 6459 return modified_in_p (addr, dep_insn);
b657fc39 6460}
a269a03c
JC
6461
6462int
e075ae69 6463ix86_adjust_cost (insn, link, dep_insn, cost)
a269a03c
JC
6464 rtx insn, link, dep_insn;
6465 int cost;
6466{
e075ae69 6467 enum attr_type insn_type, dep_insn_type;
0b5107cf 6468 enum attr_memory memory;
e075ae69 6469 rtx set, set2;
9b00189f 6470 int dep_insn_code_number;
a269a03c 6471
309ada50 6472 /* Anti and output depenancies have zero cost on all CPUs. */
e075ae69 6473 if (REG_NOTE_KIND (link) != 0)
309ada50 6474 return 0;
a269a03c 6475
9b00189f
JH
6476 dep_insn_code_number = recog_memoized (dep_insn);
6477
e075ae69 6478 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 6479 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 6480 return cost;
a269a03c 6481
1c71e60e
JH
6482 insn_type = get_attr_type (insn);
6483 dep_insn_type = get_attr_type (dep_insn);
9b00189f 6484
1c71e60e
JH
6485 /* Prologue and epilogue allocators can have a false dependency on ebp.
6486 This results in one cycle extra stall on Pentium prologue scheduling,
6487 so handle this important case manually. */
6488 if (dep_insn_code_number == CODE_FOR_pro_epilogue_adjust_stack
6489 && dep_insn_type == TYPE_ALU
9b00189f
JH
6490 && !reg_mentioned_p (stack_pointer_rtx, insn))
6491 return 0;
6492
a269a03c
JC
6493 switch (ix86_cpu)
6494 {
6495 case PROCESSOR_PENTIUM:
e075ae69
RH
6496 /* Address Generation Interlock adds a cycle of latency. */
6497 if (ix86_agi_dependant (insn, dep_insn, insn_type))
6498 cost += 1;
6499
6500 /* ??? Compares pair with jump/setcc. */
6501 if (ix86_flags_dependant (insn, dep_insn, insn_type))
6502 cost = 0;
6503
6504 /* Floating point stores require value to be ready one cycle ealier. */
0f290768 6505 if (insn_type == TYPE_FMOV
e075ae69
RH
6506 && get_attr_memory (insn) == MEMORY_STORE
6507 && !ix86_agi_dependant (insn, dep_insn, insn_type))
6508 cost += 1;
6509 break;
a269a03c 6510
e075ae69 6511 case PROCESSOR_PENTIUMPRO:
0f290768 6512 /* Since we can't represent delayed latencies of load+operation,
e075ae69
RH
6513 increase the cost here for non-imov insns. */
6514 if (dep_insn_type != TYPE_IMOV
6515 && dep_insn_type != TYPE_FMOV
0b5107cf
JH
6516 && ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
6517 || memory == MEMORY_BOTH))
e075ae69
RH
6518 cost += 1;
6519
6520 /* INT->FP conversion is expensive. */
6521 if (get_attr_fp_int_src (dep_insn))
6522 cost += 5;
6523
6524 /* There is one cycle extra latency between an FP op and a store. */
6525 if (insn_type == TYPE_FMOV
6526 && (set = single_set (dep_insn)) != NULL_RTX
6527 && (set2 = single_set (insn)) != NULL_RTX
6528 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
6529 && GET_CODE (SET_DEST (set2)) == MEM)
6530 cost += 1;
6531 break;
a269a03c 6532
e075ae69
RH
6533 case PROCESSOR_K6:
6534 /* The esp dependency is resolved before the instruction is really
6535 finished. */
6536 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
6537 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
6538 return 1;
a269a03c 6539
0f290768 6540 /* Since we can't represent delayed latencies of load+operation,
e075ae69 6541 increase the cost here for non-imov insns. */
0b5107cf
JH
6542 if ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
6543 || memory == MEMORY_BOTH)
e075ae69
RH
6544 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
6545
6546 /* INT->FP conversion is expensive. */
6547 if (get_attr_fp_int_src (dep_insn))
6548 cost += 5;
a14003ee 6549 break;
e075ae69 6550
309ada50 6551 case PROCESSOR_ATHLON:
0b5107cf
JH
6552 if ((memory = get_attr_memory (dep_insn)) == MEMORY_LOAD
6553 || memory == MEMORY_BOTH)
6554 {
6555 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
6556 cost += 2;
6557 else
6558 cost += 3;
6559 }
309ada50 6560
a269a03c 6561 default:
a269a03c
JC
6562 break;
6563 }
6564
6565 return cost;
6566}
0a726ef1 6567
e075ae69
RH
6568static union
6569{
6570 struct ppro_sched_data
6571 {
6572 rtx decode[3];
6573 int issued_this_cycle;
6574 } ppro;
6575} ix86_sched_data;
0a726ef1 6576
e075ae69
RH
6577static int
6578ix86_safe_length (insn)
6579 rtx insn;
6580{
6581 if (recog_memoized (insn) >= 0)
6582 return get_attr_length(insn);
6583 else
6584 return 128;
6585}
0a726ef1 6586
e075ae69
RH
6587static int
6588ix86_safe_length_prefix (insn)
6589 rtx insn;
6590{
6591 if (recog_memoized (insn) >= 0)
6592 return get_attr_length(insn);
6593 else
6594 return 0;
6595}
6596
6597static enum attr_memory
6598ix86_safe_memory (insn)
6599 rtx insn;
6600{
6601 if (recog_memoized (insn) >= 0)
6602 return get_attr_memory(insn);
6603 else
6604 return MEMORY_UNKNOWN;
6605}
0a726ef1 6606
e075ae69
RH
6607static enum attr_pent_pair
6608ix86_safe_pent_pair (insn)
6609 rtx insn;
6610{
6611 if (recog_memoized (insn) >= 0)
6612 return get_attr_pent_pair(insn);
6613 else
6614 return PENT_PAIR_NP;
6615}
0a726ef1 6616
e075ae69
RH
6617static enum attr_ppro_uops
6618ix86_safe_ppro_uops (insn)
6619 rtx insn;
6620{
6621 if (recog_memoized (insn) >= 0)
6622 return get_attr_ppro_uops (insn);
6623 else
6624 return PPRO_UOPS_MANY;
6625}
0a726ef1 6626
e075ae69
RH
6627static void
6628ix86_dump_ppro_packet (dump)
6629 FILE *dump;
0a726ef1 6630{
e075ae69 6631 if (ix86_sched_data.ppro.decode[0])
0a726ef1 6632 {
e075ae69
RH
6633 fprintf (dump, "PPRO packet: %d",
6634 INSN_UID (ix86_sched_data.ppro.decode[0]));
6635 if (ix86_sched_data.ppro.decode[1])
6636 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
6637 if (ix86_sched_data.ppro.decode[2])
6638 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
6639 fputc ('\n', dump);
6640 }
6641}
0a726ef1 6642
e075ae69 6643/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 6644
e075ae69
RH
6645void
6646ix86_sched_init (dump, sched_verbose)
6647 FILE *dump ATTRIBUTE_UNUSED;
6648 int sched_verbose ATTRIBUTE_UNUSED;
6649{
6650 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
6651}
6652
6653/* Shift INSN to SLOT, and shift everything else down. */
6654
6655static void
6656ix86_reorder_insn (insnp, slot)
6657 rtx *insnp, *slot;
6658{
6659 if (insnp != slot)
6660 {
6661 rtx insn = *insnp;
0f290768 6662 do
e075ae69
RH
6663 insnp[0] = insnp[1];
6664 while (++insnp != slot);
6665 *insnp = insn;
0a726ef1 6666 }
e075ae69
RH
6667}
6668
6669/* Find an instruction with given pairability and minimal amount of cycles
6670 lost by the fact that the CPU waits for both pipelines to finish before
6671 reading next instructions. Also take care that both instructions together
6672 can not exceed 7 bytes. */
6673
6674static rtx *
6675ix86_pent_find_pair (e_ready, ready, type, first)
6676 rtx *e_ready;
6677 rtx *ready;
6678 enum attr_pent_pair type;
6679 rtx first;
6680{
6681 int mincycles, cycles;
6682 enum attr_pent_pair tmp;
6683 enum attr_memory memory;
6684 rtx *insnp, *bestinsnp = NULL;
0a726ef1 6685
e075ae69
RH
6686 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
6687 return NULL;
0a726ef1 6688
e075ae69
RH
6689 memory = ix86_safe_memory (first);
6690 cycles = result_ready_cost (first);
6691 mincycles = INT_MAX;
6692
6693 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
6694 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
6695 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
6ec6d558 6696 {
e075ae69
RH
6697 enum attr_memory second_memory;
6698 int secondcycles, currentcycles;
6699
6700 second_memory = ix86_safe_memory (*insnp);
6701 secondcycles = result_ready_cost (*insnp);
6702 currentcycles = abs (cycles - secondcycles);
6703
6704 if (secondcycles >= 1 && cycles >= 1)
6ec6d558 6705 {
e075ae69
RH
6706 /* Two read/modify/write instructions together takes two
6707 cycles longer. */
6708 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
6709 currentcycles += 2;
0f290768 6710
e075ae69
RH
6711 /* Read modify/write instruction followed by read/modify
6712 takes one cycle longer. */
6713 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
6714 && tmp != PENT_PAIR_UV
6715 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
6716 currentcycles += 1;
6ec6d558 6717 }
e075ae69
RH
6718 if (currentcycles < mincycles)
6719 bestinsnp = insnp, mincycles = currentcycles;
6ec6d558 6720 }
0a726ef1 6721
e075ae69
RH
6722 return bestinsnp;
6723}
6724
78a0d70c 6725/* Subroutines of ix86_sched_reorder. */
e075ae69 6726
c6991660 6727static void
78a0d70c 6728ix86_sched_reorder_pentium (ready, e_ready)
e075ae69 6729 rtx *ready;
78a0d70c 6730 rtx *e_ready;
e075ae69 6731{
78a0d70c 6732 enum attr_pent_pair pair1, pair2;
e075ae69 6733 rtx *insnp;
e075ae69 6734
78a0d70c
ZW
6735 /* This wouldn't be necessary if Haifa knew that static insn ordering
6736 is important to which pipe an insn is issued to. So we have to make
6737 some minor rearrangements. */
e075ae69 6738
78a0d70c
ZW
6739 pair1 = ix86_safe_pent_pair (*e_ready);
6740
6741 /* If the first insn is non-pairable, let it be. */
6742 if (pair1 == PENT_PAIR_NP)
6743 return;
6744
6745 pair2 = PENT_PAIR_NP;
6746 insnp = 0;
6747
6748 /* If the first insn is UV or PV pairable, search for a PU
6749 insn to go with. */
6750 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
e075ae69 6751 {
78a0d70c
ZW
6752 insnp = ix86_pent_find_pair (e_ready-1, ready,
6753 PENT_PAIR_PU, *e_ready);
6754 if (insnp)
6755 pair2 = PENT_PAIR_PU;
6756 }
e075ae69 6757
78a0d70c
ZW
6758 /* If the first insn is PU or UV pairable, search for a PV
6759 insn to go with. */
6760 if (pair2 == PENT_PAIR_NP
6761 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
6762 {
6763 insnp = ix86_pent_find_pair (e_ready-1, ready,
6764 PENT_PAIR_PV, *e_ready);
6765 if (insnp)
6766 pair2 = PENT_PAIR_PV;
6767 }
e075ae69 6768
78a0d70c
ZW
6769 /* If the first insn is pairable, search for a UV
6770 insn to go with. */
6771 if (pair2 == PENT_PAIR_NP)
6772 {
6773 insnp = ix86_pent_find_pair (e_ready-1, ready,
6774 PENT_PAIR_UV, *e_ready);
6775 if (insnp)
6776 pair2 = PENT_PAIR_UV;
6777 }
e075ae69 6778
78a0d70c
ZW
6779 if (pair2 == PENT_PAIR_NP)
6780 return;
e075ae69 6781
78a0d70c
ZW
6782 /* Found something! Decide if we need to swap the order. */
6783 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
6784 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
6785 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
6786 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
6787 ix86_reorder_insn (insnp, e_ready);
6788 else
6789 ix86_reorder_insn (insnp, e_ready - 1);
6790}
e075ae69 6791
c6991660 6792static void
78a0d70c
ZW
6793ix86_sched_reorder_ppro (ready, e_ready)
6794 rtx *ready;
6795 rtx *e_ready;
6796{
6797 rtx decode[3];
6798 enum attr_ppro_uops cur_uops;
6799 int issued_this_cycle;
6800 rtx *insnp;
6801 int i;
e075ae69 6802
0f290768 6803 /* At this point .ppro.decode contains the state of the three
78a0d70c 6804 decoders from last "cycle". That is, those insns that were
0f290768 6805 actually independent. But here we're scheduling for the
78a0d70c
ZW
6806 decoder, and we may find things that are decodable in the
6807 same cycle. */
e075ae69 6808
0f290768 6809 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
78a0d70c 6810 issued_this_cycle = 0;
e075ae69 6811
78a0d70c
ZW
6812 insnp = e_ready;
6813 cur_uops = ix86_safe_ppro_uops (*insnp);
0a726ef1 6814
78a0d70c
ZW
6815 /* If the decoders are empty, and we've a complex insn at the
6816 head of the priority queue, let it issue without complaint. */
6817 if (decode[0] == NULL)
6818 {
6819 if (cur_uops == PPRO_UOPS_MANY)
6820 {
6821 decode[0] = *insnp;
6822 goto ppro_done;
6823 }
6824
6825 /* Otherwise, search for a 2-4 uop unsn to issue. */
6826 while (cur_uops != PPRO_UOPS_FEW)
6827 {
6828 if (insnp == ready)
6829 break;
6830 cur_uops = ix86_safe_ppro_uops (*--insnp);
6831 }
6832
6833 /* If so, move it to the head of the line. */
6834 if (cur_uops == PPRO_UOPS_FEW)
6835 ix86_reorder_insn (insnp, e_ready);
0a726ef1 6836
78a0d70c
ZW
6837 /* Issue the head of the queue. */
6838 issued_this_cycle = 1;
6839 decode[0] = *e_ready--;
6840 }
fb693d44 6841
78a0d70c
ZW
6842 /* Look for simple insns to fill in the other two slots. */
6843 for (i = 1; i < 3; ++i)
6844 if (decode[i] == NULL)
6845 {
6846 if (ready >= e_ready)
6847 goto ppro_done;
fb693d44 6848
e075ae69
RH
6849 insnp = e_ready;
6850 cur_uops = ix86_safe_ppro_uops (*insnp);
78a0d70c
ZW
6851 while (cur_uops != PPRO_UOPS_ONE)
6852 {
6853 if (insnp == ready)
6854 break;
6855 cur_uops = ix86_safe_ppro_uops (*--insnp);
6856 }
fb693d44 6857
78a0d70c
ZW
6858 /* Found one. Move it to the head of the queue and issue it. */
6859 if (cur_uops == PPRO_UOPS_ONE)
e075ae69 6860 {
78a0d70c
ZW
6861 ix86_reorder_insn (insnp, e_ready);
6862 decode[i] = *e_ready--;
6863 issued_this_cycle++;
6864 continue;
6865 }
fb693d44 6866
78a0d70c
ZW
6867 /* ??? Didn't find one. Ideally, here we would do a lazy split
6868 of 2-uop insns, issue one and queue the other. */
6869 }
fb693d44 6870
78a0d70c
ZW
6871 ppro_done:
6872 if (issued_this_cycle == 0)
6873 issued_this_cycle = 1;
6874 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
6875}
fb693d44 6876
0f290768 6877/* We are about to being issuing insns for this clock cycle.
78a0d70c
ZW
6878 Override the default sort algorithm to better slot instructions. */
6879int
6880ix86_sched_reorder (dump, sched_verbose, ready, n_ready, clock_var)
6881 FILE *dump ATTRIBUTE_UNUSED;
6882 int sched_verbose ATTRIBUTE_UNUSED;
6883 rtx *ready;
6884 int n_ready;
6885 int clock_var ATTRIBUTE_UNUSED;
6886{
6887 rtx *e_ready = ready + n_ready - 1;
fb693d44 6888
78a0d70c
ZW
6889 if (n_ready < 2)
6890 goto out;
e075ae69 6891
78a0d70c
ZW
6892 switch (ix86_cpu)
6893 {
6894 default:
6895 break;
e075ae69 6896
78a0d70c
ZW
6897 case PROCESSOR_PENTIUM:
6898 ix86_sched_reorder_pentium (ready, e_ready);
6899 break;
e075ae69 6900
78a0d70c
ZW
6901 case PROCESSOR_PENTIUMPRO:
6902 ix86_sched_reorder_ppro (ready, e_ready);
e075ae69 6903 break;
fb693d44
RH
6904 }
6905
e075ae69
RH
6906out:
6907 return ix86_issue_rate ();
6908}
fb693d44 6909
e075ae69
RH
6910/* We are about to issue INSN. Return the number of insns left on the
6911 ready queue that can be issued this cycle. */
b222082e 6912
e075ae69
RH
6913int
6914ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
6915 FILE *dump;
6916 int sched_verbose;
6917 rtx insn;
6918 int can_issue_more;
6919{
6920 int i;
6921 switch (ix86_cpu)
fb693d44 6922 {
e075ae69
RH
6923 default:
6924 return can_issue_more - 1;
fb693d44 6925
e075ae69
RH
6926 case PROCESSOR_PENTIUMPRO:
6927 {
6928 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 6929
e075ae69
RH
6930 if (uops == PPRO_UOPS_MANY)
6931 {
6932 if (sched_verbose)
6933 ix86_dump_ppro_packet (dump);
6934 ix86_sched_data.ppro.decode[0] = insn;
6935 ix86_sched_data.ppro.decode[1] = NULL;
6936 ix86_sched_data.ppro.decode[2] = NULL;
6937 if (sched_verbose)
6938 ix86_dump_ppro_packet (dump);
6939 ix86_sched_data.ppro.decode[0] = NULL;
6940 }
6941 else if (uops == PPRO_UOPS_FEW)
6942 {
6943 if (sched_verbose)
6944 ix86_dump_ppro_packet (dump);
6945 ix86_sched_data.ppro.decode[0] = insn;
6946 ix86_sched_data.ppro.decode[1] = NULL;
6947 ix86_sched_data.ppro.decode[2] = NULL;
6948 }
6949 else
6950 {
6951 for (i = 0; i < 3; ++i)
6952 if (ix86_sched_data.ppro.decode[i] == NULL)
6953 {
6954 ix86_sched_data.ppro.decode[i] = insn;
6955 break;
6956 }
6957 if (i == 3)
6958 abort ();
6959 if (i == 2)
6960 {
6961 if (sched_verbose)
6962 ix86_dump_ppro_packet (dump);
6963 ix86_sched_data.ppro.decode[0] = NULL;
6964 ix86_sched_data.ppro.decode[1] = NULL;
6965 ix86_sched_data.ppro.decode[2] = NULL;
6966 }
6967 }
6968 }
6969 return --ix86_sched_data.ppro.issued_this_cycle;
6970 }
fb693d44 6971}
a7180f70
BS
6972\f
6973/* Compute the alignment given to a constant that is being placed in memory.
6974 EXP is the constant and ALIGN is the alignment that the object would
6975 ordinarily have.
6976 The value of this function is used instead of that alignment to align
6977 the object. */
6978
6979int
6980ix86_constant_alignment (exp, align)
6981 tree exp;
6982 int align;
6983{
6984 if (TREE_CODE (exp) == REAL_CST)
6985 {
6986 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
6987 return 64;
6988 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
6989 return 128;
6990 }
6991 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
6992 && align < 256)
6993 return 256;
6994
6995 return align;
6996}
6997
6998/* Compute the alignment for a static variable.
6999 TYPE is the data type, and ALIGN is the alignment that
7000 the object would ordinarily have. The value of this function is used
7001 instead of that alignment to align the object. */
7002
7003int
7004ix86_data_alignment (type, align)
7005 tree type;
7006 int align;
7007{
7008 if (AGGREGATE_TYPE_P (type)
7009 && TYPE_SIZE (type)
7010 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
7011 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
7012 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
7013 return 256;
7014
7015 if (TREE_CODE (type) == ARRAY_TYPE)
7016 {
7017 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
7018 return 64;
7019 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
7020 return 128;
7021 }
7022 else if (TREE_CODE (type) == COMPLEX_TYPE)
7023 {
0f290768 7024
a7180f70
BS
7025 if (TYPE_MODE (type) == DCmode && align < 64)
7026 return 64;
7027 if (TYPE_MODE (type) == XCmode && align < 128)
7028 return 128;
7029 }
7030 else if ((TREE_CODE (type) == RECORD_TYPE
7031 || TREE_CODE (type) == UNION_TYPE
7032 || TREE_CODE (type) == QUAL_UNION_TYPE)
7033 && TYPE_FIELDS (type))
7034 {
7035 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
7036 return 64;
7037 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
7038 return 128;
7039 }
7040 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
7041 || TREE_CODE (type) == INTEGER_TYPE)
7042 {
7043 if (TYPE_MODE (type) == DFmode && align < 64)
7044 return 64;
7045 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
7046 return 128;
7047 }
7048
7049 return align;
7050}
7051
7052/* Compute the alignment for a local variable.
7053 TYPE is the data type, and ALIGN is the alignment that
7054 the object would ordinarily have. The value of this macro is used
7055 instead of that alignment to align the object. */
7056
7057int
7058ix86_local_alignment (type, align)
7059 tree type;
7060 int align;
7061{
7062 if (TREE_CODE (type) == ARRAY_TYPE)
7063 {
7064 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
7065 return 64;
7066 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
7067 return 128;
7068 }
7069 else if (TREE_CODE (type) == COMPLEX_TYPE)
7070 {
7071 if (TYPE_MODE (type) == DCmode && align < 64)
7072 return 64;
7073 if (TYPE_MODE (type) == XCmode && align < 128)
7074 return 128;
7075 }
7076 else if ((TREE_CODE (type) == RECORD_TYPE
7077 || TREE_CODE (type) == UNION_TYPE
7078 || TREE_CODE (type) == QUAL_UNION_TYPE)
7079 && TYPE_FIELDS (type))
7080 {
7081 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
7082 return 64;
7083 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
7084 return 128;
7085 }
7086 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
7087 || TREE_CODE (type) == INTEGER_TYPE)
7088 {
0f290768 7089
a7180f70
BS
7090 if (TYPE_MODE (type) == DFmode && align < 64)
7091 return 64;
7092 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
7093 return 128;
7094 }
7095 return align;
7096}
bd793c65
BS
7097
7098#define def_builtin(NAME, TYPE, CODE) \
7099 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL_PTR)
7100struct builtin_description
7101{
7102 enum insn_code icode;
7103 const char * name;
7104 enum ix86_builtins code;
7105 enum rtx_code comparison;
7106 unsigned int flag;
7107};
7108
7109static struct builtin_description bdesc_comi[] =
7110{
7111 { CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
7112 { CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
7113 { CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
7114 { CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
7115 { CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
7116 { CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
7117 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
7118 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
7119 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
7120 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
7121 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
7122 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
7123};
7124
7125static struct builtin_description bdesc_2arg[] =
7126{
7127 /* SSE */
7128 { CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
7129 { CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
7130 { CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
7131 { CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
7132 { CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
7133 { CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
7134 { CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
7135 { CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
7136
7137 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
7138 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
7139 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
7140 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
7141 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
7142 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
7143 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
7144 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
7145 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
7146 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
7147 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
7148 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
7149 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
7150 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
7151 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
7152 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
7153 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
7154 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
7155 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
7156 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
7157 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
7158 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
7159 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
7160 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
7161
7162 { CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
7163 { CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
7164 { CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
7165 { CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
7166
7167 { CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
7168 { CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
7169 { CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
7170 { CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
7171
7172 { CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
7173 { CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
7174 { CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
7175 { CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
7176 { CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
7177
7178 /* MMX */
7179 { CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
7180 { CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
7181 { CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
7182 { CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
7183 { CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
7184 { CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
7185
7186 { CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
7187 { CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
7188 { CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
7189 { CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
7190 { CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
7191 { CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
7192 { CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
7193 { CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
7194
7195 { CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
7196 { CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
7197 { CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
7198
7199 { CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
7200 { CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
7201 { CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
7202 { CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
7203
7204 { CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
7205 { CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
7206
7207 { CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
7208 { CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
7209 { CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
7210 { CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
7211 { CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
7212 { CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
7213
7214 { CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
7215 { CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
7216 { CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
7217 { CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
7218
7219 { CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
7220 { CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
7221 { CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
7222 { CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
7223 { CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
7224 { CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
7225
7226 /* Special. */
7227 { CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
7228 { CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
7229 { CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
7230
7231 { CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
7232 { CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
7233
7234 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
7235 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
7236 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
7237 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
7238 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
7239 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
7240
7241 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
7242 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
7243 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
7244 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
7245 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
7246 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
7247
7248 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
7249 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
7250 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
7251 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
7252
7253 { CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
7254 { CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
7255
7256};
7257
7258static struct builtin_description bdesc_1arg[] =
7259{
7260 { CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
7261 { CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
7262
7263 { CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
7264 { CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
7265 { CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
7266
7267 { CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
7268 { CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
7269 { CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
7270 { CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
7271
7272};
7273
7274/* Expand all the target specific builtins. This is not called if TARGET_MMX
7275 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
7276 builtins. */
7277void
7278ix86_init_builtins ()
7279{
7280 struct builtin_description * d;
7281 int i;
7282 tree endlink = tree_cons (NULL_TREE, void_type_node, NULL_TREE);
7283
7284 tree pchar_type_node = build_pointer_type (char_type_node);
7285 tree pfloat_type_node = build_pointer_type (float_type_node);
7286 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
7287 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
7288
7289 /* Comparisons. */
7290 tree int_ftype_v4sf_v4sf
7291 = build_function_type (integer_type_node,
7292 tree_cons (NULL_TREE, V4SF_type_node,
7293 tree_cons (NULL_TREE,
7294 V4SF_type_node,
7295 endlink)));
7296 tree v4si_ftype_v4sf_v4sf
7297 = build_function_type (V4SI_type_node,
7298 tree_cons (NULL_TREE, V4SF_type_node,
7299 tree_cons (NULL_TREE,
7300 V4SF_type_node,
7301 endlink)));
7302 /* MMX/SSE/integer conversions. */
7303 tree int_ftype_v4sf_int
7304 = build_function_type (integer_type_node,
7305 tree_cons (NULL_TREE, V4SF_type_node,
7306 tree_cons (NULL_TREE,
7307 integer_type_node,
7308 endlink)));
7309 tree int_ftype_v4sf
7310 = build_function_type (integer_type_node,
7311 tree_cons (NULL_TREE, V4SF_type_node,
7312 endlink));
7313 tree int_ftype_v8qi
7314 = build_function_type (integer_type_node,
7315 tree_cons (NULL_TREE, V8QI_type_node,
7316 endlink));
7317 tree int_ftype_v2si
7318 = build_function_type (integer_type_node,
7319 tree_cons (NULL_TREE, V2SI_type_node,
7320 endlink));
7321 tree v2si_ftype_int
7322 = build_function_type (V2SI_type_node,
7323 tree_cons (NULL_TREE, integer_type_node,
7324 endlink));
7325 tree v4sf_ftype_v4sf_int
7326 = build_function_type (integer_type_node,
7327 tree_cons (NULL_TREE, V4SF_type_node,
7328 tree_cons (NULL_TREE, integer_type_node,
7329 endlink)));
7330 tree v4sf_ftype_v4sf_v2si
7331 = build_function_type (V4SF_type_node,
7332 tree_cons (NULL_TREE, V4SF_type_node,
7333 tree_cons (NULL_TREE, V2SI_type_node,
7334 endlink)));
7335 tree int_ftype_v4hi_int
7336 = build_function_type (integer_type_node,
7337 tree_cons (NULL_TREE, V4HI_type_node,
7338 tree_cons (NULL_TREE, integer_type_node,
7339 endlink)));
7340 tree v4hi_ftype_v4hi_int_int
332316cd 7341 = build_function_type (V4HI_type_node,
bd793c65
BS
7342 tree_cons (NULL_TREE, V4HI_type_node,
7343 tree_cons (NULL_TREE, integer_type_node,
7344 tree_cons (NULL_TREE,
7345 integer_type_node,
7346 endlink))));
7347 /* Miscellaneous. */
7348 tree v8qi_ftype_v4hi_v4hi
7349 = build_function_type (V8QI_type_node,
7350 tree_cons (NULL_TREE, V4HI_type_node,
7351 tree_cons (NULL_TREE, V4HI_type_node,
7352 endlink)));
7353 tree v4hi_ftype_v2si_v2si
7354 = build_function_type (V4HI_type_node,
7355 tree_cons (NULL_TREE, V2SI_type_node,
7356 tree_cons (NULL_TREE, V2SI_type_node,
7357 endlink)));
7358 tree v4sf_ftype_v4sf_v4sf_int
7359 = build_function_type (V4SF_type_node,
7360 tree_cons (NULL_TREE, V4SF_type_node,
7361 tree_cons (NULL_TREE, V4SF_type_node,
7362 tree_cons (NULL_TREE,
7363 integer_type_node,
7364 endlink))));
7365 tree v4hi_ftype_v8qi_v8qi
7366 = build_function_type (V4HI_type_node,
7367 tree_cons (NULL_TREE, V8QI_type_node,
7368 tree_cons (NULL_TREE, V8QI_type_node,
7369 endlink)));
7370 tree v2si_ftype_v4hi_v4hi
7371 = build_function_type (V2SI_type_node,
7372 tree_cons (NULL_TREE, V4HI_type_node,
7373 tree_cons (NULL_TREE, V4HI_type_node,
7374 endlink)));
7375 tree v4hi_ftype_v4hi_int
7376 = build_function_type (V4HI_type_node,
7377 tree_cons (NULL_TREE, V4HI_type_node,
7378 tree_cons (NULL_TREE, integer_type_node,
7379 endlink)));
7380 tree di_ftype_di_int
7381 = build_function_type (long_long_unsigned_type_node,
7382 tree_cons (NULL_TREE, long_long_unsigned_type_node,
7383 tree_cons (NULL_TREE, integer_type_node,
7384 endlink)));
7385 tree v8qi_ftype_v8qi_di
7386 = build_function_type (V8QI_type_node,
7387 tree_cons (NULL_TREE, V8QI_type_node,
7388 tree_cons (NULL_TREE,
7389 long_long_integer_type_node,
7390 endlink)));
7391 tree v4hi_ftype_v4hi_di
7392 = build_function_type (V4HI_type_node,
7393 tree_cons (NULL_TREE, V4HI_type_node,
7394 tree_cons (NULL_TREE,
7395 long_long_integer_type_node,
7396 endlink)));
7397 tree v2si_ftype_v2si_di
7398 = build_function_type (V2SI_type_node,
7399 tree_cons (NULL_TREE, V2SI_type_node,
7400 tree_cons (NULL_TREE,
7401 long_long_integer_type_node,
7402 endlink)));
7403 tree void_ftype_void
7404 = build_function_type (void_type_node, endlink);
7405 tree void_ftype_pchar_int
7406 = build_function_type (void_type_node,
7407 tree_cons (NULL_TREE, pchar_type_node,
7408 tree_cons (NULL_TREE, integer_type_node,
7409 endlink)));
7410 tree void_ftype_unsigned
7411 = build_function_type (void_type_node,
7412 tree_cons (NULL_TREE, unsigned_type_node,
7413 endlink));
7414 tree unsigned_ftype_void
7415 = build_function_type (unsigned_type_node, endlink);
7416 tree di_ftype_void
7417 = build_function_type (long_long_unsigned_type_node, endlink);
7418 tree ti_ftype_void
7419 = build_function_type (intTI_type_node, endlink);
7420 tree v2si_ftype_v4sf
7421 = build_function_type (V2SI_type_node,
7422 tree_cons (NULL_TREE, V4SF_type_node,
7423 endlink));
7424 /* Loads/stores. */
7425 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
7426 tree_cons (NULL_TREE, V8QI_type_node,
7427 tree_cons (NULL_TREE,
7428 pchar_type_node,
7429 endlink)));
7430 tree void_ftype_v8qi_v8qi_pchar
7431 = build_function_type (void_type_node, maskmovq_args);
7432 tree v4sf_ftype_pfloat
7433 = build_function_type (V4SF_type_node,
7434 tree_cons (NULL_TREE, pfloat_type_node,
7435 endlink));
7436 tree v4sf_ftype_float
7437 = build_function_type (V4SF_type_node,
7438 tree_cons (NULL_TREE, float_type_node,
7439 endlink));
7440 tree v4sf_ftype_float_float_float_float
7441 = build_function_type (V4SF_type_node,
7442 tree_cons (NULL_TREE, float_type_node,
7443 tree_cons (NULL_TREE, float_type_node,
7444 tree_cons (NULL_TREE,
7445 float_type_node,
7446 tree_cons (NULL_TREE,
7447 float_type_node,
7448 endlink)))));
7449 /* @@@ the type is bogus */
7450 tree v4sf_ftype_v4sf_pv2si
7451 = build_function_type (V4SF_type_node,
7452 tree_cons (NULL_TREE, V4SF_type_node,
7453 tree_cons (NULL_TREE, pv2si_type_node,
7454 endlink)));
7455 tree v4sf_ftype_pv2si_v4sf
7456 = build_function_type (V4SF_type_node,
7457 tree_cons (NULL_TREE, V4SF_type_node,
7458 tree_cons (NULL_TREE, pv2si_type_node,
7459 endlink)));
7460 tree void_ftype_pfloat_v4sf
7461 = build_function_type (void_type_node,
7462 tree_cons (NULL_TREE, pfloat_type_node,
7463 tree_cons (NULL_TREE, V4SF_type_node,
7464 endlink)));
7465 tree void_ftype_pdi_di
7466 = build_function_type (void_type_node,
7467 tree_cons (NULL_TREE, pdi_type_node,
7468 tree_cons (NULL_TREE,
7469 long_long_unsigned_type_node,
7470 endlink)));
7471 /* Normal vector unops. */
7472 tree v4sf_ftype_v4sf
7473 = build_function_type (V4SF_type_node,
7474 tree_cons (NULL_TREE, V4SF_type_node,
7475 endlink));
0f290768 7476
bd793c65
BS
7477 /* Normal vector binops. */
7478 tree v4sf_ftype_v4sf_v4sf
7479 = build_function_type (V4SF_type_node,
7480 tree_cons (NULL_TREE, V4SF_type_node,
7481 tree_cons (NULL_TREE, V4SF_type_node,
7482 endlink)));
7483 tree v8qi_ftype_v8qi_v8qi
7484 = build_function_type (V8QI_type_node,
7485 tree_cons (NULL_TREE, V8QI_type_node,
7486 tree_cons (NULL_TREE, V8QI_type_node,
7487 endlink)));
7488 tree v4hi_ftype_v4hi_v4hi
7489 = build_function_type (V4HI_type_node,
7490 tree_cons (NULL_TREE, V4HI_type_node,
7491 tree_cons (NULL_TREE, V4HI_type_node,
7492 endlink)));
7493 tree v2si_ftype_v2si_v2si
7494 = build_function_type (V2SI_type_node,
7495 tree_cons (NULL_TREE, V2SI_type_node,
7496 tree_cons (NULL_TREE, V2SI_type_node,
7497 endlink)));
7498 tree ti_ftype_ti_ti
7499 = build_function_type (intTI_type_node,
7500 tree_cons (NULL_TREE, intTI_type_node,
7501 tree_cons (NULL_TREE, intTI_type_node,
7502 endlink)));
7503 tree di_ftype_di_di
7504 = build_function_type (long_long_unsigned_type_node,
7505 tree_cons (NULL_TREE, long_long_unsigned_type_node,
7506 tree_cons (NULL_TREE,
7507 long_long_unsigned_type_node,
7508 endlink)));
7509
7510 /* Add all builtins that are more or less simple operations on two
7511 operands. */
7512 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
7513 {
7514 /* Use one of the operands; the target can have a different mode for
7515 mask-generating compares. */
7516 enum machine_mode mode;
7517 tree type;
7518
7519 if (d->name == 0)
7520 continue;
7521 mode = insn_data[d->icode].operand[1].mode;
7522
7523 if (! TARGET_SSE && ! VALID_MMX_REG_MODE (mode))
7524 continue;
7525
7526 switch (mode)
7527 {
7528 case V4SFmode:
7529 type = v4sf_ftype_v4sf_v4sf;
7530 break;
7531 case V8QImode:
7532 type = v8qi_ftype_v8qi_v8qi;
7533 break;
7534 case V4HImode:
7535 type = v4hi_ftype_v4hi_v4hi;
7536 break;
7537 case V2SImode:
7538 type = v2si_ftype_v2si_v2si;
7539 break;
7540 case TImode:
7541 type = ti_ftype_ti_ti;
7542 break;
7543 case DImode:
7544 type = di_ftype_di_di;
7545 break;
7546
7547 default:
7548 abort ();
7549 }
0f290768 7550
bd793c65
BS
7551 /* Override for comparisons. */
7552 if (d->icode == CODE_FOR_maskcmpv4sf3
7553 || d->icode == CODE_FOR_maskncmpv4sf3
7554 || d->icode == CODE_FOR_vmmaskcmpv4sf3
7555 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
7556 type = v4si_ftype_v4sf_v4sf;
7557
7558 def_builtin (d->name, type, d->code);
7559 }
7560
7561 /* Add the remaining MMX insns with somewhat more complicated types. */
7562 def_builtin ("__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
7563 def_builtin ("__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
7564 def_builtin ("__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
7565 def_builtin ("__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
7566 def_builtin ("__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
7567 def_builtin ("__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
7568 def_builtin ("__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
7569 def_builtin ("__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
7570 def_builtin ("__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
7571
7572 def_builtin ("__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
7573 def_builtin ("__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
7574 def_builtin ("__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
7575
7576 def_builtin ("__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
7577 def_builtin ("__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
7578
7579 def_builtin ("__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
7580 def_builtin ("__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
7581
7582 /* Everything beyond this point is SSE only. */
7583 if (! TARGET_SSE)
7584 return;
0f290768 7585
bd793c65
BS
7586 /* comi/ucomi insns. */
7587 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
7588 def_builtin (d->name, int_ftype_v4sf_v4sf, d->code);
7589
7590 def_builtin ("__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
7591 def_builtin ("__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
7592 def_builtin ("__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
7593
7594 def_builtin ("__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
7595 def_builtin ("__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
7596 def_builtin ("__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
7597 def_builtin ("__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
7598 def_builtin ("__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
7599 def_builtin ("__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
7600
7601 def_builtin ("__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
7602 def_builtin ("__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
7603
7604 def_builtin ("__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
7605
7606 def_builtin ("__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
7607 def_builtin ("__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
7608 def_builtin ("__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
7609 def_builtin ("__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
7610 def_builtin ("__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
7611 def_builtin ("__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
7612
7613 def_builtin ("__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
7614 def_builtin ("__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
7615 def_builtin ("__builtin_ia32_storehps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
7616 def_builtin ("__builtin_ia32_storelps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
7617
7618 def_builtin ("__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
7619 def_builtin ("__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
7620 def_builtin ("__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
7621 def_builtin ("__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
7622
7623 def_builtin ("__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
7624 def_builtin ("__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
7625
7626 def_builtin ("__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
7627
7628 def_builtin ("__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
7629 def_builtin ("__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
7630 def_builtin ("__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
7631 def_builtin ("__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
7632 def_builtin ("__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
7633 def_builtin ("__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
7634
7635 def_builtin ("__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
7636
7637 /* Composite intrinsics. */
7638 def_builtin ("__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
7639 def_builtin ("__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
7640 def_builtin ("__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
7641 def_builtin ("__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
7642 def_builtin ("__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
7643 def_builtin ("__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
7644 def_builtin ("__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
7645}
7646
7647/* Errors in the source file can cause expand_expr to return const0_rtx
7648 where we expect a vector. To avoid crashing, use one of the vector
7649 clear instructions. */
7650static rtx
7651safe_vector_operand (x, mode)
7652 rtx x;
7653 enum machine_mode mode;
7654{
7655 if (x != const0_rtx)
7656 return x;
7657 x = gen_reg_rtx (mode);
7658
7659 if (VALID_MMX_REG_MODE (mode))
7660 emit_insn (gen_mmx_clrdi (mode == DImode ? x
7661 : gen_rtx_SUBREG (DImode, x, 0)));
7662 else
7663 emit_insn (gen_sse_clrti (mode == TImode ? x
7664 : gen_rtx_SUBREG (TImode, x, 0)));
7665 return x;
7666}
7667
7668/* Subroutine of ix86_expand_builtin to take care of binop insns. */
7669
7670static rtx
7671ix86_expand_binop_builtin (icode, arglist, target)
7672 enum insn_code icode;
7673 tree arglist;
7674 rtx target;
7675{
7676 rtx pat;
7677 tree arg0 = TREE_VALUE (arglist);
7678 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7679 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7680 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
7681 enum machine_mode tmode = insn_data[icode].operand[0].mode;
7682 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
7683 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
7684
7685 if (VECTOR_MODE_P (mode0))
7686 op0 = safe_vector_operand (op0, mode0);
7687 if (VECTOR_MODE_P (mode1))
7688 op1 = safe_vector_operand (op1, mode1);
7689
7690 if (! target
7691 || GET_MODE (target) != tmode
7692 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
7693 target = gen_reg_rtx (tmode);
7694
7695 /* In case the insn wants input operands in modes different from
7696 the result, abort. */
7697 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
7698 abort ();
7699
7700 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
7701 op0 = copy_to_mode_reg (mode0, op0);
7702 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
7703 op1 = copy_to_mode_reg (mode1, op1);
7704
7705 pat = GEN_FCN (icode) (target, op0, op1);
7706 if (! pat)
7707 return 0;
7708 emit_insn (pat);
7709 return target;
7710}
7711
7712/* Subroutine of ix86_expand_builtin to take care of stores. */
7713
7714static rtx
7715ix86_expand_store_builtin (icode, arglist, shuffle)
7716 enum insn_code icode;
7717 tree arglist;
7718 int shuffle;
7719{
7720 rtx pat;
7721 tree arg0 = TREE_VALUE (arglist);
7722 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7723 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7724 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
7725 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
7726 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
7727
7728 if (VECTOR_MODE_P (mode1))
7729 op1 = safe_vector_operand (op1, mode1);
7730
7731 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
7732 if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
7733 op1 = copy_to_mode_reg (mode1, op1);
7734 if (shuffle >= 0)
7735 emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
7736 pat = GEN_FCN (icode) (op0, op1);
7737 if (pat)
7738 emit_insn (pat);
7739 return 0;
7740}
7741
7742/* Subroutine of ix86_expand_builtin to take care of unop insns. */
7743
7744static rtx
7745ix86_expand_unop_builtin (icode, arglist, target, do_load)
7746 enum insn_code icode;
7747 tree arglist;
7748 rtx target;
7749 int do_load;
7750{
7751 rtx pat;
7752 tree arg0 = TREE_VALUE (arglist);
7753 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7754 enum machine_mode tmode = insn_data[icode].operand[0].mode;
7755 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
7756
7757 if (! target
7758 || GET_MODE (target) != tmode
7759 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
7760 target = gen_reg_rtx (tmode);
7761 if (do_load)
7762 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
7763 else
7764 {
7765 if (VECTOR_MODE_P (mode0))
7766 op0 = safe_vector_operand (op0, mode0);
7767
7768 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
7769 op0 = copy_to_mode_reg (mode0, op0);
7770 }
7771
7772 pat = GEN_FCN (icode) (target, op0);
7773 if (! pat)
7774 return 0;
7775 emit_insn (pat);
7776 return target;
7777}
7778
7779/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
7780 sqrtss, rsqrtss, rcpss. */
7781
7782static rtx
7783ix86_expand_unop1_builtin (icode, arglist, target)
7784 enum insn_code icode;
7785 tree arglist;
7786 rtx target;
7787{
7788 rtx pat;
7789 tree arg0 = TREE_VALUE (arglist);
7790 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7791 enum machine_mode tmode = insn_data[icode].operand[0].mode;
7792 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
7793
7794 if (! target
7795 || GET_MODE (target) != tmode
7796 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
7797 target = gen_reg_rtx (tmode);
7798
7799 if (VECTOR_MODE_P (mode0))
7800 op0 = safe_vector_operand (op0, mode0);
7801
7802 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
7803 op0 = copy_to_mode_reg (mode0, op0);
7804
7805 pat = GEN_FCN (icode) (target, op0, op0);
7806 if (! pat)
7807 return 0;
7808 emit_insn (pat);
7809 return target;
7810}
7811
7812/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
7813
7814static rtx
7815ix86_expand_sse_compare (d, arglist, target)
7816 struct builtin_description *d;
7817 tree arglist;
7818 rtx target;
7819{
7820 rtx pat;
7821 tree arg0 = TREE_VALUE (arglist);
7822 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7823 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7824 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
7825 rtx op2;
7826 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
7827 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
7828 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
7829 enum rtx_code comparison = d->comparison;
7830
7831 if (VECTOR_MODE_P (mode0))
7832 op0 = safe_vector_operand (op0, mode0);
7833 if (VECTOR_MODE_P (mode1))
7834 op1 = safe_vector_operand (op1, mode1);
7835
7836 /* Swap operands if we have a comparison that isn't available in
7837 hardware. */
7838 if (d->flag)
7839 {
7840 target = gen_reg_rtx (tmode);
7841 emit_move_insn (target, op1);
7842 op1 = op0;
7843 op0 = target;
7844 comparison = swap_condition (comparison);
7845 }
7846 else if (! target
7847 || GET_MODE (target) != tmode
7848 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
7849 target = gen_reg_rtx (tmode);
7850
7851 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
7852 op0 = copy_to_mode_reg (mode0, op0);
7853 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
7854 op1 = copy_to_mode_reg (mode1, op1);
7855
7856 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
7857 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
7858 if (! pat)
7859 return 0;
7860 emit_insn (pat);
7861 return target;
7862}
7863
7864/* Subroutine of ix86_expand_builtin to take care of comi insns. */
7865
7866static rtx
7867ix86_expand_sse_comi (d, arglist, target)
7868 struct builtin_description *d;
7869 tree arglist;
7870 rtx target;
7871{
7872 rtx pat;
7873 tree arg0 = TREE_VALUE (arglist);
7874 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7875 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7876 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
7877 rtx op2;
7878 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
7879 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
7880 enum rtx_code comparison = d->comparison;
7881
7882 if (VECTOR_MODE_P (mode0))
7883 op0 = safe_vector_operand (op0, mode0);
7884 if (VECTOR_MODE_P (mode1))
7885 op1 = safe_vector_operand (op1, mode1);
7886
7887 /* Swap operands if we have a comparison that isn't available in
7888 hardware. */
7889 if (d->flag)
7890 {
7891 rtx tmp = op1;
7892 op1 = op0;
7893 op0 = tmp;
7894 comparison = swap_condition (comparison);
7895 }
7896
7897 target = gen_reg_rtx (SImode);
7898 emit_move_insn (target, const0_rtx);
7899 target = gen_rtx_SUBREG (QImode, target, 0);
7900
7901 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
7902 op0 = copy_to_mode_reg (mode0, op0);
7903 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
7904 op1 = copy_to_mode_reg (mode1, op1);
7905
7906 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
7907 pat = GEN_FCN (d->icode) (op0, op1, op2);
7908 if (! pat)
7909 return 0;
7910 emit_insn (pat);
7911 emit_insn (gen_setcc_2 (target, op2));
7912
7913 return target;
7914}
7915
7916/* Expand an expression EXP that calls a built-in function,
7917 with result going to TARGET if that's convenient
7918 (and in mode MODE if that's convenient).
7919 SUBTARGET may be used as the target for computing one of EXP's operands.
7920 IGNORE is nonzero if the value is to be ignored. */
7921
7922rtx
7923ix86_expand_builtin (exp, target, subtarget, mode, ignore)
7924 tree exp;
7925 rtx target;
7926 rtx subtarget ATTRIBUTE_UNUSED;
7927 enum machine_mode mode ATTRIBUTE_UNUSED;
7928 int ignore ATTRIBUTE_UNUSED;
7929{
7930 struct builtin_description *d;
7931 int i;
7932 enum insn_code icode;
7933 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
7934 tree arglist = TREE_OPERAND (exp, 1);
7935 tree arg0, arg1, arg2, arg3;
7936 rtx op0, op1, op2, pat;
7937 enum machine_mode tmode, mode0, mode1, mode2;
7938 int fcode = DECL_FUNCTION_CODE (fndecl);
7939
7940 switch (fcode)
7941 {
7942 case IX86_BUILTIN_EMMS:
7943 emit_insn (gen_emms ());
7944 return 0;
7945
7946 case IX86_BUILTIN_SFENCE:
7947 emit_insn (gen_sfence ());
7948 return 0;
7949
7950 case IX86_BUILTIN_M_FROM_INT:
7951 target = gen_reg_rtx (DImode);
7952 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
7953 emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
7954 return target;
7955
7956 case IX86_BUILTIN_M_TO_INT:
7957 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
7958 op0 = copy_to_mode_reg (DImode, op0);
7959 target = gen_reg_rtx (SImode);
7960 emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
7961 return target;
7962
7963 case IX86_BUILTIN_PEXTRW:
7964 icode = CODE_FOR_mmx_pextrw;
7965 arg0 = TREE_VALUE (arglist);
7966 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7967 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7968 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
7969 tmode = insn_data[icode].operand[0].mode;
7970 mode0 = insn_data[icode].operand[1].mode;
7971 mode1 = insn_data[icode].operand[2].mode;
7972
7973 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
7974 op0 = copy_to_mode_reg (mode0, op0);
7975 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
7976 {
7977 /* @@@ better error message */
7978 error ("selector must be an immediate");
7979 return const0_rtx;
7980 }
7981 if (target == 0
7982 || GET_MODE (target) != tmode
7983 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
7984 target = gen_reg_rtx (tmode);
7985 pat = GEN_FCN (icode) (target, op0, op1);
7986 if (! pat)
7987 return 0;
7988 emit_insn (pat);
7989 return target;
7990
7991 case IX86_BUILTIN_PINSRW:
7992 icode = CODE_FOR_mmx_pinsrw;
7993 arg0 = TREE_VALUE (arglist);
7994 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7995 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
7996 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7997 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
7998 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
7999 tmode = insn_data[icode].operand[0].mode;
8000 mode0 = insn_data[icode].operand[1].mode;
8001 mode1 = insn_data[icode].operand[2].mode;
8002 mode2 = insn_data[icode].operand[3].mode;
8003
8004 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8005 op0 = copy_to_mode_reg (mode0, op0);
8006 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8007 op1 = copy_to_mode_reg (mode1, op1);
8008 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
8009 {
8010 /* @@@ better error message */
8011 error ("selector must be an immediate");
8012 return const0_rtx;
8013 }
8014 if (target == 0
8015 || GET_MODE (target) != tmode
8016 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8017 target = gen_reg_rtx (tmode);
8018 pat = GEN_FCN (icode) (target, op0, op1, op2);
8019 if (! pat)
8020 return 0;
8021 emit_insn (pat);
8022 return target;
8023
8024 case IX86_BUILTIN_MASKMOVQ:
8025 icode = CODE_FOR_mmx_maskmovq;
8026 /* Note the arg order is different from the operand order. */
8027 arg1 = TREE_VALUE (arglist);
8028 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
8029 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8030 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8031 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8032 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8033 mode0 = insn_data[icode].operand[0].mode;
8034 mode1 = insn_data[icode].operand[1].mode;
8035 mode2 = insn_data[icode].operand[2].mode;
8036
8037 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8038 op0 = copy_to_mode_reg (mode0, op0);
8039 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
8040 op1 = copy_to_mode_reg (mode1, op1);
8041 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
8042 op2 = copy_to_mode_reg (mode2, op2);
8043 pat = GEN_FCN (icode) (op0, op1, op2);
8044 if (! pat)
8045 return 0;
8046 emit_insn (pat);
8047 return 0;
8048
8049 case IX86_BUILTIN_SQRTSS:
8050 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
8051 case IX86_BUILTIN_RSQRTSS:
8052 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
8053 case IX86_BUILTIN_RCPSS:
8054 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
8055
8056 case IX86_BUILTIN_LOADAPS:
8057 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
8058
8059 case IX86_BUILTIN_LOADUPS:
8060 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
8061
8062 case IX86_BUILTIN_STOREAPS:
8063 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
8064 case IX86_BUILTIN_STOREUPS:
8065 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
8066
8067 case IX86_BUILTIN_LOADSS:
8068 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
8069
8070 case IX86_BUILTIN_STORESS:
8071 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
8072
0f290768 8073 case IX86_BUILTIN_LOADHPS:
bd793c65
BS
8074 case IX86_BUILTIN_LOADLPS:
8075 icode = (fcode == IX86_BUILTIN_LOADHPS
8076 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
8077 arg0 = TREE_VALUE (arglist);
8078 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8079 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8080 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8081 tmode = insn_data[icode].operand[0].mode;
8082 mode0 = insn_data[icode].operand[1].mode;
8083 mode1 = insn_data[icode].operand[2].mode;
8084
8085 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8086 op0 = copy_to_mode_reg (mode0, op0);
8087 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
8088 if (target == 0
8089 || GET_MODE (target) != tmode
8090 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8091 target = gen_reg_rtx (tmode);
8092 pat = GEN_FCN (icode) (target, op0, op1);
8093 if (! pat)
8094 return 0;
8095 emit_insn (pat);
8096 return target;
0f290768 8097
bd793c65
BS
8098 case IX86_BUILTIN_STOREHPS:
8099 case IX86_BUILTIN_STORELPS:
8100 icode = (fcode == IX86_BUILTIN_STOREHPS
8101 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
8102 arg0 = TREE_VALUE (arglist);
8103 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8104 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8105 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8106 mode0 = insn_data[icode].operand[1].mode;
8107 mode1 = insn_data[icode].operand[2].mode;
8108
8109 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8110 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8111 op1 = copy_to_mode_reg (mode1, op1);
8112
8113 pat = GEN_FCN (icode) (op0, op0, op1);
8114 if (! pat)
8115 return 0;
8116 emit_insn (pat);
8117 return 0;
8118
8119 case IX86_BUILTIN_MOVNTPS:
8120 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
8121 case IX86_BUILTIN_MOVNTQ:
8122 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
8123
8124 case IX86_BUILTIN_LDMXCSR:
8125 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8126 target = assign_386_stack_local (SImode, 0);
8127 emit_move_insn (target, op0);
8128 emit_insn (gen_ldmxcsr (target));
8129 return 0;
8130
8131 case IX86_BUILTIN_STMXCSR:
8132 target = assign_386_stack_local (SImode, 0);
8133 emit_insn (gen_stmxcsr (target));
8134 return copy_to_mode_reg (SImode, target);
8135
8136 case IX86_BUILTIN_PREFETCH:
8137 icode = CODE_FOR_prefetch;
8138 arg0 = TREE_VALUE (arglist);
8139 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8140 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8141 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
332316cd
BS
8142 mode0 = insn_data[icode].operand[0].mode;
8143 mode1 = insn_data[icode].operand[1].mode;
bd793c65 8144
332316cd 8145 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
bd793c65
BS
8146 {
8147 /* @@@ better error message */
8148 error ("selector must be an immediate");
8149 return const0_rtx;
8150 }
8151
332316cd 8152 op0 = copy_to_mode_reg (Pmode, op0);
bd793c65
BS
8153 pat = GEN_FCN (icode) (op0, op1);
8154 if (! pat)
8155 return 0;
8156 emit_insn (pat);
8157 return target;
0f290768 8158
bd793c65
BS
8159 case IX86_BUILTIN_SHUFPS:
8160 icode = CODE_FOR_sse_shufps;
8161 arg0 = TREE_VALUE (arglist);
8162 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8163 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8164 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8165 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8166 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8167 tmode = insn_data[icode].operand[0].mode;
8168 mode0 = insn_data[icode].operand[1].mode;
8169 mode1 = insn_data[icode].operand[2].mode;
8170 mode2 = insn_data[icode].operand[3].mode;
8171
8172 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8173 op0 = copy_to_mode_reg (mode0, op0);
8174 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8175 op1 = copy_to_mode_reg (mode1, op1);
8176 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
8177 {
8178 /* @@@ better error message */
8179 error ("mask must be an immediate");
8180 return const0_rtx;
8181 }
8182 if (target == 0
8183 || GET_MODE (target) != tmode
8184 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8185 target = gen_reg_rtx (tmode);
8186 pat = GEN_FCN (icode) (target, op0, op1, op2);
8187 if (! pat)
8188 return 0;
8189 emit_insn (pat);
8190 return target;
8191
8192 case IX86_BUILTIN_PSHUFW:
8193 icode = CODE_FOR_mmx_pshufw;
8194 arg0 = TREE_VALUE (arglist);
8195 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8196 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8197 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8198 tmode = insn_data[icode].operand[0].mode;
8199 mode0 = insn_data[icode].operand[2].mode;
8200 mode1 = insn_data[icode].operand[3].mode;
8201
8202 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8203 op0 = copy_to_mode_reg (mode0, op0);
8204 if (! (*insn_data[icode].operand[3].predicate) (op1, mode1))
8205 {
8206 /* @@@ better error message */
8207 error ("mask must be an immediate");
8208 return const0_rtx;
8209 }
8210 if (target == 0
8211 || GET_MODE (target) != tmode
8212 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8213 target = gen_reg_rtx (tmode);
8214 pat = GEN_FCN (icode) (target, target, op0, op1);
8215 if (! pat)
8216 return 0;
8217 emit_insn (pat);
8218 return target;
8219
8220 /* Composite intrinsics. */
8221 case IX86_BUILTIN_SETPS1:
8222 target = assign_386_stack_local (SFmode, 0);
8223 arg0 = TREE_VALUE (arglist);
8224 emit_move_insn (change_address (target, SFmode, XEXP (target, 0)),
8225 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
8226 op0 = gen_reg_rtx (V4SFmode);
8227 emit_insn (gen_sse_loadss (op0, change_address (target, V4SFmode,
8228 XEXP (target, 0))));
8229 emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
8230 return op0;
0f290768 8231
bd793c65
BS
8232 case IX86_BUILTIN_SETPS:
8233 target = assign_386_stack_local (V4SFmode, 0);
8234 op0 = change_address (target, SFmode, XEXP (target, 0));
8235 arg0 = TREE_VALUE (arglist);
8236 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8237 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8238 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
8239 emit_move_insn (op0,
8240 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
8241 emit_move_insn (adj_offsettable_operand (op0, 4),
8242 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
8243 emit_move_insn (adj_offsettable_operand (op0, 8),
8244 expand_expr (arg2, NULL_RTX, VOIDmode, 0));
8245 emit_move_insn (adj_offsettable_operand (op0, 12),
8246 expand_expr (arg3, NULL_RTX, VOIDmode, 0));
8247 op0 = gen_reg_rtx (V4SFmode);
8248 emit_insn (gen_sse_movaps (op0, target));
8249 return op0;
8250
8251 case IX86_BUILTIN_CLRPS:
8252 target = gen_reg_rtx (TImode);
8253 emit_insn (gen_sse_clrti (target));
8254 return target;
8255
8256 case IX86_BUILTIN_LOADRPS:
8257 target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
8258 gen_reg_rtx (V4SFmode), 1);
8259 emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
8260 return target;
8261
8262 case IX86_BUILTIN_LOADPS1:
8263 target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
8264 gen_reg_rtx (V4SFmode), 1);
8265 emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
8266 return target;
8267
8268 case IX86_BUILTIN_STOREPS1:
8269 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
8270 case IX86_BUILTIN_STORERPS:
8271 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
8272
8273 case IX86_BUILTIN_MMX_ZERO:
8274 target = gen_reg_rtx (DImode);
8275 emit_insn (gen_mmx_clrdi (target));
8276 return target;
8277
8278 default:
8279 break;
8280 }
8281
8282 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
8283 if (d->code == fcode)
8284 {
8285 /* Compares are treated specially. */
8286 if (d->icode == CODE_FOR_maskcmpv4sf3
8287 || d->icode == CODE_FOR_vmmaskcmpv4sf3
8288 || d->icode == CODE_FOR_maskncmpv4sf3
8289 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
8290 return ix86_expand_sse_compare (d, arglist, target);
8291
8292 return ix86_expand_binop_builtin (d->icode, arglist, target);
8293 }
8294
8295 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
8296 if (d->code == fcode)
8297 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 8298
bd793c65
BS
8299 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
8300 if (d->code == fcode)
8301 return ix86_expand_sse_comi (d, arglist, target);
0f290768 8302
bd793c65
BS
8303 fail:
8304 /* @@@ Should really do something sensible here. */
8305 return 0;
bd793c65 8306}
This page took 2.007036 seconds and 5 git commands to generate.