]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
Makefile.in (install-man): Remove explicit dependency on $(srcdir)/gcc.1.
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
8752c357 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
4592bdcb 3 Free Software Foundation, Inc.
2a2ab3f9
JVA
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
1fba7553 23#include <setjmp.h>
bb5177ac 24#include "system.h"
2a2ab3f9 25#include "rtl.h"
6baf1cc8
BS
26#include "tree.h"
27#include "tm_p.h"
2a2ab3f9
JVA
28#include "regs.h"
29#include "hard-reg-set.h"
30#include "real.h"
31#include "insn-config.h"
32#include "conditions.h"
33#include "insn-flags.h"
34#include "output.h"
35#include "insn-attr.h"
2a2ab3f9 36#include "flags.h"
a8ffcc81 37#include "except.h"
ecbc4695 38#include "function.h"
00c79232 39#include "recog.h"
ced8dd8c 40#include "expr.h"
f103890b 41#include "toplev.h"
e075ae69 42#include "basic-block.h"
1526a060 43#include "ggc.h"
2a2ab3f9 44
8dfe5673
RK
45#ifndef CHECK_STACK_LIMIT
46#define CHECK_STACK_LIMIT -1
47#endif
48
32b5b1aa
SC
49/* Processor costs (relative to an add) */
50struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 51 1, /* cost of an add instruction */
32b5b1aa
SC
52 1, /* cost of a lea instruction */
53 3, /* variable shift costs */
54 2, /* constant shift costs */
55 6, /* cost of starting a multiply */
56 1, /* cost of multiply per each bit set */
e075ae69 57 23, /* cost of a divide/mod */
96e7ae40 58 15, /* "large" insn */
e2e52e1b 59 3, /* MOVE_RATIO */
7c6b971d 60 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
61 {2, 4, 2}, /* cost of loading integer registers
62 in QImode, HImode and SImode.
0f290768 63 Relative to reg-reg move (2). */
96e7ae40
JH
64 {2, 4, 2}, /* cost of storing integer registers */
65 2, /* cost of reg,reg fld/fst */
66 {8, 8, 8}, /* cost of loading fp registers
67 in SFmode, DFmode and XFmode */
68 {8, 8, 8} /* cost of loading integer registers */
32b5b1aa
SC
69};
70
71struct processor_costs i486_cost = { /* 486 specific costs */
72 1, /* cost of an add instruction */
73 1, /* cost of a lea instruction */
74 3, /* variable shift costs */
75 2, /* constant shift costs */
76 12, /* cost of starting a multiply */
77 1, /* cost of multiply per each bit set */
e075ae69 78 40, /* cost of a divide/mod */
96e7ae40 79 15, /* "large" insn */
e2e52e1b 80 3, /* MOVE_RATIO */
7c6b971d 81 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
82 {2, 4, 2}, /* cost of loading integer registers
83 in QImode, HImode and SImode.
0f290768 84 Relative to reg-reg move (2). */
96e7ae40
JH
85 {2, 4, 2}, /* cost of storing integer registers */
86 2, /* cost of reg,reg fld/fst */
87 {8, 8, 8}, /* cost of loading fp registers
88 in SFmode, DFmode and XFmode */
89 {8, 8, 8} /* cost of loading integer registers */
32b5b1aa
SC
90};
91
e5cb57e8 92struct processor_costs pentium_cost = {
32b5b1aa
SC
93 1, /* cost of an add instruction */
94 1, /* cost of a lea instruction */
856b07a1 95 4, /* variable shift costs */
e5cb57e8 96 1, /* constant shift costs */
856b07a1
SC
97 11, /* cost of starting a multiply */
98 0, /* cost of multiply per each bit set */
e075ae69 99 25, /* cost of a divide/mod */
96e7ae40 100 8, /* "large" insn */
e2e52e1b 101 6, /* MOVE_RATIO */
7c6b971d 102 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
0f290768 105 Relative to reg-reg move (2). */
96e7ae40
JH
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {2, 2, 6}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
110 {4, 4, 6} /* cost of loading integer registers */
32b5b1aa
SC
111};
112
856b07a1
SC
113struct processor_costs pentiumpro_cost = {
114 1, /* cost of an add instruction */
115 1, /* cost of a lea instruction */
e075ae69 116 1, /* variable shift costs */
856b07a1 117 1, /* constant shift costs */
369e59b1 118 4, /* cost of starting a multiply */
856b07a1 119 0, /* cost of multiply per each bit set */
e075ae69 120 17, /* cost of a divide/mod */
96e7ae40 121 8, /* "large" insn */
e2e52e1b 122 6, /* MOVE_RATIO */
7c6b971d 123 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
124 {4, 4, 4}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
0f290768 126 Relative to reg-reg move (2). */
96e7ae40
JH
127 {2, 2, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {2, 2, 6}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {4, 4, 6} /* cost of loading integer registers */
856b07a1
SC
132};
133
a269a03c
JC
134struct processor_costs k6_cost = {
135 1, /* cost of an add instruction */
e075ae69 136 2, /* cost of a lea instruction */
a269a03c
JC
137 1, /* variable shift costs */
138 1, /* constant shift costs */
73fe76e4 139 3, /* cost of starting a multiply */
a269a03c 140 0, /* cost of multiply per each bit set */
e075ae69 141 18, /* cost of a divide/mod */
96e7ae40 142 8, /* "large" insn */
e2e52e1b 143 4, /* MOVE_RATIO */
7c6b971d 144 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
145 {4, 5, 4}, /* cost of loading integer registers
146 in QImode, HImode and SImode.
0f290768 147 Relative to reg-reg move (2). */
96e7ae40
JH
148 {2, 3, 2}, /* cost of storing integer registers */
149 4, /* cost of reg,reg fld/fst */
150 {6, 6, 6}, /* cost of loading fp registers
151 in SFmode, DFmode and XFmode */
152 {4, 4, 4} /* cost of loading integer registers */
a269a03c
JC
153};
154
309ada50
JH
155struct processor_costs athlon_cost = {
156 1, /* cost of an add instruction */
0b5107cf 157 2, /* cost of a lea instruction */
309ada50
JH
158 1, /* variable shift costs */
159 1, /* constant shift costs */
160 5, /* cost of starting a multiply */
161 0, /* cost of multiply per each bit set */
0b5107cf 162 42, /* cost of a divide/mod */
309ada50 163 8, /* "large" insn */
e2e52e1b 164 9, /* MOVE_RATIO */
309ada50
JH
165 4, /* cost for loading QImode using movzbl */
166 {4, 5, 4}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
0f290768 168 Relative to reg-reg move (2). */
309ada50
JH
169 {2, 3, 2}, /* cost of storing integer registers */
170 4, /* cost of reg,reg fld/fst */
0b5107cf 171 {6, 6, 20}, /* cost of loading fp registers
309ada50 172 in SFmode, DFmode and XFmode */
0b5107cf 173 {4, 4, 16} /* cost of loading integer registers */
309ada50
JH
174};
175
32b5b1aa
SC
176struct processor_costs *ix86_cost = &pentium_cost;
177
a269a03c
JC
178/* Processor feature/optimization bitmasks. */
179#define m_386 (1<<PROCESSOR_I386)
180#define m_486 (1<<PROCESSOR_I486)
181#define m_PENT (1<<PROCESSOR_PENTIUM)
182#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
183#define m_K6 (1<<PROCESSOR_K6)
309ada50 184#define m_ATHLON (1<<PROCESSOR_ATHLON)
a269a03c 185
309ada50
JH
186const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
187const int x86_push_memory = m_386 | m_K6 | m_ATHLON;
a269a03c 188const int x86_zero_extend_with_and = m_486 | m_PENT;
369e59b1 189const int x86_movx = m_ATHLON | m_PPRO /* m_386 | m_K6 */;
e075ae69 190const int x86_double_with_add = ~m_386;
a269a03c 191const int x86_use_bit_test = m_386;
e2e52e1b 192const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
a269a03c
JC
193const int x86_use_q_reg = m_PENT | m_PPRO | m_K6;
194const int x86_use_any_reg = m_486;
309ada50
JH
195const int x86_cmove = m_PPRO | m_ATHLON;
196const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON;
c0c102a9 197const int x86_use_sahf = m_PPRO | m_K6;
e075ae69
RH
198const int x86_partial_reg_stall = m_PPRO;
199const int x86_use_loop = m_K6;
309ada50 200const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
e075ae69
RH
201const int x86_use_mov0 = m_K6;
202const int x86_use_cltd = ~(m_PENT | m_K6);
203const int x86_read_modify_write = ~m_PENT;
204const int x86_read_modify = ~(m_PENT | m_PPRO);
205const int x86_split_long_moves = m_PPRO;
e9e80858 206const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
f90800f8 207const int x86_single_stringop = m_386;
d9f32422
JH
208const int x86_qimode_math = ~(0);
209const int x86_promote_qi_regs = 0;
210const int x86_himode_math = ~(m_PPRO);
211const int x86_promote_hi_regs = m_PPRO;
bdeb029c
JH
212const int x86_sub_esp_4 = m_ATHLON | m_PPRO;
213const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486;
214const int x86_add_esp_4 = m_ATHLON | m_K6;
215const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486;
0b5107cf
JH
216const int x86_integer_DFmode_moves = ~m_ATHLON;
217const int x86_partial_reg_dependency = m_ATHLON;
218const int x86_memory_mismatch_stall = m_ATHLON;
a269a03c 219
564d80f4 220#define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
2a2ab3f9 221
e075ae69
RH
222const char * const hi_reg_name[] = HI_REGISTER_NAMES;
223const char * const qi_reg_name[] = QI_REGISTER_NAMES;
224const char * const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
4c0d89b5
RS
225
226/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 227 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 228
e075ae69 229enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
230{
231 /* ax, dx, cx, bx */
ab408a86 232 AREG, DREG, CREG, BREG,
4c0d89b5 233 /* si, di, bp, sp */
e075ae69 234 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
235 /* FP registers */
236 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 237 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 238 /* arg pointer */
83774849 239 NON_Q_REGS,
564d80f4 240 /* flags, fpsr, dirflag, frame */
a7180f70
BS
241 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
242 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
243 SSE_REGS, SSE_REGS,
244 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
245 MMX_REGS, MMX_REGS
4c0d89b5 246};
c572e5ba 247
83774849
RH
248/* The "default" register map. */
249
0f290768 250int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
251{
252 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
253 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
254 -1, -1, -1, -1, /* arg, flags, fpsr, dir */
a7180f70
BS
255 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
256 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
83774849
RH
257};
258
259/* Define the register numbers to be used in Dwarf debugging information.
260 The SVR4 reference port C compiler uses the following register numbers
261 in its Dwarf output code:
262 0 for %eax (gcc regno = 0)
263 1 for %ecx (gcc regno = 2)
264 2 for %edx (gcc regno = 1)
265 3 for %ebx (gcc regno = 3)
266 4 for %esp (gcc regno = 7)
267 5 for %ebp (gcc regno = 6)
268 6 for %esi (gcc regno = 4)
269 7 for %edi (gcc regno = 5)
270 The following three DWARF register numbers are never generated by
271 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
272 believes these numbers have these meanings.
273 8 for %eip (no gcc equivalent)
274 9 for %eflags (gcc regno = 17)
275 10 for %trapno (no gcc equivalent)
276 It is not at all clear how we should number the FP stack registers
277 for the x86 architecture. If the version of SDB on x86/svr4 were
278 a bit less brain dead with respect to floating-point then we would
279 have a precedent to follow with respect to DWARF register numbers
280 for x86 FP registers, but the SDB on x86/svr4 is so completely
281 broken with respect to FP registers that it is hardly worth thinking
282 of it as something to strive for compatibility with.
283 The version of x86/svr4 SDB I have at the moment does (partially)
284 seem to believe that DWARF register number 11 is associated with
285 the x86 register %st(0), but that's about all. Higher DWARF
286 register numbers don't seem to be associated with anything in
287 particular, and even for DWARF regno 11, SDB only seems to under-
288 stand that it should say that a variable lives in %st(0) (when
289 asked via an `=' command) if we said it was in DWARF regno 11,
290 but SDB still prints garbage when asked for the value of the
291 variable in question (via a `/' command).
292 (Also note that the labels SDB prints for various FP stack regs
293 when doing an `x' command are all wrong.)
294 Note that these problems generally don't affect the native SVR4
295 C compiler because it doesn't allow the use of -O with -g and
296 because when it is *not* optimizing, it allocates a memory
297 location for each floating-point variable, and the memory
298 location is what gets described in the DWARF AT_location
299 attribute for the variable in question.
300 Regardless of the severe mental illness of the x86/svr4 SDB, we
301 do something sensible here and we use the following DWARF
302 register numbers. Note that these are all stack-top-relative
303 numbers.
304 11 for %st(0) (gcc regno = 8)
305 12 for %st(1) (gcc regno = 9)
306 13 for %st(2) (gcc regno = 10)
307 14 for %st(3) (gcc regno = 11)
308 15 for %st(4) (gcc regno = 12)
309 16 for %st(5) (gcc regno = 13)
310 17 for %st(6) (gcc regno = 14)
311 18 for %st(7) (gcc regno = 15)
312*/
0f290768 313int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
314{
315 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
316 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
317 -1, 9, -1, -1, /* arg, flags, fpsr, dir */
a7180f70
BS
318 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
319 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
83774849
RH
320};
321
c572e5ba
JVA
322/* Test and compare insns in i386.md store the information needed to
323 generate branch and scc insns here. */
324
e075ae69
RH
325struct rtx_def *ix86_compare_op0 = NULL_RTX;
326struct rtx_def *ix86_compare_op1 = NULL_RTX;
f5316dfe 327
36edd3cc
BS
328#define MAX_386_STACK_LOCALS 2
329
330/* Define the structure for the machine field in struct function. */
331struct machine_function
332{
333 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
334};
335
01d939e8 336#define ix86_stack_locals (cfun->machine->stack_locals)
36edd3cc 337
c8c5cb99 338/* which cpu are we scheduling for */
e42ea7f9 339enum processor_type ix86_cpu;
c8c5cb99
SC
340
341/* which instruction set architecture to use. */
c942177e 342int ix86_arch;
c8c5cb99
SC
343
344/* Strings to hold which cpu and instruction set architecture to use. */
9c23aa47
ZW
345const char *ix86_cpu_string; /* for -mcpu=<xxx> */
346const char *ix86_arch_string; /* for -march=<xxx> */
c8c5cb99 347
f5316dfe 348/* Register allocation order */
e075ae69 349const char *ix86_reg_alloc_order;
f5316dfe
MM
350static char regs_allocated[FIRST_PSEUDO_REGISTER];
351
0f290768 352/* # of registers to use to pass arguments. */
e075ae69 353const char *ix86_regparm_string;
e9a25f70 354
e075ae69
RH
355/* ix86_regparm_string as a number */
356int ix86_regparm;
e9a25f70
JL
357
358/* Alignment to use for loops and jumps: */
359
0f290768 360/* Power of two alignment for loops. */
e075ae69 361const char *ix86_align_loops_string;
e9a25f70 362
0f290768 363/* Power of two alignment for non-loop jumps. */
e075ae69 364const char *ix86_align_jumps_string;
e9a25f70 365
3af4bd89 366/* Power of two alignment for stack boundary in bytes. */
e075ae69 367const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
368
369/* Preferred alignment for stack boundary in bits. */
e075ae69 370int ix86_preferred_stack_boundary;
3af4bd89 371
e9a25f70 372/* Values 1-5: see jump.c */
e075ae69
RH
373int ix86_branch_cost;
374const char *ix86_branch_cost_string;
e9a25f70 375
0f290768 376/* Power of two alignment for functions. */
e075ae69
RH
377int ix86_align_funcs;
378const char *ix86_align_funcs_string;
b08de47e 379
0f290768 380/* Power of two alignment for loops. */
e075ae69 381int ix86_align_loops;
b08de47e 382
0f290768 383/* Power of two alignment for non-loop jumps. */
e075ae69
RH
384int ix86_align_jumps;
385\f
f6da8bc3
KG
386static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
387static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
e075ae69 388 int, int, FILE *));
f6da8bc3 389static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
3a3677ff
RH
390static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
391 rtx *, rtx *));
f6da8bc3
KG
392static rtx gen_push PARAMS ((rtx));
393static int memory_address_length PARAMS ((rtx addr));
394static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
395static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
396static int ix86_safe_length PARAMS ((rtx));
397static enum attr_memory ix86_safe_memory PARAMS ((rtx));
398static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
399static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
400static void ix86_dump_ppro_packet PARAMS ((FILE *));
401static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
402static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
e075ae69 403 rtx));
f6da8bc3
KG
404static void ix86_init_machine_status PARAMS ((struct function *));
405static void ix86_mark_machine_status PARAMS ((struct function *));
37b15744 406static void ix86_free_machine_status PARAMS ((struct function *));
2b589241 407static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
f6da8bc3 408static int ix86_safe_length_prefix PARAMS ((rtx));
564d80f4
JH
409static HOST_WIDE_INT ix86_compute_frame_size PARAMS((HOST_WIDE_INT,
410 int *, int *, int *));
0903fcab
JH
411static int ix86_nsaved_regs PARAMS((void));
412static void ix86_emit_save_regs PARAMS((void));
da2d1d3a 413static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int));
0903fcab 414static void ix86_emit_epilogue_esp_adjustment PARAMS((int));
c6991660
KG
415static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
416static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
55efb413 417static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
e075ae69
RH
418
419struct ix86_address
420{
421 rtx base, index, disp;
422 HOST_WIDE_INT scale;
423};
b08de47e 424
e075ae69 425static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
bd793c65
BS
426
427struct builtin_description;
428static rtx ix86_expand_sse_comi PARAMS ((struct builtin_description *, tree,
429 rtx));
430static rtx ix86_expand_sse_compare PARAMS ((struct builtin_description *, tree,
431 rtx));
432static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
433static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
434static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
435static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
436static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
c0c102a9
JH
437static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
438static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
439 enum rtx_code *,
440 enum rtx_code *,
441 enum rtx_code *));
9e7adcb3
JH
442static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
443 rtx *, rtx *));
444static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
445static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
446static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
447static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
e075ae69 448\f
f5316dfe
MM
449/* Sometimes certain combinations of command options do not make
450 sense on a particular target machine. You can define a macro
451 `OVERRIDE_OPTIONS' to take account of this. This macro, if
452 defined, is executed once just after all the command options have
453 been parsed.
454
455 Don't use this macro to turn on various extra optimizations for
456 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
457
458void
459override_options ()
460{
e075ae69
RH
461 /* Comes from final.c -- no real reason to change it. */
462#define MAX_CODE_ALIGN 16
f5316dfe 463
c8c5cb99
SC
464 static struct ptt
465 {
e075ae69
RH
466 struct processor_costs *cost; /* Processor costs */
467 int target_enable; /* Target flags to enable. */
468 int target_disable; /* Target flags to disable. */
469 int align_loop; /* Default alignments. */
470 int align_jump;
471 int align_func;
472 int branch_cost;
473 }
0f290768 474 const processor_target_table[PROCESSOR_max] =
e075ae69
RH
475 {
476 {&i386_cost, 0, 0, 2, 2, 2, 1},
477 {&i486_cost, 0, 0, 4, 4, 4, 1},
478 {&pentium_cost, 0, 0, -4, -4, -4, 1},
479 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
309ada50
JH
480 {&k6_cost, 0, 0, -5, -5, 4, 1},
481 {&athlon_cost, 0, 0, 4, -4, 4, 1}
e075ae69
RH
482 };
483
484 static struct pta
485 {
0f290768 486 const char *name; /* processor name or nickname. */
e075ae69
RH
487 enum processor_type processor;
488 }
0f290768 489 const processor_alias_table[] =
e075ae69
RH
490 {
491 {"i386", PROCESSOR_I386},
492 {"i486", PROCESSOR_I486},
493 {"i586", PROCESSOR_PENTIUM},
494 {"pentium", PROCESSOR_PENTIUM},
495 {"i686", PROCESSOR_PENTIUMPRO},
496 {"pentiumpro", PROCESSOR_PENTIUMPRO},
e075ae69 497 {"k6", PROCESSOR_K6},
309ada50 498 {"athlon", PROCESSOR_ATHLON},
3af4bd89 499 };
c8c5cb99 500
0f290768 501 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
c8c5cb99 502
f5316dfe
MM
503#ifdef SUBTARGET_OVERRIDE_OPTIONS
504 SUBTARGET_OVERRIDE_OPTIONS;
505#endif
506
5a6ee819 507 ix86_arch = PROCESSOR_I386;
e075ae69
RH
508 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
509
510 if (ix86_arch_string != 0)
511 {
512 int i;
513 for (i = 0; i < pta_size; i++)
514 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
515 {
516 ix86_arch = processor_alias_table[i].processor;
517 /* Default cpu tuning to the architecture. */
518 ix86_cpu = ix86_arch;
519 break;
520 }
521 if (i == pta_size)
522 error ("bad value (%s) for -march= switch", ix86_arch_string);
523 }
524
525 if (ix86_cpu_string != 0)
526 {
527 int i;
528 for (i = 0; i < pta_size; i++)
529 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
530 {
531 ix86_cpu = processor_alias_table[i].processor;
532 break;
533 }
534 if (i == pta_size)
535 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
536 }
537
538 ix86_cost = processor_target_table[ix86_cpu].cost;
539 target_flags |= processor_target_table[ix86_cpu].target_enable;
540 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
541
36edd3cc
BS
542 /* Arrange to set up i386_stack_locals for all functions. */
543 init_machine_status = ix86_init_machine_status;
1526a060 544 mark_machine_status = ix86_mark_machine_status;
37b15744 545 free_machine_status = ix86_free_machine_status;
36edd3cc 546
e9a25f70 547 /* Validate registers in register allocation order. */
e075ae69 548 if (ix86_reg_alloc_order)
f5316dfe 549 {
e075ae69
RH
550 int i, ch;
551 for (i = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
f5316dfe 552 {
00c79232 553 int regno = 0;
79325812 554
f5316dfe
MM
555 switch (ch)
556 {
557 case 'a': regno = 0; break;
558 case 'd': regno = 1; break;
559 case 'c': regno = 2; break;
560 case 'b': regno = 3; break;
561 case 'S': regno = 4; break;
562 case 'D': regno = 5; break;
563 case 'B': regno = 6; break;
564
565 default: fatal ("Register '%c' is unknown", ch);
566 }
567
568 if (regs_allocated[regno])
e9a25f70 569 fatal ("Register '%c' already specified in allocation order", ch);
f5316dfe
MM
570
571 regs_allocated[regno] = 1;
572 }
573 }
b08de47e 574
0f290768 575 /* Validate -mregparm= value. */
e075ae69 576 if (ix86_regparm_string)
b08de47e 577 {
e075ae69
RH
578 ix86_regparm = atoi (ix86_regparm_string);
579 if (ix86_regparm < 0 || ix86_regparm > REGPARM_MAX)
e9a25f70 580 fatal ("-mregparm=%d is not between 0 and %d",
e075ae69 581 ix86_regparm, REGPARM_MAX);
b08de47e
MM
582 }
583
e9a25f70 584 /* Validate -malign-loops= value, or provide default. */
e075ae69
RH
585 ix86_align_loops = processor_target_table[ix86_cpu].align_loop;
586 if (ix86_align_loops_string)
b08de47e 587 {
e075ae69
RH
588 ix86_align_loops = atoi (ix86_align_loops_string);
589 if (ix86_align_loops < 0 || ix86_align_loops > MAX_CODE_ALIGN)
b08de47e 590 fatal ("-malign-loops=%d is not between 0 and %d",
e075ae69 591 ix86_align_loops, MAX_CODE_ALIGN);
b08de47e 592 }
3af4bd89
JH
593
594 /* Validate -malign-jumps= value, or provide default. */
e075ae69
RH
595 ix86_align_jumps = processor_target_table[ix86_cpu].align_jump;
596 if (ix86_align_jumps_string)
b08de47e 597 {
e075ae69
RH
598 ix86_align_jumps = atoi (ix86_align_jumps_string);
599 if (ix86_align_jumps < 0 || ix86_align_jumps > MAX_CODE_ALIGN)
b08de47e 600 fatal ("-malign-jumps=%d is not between 0 and %d",
e075ae69 601 ix86_align_jumps, MAX_CODE_ALIGN);
b08de47e 602 }
b08de47e 603
0f290768 604 /* Validate -malign-functions= value, or provide default. */
e075ae69
RH
605 ix86_align_funcs = processor_target_table[ix86_cpu].align_func;
606 if (ix86_align_funcs_string)
b08de47e 607 {
e075ae69
RH
608 ix86_align_funcs = atoi (ix86_align_funcs_string);
609 if (ix86_align_funcs < 0 || ix86_align_funcs > MAX_CODE_ALIGN)
b08de47e 610 fatal ("-malign-functions=%d is not between 0 and %d",
e075ae69 611 ix86_align_funcs, MAX_CODE_ALIGN);
b08de47e 612 }
3af4bd89 613
e4c0478d 614 /* Validate -mpreferred-stack-boundary= value, or provide default.
3af4bd89 615 The default of 128 bits is for Pentium III's SSE __m128. */
e075ae69
RH
616 ix86_preferred_stack_boundary = 128;
617 if (ix86_preferred_stack_boundary_string)
3af4bd89 618 {
e075ae69 619 int i = atoi (ix86_preferred_stack_boundary_string);
3af4bd89 620 if (i < 2 || i > 31)
e4c0478d 621 fatal ("-mpreferred-stack-boundary=%d is not between 2 and 31", i);
e075ae69 622 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 623 }
77a989d1 624
0f290768 625 /* Validate -mbranch-cost= value, or provide default. */
e075ae69
RH
626 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
627 if (ix86_branch_cost_string)
804a8ee0 628 {
e075ae69
RH
629 ix86_branch_cost = atoi (ix86_branch_cost_string);
630 if (ix86_branch_cost < 0 || ix86_branch_cost > 5)
631 fatal ("-mbranch-cost=%d is not between 0 and 5",
632 ix86_branch_cost);
804a8ee0 633 }
804a8ee0 634
e9a25f70
JL
635 /* Keep nonleaf frame pointers. */
636 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 637 flag_omit_frame_pointer = 1;
e075ae69
RH
638
639 /* If we're doing fast math, we don't care about comparison order
640 wrt NaNs. This lets us use a shorter comparison sequence. */
641 if (flag_fast_math)
642 target_flags &= ~MASK_IEEE_FP;
643
a7180f70
BS
644 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
645 on by -msse. */
646 if (TARGET_SSE)
647 target_flags |= MASK_MMX;
f5316dfe
MM
648}
649\f
650/* A C statement (sans semicolon) to choose the order in which to
651 allocate hard registers for pseudo-registers local to a basic
652 block.
653
654 Store the desired register order in the array `reg_alloc_order'.
655 Element 0 should be the register to allocate first; element 1, the
656 next register; and so on.
657
658 The macro body should not assume anything about the contents of
659 `reg_alloc_order' before execution of the macro.
660
661 On most machines, it is not necessary to define this macro. */
662
663void
664order_regs_for_local_alloc ()
665{
00c79232 666 int i, ch, order;
f5316dfe 667
e9a25f70
JL
668 /* User specified the register allocation order. */
669
e075ae69 670 if (ix86_reg_alloc_order)
f5316dfe 671 {
e075ae69 672 for (i = order = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
f5316dfe 673 {
00c79232 674 int regno = 0;
79325812 675
f5316dfe
MM
676 switch (ch)
677 {
678 case 'a': regno = 0; break;
679 case 'd': regno = 1; break;
680 case 'c': regno = 2; break;
681 case 'b': regno = 3; break;
682 case 'S': regno = 4; break;
683 case 'D': regno = 5; break;
684 case 'B': regno = 6; break;
685 }
686
687 reg_alloc_order[order++] = regno;
688 }
689
690 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
691 {
e9a25f70 692 if (! regs_allocated[i])
f5316dfe
MM
693 reg_alloc_order[order++] = i;
694 }
695 }
696
0f290768 697 /* If user did not specify a register allocation order, use natural order. */
f5316dfe
MM
698 else
699 {
700 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
701 reg_alloc_order[i] = i;
f5316dfe
MM
702 }
703}
32b5b1aa
SC
704\f
705void
c6aded7c 706optimization_options (level, size)
32b5b1aa 707 int level;
bb5177ac 708 int size ATTRIBUTE_UNUSED;
32b5b1aa 709{
e9a25f70
JL
710 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
711 make the problem with not enough registers even worse. */
32b5b1aa
SC
712#ifdef INSN_SCHEDULING
713 if (level > 1)
714 flag_schedule_insns = 0;
715#endif
716}
b08de47e
MM
717\f
718/* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
719 attribute for DECL. The attributes in ATTRIBUTES have previously been
720 assigned to DECL. */
721
722int
e075ae69 723ix86_valid_decl_attribute_p (decl, attributes, identifier, args)
bb5177ac
RL
724 tree decl ATTRIBUTE_UNUSED;
725 tree attributes ATTRIBUTE_UNUSED;
726 tree identifier ATTRIBUTE_UNUSED;
727 tree args ATTRIBUTE_UNUSED;
b08de47e
MM
728{
729 return 0;
730}
731
732/* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
733 attribute for TYPE. The attributes in ATTRIBUTES have previously been
734 assigned to TYPE. */
735
736int
e075ae69 737ix86_valid_type_attribute_p (type, attributes, identifier, args)
b08de47e 738 tree type;
bb5177ac 739 tree attributes ATTRIBUTE_UNUSED;
b08de47e
MM
740 tree identifier;
741 tree args;
742{
743 if (TREE_CODE (type) != FUNCTION_TYPE
ac478ac0 744 && TREE_CODE (type) != METHOD_TYPE
b08de47e
MM
745 && TREE_CODE (type) != FIELD_DECL
746 && TREE_CODE (type) != TYPE_DECL)
747 return 0;
748
749 /* Stdcall attribute says callee is responsible for popping arguments
750 if they are not variable. */
751 if (is_attribute_p ("stdcall", identifier))
752 return (args == NULL_TREE);
753
0f290768 754 /* Cdecl attribute says the callee is a normal C declaration. */
b08de47e
MM
755 if (is_attribute_p ("cdecl", identifier))
756 return (args == NULL_TREE);
757
758 /* Regparm attribute specifies how many integer arguments are to be
0f290768 759 passed in registers. */
b08de47e
MM
760 if (is_attribute_p ("regparm", identifier))
761 {
762 tree cst;
763
e9a25f70 764 if (! args || TREE_CODE (args) != TREE_LIST
b08de47e
MM
765 || TREE_CHAIN (args) != NULL_TREE
766 || TREE_VALUE (args) == NULL_TREE)
767 return 0;
768
769 cst = TREE_VALUE (args);
770 if (TREE_CODE (cst) != INTEGER_CST)
771 return 0;
772
cce097f1 773 if (compare_tree_int (cst, REGPARM_MAX) > 0)
b08de47e
MM
774 return 0;
775
776 return 1;
777 }
778
779 return 0;
780}
781
782/* Return 0 if the attributes for two types are incompatible, 1 if they
783 are compatible, and 2 if they are nearly compatible (which causes a
784 warning to be generated). */
785
786int
e075ae69 787ix86_comp_type_attributes (type1, type2)
afcfe58c
MM
788 tree type1;
789 tree type2;
b08de47e 790{
0f290768 791 /* Check for mismatch of non-default calling convention. */
69ddee61 792 const char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
793
794 if (TREE_CODE (type1) != FUNCTION_TYPE)
795 return 1;
796
797 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
798 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
799 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 800 return 0;
b08de47e
MM
801 return 1;
802}
b08de47e
MM
803\f
804/* Value is the number of bytes of arguments automatically
805 popped when returning from a subroutine call.
806 FUNDECL is the declaration node of the function (as a tree),
807 FUNTYPE is the data type of the function (as a tree),
808 or for a library call it is an identifier node for the subroutine name.
809 SIZE is the number of bytes of arguments passed on the stack.
810
811 On the 80386, the RTD insn may be used to pop them if the number
812 of args is fixed, but if the number is variable then the caller
813 must pop them all. RTD can't be used for library calls now
814 because the library is compiled with the Unix compiler.
815 Use of RTD is a selectable option, since it is incompatible with
816 standard Unix calling sequences. If the option is not selected,
817 the caller must always pop the args.
818
819 The attribute stdcall is equivalent to RTD on a per module basis. */
820
821int
e075ae69 822ix86_return_pops_args (fundecl, funtype, size)
b08de47e
MM
823 tree fundecl;
824 tree funtype;
825 int size;
79325812 826{
3345ee7d 827 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 828
0f290768 829 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 830 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 831
0f290768 832 /* Stdcall functions will pop the stack if not variable args. */
698cdd84
SC
833 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
834 rtd = 1;
79325812 835
698cdd84
SC
836 if (rtd
837 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
838 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
839 == void_type_node)))
698cdd84
SC
840 return size;
841 }
79325812 842
e9a25f70 843 /* Lose any fake structure return argument. */
698cdd84
SC
844 if (aggregate_value_p (TREE_TYPE (funtype)))
845 return GET_MODE_SIZE (Pmode);
79325812 846
2614aac6 847 return 0;
b08de47e 848}
b08de47e
MM
849\f
850/* Argument support functions. */
851
852/* Initialize a variable CUM of type CUMULATIVE_ARGS
853 for a call to a function whose data type is FNTYPE.
854 For a library call, FNTYPE is 0. */
855
856void
857init_cumulative_args (cum, fntype, libname)
e9a25f70 858 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
b08de47e
MM
859 tree fntype; /* tree ptr for function decl */
860 rtx libname; /* SYMBOL_REF of library name or 0 */
861{
862 static CUMULATIVE_ARGS zero_cum;
863 tree param, next_param;
864
865 if (TARGET_DEBUG_ARG)
866 {
867 fprintf (stderr, "\ninit_cumulative_args (");
868 if (fntype)
e9a25f70
JL
869 fprintf (stderr, "fntype code = %s, ret code = %s",
870 tree_code_name[(int) TREE_CODE (fntype)],
871 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
872 else
873 fprintf (stderr, "no fntype");
874
875 if (libname)
876 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
877 }
878
879 *cum = zero_cum;
880
881 /* Set up the number of registers to use for passing arguments. */
e075ae69 882 cum->nregs = ix86_regparm;
b08de47e
MM
883 if (fntype)
884 {
885 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
e9a25f70 886
b08de47e
MM
887 if (attr)
888 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
889 }
890
891 /* Determine if this function has variable arguments. This is
892 indicated by the last argument being 'void_type_mode' if there
893 are no variable arguments. If there are variable arguments, then
894 we won't pass anything in registers */
895
896 if (cum->nregs)
897 {
898 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 899 param != 0; param = next_param)
b08de47e
MM
900 {
901 next_param = TREE_CHAIN (param);
e9a25f70 902 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
b08de47e
MM
903 cum->nregs = 0;
904 }
905 }
906
907 if (TARGET_DEBUG_ARG)
908 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
909
910 return;
911}
912
913/* Update the data in CUM to advance over an argument
914 of mode MODE and data type TYPE.
915 (TYPE is null for libcalls where that information may not be available.) */
916
917void
918function_arg_advance (cum, mode, type, named)
919 CUMULATIVE_ARGS *cum; /* current arg information */
920 enum machine_mode mode; /* current arg mode */
921 tree type; /* type of the argument or 0 if lib support */
922 int named; /* whether or not the argument was named */
923{
5ac9118e
KG
924 int bytes =
925 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
926 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
927
928 if (TARGET_DEBUG_ARG)
929 fprintf (stderr,
e9a25f70 930 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
b08de47e
MM
931 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
932
933 cum->words += words;
934 cum->nregs -= words;
935 cum->regno += words;
936
937 if (cum->nregs <= 0)
938 {
939 cum->nregs = 0;
940 cum->regno = 0;
941 }
942
943 return;
944}
945
946/* Define where to put the arguments to a function.
947 Value is zero to push the argument on the stack,
948 or a hard register in which to store the argument.
949
950 MODE is the argument's machine mode.
951 TYPE is the data type of the argument (as a tree).
952 This is null for libcalls where that information may
953 not be available.
954 CUM is a variable of type CUMULATIVE_ARGS which gives info about
955 the preceding args and about the function being called.
956 NAMED is nonzero if this argument is a named parameter
957 (otherwise it is an extra parameter matching an ellipsis). */
958
959struct rtx_def *
960function_arg (cum, mode, type, named)
961 CUMULATIVE_ARGS *cum; /* current arg information */
962 enum machine_mode mode; /* current arg mode */
963 tree type; /* type of the argument or 0 if lib support */
964 int named; /* != 0 for normal args, == 0 for ... args */
965{
966 rtx ret = NULL_RTX;
5ac9118e
KG
967 int bytes =
968 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
969 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
970
971 switch (mode)
972 {
0f290768 973 /* For now, pass fp/complex values on the stack. */
e9a25f70 974 default:
b08de47e
MM
975 break;
976
977 case BLKmode:
978 case DImode:
979 case SImode:
980 case HImode:
981 case QImode:
982 if (words <= cum->nregs)
f64cecad 983 ret = gen_rtx_REG (mode, cum->regno);
b08de47e
MM
984 break;
985 }
986
987 if (TARGET_DEBUG_ARG)
988 {
989 fprintf (stderr,
e9a25f70 990 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
b08de47e
MM
991 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
992
993 if (ret)
994 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
995 else
996 fprintf (stderr, ", stack");
997
998 fprintf (stderr, " )\n");
999 }
1000
1001 return ret;
1002}
e075ae69 1003\f
8bad7136
JL
1004
1005/* Return nonzero if OP is (const_int 1), else return zero. */
1006
1007int
1008const_int_1_operand (op, mode)
1009 rtx op;
1010 enum machine_mode mode ATTRIBUTE_UNUSED;
1011{
1012 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
1013}
1014
e075ae69
RH
1015/* Returns 1 if OP is either a symbol reference or a sum of a symbol
1016 reference and a constant. */
b08de47e
MM
1017
1018int
e075ae69
RH
1019symbolic_operand (op, mode)
1020 register rtx op;
1021 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 1022{
e075ae69 1023 switch (GET_CODE (op))
2a2ab3f9 1024 {
e075ae69
RH
1025 case SYMBOL_REF:
1026 case LABEL_REF:
1027 return 1;
1028
1029 case CONST:
1030 op = XEXP (op, 0);
1031 if (GET_CODE (op) == SYMBOL_REF
1032 || GET_CODE (op) == LABEL_REF
1033 || (GET_CODE (op) == UNSPEC
1034 && XINT (op, 1) >= 6
1035 && XINT (op, 1) <= 7))
1036 return 1;
1037 if (GET_CODE (op) != PLUS
1038 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1039 return 0;
1040
1041 op = XEXP (op, 0);
1042 if (GET_CODE (op) == SYMBOL_REF
1043 || GET_CODE (op) == LABEL_REF)
1044 return 1;
1045 /* Only @GOTOFF gets offsets. */
1046 if (GET_CODE (op) != UNSPEC
1047 || XINT (op, 1) != 7)
1048 return 0;
1049
1050 op = XVECEXP (op, 0, 0);
1051 if (GET_CODE (op) == SYMBOL_REF
1052 || GET_CODE (op) == LABEL_REF)
1053 return 1;
1054 return 0;
1055
1056 default:
1057 return 0;
2a2ab3f9
JVA
1058 }
1059}
2a2ab3f9 1060
e075ae69 1061/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 1062
e075ae69
RH
1063int
1064pic_symbolic_operand (op, mode)
1065 register rtx op;
1066 enum machine_mode mode ATTRIBUTE_UNUSED;
1067{
1068 if (GET_CODE (op) == CONST)
2a2ab3f9 1069 {
e075ae69
RH
1070 op = XEXP (op, 0);
1071 if (GET_CODE (op) == UNSPEC)
1072 return 1;
1073 if (GET_CODE (op) != PLUS
1074 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1075 return 0;
1076 op = XEXP (op, 0);
1077 if (GET_CODE (op) == UNSPEC)
1078 return 1;
2a2ab3f9 1079 }
e075ae69 1080 return 0;
2a2ab3f9 1081}
2a2ab3f9 1082
28d52ffb
RH
1083/* Test for a valid operand for a call instruction. Don't allow the
1084 arg pointer register or virtual regs since they may decay into
1085 reg + const, which the patterns can't handle. */
2a2ab3f9 1086
e075ae69
RH
1087int
1088call_insn_operand (op, mode)
1089 rtx op;
1090 enum machine_mode mode ATTRIBUTE_UNUSED;
1091{
e075ae69
RH
1092 /* Disallow indirect through a virtual register. This leads to
1093 compiler aborts when trying to eliminate them. */
1094 if (GET_CODE (op) == REG
1095 && (op == arg_pointer_rtx
564d80f4 1096 || op == frame_pointer_rtx
e075ae69
RH
1097 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
1098 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
1099 return 0;
2a2ab3f9 1100
28d52ffb
RH
1101 /* Disallow `call 1234'. Due to varying assembler lameness this
1102 gets either rejected or translated to `call .+1234'. */
1103 if (GET_CODE (op) == CONST_INT)
1104 return 0;
1105
cbbf65e0
RH
1106 /* Explicitly allow SYMBOL_REF even if pic. */
1107 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 1108 return 1;
2a2ab3f9 1109
cbbf65e0
RH
1110 /* Half-pic doesn't allow anything but registers and constants.
1111 We've just taken care of the later. */
1112 if (HALF_PIC_P ())
1113 return register_operand (op, Pmode);
1114
1115 /* Otherwise we can allow any general_operand in the address. */
1116 return general_operand (op, Pmode);
e075ae69 1117}
79325812 1118
e075ae69
RH
1119int
1120constant_call_address_operand (op, mode)
1121 rtx op;
1122 enum machine_mode mode ATTRIBUTE_UNUSED;
1123{
eaf19aba
JJ
1124 if (GET_CODE (op) == CONST
1125 && GET_CODE (XEXP (op, 0)) == PLUS
1126 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1127 op = XEXP (XEXP (op, 0), 0);
e1ff012c 1128 return GET_CODE (op) == SYMBOL_REF;
e075ae69 1129}
2a2ab3f9 1130
e075ae69 1131/* Match exactly zero and one. */
e9a25f70 1132
0f290768 1133int
e075ae69
RH
1134const0_operand (op, mode)
1135 register rtx op;
1136 enum machine_mode mode;
1137{
1138 return op == CONST0_RTX (mode);
1139}
e9a25f70 1140
0f290768 1141int
e075ae69
RH
1142const1_operand (op, mode)
1143 register rtx op;
1144 enum machine_mode mode ATTRIBUTE_UNUSED;
1145{
1146 return op == const1_rtx;
1147}
2a2ab3f9 1148
e075ae69 1149/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 1150
e075ae69
RH
1151int
1152const248_operand (op, mode)
1153 register rtx op;
1154 enum machine_mode mode ATTRIBUTE_UNUSED;
1155{
1156 return (GET_CODE (op) == CONST_INT
1157 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
1158}
e9a25f70 1159
e075ae69 1160/* True if this is a constant appropriate for an increment or decremenmt. */
81fd0956 1161
e075ae69
RH
1162int
1163incdec_operand (op, mode)
1164 register rtx op;
1165 enum machine_mode mode;
1166{
1167 if (op == const1_rtx || op == constm1_rtx)
1168 return 1;
1169 if (GET_CODE (op) != CONST_INT)
1170 return 0;
1171 if (mode == SImode && INTVAL (op) == (HOST_WIDE_INT) 0xffffffff)
1172 return 1;
1173 if (mode == HImode && INTVAL (op) == (HOST_WIDE_INT) 0xffff)
1174 return 1;
1175 if (mode == QImode && INTVAL (op) == (HOST_WIDE_INT) 0xff)
1176 return 1;
1177 return 0;
1178}
2a2ab3f9 1179
0f290768 1180/* Return false if this is the stack pointer, or any other fake
e075ae69
RH
1181 register eliminable to the stack pointer. Otherwise, this is
1182 a register operand.
2a2ab3f9 1183
e075ae69
RH
1184 This is used to prevent esp from being used as an index reg.
1185 Which would only happen in pathological cases. */
5f1ec3e6 1186
e075ae69
RH
1187int
1188reg_no_sp_operand (op, mode)
1189 register rtx op;
1190 enum machine_mode mode;
1191{
1192 rtx t = op;
1193 if (GET_CODE (t) == SUBREG)
1194 t = SUBREG_REG (t);
564d80f4 1195 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 1196 return 0;
2a2ab3f9 1197
e075ae69 1198 return register_operand (op, mode);
2a2ab3f9 1199}
b840bfb0 1200
915119a5
BS
1201int
1202mmx_reg_operand (op, mode)
1203 register rtx op;
bd793c65 1204 enum machine_mode mode ATTRIBUTE_UNUSED;
915119a5
BS
1205{
1206 return MMX_REG_P (op);
1207}
1208
2c5a510c
RH
1209/* Return false if this is any eliminable register. Otherwise
1210 general_operand. */
1211
1212int
1213general_no_elim_operand (op, mode)
1214 register rtx op;
1215 enum machine_mode mode;
1216{
1217 rtx t = op;
1218 if (GET_CODE (t) == SUBREG)
1219 t = SUBREG_REG (t);
1220 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1221 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1222 || t == virtual_stack_dynamic_rtx)
1223 return 0;
1224
1225 return general_operand (op, mode);
1226}
1227
1228/* Return false if this is any eliminable register. Otherwise
1229 register_operand or const_int. */
1230
1231int
1232nonmemory_no_elim_operand (op, mode)
1233 register rtx op;
1234 enum machine_mode mode;
1235{
1236 rtx t = op;
1237 if (GET_CODE (t) == SUBREG)
1238 t = SUBREG_REG (t);
1239 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1240 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1241 || t == virtual_stack_dynamic_rtx)
1242 return 0;
1243
1244 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
1245}
1246
e075ae69 1247/* Return true if op is a Q_REGS class register. */
b840bfb0 1248
e075ae69
RH
1249int
1250q_regs_operand (op, mode)
1251 register rtx op;
1252 enum machine_mode mode;
b840bfb0 1253{
e075ae69
RH
1254 if (mode != VOIDmode && GET_MODE (op) != mode)
1255 return 0;
1256 if (GET_CODE (op) == SUBREG)
1257 op = SUBREG_REG (op);
1258 return QI_REG_P (op);
0f290768 1259}
b840bfb0 1260
e075ae69 1261/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 1262
e075ae69
RH
1263int
1264non_q_regs_operand (op, mode)
1265 register rtx op;
1266 enum machine_mode mode;
1267{
1268 if (mode != VOIDmode && GET_MODE (op) != mode)
1269 return 0;
1270 if (GET_CODE (op) == SUBREG)
1271 op = SUBREG_REG (op);
1272 return NON_QI_REG_P (op);
0f290768 1273}
b840bfb0 1274
915119a5
BS
1275/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
1276 insns. */
1277int
1278sse_comparison_operator (op, mode)
1279 rtx op;
1280 enum machine_mode mode ATTRIBUTE_UNUSED;
1281{
1282 enum rtx_code code = GET_CODE (op);
1283 return code == EQ || code == LT || code == LE || code == UNORDERED;
1284}
9076b9c1 1285/* Return 1 if OP is a valid comparison operator in valid mode. */
e075ae69 1286int
9076b9c1
JH
1287ix86_comparison_operator (op, mode)
1288 register rtx op;
1289 enum machine_mode mode;
e075ae69 1290{
9076b9c1 1291 enum machine_mode inmode;
9a915772 1292 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
1293 if (mode != VOIDmode && GET_MODE (op) != mode)
1294 return 0;
9a915772
JH
1295 if (GET_RTX_CLASS (code) != '<')
1296 return 0;
1297 inmode = GET_MODE (XEXP (op, 0));
1298
1299 if (inmode == CCFPmode || inmode == CCFPUmode)
1300 {
1301 enum rtx_code second_code, bypass_code;
1302 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1303 return (bypass_code == NIL && second_code == NIL);
1304 }
1305 switch (code)
3a3677ff
RH
1306 {
1307 case EQ: case NE:
3a3677ff 1308 return 1;
9076b9c1 1309 case LT: case GE:
7e08e190 1310 if (inmode == CCmode || inmode == CCGCmode
9076b9c1
JH
1311 || inmode == CCGOCmode || inmode == CCNOmode)
1312 return 1;
1313 return 0;
7e08e190 1314 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
7e08e190 1315 if (inmode == CCmode)
9076b9c1
JH
1316 return 1;
1317 return 0;
1318 case GT: case LE:
7e08e190 1319 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
9076b9c1
JH
1320 return 1;
1321 return 0;
3a3677ff
RH
1322 default:
1323 return 0;
1324 }
1325}
1326
9076b9c1 1327/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3a3677ff 1328
9076b9c1
JH
1329int
1330fcmov_comparison_operator (op, mode)
3a3677ff
RH
1331 register rtx op;
1332 enum machine_mode mode;
1333{
9076b9c1 1334 enum machine_mode inmode = GET_MODE (XEXP (op, 0));
9a915772 1335 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
1336 if (mode != VOIDmode && GET_MODE (op) != mode)
1337 return 0;
9a915772
JH
1338 if (GET_RTX_CLASS (code) != '<')
1339 return 0;
1340 inmode = GET_MODE (XEXP (op, 0));
1341 if (inmode == CCFPmode || inmode == CCFPUmode)
3a3677ff 1342 {
9a915772
JH
1343 enum rtx_code second_code, bypass_code;
1344 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1345 if (bypass_code != NIL || second_code != NIL)
1346 return 0;
1347 code = ix86_fp_compare_code_to_integer (code);
1348 }
1349 /* i387 supports just limited amount of conditional codes. */
1350 switch (code)
1351 {
1352 case LTU: case GTU: case LEU: case GEU:
1353 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
9076b9c1
JH
1354 return 1;
1355 return 0;
9a915772
JH
1356 case ORDERED: case UNORDERED:
1357 case EQ: case NE:
1358 return 1;
3a3677ff
RH
1359 default:
1360 return 0;
1361 }
e075ae69 1362}
b840bfb0 1363
e9e80858
JH
1364/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1365
1366int
1367promotable_binary_operator (op, mode)
1368 register rtx op;
1369 enum machine_mode mode ATTRIBUTE_UNUSED;
1370{
1371 switch (GET_CODE (op))
1372 {
1373 case MULT:
1374 /* Modern CPUs have same latency for HImode and SImode multiply,
1375 but 386 and 486 do HImode multiply faster. */
1376 return ix86_cpu > PROCESSOR_I486;
1377 case PLUS:
1378 case AND:
1379 case IOR:
1380 case XOR:
1381 case ASHIFT:
1382 return 1;
1383 default:
1384 return 0;
1385 }
1386}
1387
e075ae69
RH
1388/* Nearly general operand, but accept any const_double, since we wish
1389 to be able to drop them into memory rather than have them get pulled
1390 into registers. */
b840bfb0 1391
2a2ab3f9 1392int
e075ae69
RH
1393cmp_fp_expander_operand (op, mode)
1394 register rtx op;
1395 enum machine_mode mode;
2a2ab3f9 1396{
e075ae69 1397 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 1398 return 0;
e075ae69 1399 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 1400 return 1;
e075ae69 1401 return general_operand (op, mode);
2a2ab3f9
JVA
1402}
1403
e075ae69 1404/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
1405
1406int
e075ae69 1407ext_register_operand (op, mode)
2a2ab3f9 1408 register rtx op;
bb5177ac 1409 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 1410{
e075ae69
RH
1411 if (GET_MODE (op) != SImode && GET_MODE (op) != HImode)
1412 return 0;
1413 return register_operand (op, VOIDmode);
1414}
1415
1416/* Return 1 if this is a valid binary floating-point operation.
0f290768 1417 OP is the expression matched, and MODE is its mode. */
e075ae69
RH
1418
1419int
1420binary_fp_operator (op, mode)
1421 register rtx op;
1422 enum machine_mode mode;
1423{
1424 if (mode != VOIDmode && mode != GET_MODE (op))
1425 return 0;
1426
2a2ab3f9
JVA
1427 switch (GET_CODE (op))
1428 {
e075ae69
RH
1429 case PLUS:
1430 case MINUS:
1431 case MULT:
1432 case DIV:
1433 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 1434
2a2ab3f9
JVA
1435 default:
1436 return 0;
1437 }
1438}
fee2770d 1439
e075ae69
RH
1440int
1441mult_operator(op, mode)
1442 register rtx op;
1443 enum machine_mode mode ATTRIBUTE_UNUSED;
1444{
1445 return GET_CODE (op) == MULT;
1446}
1447
1448int
1449div_operator(op, mode)
1450 register rtx op;
1451 enum machine_mode mode ATTRIBUTE_UNUSED;
1452{
1453 return GET_CODE (op) == DIV;
1454}
0a726ef1
JL
1455
1456int
e075ae69
RH
1457arith_or_logical_operator (op, mode)
1458 rtx op;
1459 enum machine_mode mode;
0a726ef1 1460{
e075ae69
RH
1461 return ((mode == VOIDmode || GET_MODE (op) == mode)
1462 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
1463 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
1464}
1465
e075ae69 1466/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
1467
1468int
e075ae69
RH
1469memory_displacement_operand (op, mode)
1470 register rtx op;
1471 enum machine_mode mode;
4f2c8ebb 1472{
e075ae69 1473 struct ix86_address parts;
e9a25f70 1474
e075ae69
RH
1475 if (! memory_operand (op, mode))
1476 return 0;
1477
1478 if (! ix86_decompose_address (XEXP (op, 0), &parts))
1479 abort ();
1480
1481 return parts.disp != NULL_RTX;
4f2c8ebb
RS
1482}
1483
16189740 1484/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
1485 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1486
1487 ??? It seems likely that this will only work because cmpsi is an
1488 expander, and no actual insns use this. */
4f2c8ebb
RS
1489
1490int
e075ae69
RH
1491cmpsi_operand (op, mode)
1492 rtx op;
1493 enum machine_mode mode;
fee2770d 1494{
e075ae69
RH
1495 if (general_operand (op, mode))
1496 return 1;
1497
1498 if (GET_CODE (op) == AND
1499 && GET_MODE (op) == SImode
1500 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
1501 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
1502 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
1503 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
1504 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
1505 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 1506 return 1;
e9a25f70 1507
fee2770d
RS
1508 return 0;
1509}
d784886d 1510
e075ae69
RH
1511/* Returns 1 if OP is memory operand that can not be represented by the
1512 modRM array. */
d784886d
RK
1513
1514int
e075ae69 1515long_memory_operand (op, mode)
d784886d
RK
1516 register rtx op;
1517 enum machine_mode mode;
1518{
e075ae69 1519 if (! memory_operand (op, mode))
d784886d
RK
1520 return 0;
1521
e075ae69 1522 return memory_address_length (op) != 0;
d784886d 1523}
2247f6ed
JH
1524
1525/* Return nonzero if the rtx is known aligned. */
1526
1527int
1528aligned_operand (op, mode)
1529 rtx op;
1530 enum machine_mode mode;
1531{
1532 struct ix86_address parts;
1533
1534 if (!general_operand (op, mode))
1535 return 0;
1536
0f290768 1537 /* Registers and immediate operands are always "aligned". */
2247f6ed
JH
1538 if (GET_CODE (op) != MEM)
1539 return 1;
1540
0f290768 1541 /* Don't even try to do any aligned optimizations with volatiles. */
2247f6ed
JH
1542 if (MEM_VOLATILE_P (op))
1543 return 0;
1544
1545 op = XEXP (op, 0);
1546
1547 /* Pushes and pops are only valid on the stack pointer. */
1548 if (GET_CODE (op) == PRE_DEC
1549 || GET_CODE (op) == POST_INC)
1550 return 1;
1551
1552 /* Decode the address. */
1553 if (! ix86_decompose_address (op, &parts))
1554 abort ();
1555
1556 /* Look for some component that isn't known to be aligned. */
1557 if (parts.index)
1558 {
1559 if (parts.scale < 4
bdb429a5 1560 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
1561 return 0;
1562 }
1563 if (parts.base)
1564 {
bdb429a5 1565 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
1566 return 0;
1567 }
1568 if (parts.disp)
1569 {
1570 if (GET_CODE (parts.disp) != CONST_INT
1571 || (INTVAL (parts.disp) & 3) != 0)
1572 return 0;
1573 }
1574
1575 /* Didn't find one -- this must be an aligned address. */
1576 return 1;
1577}
e075ae69
RH
1578\f
1579/* Return true if the constant is something that can be loaded with
1580 a special instruction. Only handle 0.0 and 1.0; others are less
1581 worthwhile. */
57dbca5e
BS
1582
1583int
e075ae69
RH
1584standard_80387_constant_p (x)
1585 rtx x;
57dbca5e 1586{
e075ae69
RH
1587 if (GET_CODE (x) != CONST_DOUBLE)
1588 return -1;
1589
1590#if ! defined (REAL_IS_NOT_DOUBLE) || defined (REAL_ARITHMETIC)
1591 {
1592 REAL_VALUE_TYPE d;
1593 jmp_buf handler;
1594 int is0, is1;
1595
1596 if (setjmp (handler))
1597 return 0;
1598
1599 set_float_handler (handler);
1600 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
1601 is0 = REAL_VALUES_EQUAL (d, dconst0) && !REAL_VALUE_MINUS_ZERO (d);
1602 is1 = REAL_VALUES_EQUAL (d, dconst1);
1603 set_float_handler (NULL_PTR);
1604
1605 if (is0)
1606 return 1;
1607
1608 if (is1)
1609 return 2;
1610
1611 /* Note that on the 80387, other constants, such as pi,
1612 are much slower to load as standard constants
1613 than to load from doubles in memory! */
1614 /* ??? Not true on K6: all constants are equal cost. */
1615 }
1616#endif
1617
1618 return 0;
57dbca5e
BS
1619}
1620
2a2ab3f9
JVA
1621/* Returns 1 if OP contains a symbol reference */
1622
1623int
1624symbolic_reference_mentioned_p (op)
1625 rtx op;
1626{
6f7d635c 1627 register const char *fmt;
2a2ab3f9
JVA
1628 register int i;
1629
1630 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
1631 return 1;
1632
1633 fmt = GET_RTX_FORMAT (GET_CODE (op));
1634 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
1635 {
1636 if (fmt[i] == 'E')
1637 {
1638 register int j;
1639
1640 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
1641 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
1642 return 1;
1643 }
e9a25f70 1644
2a2ab3f9
JVA
1645 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
1646 return 1;
1647 }
1648
1649 return 0;
1650}
e075ae69
RH
1651
1652/* Return 1 if it is appropriate to emit `ret' instructions in the
1653 body of a function. Do this only if the epilogue is simple, needing a
1654 couple of insns. Prior to reloading, we can't tell how many registers
1655 must be saved, so return 0 then. Return 0 if there is no frame
1656 marker to de-allocate.
1657
1658 If NON_SAVING_SETJMP is defined and true, then it is not possible
1659 for the epilogue to be simple, so return 0. This is a special case
1660 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1661 until final, but jump_optimize may need to know sooner if a
1662 `return' is OK. */
32b5b1aa
SC
1663
1664int
e075ae69 1665ix86_can_use_return_insn_p ()
32b5b1aa 1666{
9a7372d6
RH
1667 HOST_WIDE_INT tsize;
1668 int nregs;
1669
e075ae69
RH
1670#ifdef NON_SAVING_SETJMP
1671 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
1672 return 0;
1673#endif
9a7372d6
RH
1674#ifdef FUNCTION_BLOCK_PROFILER_EXIT
1675 if (profile_block_flag == 2)
1676 return 0;
1677#endif
1678
1679 if (! reload_completed || frame_pointer_needed)
1680 return 0;
32b5b1aa 1681
9a7372d6
RH
1682 /* Don't allow more than 32 pop, since that's all we can do
1683 with one instruction. */
1684 if (current_function_pops_args
1685 && current_function_args_size >= 32768)
e075ae69 1686 return 0;
32b5b1aa 1687
9a7372d6
RH
1688 tsize = ix86_compute_frame_size (get_frame_size (), &nregs, NULL, NULL);
1689 return tsize == 0 && nregs == 0;
e075ae69
RH
1690}
1691\f
520a57c8 1692static const char *pic_label_name;
e075ae69 1693static int pic_label_output;
e9a25f70 1694
e075ae69
RH
1695/* This function generates code for -fpic that loads %ebx with
1696 the return address of the caller and then returns. */
1697
1698void
1699asm_output_function_prefix (file, name)
1700 FILE *file;
3cce094d 1701 const char *name ATTRIBUTE_UNUSED;
e075ae69
RH
1702{
1703 rtx xops[2];
1704 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1705 || current_function_uses_const_pool);
1706 xops[0] = pic_offset_table_rtx;
1707 xops[1] = stack_pointer_rtx;
32b5b1aa 1708
0f290768 1709 /* Deep branch prediction favors having a return for every call. */
e075ae69 1710 if (pic_reg_used && TARGET_DEEP_BRANCH_PREDICTION)
32b5b1aa 1711 {
e075ae69
RH
1712 if (!pic_label_output)
1713 {
1714 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
1715 internal (non-global) label that's being emitted, it didn't make
1716 sense to have .type information for local labels. This caused
1717 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
77ebd435 1718 me debug info for a label that you're declaring non-global?) this
0f290768 1719 was changed to call ASM_OUTPUT_LABEL() instead. */
32b5b1aa 1720
0f290768 1721 ASM_OUTPUT_LABEL (file, pic_label_name);
e9a25f70 1722
e075ae69
RH
1723 xops[1] = gen_rtx_MEM (SImode, xops[1]);
1724 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
1725 output_asm_insn ("ret", xops);
0afeb08a 1726
e075ae69 1727 pic_label_output = 1;
32b5b1aa 1728 }
32b5b1aa 1729 }
32b5b1aa 1730}
32b5b1aa 1731
e075ae69
RH
1732void
1733load_pic_register ()
32b5b1aa 1734{
e075ae69 1735 rtx gotsym, pclab;
32b5b1aa 1736
a8a05998 1737 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
32b5b1aa 1738
e075ae69 1739 if (TARGET_DEEP_BRANCH_PREDICTION)
32b5b1aa 1740 {
0f290768 1741 if (pic_label_name == NULL)
21a427cc 1742 {
520a57c8
ZW
1743 char buf[32];
1744 ASM_GENERATE_INTERNAL_LABEL (buf, "LPR", 0);
a8a05998 1745 pic_label_name = ggc_strdup (buf);
21a427cc 1746 }
e075ae69 1747 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
32b5b1aa 1748 }
e075ae69 1749 else
e5cb57e8 1750 {
e075ae69 1751 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
e5cb57e8 1752 }
e5cb57e8 1753
e075ae69 1754 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
2a2ab3f9 1755
e075ae69
RH
1756 if (! TARGET_DEEP_BRANCH_PREDICTION)
1757 emit_insn (gen_popsi1 (pic_offset_table_rtx));
79325812 1758
e075ae69 1759 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
e9a25f70 1760}
8dfe5673 1761
e075ae69 1762/* Generate an SImode "push" pattern for input ARG. */
e9a25f70 1763
e075ae69
RH
1764static rtx
1765gen_push (arg)
1766 rtx arg;
e9a25f70 1767{
c5c76735
JL
1768 return gen_rtx_SET (VOIDmode,
1769 gen_rtx_MEM (SImode,
1770 gen_rtx_PRE_DEC (SImode,
1771 stack_pointer_rtx)),
1772 arg);
e9a25f70
JL
1773}
1774
0903fcab
JH
1775/* Return number of registers to be saved on the stack. */
1776
1777static int
1778ix86_nsaved_regs ()
1779{
1780 int nregs = 0;
1781 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1782 || current_function_uses_const_pool);
1783 int limit = (frame_pointer_needed
1784 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1785 int regno;
1786
1787 for (regno = limit - 1; regno >= 0; regno--)
1788 if ((regs_ever_live[regno] && ! call_used_regs[regno])
1789 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1790 {
1791 nregs ++;
1792 }
1793 return nregs;
1794}
1795
1796/* Return the offset between two registers, one to be eliminated, and the other
1797 its replacement, at the start of a routine. */
1798
1799HOST_WIDE_INT
1800ix86_initial_elimination_offset (from, to)
1801 int from;
1802 int to;
1803{
564d80f4
JH
1804 int padding1;
1805 int nregs;
1806
1807 /* Stack grows downward:
0f290768 1808
564d80f4
JH
1809 [arguments]
1810 <- ARG_POINTER
1811 saved pc
1812
1813 saved frame pointer if frame_pointer_needed
1814 <- HARD_FRAME_POINTER
1c71e60e 1815 [saved regs]
564d80f4
JH
1816
1817 [padding1] \
1818 | <- FRAME_POINTER
1819 [frame] > tsize
1820 |
1821 [padding2] /
564d80f4
JH
1822 */
1823
1824 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
1825 /* Skip saved PC and previous frame pointer.
1826 Executed only when frame_pointer_needed. */
1827 return 8;
1828 else if (from == FRAME_POINTER_REGNUM
1829 && to == HARD_FRAME_POINTER_REGNUM)
1830 {
0f290768 1831 ix86_compute_frame_size (get_frame_size (), &nregs, &padding1, (int *) 0);
1c71e60e 1832 padding1 += nregs * UNITS_PER_WORD;
564d80f4
JH
1833 return -padding1;
1834 }
0903fcab
JH
1835 else
1836 {
564d80f4
JH
1837 /* ARG_POINTER or FRAME_POINTER to STACK_POINTER elimination. */
1838 int frame_size = frame_pointer_needed ? 8 : 4;
0903fcab 1839 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (),
0f290768 1840 &nregs, &padding1, (int *) 0);
0903fcab 1841
564d80f4
JH
1842 if (to != STACK_POINTER_REGNUM)
1843 abort ();
1844 else if (from == ARG_POINTER_REGNUM)
1845 return tsize + nregs * UNITS_PER_WORD + frame_size;
1846 else if (from != FRAME_POINTER_REGNUM)
1847 abort ();
0903fcab 1848 else
1c71e60e 1849 return tsize - padding1;
0903fcab
JH
1850 }
1851}
1852
65954bd8
JL
1853/* Compute the size of local storage taking into consideration the
1854 desired stack alignment which is to be maintained. Also determine
0f290768
KH
1855 the number of registers saved below the local storage.
1856
564d80f4
JH
1857 PADDING1 returns padding before stack frame and PADDING2 returns
1858 padding after stack frame;
1859 */
1860
1861static HOST_WIDE_INT
1862ix86_compute_frame_size (size, nregs_on_stack, rpadding1, rpadding2)
65954bd8
JL
1863 HOST_WIDE_INT size;
1864 int *nregs_on_stack;
564d80f4
JH
1865 int *rpadding1;
1866 int *rpadding2;
65954bd8 1867{
65954bd8 1868 int nregs;
564d80f4
JH
1869 int padding1 = 0;
1870 int padding2 = 0;
65954bd8 1871 HOST_WIDE_INT total_size;
564d80f4 1872 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
44affdae
JH
1873 int offset;
1874 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
65954bd8 1875
564d80f4 1876 nregs = ix86_nsaved_regs ();
564d80f4 1877 total_size = size;
65954bd8 1878
44affdae 1879 offset = frame_pointer_needed ? 8 : 4;
564d80f4 1880
44affdae
JH
1881 /* Do some sanity checking of stack_alignment_needed and preferred_alignment,
1882 since i386 port is the only using those features that may break easilly. */
564d80f4 1883
44affdae
JH
1884 if (size && !stack_alignment_needed)
1885 abort ();
5f677a9e 1886 if (!size && stack_alignment_needed != STACK_BOUNDARY / BITS_PER_UNIT)
44affdae
JH
1887 abort ();
1888 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
1889 abort ();
1890 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1891 abort ();
1892 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1893 abort ();
564d80f4 1894
44affdae
JH
1895 if (stack_alignment_needed < 4)
1896 stack_alignment_needed = 4;
564d80f4 1897
44affdae 1898 offset += nregs * UNITS_PER_WORD;
65954bd8 1899
f73ad30e
JH
1900 if (ACCUMULATE_OUTGOING_ARGS)
1901 total_size += current_function_outgoing_args_size;
1902
44affdae 1903 total_size += offset;
65954bd8 1904
44affdae
JH
1905 /* Align start of frame for local function. */
1906 padding1 = ((offset + stack_alignment_needed - 1)
1907 & -stack_alignment_needed) - offset;
1908 total_size += padding1;
54ff41b7 1909
0f290768 1910 /* Align stack boundary. */
44affdae
JH
1911 padding2 = ((total_size + preferred_alignment - 1)
1912 & -preferred_alignment) - total_size;
65954bd8 1913
f73ad30e
JH
1914 if (ACCUMULATE_OUTGOING_ARGS)
1915 padding2 += current_function_outgoing_args_size;
1916
65954bd8
JL
1917 if (nregs_on_stack)
1918 *nregs_on_stack = nregs;
564d80f4
JH
1919 if (rpadding1)
1920 *rpadding1 = padding1;
564d80f4
JH
1921 if (rpadding2)
1922 *rpadding2 = padding2;
1923
1924 return size + padding1 + padding2;
65954bd8
JL
1925}
1926
0903fcab
JH
1927/* Emit code to save registers in the prologue. */
1928
1929static void
1930ix86_emit_save_regs ()
1931{
1932 register int regno;
1933 int limit;
1934 rtx insn;
1935 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1936 || current_function_uses_const_pool);
1937 limit = (frame_pointer_needed
564d80f4 1938 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
0903fcab
JH
1939
1940 for (regno = limit - 1; regno >= 0; regno--)
1941 if ((regs_ever_live[regno] && !call_used_regs[regno])
1942 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1943 {
1944 insn = emit_insn (gen_push (gen_rtx_REG (SImode, regno)));
1945 RTX_FRAME_RELATED_P (insn) = 1;
1946 }
1947}
1948
0f290768 1949/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
1950
1951void
1952ix86_expand_prologue ()
2a2ab3f9 1953{
0f290768
KH
1954 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), (int *) 0, (int *) 0,
1955 (int *) 0);
564d80f4 1956 rtx insn;
aae75261
JVA
1957 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1958 || current_function_uses_const_pool);
79325812 1959
e075ae69
RH
1960 /* Note: AT&T enter does NOT have reversed args. Enter is probably
1961 slower on all targets. Also sdb doesn't like it. */
e9a25f70 1962
2a2ab3f9
JVA
1963 if (frame_pointer_needed)
1964 {
564d80f4 1965 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 1966 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 1967
564d80f4 1968 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 1969 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
1970 }
1971
1c71e60e 1972 ix86_emit_save_regs ();
564d80f4 1973
8dfe5673
RK
1974 if (tsize == 0)
1975 ;
1976 else if (! TARGET_STACK_PROBE || tsize < CHECK_STACK_LIMIT)
469ac993 1977 {
e075ae69 1978 if (frame_pointer_needed)
1c71e60e
JH
1979 insn = emit_insn (gen_pro_epilogue_adjust_stack
1980 (stack_pointer_rtx, stack_pointer_rtx,
1981 GEN_INT (-tsize), hard_frame_pointer_rtx));
79325812 1982 else
e075ae69
RH
1983 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
1984 GEN_INT (-tsize)));
1985 RTX_FRAME_RELATED_P (insn) = 1;
469ac993 1986 }
79325812 1987 else
8dfe5673 1988 {
e075ae69 1989 /* ??? Is this only valid for Win32? */
e9a25f70 1990
e075ae69 1991 rtx arg0, sym;
e9a25f70 1992
e075ae69
RH
1993 arg0 = gen_rtx_REG (SImode, 0);
1994 emit_move_insn (arg0, GEN_INT (tsize));
77a989d1 1995
e075ae69
RH
1996 sym = gen_rtx_MEM (FUNCTION_MODE,
1997 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
1998 insn = emit_call_insn (gen_call (sym, const0_rtx));
1999
2000 CALL_INSN_FUNCTION_USAGE (insn)
276ab4a4
RH
2001 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
2002 CALL_INSN_FUNCTION_USAGE (insn));
e075ae69 2003 }
e9a25f70 2004
84530511
SC
2005#ifdef SUBTARGET_PROLOGUE
2006 SUBTARGET_PROLOGUE;
0f290768 2007#endif
84530511 2008
e9a25f70 2009 if (pic_reg_used)
e075ae69 2010 load_pic_register ();
77a989d1 2011
e9a25f70
JL
2012 /* If we are profiling, make sure no instructions are scheduled before
2013 the call to mcount. However, if -fpic, the above call will have
2014 done that. */
e075ae69 2015 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
e9a25f70 2016 emit_insn (gen_blockage ());
77a989d1
SC
2017}
2018
0903fcab
JH
2019/* Emit code to add TSIZE to esp value. Use POP instruction when
2020 profitable. */
2021
2022static void
2023ix86_emit_epilogue_esp_adjustment (tsize)
2024 int tsize;
2025{
bdeb029c
JH
2026 /* If a frame pointer is present, we must be sure to tie the sp
2027 to the fp so that we don't mis-schedule. */
2028 if (frame_pointer_needed)
2029 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2030 stack_pointer_rtx,
2031 GEN_INT (tsize),
2032 hard_frame_pointer_rtx));
0903fcab 2033 else
bdeb029c
JH
2034 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
2035 GEN_INT (tsize)));
0903fcab
JH
2036}
2037
da2d1d3a
JH
2038/* Emit code to restore saved registers using MOV insns. First register
2039 is restored from POINTER + OFFSET. */
2040static void
2041ix86_emit_restore_regs_using_mov (pointer, offset)
2042 rtx pointer;
2043 int offset;
2044{
2045 int regno;
2046 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2047 || current_function_uses_const_pool);
2048 int limit = (frame_pointer_needed
2049 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
2050
2051 for (regno = 0; regno < limit; regno++)
2052 if ((regs_ever_live[regno] && !call_used_regs[regno])
2053 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2054 {
2055 emit_move_insn (gen_rtx_REG (SImode, regno),
2056 adj_offsettable_operand (gen_rtx_MEM (SImode,
2057 pointer),
2058 offset));
2059 offset += 4;
2060 }
2061}
2062
0f290768 2063/* Restore function stack, frame, and registers. */
e9a25f70 2064
2a2ab3f9 2065void
cbbf65e0
RH
2066ix86_expand_epilogue (emit_return)
2067 int emit_return;
2a2ab3f9 2068{
65954bd8 2069 int nregs;
1c71e60e
JH
2070 int regno;
2071
aae75261
JVA
2072 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2073 || current_function_uses_const_pool);
fdb8a883 2074 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
65954bd8 2075 HOST_WIDE_INT offset;
1c71e60e 2076 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), &nregs,
0f290768 2077 (int *) 0, (int *) 0);
2a2ab3f9 2078
1c71e60e
JH
2079 /* Calculate start of saved registers relative to ebp. */
2080 offset = -nregs * UNITS_PER_WORD;
2a2ab3f9 2081
1c71e60e
JH
2082#ifdef FUNCTION_BLOCK_PROFILER_EXIT
2083 if (profile_block_flag == 2)
564d80f4 2084 {
1c71e60e 2085 FUNCTION_BLOCK_PROFILER_EXIT;
564d80f4 2086 }
1c71e60e 2087#endif
564d80f4 2088
fdb8a883
JW
2089 /* If we're only restoring one register and sp is not valid then
2090 using a move instruction to restore the register since it's
0f290768 2091 less work than reloading sp and popping the register.
da2d1d3a
JH
2092
2093 The default code result in stack adjustment using add/lea instruction,
2094 while this code results in LEAVE instruction (or discrete equivalent),
2095 so it is profitable in some other cases as well. Especially when there
2096 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2097 and there is exactly one register to pop. This heruistic may need some
2098 tuning in future. */
2099 if ((!sp_valid && nregs <= 1)
2100 || (frame_pointer_needed && !nregs && tsize)
2101 || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
2102 && nregs == 1))
2a2ab3f9 2103 {
da2d1d3a
JH
2104 /* Restore registers. We can use ebp or esp to address the memory
2105 locations. If both are available, default to ebp, since offsets
2106 are known to be small. Only exception is esp pointing directly to the
2107 end of block of saved registers, where we may simplify addressing
2108 mode. */
2109
2110 if (!frame_pointer_needed || (sp_valid && !tsize))
2111 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, tsize);
2112 else
2113 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, offset);
2a2ab3f9 2114
da2d1d3a
JH
2115 if (!frame_pointer_needed)
2116 ix86_emit_epilogue_esp_adjustment (tsize + nregs * UNITS_PER_WORD);
0f290768 2117 /* If not an i386, mov & pop is faster than "leave". */
da2d1d3a 2118 else if (TARGET_USE_LEAVE || optimize_size)
564d80f4 2119 emit_insn (gen_leave ());
c8c5cb99 2120 else
2a2ab3f9 2121 {
1c71e60e
JH
2122 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2123 hard_frame_pointer_rtx,
2124 const0_rtx,
2125 hard_frame_pointer_rtx));
564d80f4 2126 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
2127 }
2128 }
1c71e60e 2129 else
68f654ec 2130 {
1c71e60e
JH
2131 /* First step is to deallocate the stack frame so that we can
2132 pop the registers. */
2133 if (!sp_valid)
2134 {
2135 if (!frame_pointer_needed)
2136 abort ();
2137 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2138 hard_frame_pointer_rtx,
2139 GEN_INT (offset),
2140 hard_frame_pointer_rtx));
2141 }
2142 else if (tsize)
2143 ix86_emit_epilogue_esp_adjustment (tsize);
2144
2145 for (regno = 0; regno < STACK_POINTER_REGNUM; regno++)
2146 if ((regs_ever_live[regno] && !call_used_regs[regno])
2147 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2148 emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
68f654ec 2149 }
68f654ec 2150
cbbf65e0
RH
2151 /* Sibcall epilogues don't want a return instruction. */
2152 if (! emit_return)
2153 return;
2154
2a2ab3f9
JVA
2155 if (current_function_pops_args && current_function_args_size)
2156 {
e075ae69 2157 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 2158
b8c752c8
UD
2159 /* i386 can only pop 64K bytes. If asked to pop more, pop
2160 return address, do explicit add, and jump indirectly to the
0f290768 2161 caller. */
2a2ab3f9 2162
b8c752c8 2163 if (current_function_pops_args >= 65536)
2a2ab3f9 2164 {
e075ae69 2165 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 2166
e075ae69
RH
2167 emit_insn (gen_popsi1 (ecx));
2168 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 2169 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 2170 }
79325812 2171 else
e075ae69
RH
2172 emit_jump_insn (gen_return_pop_internal (popc));
2173 }
2174 else
2175 emit_jump_insn (gen_return_internal ());
2176}
2177\f
2178/* Extract the parts of an RTL expression that is a valid memory address
2179 for an instruction. Return false if the structure of the address is
2180 grossly off. */
2181
2182static int
2183ix86_decompose_address (addr, out)
2184 register rtx addr;
2185 struct ix86_address *out;
2186{
2187 rtx base = NULL_RTX;
2188 rtx index = NULL_RTX;
2189 rtx disp = NULL_RTX;
2190 HOST_WIDE_INT scale = 1;
2191 rtx scale_rtx = NULL_RTX;
2192
2193 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
2194 base = addr;
2195 else if (GET_CODE (addr) == PLUS)
2196 {
2197 rtx op0 = XEXP (addr, 0);
2198 rtx op1 = XEXP (addr, 1);
2199 enum rtx_code code0 = GET_CODE (op0);
2200 enum rtx_code code1 = GET_CODE (op1);
2201
2202 if (code0 == REG || code0 == SUBREG)
2203 {
2204 if (code1 == REG || code1 == SUBREG)
2205 index = op0, base = op1; /* index + base */
2206 else
2207 base = op0, disp = op1; /* base + displacement */
2208 }
2209 else if (code0 == MULT)
e9a25f70 2210 {
e075ae69
RH
2211 index = XEXP (op0, 0);
2212 scale_rtx = XEXP (op0, 1);
2213 if (code1 == REG || code1 == SUBREG)
2214 base = op1; /* index*scale + base */
e9a25f70 2215 else
e075ae69
RH
2216 disp = op1; /* index*scale + disp */
2217 }
2218 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
2219 {
2220 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
2221 scale_rtx = XEXP (XEXP (op0, 0), 1);
2222 base = XEXP (op0, 1);
2223 disp = op1;
2a2ab3f9 2224 }
e075ae69
RH
2225 else if (code0 == PLUS)
2226 {
2227 index = XEXP (op0, 0); /* index + base + disp */
2228 base = XEXP (op0, 1);
2229 disp = op1;
2230 }
2231 else
2232 return FALSE;
2233 }
2234 else if (GET_CODE (addr) == MULT)
2235 {
2236 index = XEXP (addr, 0); /* index*scale */
2237 scale_rtx = XEXP (addr, 1);
2238 }
2239 else if (GET_CODE (addr) == ASHIFT)
2240 {
2241 rtx tmp;
2242
2243 /* We're called for lea too, which implements ashift on occasion. */
2244 index = XEXP (addr, 0);
2245 tmp = XEXP (addr, 1);
2246 if (GET_CODE (tmp) != CONST_INT)
2247 return FALSE;
2248 scale = INTVAL (tmp);
2249 if ((unsigned HOST_WIDE_INT) scale > 3)
2250 return FALSE;
2251 scale = 1 << scale;
2a2ab3f9 2252 }
2a2ab3f9 2253 else
e075ae69
RH
2254 disp = addr; /* displacement */
2255
2256 /* Extract the integral value of scale. */
2257 if (scale_rtx)
e9a25f70 2258 {
e075ae69
RH
2259 if (GET_CODE (scale_rtx) != CONST_INT)
2260 return FALSE;
2261 scale = INTVAL (scale_rtx);
e9a25f70 2262 }
3b3c6a3f 2263
e075ae69
RH
2264 /* Allow arg pointer and stack pointer as index if there is not scaling */
2265 if (base && index && scale == 1
564d80f4
JH
2266 && (index == arg_pointer_rtx || index == frame_pointer_rtx
2267 || index == stack_pointer_rtx))
e075ae69
RH
2268 {
2269 rtx tmp = base;
2270 base = index;
2271 index = tmp;
2272 }
2273
2274 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
2275 if ((base == hard_frame_pointer_rtx
2276 || base == frame_pointer_rtx
2277 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
2278 disp = const0_rtx;
2279
2280 /* Special case: on K6, [%esi] makes the instruction vector decoded.
2281 Avoid this by transforming to [%esi+0]. */
2282 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
2283 && base && !index && !disp
329e1d01 2284 && REG_P (base)
e075ae69
RH
2285 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
2286 disp = const0_rtx;
2287
2288 /* Special case: encode reg+reg instead of reg*2. */
2289 if (!base && index && scale && scale == 2)
2290 base = index, scale = 1;
0f290768 2291
e075ae69
RH
2292 /* Special case: scaling cannot be encoded without base or displacement. */
2293 if (!base && !disp && index && scale != 1)
2294 disp = const0_rtx;
2295
2296 out->base = base;
2297 out->index = index;
2298 out->disp = disp;
2299 out->scale = scale;
3b3c6a3f 2300
e075ae69
RH
2301 return TRUE;
2302}
01329426
JH
2303\f
2304/* Return cost of the memory address x.
2305 For i386, it is better to use a complex address than let gcc copy
2306 the address into a reg and make a new pseudo. But not if the address
2307 requires to two regs - that would mean more pseudos with longer
2308 lifetimes. */
2309int
2310ix86_address_cost (x)
2311 rtx x;
2312{
2313 struct ix86_address parts;
2314 int cost = 1;
3b3c6a3f 2315
01329426
JH
2316 if (!ix86_decompose_address (x, &parts))
2317 abort ();
2318
2319 /* More complex memory references are better. */
2320 if (parts.disp && parts.disp != const0_rtx)
2321 cost--;
2322
2323 /* Attempt to minimize number of registers in the address. */
2324 if ((parts.base
2325 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
2326 || (parts.index
2327 && (!REG_P (parts.index)
2328 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
2329 cost++;
2330
2331 if (parts.base
2332 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
2333 && parts.index
2334 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
2335 && parts.base != parts.index)
2336 cost++;
2337
2338 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
2339 since it's predecode logic can't detect the length of instructions
2340 and it degenerates to vector decoded. Increase cost of such
2341 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 2342 to split such addresses or even refuse such addresses at all.
01329426
JH
2343
2344 Following addressing modes are affected:
2345 [base+scale*index]
2346 [scale*index+disp]
2347 [base+index]
0f290768 2348
01329426
JH
2349 The first and last case may be avoidable by explicitly coding the zero in
2350 memory address, but I don't have AMD-K6 machine handy to check this
2351 theory. */
2352
2353 if (TARGET_K6
2354 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
2355 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
2356 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
2357 cost += 10;
0f290768 2358
01329426
JH
2359 return cost;
2360}
2361\f
b949ea8b
JW
2362/* If X is a machine specific address (i.e. a symbol or label being
2363 referenced as a displacement from the GOT implemented using an
2364 UNSPEC), then return the base term. Otherwise return X. */
2365
2366rtx
2367ix86_find_base_term (x)
2368 rtx x;
2369{
2370 rtx term;
2371
2372 if (GET_CODE (x) != PLUS
2373 || XEXP (x, 0) != pic_offset_table_rtx
2374 || GET_CODE (XEXP (x, 1)) != CONST)
2375 return x;
2376
2377 term = XEXP (XEXP (x, 1), 0);
2378
2379 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
2380 term = XEXP (term, 0);
2381
2382 if (GET_CODE (term) != UNSPEC
2383 || XVECLEN (term, 0) != 1
2384 || XINT (term, 1) != 7)
2385 return x;
2386
2387 term = XVECEXP (term, 0, 0);
2388
2389 if (GET_CODE (term) != SYMBOL_REF
2390 && GET_CODE (term) != LABEL_REF)
2391 return x;
2392
2393 return term;
2394}
2395\f
e075ae69
RH
2396/* Determine if a given CONST RTX is a valid memory displacement
2397 in PIC mode. */
0f290768 2398
59be65f6 2399int
91bb873f
RH
2400legitimate_pic_address_disp_p (disp)
2401 register rtx disp;
2402{
2403 if (GET_CODE (disp) != CONST)
2404 return 0;
2405 disp = XEXP (disp, 0);
2406
2407 if (GET_CODE (disp) == PLUS)
2408 {
2409 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
2410 return 0;
2411 disp = XEXP (disp, 0);
2412 }
2413
2414 if (GET_CODE (disp) != UNSPEC
2415 || XVECLEN (disp, 0) != 1)
2416 return 0;
2417
2418 /* Must be @GOT or @GOTOFF. */
2419 if (XINT (disp, 1) != 6
2420 && XINT (disp, 1) != 7)
2421 return 0;
2422
2423 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2424 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
2425 return 0;
2426
2427 return 1;
2428}
2429
e075ae69
RH
2430/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
2431 memory address for an instruction. The MODE argument is the machine mode
2432 for the MEM expression that wants to use this address.
2433
2434 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
2435 convert common non-canonical forms to canonical form so that they will
2436 be recognized. */
2437
3b3c6a3f
MM
2438int
2439legitimate_address_p (mode, addr, strict)
2440 enum machine_mode mode;
2441 register rtx addr;
2442 int strict;
2443{
e075ae69
RH
2444 struct ix86_address parts;
2445 rtx base, index, disp;
2446 HOST_WIDE_INT scale;
2447 const char *reason = NULL;
2448 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
2449
2450 if (TARGET_DEBUG_ADDR)
2451 {
2452 fprintf (stderr,
e9a25f70 2453 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 2454 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
2455 debug_rtx (addr);
2456 }
2457
e075ae69 2458 if (! ix86_decompose_address (addr, &parts))
3b3c6a3f 2459 {
e075ae69 2460 reason = "decomposition failed";
50e60bc3 2461 goto report_error;
3b3c6a3f
MM
2462 }
2463
e075ae69
RH
2464 base = parts.base;
2465 index = parts.index;
2466 disp = parts.disp;
2467 scale = parts.scale;
91f0226f 2468
e075ae69 2469 /* Validate base register.
e9a25f70
JL
2470
2471 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
2472 is one word out of a two word structure, which is represented internally
2473 as a DImode int. */
e9a25f70 2474
3b3c6a3f
MM
2475 if (base)
2476 {
e075ae69
RH
2477 reason_rtx = base;
2478
3d771dfd 2479 if (GET_CODE (base) != REG)
3b3c6a3f 2480 {
e075ae69 2481 reason = "base is not a register";
50e60bc3 2482 goto report_error;
3b3c6a3f
MM
2483 }
2484
c954bd01
RH
2485 if (GET_MODE (base) != Pmode)
2486 {
e075ae69 2487 reason = "base is not in Pmode";
50e60bc3 2488 goto report_error;
c954bd01
RH
2489 }
2490
e9a25f70
JL
2491 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
2492 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
3b3c6a3f 2493 {
e075ae69 2494 reason = "base is not valid";
50e60bc3 2495 goto report_error;
3b3c6a3f
MM
2496 }
2497 }
2498
e075ae69 2499 /* Validate index register.
e9a25f70
JL
2500
2501 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
2502 is one word out of a two word structure, which is represented internally
2503 as a DImode int. */
e075ae69
RH
2504
2505 if (index)
3b3c6a3f 2506 {
e075ae69
RH
2507 reason_rtx = index;
2508
2509 if (GET_CODE (index) != REG)
3b3c6a3f 2510 {
e075ae69 2511 reason = "index is not a register";
50e60bc3 2512 goto report_error;
3b3c6a3f
MM
2513 }
2514
e075ae69 2515 if (GET_MODE (index) != Pmode)
c954bd01 2516 {
e075ae69 2517 reason = "index is not in Pmode";
50e60bc3 2518 goto report_error;
c954bd01
RH
2519 }
2520
e075ae69
RH
2521 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
2522 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
3b3c6a3f 2523 {
e075ae69 2524 reason = "index is not valid";
50e60bc3 2525 goto report_error;
3b3c6a3f
MM
2526 }
2527 }
3b3c6a3f 2528
e075ae69
RH
2529 /* Validate scale factor. */
2530 if (scale != 1)
3b3c6a3f 2531 {
e075ae69
RH
2532 reason_rtx = GEN_INT (scale);
2533 if (!index)
3b3c6a3f 2534 {
e075ae69 2535 reason = "scale without index";
50e60bc3 2536 goto report_error;
3b3c6a3f
MM
2537 }
2538
e075ae69 2539 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 2540 {
e075ae69 2541 reason = "scale is not a valid multiplier";
50e60bc3 2542 goto report_error;
3b3c6a3f
MM
2543 }
2544 }
2545
91bb873f 2546 /* Validate displacement. */
3b3c6a3f
MM
2547 if (disp)
2548 {
e075ae69
RH
2549 reason_rtx = disp;
2550
91bb873f 2551 if (!CONSTANT_ADDRESS_P (disp))
3b3c6a3f 2552 {
e075ae69 2553 reason = "displacement is not constant";
50e60bc3 2554 goto report_error;
3b3c6a3f
MM
2555 }
2556
e075ae69 2557 if (GET_CODE (disp) == CONST_DOUBLE)
3b3c6a3f 2558 {
e075ae69 2559 reason = "displacement is a const_double";
50e60bc3 2560 goto report_error;
3b3c6a3f
MM
2561 }
2562
91bb873f 2563 if (flag_pic && SYMBOLIC_CONST (disp))
3b3c6a3f 2564 {
91bb873f
RH
2565 if (! legitimate_pic_address_disp_p (disp))
2566 {
e075ae69 2567 reason = "displacement is an invalid pic construct";
50e60bc3 2568 goto report_error;
91bb873f
RH
2569 }
2570
4e9efe54 2571 /* This code used to verify that a symbolic pic displacement
0f290768
KH
2572 includes the pic_offset_table_rtx register.
2573
4e9efe54
JH
2574 While this is good idea, unfortunately these constructs may
2575 be created by "adds using lea" optimization for incorrect
2576 code like:
2577
2578 int a;
2579 int foo(int i)
2580 {
2581 return *(&a+i);
2582 }
2583
50e60bc3 2584 This code is nonsensical, but results in addressing
4e9efe54
JH
2585 GOT table with pic_offset_table_rtx base. We can't
2586 just refuse it easilly, since it gets matched by
2587 "addsi3" pattern, that later gets split to lea in the
2588 case output register differs from input. While this
2589 can be handled by separate addsi pattern for this case
2590 that never results in lea, this seems to be easier and
2591 correct fix for crash to disable this test. */
3b3c6a3f 2592 }
91bb873f 2593 else if (HALF_PIC_P ())
3b3c6a3f 2594 {
91bb873f 2595 if (! HALF_PIC_ADDRESS_P (disp)
e075ae69 2596 || (base != NULL_RTX || index != NULL_RTX))
91bb873f 2597 {
e075ae69 2598 reason = "displacement is an invalid half-pic reference";
50e60bc3 2599 goto report_error;
91bb873f 2600 }
3b3c6a3f
MM
2601 }
2602 }
2603
e075ae69 2604 /* Everything looks valid. */
3b3c6a3f 2605 if (TARGET_DEBUG_ADDR)
e075ae69 2606 fprintf (stderr, "Success.\n");
3b3c6a3f 2607 return TRUE;
e075ae69 2608
50e60bc3 2609report_error:
e075ae69
RH
2610 if (TARGET_DEBUG_ADDR)
2611 {
2612 fprintf (stderr, "Error: %s\n", reason);
2613 debug_rtx (reason_rtx);
2614 }
2615 return FALSE;
3b3c6a3f 2616}
3b3c6a3f 2617\f
55efb413
JW
2618/* Return an unique alias set for the GOT. */
2619
0f290768 2620static HOST_WIDE_INT
55efb413
JW
2621ix86_GOT_alias_set ()
2622{
2623 static HOST_WIDE_INT set = -1;
2624 if (set == -1)
2625 set = new_alias_set ();
2626 return set;
0f290768 2627}
55efb413 2628
3b3c6a3f
MM
2629/* Return a legitimate reference for ORIG (an address) using the
2630 register REG. If REG is 0, a new pseudo is generated.
2631
91bb873f 2632 There are two types of references that must be handled:
3b3c6a3f
MM
2633
2634 1. Global data references must load the address from the GOT, via
2635 the PIC reg. An insn is emitted to do this load, and the reg is
2636 returned.
2637
91bb873f
RH
2638 2. Static data references, constant pool addresses, and code labels
2639 compute the address as an offset from the GOT, whose base is in
2640 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
2641 differentiate them from global data objects. The returned
2642 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
2643
2644 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 2645 reg also appears in the address. */
3b3c6a3f
MM
2646
2647rtx
2648legitimize_pic_address (orig, reg)
2649 rtx orig;
2650 rtx reg;
2651{
2652 rtx addr = orig;
2653 rtx new = orig;
91bb873f 2654 rtx base;
3b3c6a3f 2655
91bb873f
RH
2656 if (GET_CODE (addr) == LABEL_REF
2657 || (GET_CODE (addr) == SYMBOL_REF
2658 && (CONSTANT_POOL_ADDRESS_P (addr)
2659 || SYMBOL_REF_FLAG (addr))))
3b3c6a3f 2660 {
91bb873f
RH
2661 /* This symbol may be referenced via a displacement from the PIC
2662 base address (@GOTOFF). */
3b3c6a3f 2663
91bb873f 2664 current_function_uses_pic_offset_table = 1;
4859dd36
RH
2665 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
2666 new = gen_rtx_CONST (Pmode, new);
91bb873f 2667 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 2668
91bb873f
RH
2669 if (reg != 0)
2670 {
3b3c6a3f 2671 emit_move_insn (reg, new);
91bb873f 2672 new = reg;
3b3c6a3f 2673 }
3b3c6a3f 2674 }
91bb873f 2675 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 2676 {
91bb873f 2677 /* This symbol must be referenced via a load from the
0f290768 2678 Global Offset Table (@GOT). */
3b3c6a3f 2679
91bb873f 2680 current_function_uses_pic_offset_table = 1;
4859dd36
RH
2681 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
2682 new = gen_rtx_CONST (Pmode, new);
91bb873f
RH
2683 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2684 new = gen_rtx_MEM (Pmode, new);
2685 RTX_UNCHANGING_P (new) = 1;
0f290768 2686 MEM_ALIAS_SET (new) = ix86_GOT_alias_set ();
3b3c6a3f
MM
2687
2688 if (reg == 0)
2689 reg = gen_reg_rtx (Pmode);
91bb873f
RH
2690 emit_move_insn (reg, new);
2691 new = reg;
0f290768 2692 }
91bb873f
RH
2693 else
2694 {
2695 if (GET_CODE (addr) == CONST)
3b3c6a3f 2696 {
91bb873f
RH
2697 addr = XEXP (addr, 0);
2698 if (GET_CODE (addr) == UNSPEC)
2699 {
2700 /* Check that the unspec is one of the ones we generate? */
2701 }
2702 else if (GET_CODE (addr) != PLUS)
564d80f4 2703 abort ();
3b3c6a3f 2704 }
91bb873f
RH
2705 if (GET_CODE (addr) == PLUS)
2706 {
2707 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 2708
91bb873f
RH
2709 /* Check first to see if this is a constant offset from a @GOTOFF
2710 symbol reference. */
2711 if ((GET_CODE (op0) == LABEL_REF
2712 || (GET_CODE (op0) == SYMBOL_REF
2713 && (CONSTANT_POOL_ADDRESS_P (op0)
2714 || SYMBOL_REF_FLAG (op0))))
2715 && GET_CODE (op1) == CONST_INT)
2716 {
2717 current_function_uses_pic_offset_table = 1;
4859dd36
RH
2718 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
2719 new = gen_rtx_PLUS (Pmode, new, op1);
2720 new = gen_rtx_CONST (Pmode, new);
91bb873f
RH
2721 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2722
2723 if (reg != 0)
2724 {
2725 emit_move_insn (reg, new);
2726 new = reg;
2727 }
2728 }
2729 else
2730 {
2731 base = legitimize_pic_address (XEXP (addr, 0), reg);
2732 new = legitimize_pic_address (XEXP (addr, 1),
2733 base == reg ? NULL_RTX : reg);
2734
2735 if (GET_CODE (new) == CONST_INT)
2736 new = plus_constant (base, INTVAL (new));
2737 else
2738 {
2739 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
2740 {
2741 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
2742 new = XEXP (new, 1);
2743 }
2744 new = gen_rtx_PLUS (Pmode, base, new);
2745 }
2746 }
2747 }
3b3c6a3f
MM
2748 }
2749 return new;
2750}
2751\f
3b3c6a3f
MM
2752/* Try machine-dependent ways of modifying an illegitimate address
2753 to be legitimate. If we find one, return the new, valid address.
2754 This macro is used in only one place: `memory_address' in explow.c.
2755
2756 OLDX is the address as it was before break_out_memory_refs was called.
2757 In some cases it is useful to look at this to decide what needs to be done.
2758
2759 MODE and WIN are passed so that this macro can use
2760 GO_IF_LEGITIMATE_ADDRESS.
2761
2762 It is always safe for this macro to do nothing. It exists to recognize
2763 opportunities to optimize the output.
2764
2765 For the 80386, we handle X+REG by loading X into a register R and
2766 using R+REG. R will go in a general reg and indexing will be used.
2767 However, if REG is a broken-out memory address or multiplication,
2768 nothing needs to be done because REG can certainly go in a general reg.
2769
2770 When -fpic is used, special handling is needed for symbolic references.
2771 See comments by legitimize_pic_address in i386.c for details. */
2772
2773rtx
2774legitimize_address (x, oldx, mode)
2775 register rtx x;
bb5177ac 2776 register rtx oldx ATTRIBUTE_UNUSED;
3b3c6a3f
MM
2777 enum machine_mode mode;
2778{
2779 int changed = 0;
2780 unsigned log;
2781
2782 if (TARGET_DEBUG_ADDR)
2783 {
e9a25f70
JL
2784 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
2785 GET_MODE_NAME (mode));
3b3c6a3f
MM
2786 debug_rtx (x);
2787 }
2788
2789 if (flag_pic && SYMBOLIC_CONST (x))
2790 return legitimize_pic_address (x, 0);
2791
2792 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
2793 if (GET_CODE (x) == ASHIFT
2794 && GET_CODE (XEXP (x, 1)) == CONST_INT
2795 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
2796 {
2797 changed = 1;
a269a03c
JC
2798 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
2799 GEN_INT (1 << log));
3b3c6a3f
MM
2800 }
2801
2802 if (GET_CODE (x) == PLUS)
2803 {
0f290768 2804 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 2805
3b3c6a3f
MM
2806 if (GET_CODE (XEXP (x, 0)) == ASHIFT
2807 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2808 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
2809 {
2810 changed = 1;
c5c76735
JL
2811 XEXP (x, 0) = gen_rtx_MULT (Pmode,
2812 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
2813 GEN_INT (1 << log));
3b3c6a3f
MM
2814 }
2815
2816 if (GET_CODE (XEXP (x, 1)) == ASHIFT
2817 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
2818 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
2819 {
2820 changed = 1;
c5c76735
JL
2821 XEXP (x, 1) = gen_rtx_MULT (Pmode,
2822 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
2823 GEN_INT (1 << log));
3b3c6a3f
MM
2824 }
2825
0f290768 2826 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
2827 if (GET_CODE (XEXP (x, 1)) == MULT)
2828 {
2829 rtx tmp = XEXP (x, 0);
2830 XEXP (x, 0) = XEXP (x, 1);
2831 XEXP (x, 1) = tmp;
2832 changed = 1;
2833 }
2834
2835 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
2836 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
2837 created by virtual register instantiation, register elimination, and
2838 similar optimizations. */
2839 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
2840 {
2841 changed = 1;
c5c76735
JL
2842 x = gen_rtx_PLUS (Pmode,
2843 gen_rtx_PLUS (Pmode, XEXP (x, 0),
2844 XEXP (XEXP (x, 1), 0)),
2845 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
2846 }
2847
e9a25f70
JL
2848 /* Canonicalize
2849 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
2850 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
2851 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
2852 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
2853 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
2854 && CONSTANT_P (XEXP (x, 1)))
2855 {
00c79232
ML
2856 rtx constant;
2857 rtx other = NULL_RTX;
3b3c6a3f
MM
2858
2859 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2860 {
2861 constant = XEXP (x, 1);
2862 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
2863 }
2864 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
2865 {
2866 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
2867 other = XEXP (x, 1);
2868 }
2869 else
2870 constant = 0;
2871
2872 if (constant)
2873 {
2874 changed = 1;
c5c76735
JL
2875 x = gen_rtx_PLUS (Pmode,
2876 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
2877 XEXP (XEXP (XEXP (x, 0), 1), 0)),
2878 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
2879 }
2880 }
2881
2882 if (changed && legitimate_address_p (mode, x, FALSE))
2883 return x;
2884
2885 if (GET_CODE (XEXP (x, 0)) == MULT)
2886 {
2887 changed = 1;
2888 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
2889 }
2890
2891 if (GET_CODE (XEXP (x, 1)) == MULT)
2892 {
2893 changed = 1;
2894 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
2895 }
2896
2897 if (changed
2898 && GET_CODE (XEXP (x, 1)) == REG
2899 && GET_CODE (XEXP (x, 0)) == REG)
2900 return x;
2901
2902 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
2903 {
2904 changed = 1;
2905 x = legitimize_pic_address (x, 0);
2906 }
2907
2908 if (changed && legitimate_address_p (mode, x, FALSE))
2909 return x;
2910
2911 if (GET_CODE (XEXP (x, 0)) == REG)
2912 {
2913 register rtx temp = gen_reg_rtx (Pmode);
2914 register rtx val = force_operand (XEXP (x, 1), temp);
2915 if (val != temp)
2916 emit_move_insn (temp, val);
2917
2918 XEXP (x, 1) = temp;
2919 return x;
2920 }
2921
2922 else if (GET_CODE (XEXP (x, 1)) == REG)
2923 {
2924 register rtx temp = gen_reg_rtx (Pmode);
2925 register rtx val = force_operand (XEXP (x, 0), temp);
2926 if (val != temp)
2927 emit_move_insn (temp, val);
2928
2929 XEXP (x, 0) = temp;
2930 return x;
2931 }
2932 }
2933
2934 return x;
2935}
2a2ab3f9
JVA
2936\f
2937/* Print an integer constant expression in assembler syntax. Addition
2938 and subtraction are the only arithmetic that may appear in these
2939 expressions. FILE is the stdio stream to write to, X is the rtx, and
2940 CODE is the operand print code from the output string. */
2941
2942static void
2943output_pic_addr_const (file, x, code)
2944 FILE *file;
2945 rtx x;
2946 int code;
2947{
2948 char buf[256];
2949
2950 switch (GET_CODE (x))
2951 {
2952 case PC:
2953 if (flag_pic)
2954 putc ('.', file);
2955 else
2956 abort ();
2957 break;
2958
2959 case SYMBOL_REF:
91bb873f
RH
2960 assemble_name (file, XSTR (x, 0));
2961 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
2962 fputs ("@PLT", file);
2a2ab3f9
JVA
2963 break;
2964
91bb873f
RH
2965 case LABEL_REF:
2966 x = XEXP (x, 0);
2967 /* FALLTHRU */
2a2ab3f9
JVA
2968 case CODE_LABEL:
2969 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
2970 assemble_name (asm_out_file, buf);
2971 break;
2972
2973 case CONST_INT:
f64cecad 2974 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
2975 break;
2976
2977 case CONST:
2978 /* This used to output parentheses around the expression,
2979 but that does not work on the 386 (either ATT or BSD assembler). */
2980 output_pic_addr_const (file, XEXP (x, 0), code);
2981 break;
2982
2983 case CONST_DOUBLE:
2984 if (GET_MODE (x) == VOIDmode)
2985 {
2986 /* We can use %d if the number is <32 bits and positive. */
2987 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
2988 fprintf (file, "0x%lx%08lx",
2989 (unsigned long) CONST_DOUBLE_HIGH (x),
2990 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 2991 else
f64cecad 2992 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
2993 }
2994 else
2995 /* We can't handle floating point constants;
2996 PRINT_OPERAND must handle them. */
2997 output_operand_lossage ("floating constant misused");
2998 break;
2999
3000 case PLUS:
e9a25f70 3001 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
3002 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
3003 {
2a2ab3f9 3004 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 3005 putc ('+', file);
e9a25f70 3006 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 3007 }
91bb873f 3008 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 3009 {
2a2ab3f9 3010 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 3011 putc ('+', file);
e9a25f70 3012 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 3013 }
91bb873f
RH
3014 else
3015 abort ();
2a2ab3f9
JVA
3016 break;
3017
3018 case MINUS:
e075ae69 3019 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
2a2ab3f9 3020 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 3021 putc ('-', file);
2a2ab3f9 3022 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 3023 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
2a2ab3f9
JVA
3024 break;
3025
91bb873f
RH
3026 case UNSPEC:
3027 if (XVECLEN (x, 0) != 1)
77ebd435 3028 abort ();
91bb873f
RH
3029 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
3030 switch (XINT (x, 1))
77ebd435
AJ
3031 {
3032 case 6:
3033 fputs ("@GOT", file);
3034 break;
3035 case 7:
3036 fputs ("@GOTOFF", file);
3037 break;
3038 case 8:
3039 fputs ("@PLT", file);
3040 break;
3041 default:
3042 output_operand_lossage ("invalid UNSPEC as operand");
3043 break;
3044 }
91bb873f
RH
3045 break;
3046
2a2ab3f9
JVA
3047 default:
3048 output_operand_lossage ("invalid expression as operand");
3049 }
3050}
1865dbb5 3051
0f290768 3052/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
1865dbb5
JM
3053 We need to handle our special PIC relocations. */
3054
0f290768 3055void
1865dbb5
JM
3056i386_dwarf_output_addr_const (file, x)
3057 FILE *file;
3058 rtx x;
3059{
f0ca81d2 3060 fprintf (file, "%s", INT_ASM_OP);
1865dbb5
JM
3061 if (flag_pic)
3062 output_pic_addr_const (file, x, '\0');
3063 else
3064 output_addr_const (file, x);
3065 fputc ('\n', file);
3066}
3067
3068/* In the name of slightly smaller debug output, and to cater to
3069 general assembler losage, recognize PIC+GOTOFF and turn it back
3070 into a direct symbol reference. */
3071
3072rtx
3073i386_simplify_dwarf_addr (orig_x)
3074 rtx orig_x;
3075{
3076 rtx x = orig_x;
3077
3078 if (GET_CODE (x) != PLUS
3079 || GET_CODE (XEXP (x, 0)) != REG
3080 || GET_CODE (XEXP (x, 1)) != CONST)
3081 return orig_x;
3082
3083 x = XEXP (XEXP (x, 1), 0);
3084 if (GET_CODE (x) == UNSPEC
3adbce3d
RH
3085 && (XINT (x, 1) == 6
3086 || XINT (x, 1) == 7))
1865dbb5
JM
3087 return XVECEXP (x, 0, 0);
3088
3089 if (GET_CODE (x) == PLUS
3090 && GET_CODE (XEXP (x, 0)) == UNSPEC
3091 && GET_CODE (XEXP (x, 1)) == CONST_INT
3adbce3d
RH
3092 && (XINT (XEXP (x, 0), 1) == 6
3093 || XINT (XEXP (x, 0), 1) == 7))
1865dbb5
JM
3094 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
3095
3096 return orig_x;
3097}
2a2ab3f9 3098\f
a269a03c 3099static void
e075ae69 3100put_condition_code (code, mode, reverse, fp, file)
a269a03c 3101 enum rtx_code code;
e075ae69
RH
3102 enum machine_mode mode;
3103 int reverse, fp;
a269a03c
JC
3104 FILE *file;
3105{
a269a03c
JC
3106 const char *suffix;
3107
9a915772
JH
3108 if (mode == CCFPmode || mode == CCFPUmode)
3109 {
3110 enum rtx_code second_code, bypass_code;
3111 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3112 if (bypass_code != NIL || second_code != NIL)
3113 abort();
3114 code = ix86_fp_compare_code_to_integer (code);
3115 mode = CCmode;
3116 }
a269a03c
JC
3117 if (reverse)
3118 code = reverse_condition (code);
e075ae69 3119
a269a03c
JC
3120 switch (code)
3121 {
3122 case EQ:
3123 suffix = "e";
3124 break;
a269a03c
JC
3125 case NE:
3126 suffix = "ne";
3127 break;
a269a03c 3128 case GT:
7e08e190 3129 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
3130 abort ();
3131 suffix = "g";
a269a03c 3132 break;
a269a03c 3133 case GTU:
e075ae69
RH
3134 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
3135 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 3136 if (mode != CCmode)
0f290768 3137 abort ();
e075ae69 3138 suffix = fp ? "nbe" : "a";
a269a03c 3139 break;
a269a03c 3140 case LT:
9076b9c1 3141 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 3142 suffix = "s";
7e08e190 3143 else if (mode == CCmode || mode == CCGCmode)
e075ae69 3144 suffix = "l";
9076b9c1 3145 else
0f290768 3146 abort ();
a269a03c 3147 break;
a269a03c 3148 case LTU:
9076b9c1 3149 if (mode != CCmode)
0f290768 3150 abort ();
a269a03c
JC
3151 suffix = "b";
3152 break;
a269a03c 3153 case GE:
9076b9c1 3154 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 3155 suffix = "ns";
7e08e190 3156 else if (mode == CCmode || mode == CCGCmode)
e075ae69 3157 suffix = "ge";
9076b9c1 3158 else
0f290768 3159 abort ();
a269a03c 3160 break;
a269a03c 3161 case GEU:
e075ae69 3162 /* ??? As above. */
7e08e190 3163 if (mode != CCmode)
0f290768 3164 abort ();
7e08e190 3165 suffix = fp ? "nb" : "ae";
a269a03c 3166 break;
a269a03c 3167 case LE:
7e08e190 3168 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
3169 abort ();
3170 suffix = "le";
a269a03c 3171 break;
a269a03c 3172 case LEU:
9076b9c1
JH
3173 if (mode != CCmode)
3174 abort ();
7e08e190 3175 suffix = "be";
a269a03c 3176 break;
3a3677ff 3177 case UNORDERED:
9e7adcb3 3178 suffix = fp ? "u" : "p";
3a3677ff
RH
3179 break;
3180 case ORDERED:
9e7adcb3 3181 suffix = fp ? "nu" : "np";
3a3677ff 3182 break;
a269a03c
JC
3183 default:
3184 abort ();
3185 }
3186 fputs (suffix, file);
3187}
3188
e075ae69
RH
3189void
3190print_reg (x, code, file)
3191 rtx x;
3192 int code;
3193 FILE *file;
e5cb57e8 3194{
e075ae69 3195 if (REGNO (x) == ARG_POINTER_REGNUM
564d80f4 3196 || REGNO (x) == FRAME_POINTER_REGNUM
e075ae69
RH
3197 || REGNO (x) == FLAGS_REG
3198 || REGNO (x) == FPSR_REG)
3199 abort ();
e9a25f70 3200
e075ae69
RH
3201 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
3202 putc ('%', file);
3203
3204 if (code == 'w')
3205 code = 2;
3206 else if (code == 'b')
3207 code = 1;
3208 else if (code == 'k')
3209 code = 4;
3210 else if (code == 'y')
3211 code = 3;
3212 else if (code == 'h')
3213 code = 0;
a7180f70
BS
3214 else if (code == 'm' || MMX_REG_P (x))
3215 code = 5;
e075ae69
RH
3216 else
3217 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 3218
e075ae69
RH
3219 switch (code)
3220 {
a7180f70
BS
3221 case 5:
3222 fputs (hi_reg_name[REGNO (x)], file);
3223 break;
e075ae69
RH
3224 case 3:
3225 if (STACK_TOP_P (x))
3226 {
3227 fputs ("st(0)", file);
3228 break;
3229 }
3230 /* FALLTHRU */
3231 case 4:
3232 case 8:
3233 case 12:
3234 if (! FP_REG_P (x))
3235 putc ('e', file);
3236 /* FALLTHRU */
a7180f70 3237 case 16:
e075ae69
RH
3238 case 2:
3239 fputs (hi_reg_name[REGNO (x)], file);
3240 break;
3241 case 1:
3242 fputs (qi_reg_name[REGNO (x)], file);
3243 break;
3244 case 0:
3245 fputs (qi_high_reg_name[REGNO (x)], file);
3246 break;
3247 default:
3248 abort ();
fe25fea3 3249 }
e5cb57e8
SC
3250}
3251
2a2ab3f9 3252/* Meaning of CODE:
fe25fea3 3253 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 3254 C -- print opcode suffix for set/cmov insn.
fe25fea3 3255 c -- like C, but print reversed condition
2a2ab3f9
JVA
3256 R -- print the prefix for register names.
3257 z -- print the opcode suffix for the size of the current operand.
3258 * -- print a star (in certain assembler syntax)
fb204271 3259 A -- print an absolute memory reference.
2a2ab3f9 3260 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
3261 s -- print a shift double count, followed by the assemblers argument
3262 delimiter.
fe25fea3
SC
3263 b -- print the QImode name of the register for the indicated operand.
3264 %b0 would print %al if operands[0] is reg 0.
3265 w -- likewise, print the HImode name of the register.
3266 k -- likewise, print the SImode name of the register.
3267 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
a7180f70
BS
3268 y -- print "st(0)" instead of "st" as a register.
3269 m -- print "st(n)" as an mmx register. */
2a2ab3f9
JVA
3270
3271void
3272print_operand (file, x, code)
3273 FILE *file;
3274 rtx x;
3275 int code;
3276{
3277 if (code)
3278 {
3279 switch (code)
3280 {
3281 case '*':
e075ae69 3282 if (ASSEMBLER_DIALECT == 0)
2a2ab3f9
JVA
3283 putc ('*', file);
3284 return;
3285
fb204271
DN
3286 case 'A':
3287 if (ASSEMBLER_DIALECT == 0)
3288 putc ('*', file);
3289 else if (ASSEMBLER_DIALECT == 1)
3290 {
3291 /* Intel syntax. For absolute addresses, registers should not
3292 be surrounded by braces. */
3293 if (GET_CODE (x) != REG)
3294 {
3295 putc ('[', file);
3296 PRINT_OPERAND (file, x, 0);
3297 putc (']', file);
3298 return;
3299 }
3300 }
3301
3302 PRINT_OPERAND (file, x, 0);
3303 return;
3304
3305
2a2ab3f9 3306 case 'L':
e075ae69
RH
3307 if (ASSEMBLER_DIALECT == 0)
3308 putc ('l', file);
2a2ab3f9
JVA
3309 return;
3310
3311 case 'W':
e075ae69
RH
3312 if (ASSEMBLER_DIALECT == 0)
3313 putc ('w', file);
2a2ab3f9
JVA
3314 return;
3315
3316 case 'B':
e075ae69
RH
3317 if (ASSEMBLER_DIALECT == 0)
3318 putc ('b', file);
2a2ab3f9
JVA
3319 return;
3320
3321 case 'Q':
e075ae69
RH
3322 if (ASSEMBLER_DIALECT == 0)
3323 putc ('l', file);
2a2ab3f9
JVA
3324 return;
3325
3326 case 'S':
e075ae69
RH
3327 if (ASSEMBLER_DIALECT == 0)
3328 putc ('s', file);
2a2ab3f9
JVA
3329 return;
3330
5f1ec3e6 3331 case 'T':
e075ae69
RH
3332 if (ASSEMBLER_DIALECT == 0)
3333 putc ('t', file);
5f1ec3e6
JVA
3334 return;
3335
2a2ab3f9
JVA
3336 case 'z':
3337 /* 387 opcodes don't get size suffixes if the operands are
0f290768 3338 registers. */
2a2ab3f9
JVA
3339
3340 if (STACK_REG_P (x))
3341 return;
3342
3343 /* this is the size of op from size of operand */
3344 switch (GET_MODE_SIZE (GET_MODE (x)))
3345 {
2a2ab3f9 3346 case 2:
155d8a47
JW
3347#ifdef HAVE_GAS_FILDS_FISTS
3348 putc ('s', file);
3349#endif
2a2ab3f9
JVA
3350 return;
3351
3352 case 4:
3353 if (GET_MODE (x) == SFmode)
3354 {
e075ae69 3355 putc ('s', file);
2a2ab3f9
JVA
3356 return;
3357 }
3358 else
e075ae69 3359 putc ('l', file);
2a2ab3f9
JVA
3360 return;
3361
5f1ec3e6 3362 case 12:
2b589241 3363 case 16:
e075ae69
RH
3364 putc ('t', file);
3365 return;
5f1ec3e6 3366
2a2ab3f9
JVA
3367 case 8:
3368 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
3369 {
3370#ifdef GAS_MNEMONICS
e075ae69 3371 putc ('q', file);
56c0e8fa 3372#else
e075ae69
RH
3373 putc ('l', file);
3374 putc ('l', file);
56c0e8fa
JVA
3375#endif
3376 }
e075ae69
RH
3377 else
3378 putc ('l', file);
2a2ab3f9 3379 return;
155d8a47
JW
3380
3381 default:
3382 abort ();
2a2ab3f9 3383 }
4af3895e
JVA
3384
3385 case 'b':
3386 case 'w':
3387 case 'k':
3388 case 'h':
3389 case 'y':
a7180f70 3390 case 'm':
5cb6195d 3391 case 'X':
e075ae69 3392 case 'P':
4af3895e
JVA
3393 break;
3394
2d49677f
SC
3395 case 's':
3396 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
3397 {
3398 PRINT_OPERAND (file, x, 0);
e075ae69 3399 putc (',', file);
2d49677f 3400 }
a269a03c
JC
3401 return;
3402
1853aadd 3403 case 'C':
e075ae69 3404 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 3405 return;
fe25fea3 3406 case 'F':
e075ae69 3407 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
3408 return;
3409
e9a25f70 3410 /* Like above, but reverse condition */
e075ae69
RH
3411 case 'c':
3412 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
3413 return;
fe25fea3 3414 case 'f':
e075ae69 3415 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 3416 return;
e5cb57e8 3417
4af3895e 3418 default:
68daafd4
JVA
3419 {
3420 char str[50];
68daafd4
JVA
3421 sprintf (str, "invalid operand code `%c'", code);
3422 output_operand_lossage (str);
3423 }
2a2ab3f9
JVA
3424 }
3425 }
e9a25f70 3426
2a2ab3f9
JVA
3427 if (GET_CODE (x) == REG)
3428 {
3429 PRINT_REG (x, code, file);
3430 }
e9a25f70 3431
2a2ab3f9
JVA
3432 else if (GET_CODE (x) == MEM)
3433 {
e075ae69
RH
3434 /* No `byte ptr' prefix for call instructions. */
3435 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
2a2ab3f9 3436 {
69ddee61 3437 const char * size;
e075ae69
RH
3438 switch (GET_MODE_SIZE (GET_MODE (x)))
3439 {
3440 case 1: size = "BYTE"; break;
3441 case 2: size = "WORD"; break;
3442 case 4: size = "DWORD"; break;
3443 case 8: size = "QWORD"; break;
3444 case 12: size = "XWORD"; break;
a7180f70 3445 case 16: size = "XMMWORD"; break;
e075ae69 3446 default:
564d80f4 3447 abort ();
e075ae69 3448 }
fb204271
DN
3449
3450 /* Check for explicit size override (codes 'b', 'w' and 'k') */
3451 if (code == 'b')
3452 size = "BYTE";
3453 else if (code == 'w')
3454 size = "WORD";
3455 else if (code == 'k')
3456 size = "DWORD";
3457
e075ae69
RH
3458 fputs (size, file);
3459 fputs (" PTR ", file);
2a2ab3f9 3460 }
e075ae69
RH
3461
3462 x = XEXP (x, 0);
3463 if (flag_pic && CONSTANT_ADDRESS_P (x))
3464 output_pic_addr_const (file, x, code);
2a2ab3f9 3465 else
e075ae69 3466 output_address (x);
2a2ab3f9 3467 }
e9a25f70 3468
2a2ab3f9
JVA
3469 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
3470 {
e9a25f70
JL
3471 REAL_VALUE_TYPE r;
3472 long l;
3473
5f1ec3e6
JVA
3474 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3475 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69
RH
3476
3477 if (ASSEMBLER_DIALECT == 0)
3478 putc ('$', file);
52267fcb 3479 fprintf (file, "0x%lx", l);
5f1ec3e6 3480 }
e9a25f70 3481
0f290768 3482 /* These float cases don't actually occur as immediate operands. */
5f1ec3e6
JVA
3483 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
3484 {
e9a25f70
JL
3485 REAL_VALUE_TYPE r;
3486 char dstr[30];
3487
5f1ec3e6
JVA
3488 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3489 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3490 fprintf (file, "%s", dstr);
2a2ab3f9 3491 }
e9a25f70 3492
2b589241
JH
3493 else if (GET_CODE (x) == CONST_DOUBLE
3494 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
2a2ab3f9 3495 {
e9a25f70
JL
3496 REAL_VALUE_TYPE r;
3497 char dstr[30];
3498
5f1ec3e6
JVA
3499 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3500 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3501 fprintf (file, "%s", dstr);
2a2ab3f9 3502 }
79325812 3503 else
2a2ab3f9 3504 {
4af3895e 3505 if (code != 'P')
2a2ab3f9 3506 {
695dac07 3507 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69
RH
3508 {
3509 if (ASSEMBLER_DIALECT == 0)
3510 putc ('$', file);
3511 }
2a2ab3f9
JVA
3512 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
3513 || GET_CODE (x) == LABEL_REF)
e075ae69
RH
3514 {
3515 if (ASSEMBLER_DIALECT == 0)
3516 putc ('$', file);
3517 else
3518 fputs ("OFFSET FLAT:", file);
3519 }
2a2ab3f9 3520 }
e075ae69
RH
3521 if (GET_CODE (x) == CONST_INT)
3522 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3523 else if (flag_pic)
2a2ab3f9
JVA
3524 output_pic_addr_const (file, x, code);
3525 else
3526 output_addr_const (file, x);
3527 }
3528}
3529\f
3530/* Print a memory operand whose address is ADDR. */
3531
3532void
3533print_operand_address (file, addr)
3534 FILE *file;
3535 register rtx addr;
3536{
e075ae69
RH
3537 struct ix86_address parts;
3538 rtx base, index, disp;
3539 int scale;
e9a25f70 3540
e075ae69
RH
3541 if (! ix86_decompose_address (addr, &parts))
3542 abort ();
e9a25f70 3543
e075ae69
RH
3544 base = parts.base;
3545 index = parts.index;
3546 disp = parts.disp;
3547 scale = parts.scale;
e9a25f70 3548
e075ae69
RH
3549 if (!base && !index)
3550 {
3551 /* Displacement only requires special attention. */
e9a25f70 3552
e075ae69 3553 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 3554 {
e075ae69 3555 if (ASSEMBLER_DIALECT != 0)
fb204271
DN
3556 {
3557 if (USER_LABEL_PREFIX[0] == 0)
3558 putc ('%', file);
3559 fputs ("ds:", file);
3560 }
e075ae69 3561 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
2a2ab3f9 3562 }
e075ae69
RH
3563 else if (flag_pic)
3564 output_pic_addr_const (file, addr, 0);
3565 else
3566 output_addr_const (file, addr);
3567 }
3568 else
3569 {
3570 if (ASSEMBLER_DIALECT == 0)
2a2ab3f9 3571 {
e075ae69 3572 if (disp)
2a2ab3f9 3573 {
c399861d 3574 if (flag_pic)
e075ae69
RH
3575 output_pic_addr_const (file, disp, 0);
3576 else if (GET_CODE (disp) == LABEL_REF)
3577 output_asm_label (disp);
2a2ab3f9 3578 else
e075ae69 3579 output_addr_const (file, disp);
2a2ab3f9
JVA
3580 }
3581
e075ae69
RH
3582 putc ('(', file);
3583 if (base)
3584 PRINT_REG (base, 0, file);
3585 if (index)
2a2ab3f9 3586 {
e075ae69
RH
3587 putc (',', file);
3588 PRINT_REG (index, 0, file);
3589 if (scale != 1)
3590 fprintf (file, ",%d", scale);
2a2ab3f9 3591 }
e075ae69 3592 putc (')', file);
2a2ab3f9 3593 }
2a2ab3f9
JVA
3594 else
3595 {
e075ae69 3596 rtx offset = NULL_RTX;
e9a25f70 3597
e075ae69
RH
3598 if (disp)
3599 {
3600 /* Pull out the offset of a symbol; print any symbol itself. */
3601 if (GET_CODE (disp) == CONST
3602 && GET_CODE (XEXP (disp, 0)) == PLUS
3603 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
3604 {
3605 offset = XEXP (XEXP (disp, 0), 1);
3606 disp = gen_rtx_CONST (VOIDmode,
3607 XEXP (XEXP (disp, 0), 0));
3608 }
ce193852 3609
e075ae69
RH
3610 if (flag_pic)
3611 output_pic_addr_const (file, disp, 0);
3612 else if (GET_CODE (disp) == LABEL_REF)
3613 output_asm_label (disp);
3614 else if (GET_CODE (disp) == CONST_INT)
3615 offset = disp;
3616 else
3617 output_addr_const (file, disp);
3618 }
e9a25f70 3619
e075ae69
RH
3620 putc ('[', file);
3621 if (base)
a8620236 3622 {
e075ae69
RH
3623 PRINT_REG (base, 0, file);
3624 if (offset)
3625 {
3626 if (INTVAL (offset) >= 0)
3627 putc ('+', file);
3628 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3629 }
a8620236 3630 }
e075ae69
RH
3631 else if (offset)
3632 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 3633 else
e075ae69 3634 putc ('0', file);
e9a25f70 3635
e075ae69
RH
3636 if (index)
3637 {
3638 putc ('+', file);
3639 PRINT_REG (index, 0, file);
3640 if (scale != 1)
3641 fprintf (file, "*%d", scale);
3642 }
3643 putc (']', file);
3644 }
2a2ab3f9
JVA
3645 }
3646}
3647\f
3648/* Split one or more DImode RTL references into pairs of SImode
3649 references. The RTL can be REG, offsettable MEM, integer constant, or
3650 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
3651 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 3652 that parallel "operands". */
2a2ab3f9
JVA
3653
3654void
3655split_di (operands, num, lo_half, hi_half)
3656 rtx operands[];
3657 int num;
3658 rtx lo_half[], hi_half[];
3659{
3660 while (num--)
3661 {
57dbca5e 3662 rtx op = operands[num];
e075ae69
RH
3663 if (CONSTANT_P (op))
3664 split_double (op, &lo_half[num], &hi_half[num]);
3665 else if (! reload_completed)
a269a03c
JC
3666 {
3667 lo_half[num] = gen_lowpart (SImode, op);
3668 hi_half[num] = gen_highpart (SImode, op);
3669 }
3670 else if (GET_CODE (op) == REG)
2a2ab3f9 3671 {
57dbca5e
BS
3672 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
3673 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
2a2ab3f9 3674 }
57dbca5e 3675 else if (offsettable_memref_p (op))
2a2ab3f9 3676 {
57dbca5e
BS
3677 rtx lo_addr = XEXP (op, 0);
3678 rtx hi_addr = XEXP (adj_offsettable_operand (op, 4), 0);
3679 lo_half[num] = change_address (op, SImode, lo_addr);
3680 hi_half[num] = change_address (op, SImode, hi_addr);
2a2ab3f9
JVA
3681 }
3682 else
564d80f4 3683 abort ();
2a2ab3f9
JVA
3684 }
3685}
3686\f
2a2ab3f9
JVA
3687/* Output code to perform a 387 binary operation in INSN, one of PLUS,
3688 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
3689 is the expression of the binary operation. The output may either be
3690 emitted here, or returned to the caller, like all output_* functions.
3691
3692 There is no guarantee that the operands are the same mode, as they
0f290768 3693 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 3694
e3c2afab
AM
3695#ifndef SYSV386_COMPAT
3696/* Set to 1 for compatibility with brain-damaged assemblers. No-one
3697 wants to fix the assemblers because that causes incompatibility
3698 with gcc. No-one wants to fix gcc because that causes
3699 incompatibility with assemblers... You can use the option of
3700 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
3701#define SYSV386_COMPAT 1
3702#endif
3703
69ddee61 3704const char *
2a2ab3f9
JVA
3705output_387_binary_op (insn, operands)
3706 rtx insn;
3707 rtx *operands;
3708{
e3c2afab 3709 static char buf[30];
69ddee61 3710 const char *p;
2a2ab3f9 3711
e3c2afab
AM
3712#ifdef ENABLE_CHECKING
3713 /* Even if we do not want to check the inputs, this documents input
3714 constraints. Which helps in understanding the following code. */
3715 if (STACK_REG_P (operands[0])
3716 && ((REG_P (operands[1])
3717 && REGNO (operands[0]) == REGNO (operands[1])
3718 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
3719 || (REG_P (operands[2])
3720 && REGNO (operands[0]) == REGNO (operands[2])
3721 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
3722 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
3723 ; /* ok */
3724 else
3725 abort ();
3726#endif
3727
2a2ab3f9
JVA
3728 switch (GET_CODE (operands[3]))
3729 {
3730 case PLUS:
e075ae69
RH
3731 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3732 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3733 p = "fiadd";
3734 else
3735 p = "fadd";
2a2ab3f9
JVA
3736 break;
3737
3738 case MINUS:
e075ae69
RH
3739 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3740 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3741 p = "fisub";
3742 else
3743 p = "fsub";
2a2ab3f9
JVA
3744 break;
3745
3746 case MULT:
e075ae69
RH
3747 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3748 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3749 p = "fimul";
3750 else
3751 p = "fmul";
2a2ab3f9
JVA
3752 break;
3753
3754 case DIV:
e075ae69
RH
3755 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3756 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3757 p = "fidiv";
3758 else
3759 p = "fdiv";
2a2ab3f9
JVA
3760 break;
3761
3762 default:
3763 abort ();
3764 }
3765
e075ae69 3766 strcpy (buf, p);
2a2ab3f9
JVA
3767
3768 switch (GET_CODE (operands[3]))
3769 {
3770 case MULT:
3771 case PLUS:
3772 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
3773 {
e3c2afab 3774 rtx temp = operands[2];
2a2ab3f9
JVA
3775 operands[2] = operands[1];
3776 operands[1] = temp;
3777 }
3778
e3c2afab
AM
3779 /* know operands[0] == operands[1]. */
3780
2a2ab3f9 3781 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
3782 {
3783 p = "%z2\t%2";
3784 break;
3785 }
2a2ab3f9
JVA
3786
3787 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
3788 {
3789 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
3790 /* How is it that we are storing to a dead operand[2]?
3791 Well, presumably operands[1] is dead too. We can't
3792 store the result to st(0) as st(0) gets popped on this
3793 instruction. Instead store to operands[2] (which I
3794 think has to be st(1)). st(1) will be popped later.
3795 gcc <= 2.8.1 didn't have this check and generated
3796 assembly code that the Unixware assembler rejected. */
3797 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 3798 else
e3c2afab 3799 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 3800 break;
6b28fd63 3801 }
2a2ab3f9
JVA
3802
3803 if (STACK_TOP_P (operands[0]))
e3c2afab 3804 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 3805 else
e3c2afab 3806 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 3807 break;
2a2ab3f9
JVA
3808
3809 case MINUS:
3810 case DIV:
3811 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
3812 {
3813 p = "r%z1\t%1";
3814 break;
3815 }
2a2ab3f9
JVA
3816
3817 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
3818 {
3819 p = "%z2\t%2";
3820 break;
3821 }
2a2ab3f9 3822
2a2ab3f9 3823 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 3824 {
e3c2afab
AM
3825#if SYSV386_COMPAT
3826 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
3827 derived assemblers, confusingly reverse the direction of
3828 the operation for fsub{r} and fdiv{r} when the
3829 destination register is not st(0). The Intel assembler
3830 doesn't have this brain damage. Read !SYSV386_COMPAT to
3831 figure out what the hardware really does. */
3832 if (STACK_TOP_P (operands[0]))
3833 p = "{p\t%0, %2|rp\t%2, %0}";
3834 else
3835 p = "{rp\t%2, %0|p\t%0, %2}";
3836#else
6b28fd63 3837 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
3838 /* As above for fmul/fadd, we can't store to st(0). */
3839 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 3840 else
e3c2afab
AM
3841 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
3842#endif
e075ae69 3843 break;
6b28fd63 3844 }
2a2ab3f9
JVA
3845
3846 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 3847 {
e3c2afab 3848#if SYSV386_COMPAT
6b28fd63 3849 if (STACK_TOP_P (operands[0]))
e3c2afab 3850 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 3851 else
e3c2afab
AM
3852 p = "{p\t%1, %0|rp\t%0, %1}";
3853#else
3854 if (STACK_TOP_P (operands[0]))
3855 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
3856 else
3857 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
3858#endif
e075ae69 3859 break;
6b28fd63 3860 }
2a2ab3f9
JVA
3861
3862 if (STACK_TOP_P (operands[0]))
3863 {
3864 if (STACK_TOP_P (operands[1]))
e3c2afab 3865 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 3866 else
e3c2afab 3867 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 3868 break;
2a2ab3f9
JVA
3869 }
3870 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
3871 {
3872#if SYSV386_COMPAT
3873 p = "{\t%1, %0|r\t%0, %1}";
3874#else
3875 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
3876#endif
3877 }
2a2ab3f9 3878 else
e3c2afab
AM
3879 {
3880#if SYSV386_COMPAT
3881 p = "{r\t%2, %0|\t%0, %2}";
3882#else
3883 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
3884#endif
3885 }
e075ae69 3886 break;
2a2ab3f9
JVA
3887
3888 default:
3889 abort ();
3890 }
e075ae69
RH
3891
3892 strcat (buf, p);
3893 return buf;
2a2ab3f9 3894}
e075ae69 3895
2a2ab3f9 3896/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 3897 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 3898 operand may be [SDX]Fmode. */
2a2ab3f9 3899
69ddee61 3900const char *
2a2ab3f9
JVA
3901output_fix_trunc (insn, operands)
3902 rtx insn;
3903 rtx *operands;
3904{
3905 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69
RH
3906 int dimode_p = GET_MODE (operands[0]) == DImode;
3907 rtx xops[4];
2a2ab3f9 3908
e075ae69
RH
3909 /* Jump through a hoop or two for DImode, since the hardware has no
3910 non-popping instruction. We used to do this a different way, but
3911 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
3912 if (dimode_p && !stack_top_dies)
3913 output_asm_insn ("fld\t%y1", operands);
e075ae69
RH
3914
3915 if (! STACK_TOP_P (operands[1]))
10195bd8
JW
3916 abort ();
3917
e075ae69
RH
3918 xops[0] = GEN_INT (12);
3919 xops[1] = adj_offsettable_operand (operands[2], 1);
3920 xops[1] = change_address (xops[1], QImode, NULL_RTX);
305f097e 3921
e075ae69
RH
3922 xops[2] = operands[0];
3923 if (GET_CODE (operands[0]) != MEM)
3924 xops[2] = operands[3];
2a2ab3f9 3925
e075ae69
RH
3926 output_asm_insn ("fnstcw\t%2", operands);
3927 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands);
3928 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops);
3929 output_asm_insn ("fldcw\t%2", operands);
3930 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands);
e9a25f70 3931
e075ae69
RH
3932 if (stack_top_dies || dimode_p)
3933 output_asm_insn ("fistp%z2\t%2", xops);
10195bd8 3934 else
e075ae69
RH
3935 output_asm_insn ("fist%z2\t%2", xops);
3936
3937 output_asm_insn ("fldcw\t%2", operands);
10195bd8 3938
e075ae69 3939 if (GET_CODE (operands[0]) != MEM)
2a2ab3f9 3940 {
e075ae69 3941 if (dimode_p)
2e14a41b 3942 {
e075ae69
RH
3943 split_di (operands+0, 1, xops+0, xops+1);
3944 split_di (operands+3, 1, xops+2, xops+3);
3945 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3946 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops);
2e14a41b 3947 }
46d21d2c 3948 else if (GET_MODE (operands[0]) == SImode)
e3c2afab 3949 output_asm_insn ("mov{l}\t{%3, %0|%0, %3}", operands);
46d21d2c
JW
3950 else
3951 output_asm_insn ("mov{w}\t{%3, %0|%0, %3}", operands);
2a2ab3f9 3952 }
2a2ab3f9 3953
e075ae69 3954 return "";
2a2ab3f9 3955}
cda749b1 3956
e075ae69
RH
3957/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
3958 should be used and 2 when fnstsw should be used. UNORDERED_P is true
3959 when fucom should be used. */
3960
69ddee61 3961const char *
e075ae69 3962output_fp_compare (insn, operands, eflags_p, unordered_p)
cda749b1
JW
3963 rtx insn;
3964 rtx *operands;
e075ae69 3965 int eflags_p, unordered_p;
cda749b1 3966{
e075ae69
RH
3967 int stack_top_dies;
3968 rtx cmp_op0 = operands[0];
3969 rtx cmp_op1 = operands[1];
3970
3971 if (eflags_p == 2)
3972 {
3973 cmp_op0 = cmp_op1;
3974 cmp_op1 = operands[2];
3975 }
cda749b1 3976
e075ae69 3977 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
3978 abort ();
3979
e075ae69 3980 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 3981
e075ae69
RH
3982 if (STACK_REG_P (cmp_op1)
3983 && stack_top_dies
3984 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
3985 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 3986 {
e075ae69
RH
3987 /* If both the top of the 387 stack dies, and the other operand
3988 is also a stack register that dies, then this must be a
3989 `fcompp' float compare */
3990
3991 if (eflags_p == 1)
3992 {
3993 /* There is no double popping fcomi variant. Fortunately,
3994 eflags is immune from the fstp's cc clobbering. */
3995 if (unordered_p)
3996 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
3997 else
3998 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
3999 return "fstp\t%y0";
4000 }
4001 else
cda749b1 4002 {
e075ae69
RH
4003 if (eflags_p == 2)
4004 {
4005 if (unordered_p)
4006 return "fucompp\n\tfnstsw\t%0";
4007 else
4008 return "fcompp\n\tfnstsw\t%0";
4009 }
cda749b1
JW
4010 else
4011 {
e075ae69
RH
4012 if (unordered_p)
4013 return "fucompp";
4014 else
4015 return "fcompp";
cda749b1
JW
4016 }
4017 }
cda749b1
JW
4018 }
4019 else
4020 {
e075ae69 4021 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 4022
0f290768 4023 static const char * const alt[24] =
e075ae69
RH
4024 {
4025 "fcom%z1\t%y1",
4026 "fcomp%z1\t%y1",
4027 "fucom%z1\t%y1",
4028 "fucomp%z1\t%y1",
0f290768 4029
e075ae69
RH
4030 "ficom%z1\t%y1",
4031 "ficomp%z1\t%y1",
4032 NULL,
4033 NULL,
4034
4035 "fcomi\t{%y1, %0|%0, %y1}",
4036 "fcomip\t{%y1, %0|%0, %y1}",
4037 "fucomi\t{%y1, %0|%0, %y1}",
4038 "fucomip\t{%y1, %0|%0, %y1}",
4039
4040 NULL,
4041 NULL,
4042 NULL,
4043 NULL,
4044
4045 "fcom%z2\t%y2\n\tfnstsw\t%0",
4046 "fcomp%z2\t%y2\n\tfnstsw\t%0",
4047 "fucom%z2\t%y2\n\tfnstsw\t%0",
4048 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 4049
e075ae69
RH
4050 "ficom%z2\t%y2\n\tfnstsw\t%0",
4051 "ficomp%z2\t%y2\n\tfnstsw\t%0",
4052 NULL,
4053 NULL
4054 };
4055
4056 int mask;
69ddee61 4057 const char *ret;
e075ae69
RH
4058
4059 mask = eflags_p << 3;
4060 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
4061 mask |= unordered_p << 1;
4062 mask |= stack_top_dies;
4063
4064 if (mask >= 24)
4065 abort ();
4066 ret = alt[mask];
4067 if (ret == NULL)
4068 abort ();
cda749b1 4069
e075ae69 4070 return ret;
cda749b1
JW
4071 }
4072}
2a2ab3f9 4073
e075ae69 4074/* Output assembler code to FILE to initialize basic-block profiling.
2a2ab3f9 4075
e075ae69 4076 If profile_block_flag == 2
2a2ab3f9 4077
e075ae69
RH
4078 Output code to call the subroutine `__bb_init_trace_func'
4079 and pass two parameters to it. The first parameter is
4080 the address of a block allocated in the object module.
4081 The second parameter is the number of the first basic block
4082 of the function.
2a2ab3f9 4083
e075ae69 4084 The name of the block is a local symbol made with this statement:
0f290768 4085
e075ae69 4086 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
2a2ab3f9 4087
e075ae69
RH
4088 Of course, since you are writing the definition of
4089 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4090 can take a short cut in the definition of this macro and use the
4091 name that you know will result.
2a2ab3f9 4092
e075ae69
RH
4093 The number of the first basic block of the function is
4094 passed to the macro in BLOCK_OR_LABEL.
2a2ab3f9 4095
e075ae69
RH
4096 If described in a virtual assembler language the code to be
4097 output looks like:
2a2ab3f9 4098
e075ae69
RH
4099 parameter1 <- LPBX0
4100 parameter2 <- BLOCK_OR_LABEL
4101 call __bb_init_trace_func
2a2ab3f9 4102
e075ae69 4103 else if profile_block_flag != 0
e74389ff 4104
e075ae69
RH
4105 Output code to call the subroutine `__bb_init_func'
4106 and pass one single parameter to it, which is the same
4107 as the first parameter to `__bb_init_trace_func'.
e74389ff 4108
e075ae69
RH
4109 The first word of this parameter is a flag which will be nonzero if
4110 the object module has already been initialized. So test this word
4111 first, and do not call `__bb_init_func' if the flag is nonzero.
4112 Note: When profile_block_flag == 2 the test need not be done
4113 but `__bb_init_trace_func' *must* be called.
e74389ff 4114
e075ae69
RH
4115 BLOCK_OR_LABEL may be used to generate a label number as a
4116 branch destination in case `__bb_init_func' will not be called.
e74389ff 4117
e075ae69
RH
4118 If described in a virtual assembler language the code to be
4119 output looks like:
2a2ab3f9 4120
e075ae69
RH
4121 cmp (LPBX0),0
4122 jne local_label
4123 parameter1 <- LPBX0
4124 call __bb_init_func
4125 local_label:
4126*/
c572e5ba 4127
e075ae69
RH
4128void
4129ix86_output_function_block_profiler (file, block_or_label)
4130 FILE *file;
4131 int block_or_label;
c572e5ba 4132{
e075ae69
RH
4133 static int num_func = 0;
4134 rtx xops[8];
4135 char block_table[80], false_label[80];
c572e5ba 4136
e075ae69 4137 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
e9a25f70 4138
e075ae69
RH
4139 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4140 xops[5] = stack_pointer_rtx;
4141 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
2a2ab3f9 4142
e075ae69 4143 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
c572e5ba 4144
e075ae69 4145 switch (profile_block_flag)
c572e5ba 4146 {
e075ae69
RH
4147 case 2:
4148 xops[2] = GEN_INT (block_or_label);
4149 xops[3] = gen_rtx_MEM (Pmode,
4150 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
4151 xops[6] = GEN_INT (8);
e9a25f70 4152
e075ae69
RH
4153 output_asm_insn ("push{l}\t%2", xops);
4154 if (!flag_pic)
4155 output_asm_insn ("push{l}\t%1", xops);
e9a25f70 4156 else
870a0c2c 4157 {
e075ae69
RH
4158 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4159 output_asm_insn ("push{l}\t%7", xops);
870a0c2c 4160 }
e075ae69
RH
4161 output_asm_insn ("call\t%P3", xops);
4162 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4163 break;
c572e5ba 4164
e075ae69
RH
4165 default:
4166 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
c572e5ba 4167
e075ae69
RH
4168 xops[0] = const0_rtx;
4169 xops[2] = gen_rtx_MEM (Pmode,
4170 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
4171 xops[3] = gen_rtx_MEM (Pmode,
4172 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
4173 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
4174 xops[6] = GEN_INT (4);
a14003ee 4175
e075ae69 4176 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
446ba526 4177
e075ae69
RH
4178 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
4179 output_asm_insn ("jne\t%2", xops);
870a0c2c 4180
e075ae69
RH
4181 if (!flag_pic)
4182 output_asm_insn ("push{l}\t%1", xops);
4183 else
4184 {
4185 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
4186 output_asm_insn ("push{l}\t%7", xops);
870a0c2c 4187 }
e075ae69
RH
4188 output_asm_insn ("call\t%P3", xops);
4189 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4190 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
4191 num_func++;
4192 break;
c572e5ba 4193 }
2a2ab3f9 4194}
305f097e 4195
e075ae69
RH
4196/* Output assembler code to FILE to increment a counter associated
4197 with basic block number BLOCKNO.
305f097e 4198
e075ae69 4199 If profile_block_flag == 2
ecbc4695 4200
e075ae69
RH
4201 Output code to initialize the global structure `__bb' and
4202 call the function `__bb_trace_func' which will increment the
4203 counter.
ecbc4695 4204
e075ae69
RH
4205 `__bb' consists of two words. In the first word the number
4206 of the basic block has to be stored. In the second word
0f290768 4207 the address of a block allocated in the object module
e075ae69 4208 has to be stored.
ecbc4695 4209
e075ae69 4210 The basic block number is given by BLOCKNO.
ecbc4695 4211
0f290768 4212 The address of the block is given by the label created with
305f097e 4213
e075ae69 4214 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
305f097e 4215
e075ae69 4216 by FUNCTION_BLOCK_PROFILER.
ecbc4695 4217
e075ae69
RH
4218 Of course, since you are writing the definition of
4219 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4220 can take a short cut in the definition of this macro and use the
4221 name that you know will result.
305f097e 4222
e075ae69
RH
4223 If described in a virtual assembler language the code to be
4224 output looks like:
305f097e 4225
e075ae69
RH
4226 move BLOCKNO -> (__bb)
4227 move LPBX0 -> (__bb+4)
4228 call __bb_trace_func
305f097e 4229
e075ae69
RH
4230 Note that function `__bb_trace_func' must not change the
4231 machine state, especially the flag register. To grant
4232 this, you must output code to save and restore registers
4233 either in this macro or in the macros MACHINE_STATE_SAVE
4234 and MACHINE_STATE_RESTORE. The last two macros will be
4235 used in the function `__bb_trace_func', so you must make
0f290768 4236 sure that the function prologue does not change any
e075ae69 4237 register prior to saving it with MACHINE_STATE_SAVE.
305f097e 4238
e075ae69 4239 else if profile_block_flag != 0
305f097e 4240
e075ae69
RH
4241 Output code to increment the counter directly.
4242 Basic blocks are numbered separately from zero within each
4243 compiled object module. The count associated with block number
0f290768 4244 BLOCKNO is at index BLOCKNO in an array of words; the name of
e075ae69 4245 this array is a local symbol made with this statement:
32b5b1aa 4246
e075ae69 4247 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
32b5b1aa 4248
e075ae69
RH
4249 Of course, since you are writing the definition of
4250 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4251 can take a short cut in the definition of this macro and use the
0f290768 4252 name that you know will result.
32b5b1aa 4253
e075ae69
RH
4254 If described in a virtual assembler language the code to be
4255 output looks like:
32b5b1aa 4256
e075ae69
RH
4257 inc (LPBX2+4*BLOCKNO)
4258*/
32b5b1aa 4259
e075ae69
RH
4260void
4261ix86_output_block_profiler (file, blockno)
4262 FILE *file ATTRIBUTE_UNUSED;
4263 int blockno;
4264{
4265 rtx xops[8], cnt_rtx;
4266 char counts[80];
4267 char *block_table = counts;
4268
4269 switch (profile_block_flag)
4270 {
4271 case 2:
4272 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
32b5b1aa 4273
e075ae69
RH
4274 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4275 xops[2] = GEN_INT (blockno);
4276 xops[3] = gen_rtx_MEM (Pmode,
4277 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
4278 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
4279 xops[5] = plus_constant (xops[4], 4);
4280 xops[0] = gen_rtx_MEM (SImode, xops[4]);
4281 xops[6] = gen_rtx_MEM (SImode, xops[5]);
79325812 4282
e075ae69 4283 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
32b5b1aa 4284
e075ae69
RH
4285 output_asm_insn ("pushf", xops);
4286 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4287 if (flag_pic)
32b5b1aa 4288 {
e075ae69
RH
4289 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4290 output_asm_insn ("push{l}\t%7", xops);
4291 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4292 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
4293 output_asm_insn ("pop{l}\t%7", xops);
4294 }
4295 else
4296 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
4297 output_asm_insn ("call\t%P3", xops);
4298 output_asm_insn ("popf", xops);
32b5b1aa 4299
e075ae69 4300 break;
32b5b1aa 4301
e075ae69
RH
4302 default:
4303 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
4304 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
4305 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
32b5b1aa 4306
e075ae69
RH
4307 if (blockno)
4308 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
32b5b1aa 4309
e075ae69
RH
4310 if (flag_pic)
4311 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
32b5b1aa 4312
e075ae69
RH
4313 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
4314 output_asm_insn ("inc{l}\t%0", xops);
32b5b1aa 4315
e075ae69 4316 break;
32b5b1aa 4317 }
32b5b1aa 4318}
32b5b1aa 4319\f
79325812 4320void
e075ae69
RH
4321ix86_expand_move (mode, operands)
4322 enum machine_mode mode;
4323 rtx operands[];
32b5b1aa 4324{
e075ae69 4325 int strict = (reload_in_progress || reload_completed);
e075ae69 4326 rtx insn;
e9a25f70 4327
e075ae69 4328 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
32b5b1aa 4329 {
e075ae69 4330 /* Emit insns to move operands[1] into operands[0]. */
e9a25f70 4331
e075ae69
RH
4332 if (GET_CODE (operands[0]) == MEM)
4333 operands[1] = force_reg (Pmode, operands[1]);
4334 else
32b5b1aa 4335 {
e075ae69
RH
4336 rtx temp = operands[0];
4337 if (GET_CODE (temp) != REG)
4338 temp = gen_reg_rtx (Pmode);
4339 temp = legitimize_pic_address (operands[1], temp);
4340 if (temp == operands[0])
4341 return;
4342 operands[1] = temp;
32b5b1aa 4343 }
e075ae69
RH
4344 }
4345 else
4346 {
d7a29404
JH
4347 if (GET_CODE (operands[0]) == MEM
4348 && (GET_MODE (operands[0]) == QImode
4349 || !push_operand (operands[0], mode))
4350 && GET_CODE (operands[1]) == MEM)
e075ae69 4351 operands[1] = force_reg (mode, operands[1]);
e9a25f70 4352
2c5a510c
RH
4353 if (push_operand (operands[0], mode)
4354 && ! general_no_elim_operand (operands[1], mode))
4355 operands[1] = copy_to_mode_reg (mode, operands[1]);
4356
e075ae69 4357 if (FLOAT_MODE_P (mode))
32b5b1aa 4358 {
d7a29404
JH
4359 /* If we are loading a floating point constant to a register,
4360 force the value to memory now, since we'll get better code
4361 out the back end. */
e075ae69
RH
4362
4363 if (strict)
4364 ;
e075ae69 4365 else if (GET_CODE (operands[1]) == CONST_DOUBLE
d7a29404 4366 && register_operand (operands[0], mode))
e075ae69 4367 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
32b5b1aa 4368 }
32b5b1aa 4369 }
e9a25f70 4370
e075ae69 4371 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
e9a25f70 4372
e075ae69
RH
4373 emit_insn (insn);
4374}
e9a25f70 4375
e075ae69
RH
4376/* Attempt to expand a binary operator. Make the expansion closer to the
4377 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 4378 memory references (one output, two input) in a single insn. */
e9a25f70 4379
e075ae69
RH
4380void
4381ix86_expand_binary_operator (code, mode, operands)
4382 enum rtx_code code;
4383 enum machine_mode mode;
4384 rtx operands[];
4385{
4386 int matching_memory;
4387 rtx src1, src2, dst, op, clob;
4388
4389 dst = operands[0];
4390 src1 = operands[1];
4391 src2 = operands[2];
4392
4393 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
4394 if (GET_RTX_CLASS (code) == 'c'
4395 && (rtx_equal_p (dst, src2)
4396 || immediate_operand (src1, mode)))
4397 {
4398 rtx temp = src1;
4399 src1 = src2;
4400 src2 = temp;
32b5b1aa 4401 }
e9a25f70 4402
e075ae69
RH
4403 /* If the destination is memory, and we do not have matching source
4404 operands, do things in registers. */
4405 matching_memory = 0;
4406 if (GET_CODE (dst) == MEM)
32b5b1aa 4407 {
e075ae69
RH
4408 if (rtx_equal_p (dst, src1))
4409 matching_memory = 1;
4410 else if (GET_RTX_CLASS (code) == 'c'
4411 && rtx_equal_p (dst, src2))
4412 matching_memory = 2;
4413 else
4414 dst = gen_reg_rtx (mode);
4415 }
0f290768 4416
e075ae69
RH
4417 /* Both source operands cannot be in memory. */
4418 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
4419 {
4420 if (matching_memory != 2)
4421 src2 = force_reg (mode, src2);
4422 else
4423 src1 = force_reg (mode, src1);
32b5b1aa 4424 }
e9a25f70 4425
06a964de
JH
4426 /* If the operation is not commutable, source 1 cannot be a constant
4427 or non-matching memory. */
0f290768 4428 if ((CONSTANT_P (src1)
06a964de
JH
4429 || (!matching_memory && GET_CODE (src1) == MEM))
4430 && GET_RTX_CLASS (code) != 'c')
e075ae69 4431 src1 = force_reg (mode, src1);
0f290768 4432
e075ae69 4433 /* If optimizing, copy to regs to improve CSE */
fe577e58 4434 if (optimize && ! no_new_pseudos)
32b5b1aa 4435 {
e075ae69
RH
4436 if (GET_CODE (dst) == MEM)
4437 dst = gen_reg_rtx (mode);
4438 if (GET_CODE (src1) == MEM)
4439 src1 = force_reg (mode, src1);
4440 if (GET_CODE (src2) == MEM)
4441 src2 = force_reg (mode, src2);
32b5b1aa 4442 }
e9a25f70 4443
e075ae69
RH
4444 /* Emit the instruction. */
4445
4446 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
4447 if (reload_in_progress)
4448 {
4449 /* Reload doesn't know about the flags register, and doesn't know that
4450 it doesn't want to clobber it. We can only do this with PLUS. */
4451 if (code != PLUS)
4452 abort ();
4453 emit_insn (op);
4454 }
4455 else
32b5b1aa 4456 {
e075ae69
RH
4457 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4458 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 4459 }
e9a25f70 4460
e075ae69
RH
4461 /* Fix up the destination if needed. */
4462 if (dst != operands[0])
4463 emit_move_insn (operands[0], dst);
4464}
4465
4466/* Return TRUE or FALSE depending on whether the binary operator meets the
4467 appropriate constraints. */
4468
4469int
4470ix86_binary_operator_ok (code, mode, operands)
4471 enum rtx_code code;
4472 enum machine_mode mode ATTRIBUTE_UNUSED;
4473 rtx operands[3];
4474{
4475 /* Both source operands cannot be in memory. */
4476 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
4477 return 0;
4478 /* If the operation is not commutable, source 1 cannot be a constant. */
4479 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
4480 return 0;
4481 /* If the destination is memory, we must have a matching source operand. */
4482 if (GET_CODE (operands[0]) == MEM
4483 && ! (rtx_equal_p (operands[0], operands[1])
4484 || (GET_RTX_CLASS (code) == 'c'
4485 && rtx_equal_p (operands[0], operands[2]))))
4486 return 0;
06a964de
JH
4487 /* If the operation is not commutable and the source 1 is memory, we must
4488 have a matching destionation. */
4489 if (GET_CODE (operands[1]) == MEM
4490 && GET_RTX_CLASS (code) != 'c'
4491 && ! rtx_equal_p (operands[0], operands[1]))
4492 return 0;
e075ae69
RH
4493 return 1;
4494}
4495
4496/* Attempt to expand a unary operator. Make the expansion closer to the
4497 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 4498 memory references (one output, one input) in a single insn. */
e075ae69 4499
9d81fc27 4500void
e075ae69
RH
4501ix86_expand_unary_operator (code, mode, operands)
4502 enum rtx_code code;
4503 enum machine_mode mode;
4504 rtx operands[];
4505{
06a964de
JH
4506 int matching_memory;
4507 rtx src, dst, op, clob;
4508
4509 dst = operands[0];
4510 src = operands[1];
e075ae69 4511
06a964de
JH
4512 /* If the destination is memory, and we do not have matching source
4513 operands, do things in registers. */
4514 matching_memory = 0;
4515 if (GET_CODE (dst) == MEM)
32b5b1aa 4516 {
06a964de
JH
4517 if (rtx_equal_p (dst, src))
4518 matching_memory = 1;
e075ae69 4519 else
06a964de 4520 dst = gen_reg_rtx (mode);
32b5b1aa 4521 }
e9a25f70 4522
06a964de
JH
4523 /* When source operand is memory, destination must match. */
4524 if (!matching_memory && GET_CODE (src) == MEM)
4525 src = force_reg (mode, src);
0f290768 4526
06a964de 4527 /* If optimizing, copy to regs to improve CSE */
fe577e58 4528 if (optimize && ! no_new_pseudos)
06a964de
JH
4529 {
4530 if (GET_CODE (dst) == MEM)
4531 dst = gen_reg_rtx (mode);
4532 if (GET_CODE (src) == MEM)
4533 src = force_reg (mode, src);
4534 }
4535
4536 /* Emit the instruction. */
4537
4538 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
4539 if (reload_in_progress || code == NOT)
4540 {
4541 /* Reload doesn't know about the flags register, and doesn't know that
4542 it doesn't want to clobber it. */
4543 if (code != NOT)
4544 abort ();
4545 emit_insn (op);
4546 }
4547 else
4548 {
4549 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4550 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4551 }
4552
4553 /* Fix up the destination if needed. */
4554 if (dst != operands[0])
4555 emit_move_insn (operands[0], dst);
e075ae69
RH
4556}
4557
4558/* Return TRUE or FALSE depending on whether the unary operator meets the
4559 appropriate constraints. */
4560
4561int
4562ix86_unary_operator_ok (code, mode, operands)
4563 enum rtx_code code ATTRIBUTE_UNUSED;
4564 enum machine_mode mode ATTRIBUTE_UNUSED;
4565 rtx operands[2] ATTRIBUTE_UNUSED;
4566{
06a964de
JH
4567 /* If one of operands is memory, source and destination must match. */
4568 if ((GET_CODE (operands[0]) == MEM
4569 || GET_CODE (operands[1]) == MEM)
4570 && ! rtx_equal_p (operands[0], operands[1]))
4571 return FALSE;
e075ae69
RH
4572 return TRUE;
4573}
4574
16189740
RH
4575/* Return TRUE or FALSE depending on whether the first SET in INSN
4576 has source and destination with matching CC modes, and that the
4577 CC mode is at least as constrained as REQ_MODE. */
4578
4579int
4580ix86_match_ccmode (insn, req_mode)
4581 rtx insn;
4582 enum machine_mode req_mode;
4583{
4584 rtx set;
4585 enum machine_mode set_mode;
4586
4587 set = PATTERN (insn);
4588 if (GET_CODE (set) == PARALLEL)
4589 set = XVECEXP (set, 0, 0);
4590 if (GET_CODE (set) != SET)
4591 abort ();
9076b9c1
JH
4592 if (GET_CODE (SET_SRC (set)) != COMPARE)
4593 abort ();
16189740
RH
4594
4595 set_mode = GET_MODE (SET_DEST (set));
4596 switch (set_mode)
4597 {
9076b9c1
JH
4598 case CCNOmode:
4599 if (req_mode != CCNOmode
4600 && (req_mode != CCmode
4601 || XEXP (SET_SRC (set), 1) != const0_rtx))
4602 return 0;
4603 break;
16189740 4604 case CCmode:
9076b9c1 4605 if (req_mode == CCGCmode)
16189740
RH
4606 return 0;
4607 /* FALLTHRU */
9076b9c1
JH
4608 case CCGCmode:
4609 if (req_mode == CCGOCmode || req_mode == CCNOmode)
4610 return 0;
4611 /* FALLTHRU */
4612 case CCGOCmode:
16189740
RH
4613 if (req_mode == CCZmode)
4614 return 0;
4615 /* FALLTHRU */
4616 case CCZmode:
4617 break;
4618
4619 default:
4620 abort ();
4621 }
4622
4623 return (GET_MODE (SET_SRC (set)) == set_mode);
4624}
4625
e075ae69
RH
4626/* Generate insn patterns to do an integer compare of OPERANDS. */
4627
4628static rtx
4629ix86_expand_int_compare (code, op0, op1)
4630 enum rtx_code code;
4631 rtx op0, op1;
4632{
4633 enum machine_mode cmpmode;
4634 rtx tmp, flags;
4635
4636 cmpmode = SELECT_CC_MODE (code, op0, op1);
4637 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
4638
4639 /* This is very simple, but making the interface the same as in the
4640 FP case makes the rest of the code easier. */
4641 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
4642 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
4643
4644 /* Return the test that should be put into the flags user, i.e.
4645 the bcc, scc, or cmov instruction. */
4646 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
4647}
4648
3a3677ff
RH
4649/* Figure out whether to use ordered or unordered fp comparisons.
4650 Return the appropriate mode to use. */
e075ae69 4651
b1cdafbb 4652enum machine_mode
3a3677ff 4653ix86_fp_compare_mode (code)
8752c357 4654 enum rtx_code code ATTRIBUTE_UNUSED;
e075ae69 4655{
9e7adcb3
JH
4656 /* ??? In order to make all comparisons reversible, we do all comparisons
4657 non-trapping when compiling for IEEE. Once gcc is able to distinguish
4658 all forms trapping and nontrapping comparisons, we can make inequality
4659 comparisons trapping again, since it results in better code when using
4660 FCOM based compares. */
4661 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
4662}
4663
9076b9c1
JH
4664enum machine_mode
4665ix86_cc_mode (code, op0, op1)
4666 enum rtx_code code;
4667 rtx op0, op1;
4668{
4669 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
4670 return ix86_fp_compare_mode (code);
4671 switch (code)
4672 {
4673 /* Only zero flag is needed. */
4674 case EQ: /* ZF=0 */
4675 case NE: /* ZF!=0 */
4676 return CCZmode;
4677 /* Codes needing carry flag. */
265dab10
JH
4678 case GEU: /* CF=0 */
4679 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
4680 case LTU: /* CF=1 */
4681 case LEU: /* CF=1 | ZF=1 */
265dab10 4682 return CCmode;
9076b9c1
JH
4683 /* Codes possibly doable only with sign flag when
4684 comparing against zero. */
4685 case GE: /* SF=OF or SF=0 */
7e08e190 4686 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
4687 if (op1 == const0_rtx)
4688 return CCGOCmode;
4689 else
4690 /* For other cases Carry flag is not required. */
4691 return CCGCmode;
4692 /* Codes doable only with sign flag when comparing
4693 against zero, but we miss jump instruction for it
4694 so we need to use relational tests agains overflow
4695 that thus needs to be zero. */
4696 case GT: /* ZF=0 & SF=OF */
4697 case LE: /* ZF=1 | SF<>OF */
4698 if (op1 == const0_rtx)
4699 return CCNOmode;
4700 else
4701 return CCGCmode;
4702 default:
0f290768 4703 abort ();
9076b9c1
JH
4704 }
4705}
4706
3a3677ff
RH
4707/* Return true if we should use an FCOMI instruction for this fp comparison. */
4708
a940d8bd 4709int
3a3677ff 4710ix86_use_fcomi_compare (code)
9e7adcb3 4711 enum rtx_code code ATTRIBUTE_UNUSED;
3a3677ff 4712{
9e7adcb3
JH
4713 enum rtx_code swapped_code = swap_condition (code);
4714 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
4715 || (ix86_fp_comparison_cost (swapped_code)
4716 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
4717}
4718
0f290768 4719/* Swap, force into registers, or otherwise massage the two operands
3a3677ff
RH
4720 to a fp comparison. The operands are updated in place; the new
4721 comparsion code is returned. */
4722
4723static enum rtx_code
4724ix86_prepare_fp_compare_args (code, pop0, pop1)
4725 enum rtx_code code;
4726 rtx *pop0, *pop1;
4727{
4728 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
4729 rtx op0 = *pop0, op1 = *pop1;
4730 enum machine_mode op_mode = GET_MODE (op0);
4731
e075ae69 4732 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
4733 The same is true of the XFmode compare instructions. The same is
4734 true of the fcomi compare instructions. */
4735
4736 if (fpcmp_mode == CCFPUmode
4737 || op_mode == XFmode
2b589241 4738 || op_mode == TFmode
3a3677ff 4739 || ix86_use_fcomi_compare (code))
e075ae69 4740 {
3a3677ff
RH
4741 op0 = force_reg (op_mode, op0);
4742 op1 = force_reg (op_mode, op1);
e075ae69
RH
4743 }
4744 else
4745 {
4746 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
4747 things around if they appear profitable, otherwise force op0
4748 into a register. */
4749
4750 if (standard_80387_constant_p (op0) == 0
4751 || (GET_CODE (op0) == MEM
4752 && ! (standard_80387_constant_p (op1) == 0
4753 || GET_CODE (op1) == MEM)))
32b5b1aa 4754 {
e075ae69
RH
4755 rtx tmp;
4756 tmp = op0, op0 = op1, op1 = tmp;
4757 code = swap_condition (code);
4758 }
4759
4760 if (GET_CODE (op0) != REG)
3a3677ff 4761 op0 = force_reg (op_mode, op0);
e075ae69
RH
4762
4763 if (CONSTANT_P (op1))
4764 {
4765 if (standard_80387_constant_p (op1))
3a3677ff 4766 op1 = force_reg (op_mode, op1);
e075ae69 4767 else
3a3677ff 4768 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
4769 }
4770 }
e9a25f70 4771
9e7adcb3
JH
4772 /* Try to rearrange the comparison to make it cheaper. */
4773 if (ix86_fp_comparison_cost (code)
4774 > ix86_fp_comparison_cost (swap_condition (code))
4775 && (GET_CODE (op0) == REG || !reload_completed))
4776 {
4777 rtx tmp;
4778 tmp = op0, op0 = op1, op1 = tmp;
4779 code = swap_condition (code);
4780 if (GET_CODE (op0) != REG)
4781 op0 = force_reg (op_mode, op0);
4782 }
4783
3a3677ff
RH
4784 *pop0 = op0;
4785 *pop1 = op1;
4786 return code;
4787}
4788
c0c102a9
JH
4789/* Convert comparison codes we use to represent FP comparison to integer
4790 code that will result in proper branch. Return UNKNOWN if no such code
4791 is available. */
4792static enum rtx_code
4793ix86_fp_compare_code_to_integer (code)
4794 enum rtx_code code;
4795{
4796 switch (code)
4797 {
4798 case GT:
4799 return GTU;
4800 case GE:
4801 return GEU;
4802 case ORDERED:
4803 case UNORDERED:
4804 return code;
4805 break;
4806 case UNEQ:
4807 return EQ;
4808 break;
4809 case UNLT:
4810 return LTU;
4811 break;
4812 case UNLE:
4813 return LEU;
4814 break;
4815 case LTGT:
4816 return NE;
4817 break;
4818 default:
4819 return UNKNOWN;
4820 }
4821}
4822
4823/* Split comparison code CODE into comparisons we can do using branch
4824 instructions. BYPASS_CODE is comparison code for branch that will
4825 branch around FIRST_CODE and SECOND_CODE. If some of branches
4826 is not required, set value to NIL.
4827 We never require more than two branches. */
4828static void
4829ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
4830 enum rtx_code code, *bypass_code, *first_code, *second_code;
4831{
4832 *first_code = code;
4833 *bypass_code = NIL;
4834 *second_code = NIL;
4835
4836 /* The fcomi comparison sets flags as follows:
4837
4838 cmp ZF PF CF
4839 > 0 0 0
4840 < 0 0 1
4841 = 1 0 0
4842 un 1 1 1 */
4843
4844 switch (code)
4845 {
4846 case GT: /* GTU - CF=0 & ZF=0 */
4847 case GE: /* GEU - CF=0 */
4848 case ORDERED: /* PF=0 */
4849 case UNORDERED: /* PF=1 */
4850 case UNEQ: /* EQ - ZF=1 */
4851 case UNLT: /* LTU - CF=1 */
4852 case UNLE: /* LEU - CF=1 | ZF=1 */
4853 case LTGT: /* EQ - ZF=0 */
4854 break;
4855 case LT: /* LTU - CF=1 - fails on unordered */
4856 *first_code = UNLT;
4857 *bypass_code = UNORDERED;
4858 break;
4859 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
4860 *first_code = UNLE;
4861 *bypass_code = UNORDERED;
4862 break;
4863 case EQ: /* EQ - ZF=1 - fails on unordered */
4864 *first_code = UNEQ;
4865 *bypass_code = UNORDERED;
4866 break;
4867 case NE: /* NE - ZF=0 - fails on unordered */
4868 *first_code = LTGT;
4869 *second_code = UNORDERED;
4870 break;
4871 case UNGE: /* GEU - CF=0 - fails on unordered */
4872 *first_code = GE;
4873 *second_code = UNORDERED;
4874 break;
4875 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
4876 *first_code = GT;
4877 *second_code = UNORDERED;
4878 break;
4879 default:
4880 abort ();
4881 }
4882 if (!TARGET_IEEE_FP)
4883 {
4884 *second_code = NIL;
4885 *bypass_code = NIL;
4886 }
4887}
4888
9e7adcb3
JH
4889/* Return cost of comparison done fcom + arithmetics operations on AX.
4890 All following functions do use number of instructions as an cost metrics.
4891 In future this should be tweaked to compute bytes for optimize_size and
4892 take into account performance of various instructions on various CPUs. */
4893static int
4894ix86_fp_comparison_arithmetics_cost (code)
4895 enum rtx_code code;
4896{
4897 if (!TARGET_IEEE_FP)
4898 return 4;
4899 /* The cost of code output by ix86_expand_fp_compare. */
4900 switch (code)
4901 {
4902 case UNLE:
4903 case UNLT:
4904 case LTGT:
4905 case GT:
4906 case GE:
4907 case UNORDERED:
4908 case ORDERED:
4909 case UNEQ:
4910 return 4;
4911 break;
4912 case LT:
4913 case NE:
4914 case EQ:
4915 case UNGE:
4916 return 5;
4917 break;
4918 case LE:
4919 case UNGT:
4920 return 6;
4921 break;
4922 default:
4923 abort ();
4924 }
4925}
4926
4927/* Return cost of comparison done using fcomi operation.
4928 See ix86_fp_comparison_arithmetics_cost for the metrics. */
4929static int
4930ix86_fp_comparison_fcomi_cost (code)
4931 enum rtx_code code;
4932{
4933 enum rtx_code bypass_code, first_code, second_code;
4934 /* Return arbitarily high cost when instruction is not supported - this
4935 prevents gcc from using it. */
4936 if (!TARGET_CMOVE)
4937 return 1024;
4938 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
4939 return (bypass_code != NIL || second_code != NIL) + 2;
4940}
4941
4942/* Return cost of comparison done using sahf operation.
4943 See ix86_fp_comparison_arithmetics_cost for the metrics. */
4944static int
4945ix86_fp_comparison_sahf_cost (code)
4946 enum rtx_code code;
4947{
4948 enum rtx_code bypass_code, first_code, second_code;
4949 /* Return arbitarily high cost when instruction is not preferred - this
4950 avoids gcc from using it. */
4951 if (!TARGET_USE_SAHF && !optimize_size)
4952 return 1024;
4953 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
4954 return (bypass_code != NIL || second_code != NIL) + 3;
4955}
4956
4957/* Compute cost of the comparison done using any method.
4958 See ix86_fp_comparison_arithmetics_cost for the metrics. */
4959static int
4960ix86_fp_comparison_cost (code)
4961 enum rtx_code code;
4962{
4963 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
4964 int min;
4965
4966 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
4967 sahf_cost = ix86_fp_comparison_sahf_cost (code);
4968
4969 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
4970 if (min > sahf_cost)
4971 min = sahf_cost;
4972 if (min > fcomi_cost)
4973 min = fcomi_cost;
4974 return min;
4975}
c0c102a9 4976
3a3677ff
RH
4977/* Generate insn patterns to do a floating point compare of OPERANDS. */
4978
9e7adcb3
JH
4979static rtx
4980ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
3a3677ff
RH
4981 enum rtx_code code;
4982 rtx op0, op1, scratch;
9e7adcb3
JH
4983 rtx *second_test;
4984 rtx *bypass_test;
3a3677ff
RH
4985{
4986 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 4987 rtx tmp, tmp2;
9e7adcb3 4988 int cost = ix86_fp_comparison_cost (code);
c0c102a9 4989 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
4990
4991 fpcmp_mode = ix86_fp_compare_mode (code);
4992 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
4993
9e7adcb3
JH
4994 if (second_test)
4995 *second_test = NULL_RTX;
4996 if (bypass_test)
4997 *bypass_test = NULL_RTX;
4998
c0c102a9
JH
4999 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5000
9e7adcb3
JH
5001 /* Do fcomi/sahf based test when profitable. */
5002 if ((bypass_code == NIL || bypass_test)
5003 && (second_code == NIL || second_test)
5004 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 5005 {
c0c102a9
JH
5006 if (TARGET_CMOVE)
5007 {
5008 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5009 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
5010 tmp);
5011 emit_insn (tmp);
5012 }
5013 else
5014 {
5015 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5016 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
5017 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
5018 emit_insn (gen_x86_sahf_1 (scratch));
5019 }
e075ae69
RH
5020
5021 /* The FP codes work out to act like unsigned. */
9a915772 5022 intcmp_mode = fpcmp_mode;
9e7adcb3
JH
5023 code = first_code;
5024 if (bypass_code != NIL)
5025 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
5026 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5027 const0_rtx);
5028 if (second_code != NIL)
5029 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
5030 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5031 const0_rtx);
e075ae69
RH
5032 }
5033 else
5034 {
5035 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69
RH
5036 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5037 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
3a3677ff 5038 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 5039
9a915772
JH
5040 /* In the unordered case, we have to check C2 for NaN's, which
5041 doesn't happen to work out to anything nice combination-wise.
5042 So do some bit twiddling on the value we've got in AH to come
5043 up with an appropriate set of condition codes. */
e075ae69 5044
9a915772
JH
5045 intcmp_mode = CCNOmode;
5046 switch (code)
32b5b1aa 5047 {
9a915772
JH
5048 case GT:
5049 case UNGT:
5050 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 5051 {
3a3677ff 5052 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 5053 code = EQ;
9a915772
JH
5054 }
5055 else
5056 {
5057 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5058 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5059 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
5060 intcmp_mode = CCmode;
5061 code = GEU;
5062 }
5063 break;
5064 case LT:
5065 case UNLT:
5066 if (code == LT && TARGET_IEEE_FP)
5067 {
3a3677ff
RH
5068 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5069 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
5070 intcmp_mode = CCmode;
5071 code = EQ;
9a915772
JH
5072 }
5073 else
5074 {
5075 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
5076 code = NE;
5077 }
5078 break;
5079 case GE:
5080 case UNGE:
5081 if (code == GE || !TARGET_IEEE_FP)
5082 {
3a3677ff 5083 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 5084 code = EQ;
9a915772
JH
5085 }
5086 else
5087 {
5088 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5089 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
5090 GEN_INT (0x01)));
5091 code = NE;
5092 }
5093 break;
5094 case LE:
5095 case UNLE:
5096 if (code == LE && TARGET_IEEE_FP)
5097 {
3a3677ff
RH
5098 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5099 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5100 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
5101 intcmp_mode = CCmode;
5102 code = LTU;
9a915772
JH
5103 }
5104 else
5105 {
5106 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
5107 code = NE;
5108 }
5109 break;
5110 case EQ:
5111 case UNEQ:
5112 if (code == EQ && TARGET_IEEE_FP)
5113 {
3a3677ff
RH
5114 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5115 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
5116 intcmp_mode = CCmode;
5117 code = EQ;
9a915772
JH
5118 }
5119 else
5120 {
3a3677ff
RH
5121 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
5122 code = NE;
5123 break;
9a915772
JH
5124 }
5125 break;
5126 case NE:
5127 case LTGT:
5128 if (code == NE && TARGET_IEEE_FP)
5129 {
3a3677ff 5130 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
5131 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
5132 GEN_INT (0x40)));
3a3677ff 5133 code = NE;
9a915772
JH
5134 }
5135 else
5136 {
3a3677ff
RH
5137 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
5138 code = EQ;
32b5b1aa 5139 }
9a915772
JH
5140 break;
5141
5142 case UNORDERED:
5143 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
5144 code = NE;
5145 break;
5146 case ORDERED:
5147 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
5148 code = EQ;
5149 break;
5150
5151 default:
5152 abort ();
32b5b1aa 5153 }
32b5b1aa 5154 }
e075ae69
RH
5155
5156 /* Return the test that should be put into the flags user, i.e.
5157 the bcc, scc, or cmov instruction. */
5158 return gen_rtx_fmt_ee (code, VOIDmode,
5159 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5160 const0_rtx);
5161}
5162
9e3e266c 5163rtx
a1b8572c 5164ix86_expand_compare (code, second_test, bypass_test)
e075ae69 5165 enum rtx_code code;
a1b8572c 5166 rtx *second_test, *bypass_test;
e075ae69
RH
5167{
5168 rtx op0, op1, ret;
5169 op0 = ix86_compare_op0;
5170 op1 = ix86_compare_op1;
5171
a1b8572c
JH
5172 if (second_test)
5173 *second_test = NULL_RTX;
5174 if (bypass_test)
5175 *bypass_test = NULL_RTX;
5176
e075ae69 5177 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9e7adcb3 5178 ret = ix86_expand_fp_compare (code, op0, op1, gen_reg_rtx (HImode),
77ebd435 5179 second_test, bypass_test);
32b5b1aa 5180 else
e075ae69
RH
5181 ret = ix86_expand_int_compare (code, op0, op1);
5182
5183 return ret;
5184}
5185
5186void
3a3677ff 5187ix86_expand_branch (code, label)
e075ae69 5188 enum rtx_code code;
e075ae69
RH
5189 rtx label;
5190{
3a3677ff 5191 rtx tmp;
e075ae69 5192
3a3677ff 5193 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 5194 {
3a3677ff
RH
5195 case QImode:
5196 case HImode:
5197 case SImode:
a1b8572c 5198 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
5199 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5200 gen_rtx_LABEL_REF (VOIDmode, label),
5201 pc_rtx);
5202 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 5203 return;
e075ae69 5204
3a3677ff
RH
5205 case SFmode:
5206 case DFmode:
0f290768 5207 case XFmode:
2b589241 5208 case TFmode:
3a3677ff
RH
5209 /* Don't expand the comparison early, so that we get better code
5210 when jump or whoever decides to reverse the comparison. */
5211 {
5212 rtvec vec;
5213 int use_fcomi;
5214
5215 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
5216 &ix86_compare_op1);
5217
0b9aaeee 5218 tmp = gen_rtx_fmt_ee (code, VOIDmode,
3a3677ff
RH
5219 ix86_compare_op0, ix86_compare_op1);
5220 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5221 gen_rtx_LABEL_REF (VOIDmode, label),
5222 pc_rtx);
5223 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
5224
5225 use_fcomi = ix86_use_fcomi_compare (code);
5226 vec = rtvec_alloc (3 + !use_fcomi);
5227 RTVEC_ELT (vec, 0) = tmp;
5228 RTVEC_ELT (vec, 1)
5229 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
5230 RTVEC_ELT (vec, 2)
5231 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
5232 if (! use_fcomi)
5233 RTVEC_ELT (vec, 3)
5234 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
5235
5236 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
5237 return;
5238 }
32b5b1aa 5239
3a3677ff
RH
5240 case DImode:
5241 /* Expand DImode branch into multiple compare+branch. */
5242 {
5243 rtx lo[2], hi[2], label2;
5244 enum rtx_code code1, code2, code3;
32b5b1aa 5245
3a3677ff
RH
5246 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
5247 {
5248 tmp = ix86_compare_op0;
5249 ix86_compare_op0 = ix86_compare_op1;
5250 ix86_compare_op1 = tmp;
5251 code = swap_condition (code);
5252 }
5253 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
5254 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 5255
3a3677ff
RH
5256 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
5257 avoid two branches. This costs one extra insn, so disable when
5258 optimizing for size. */
32b5b1aa 5259
3a3677ff
RH
5260 if ((code == EQ || code == NE)
5261 && (!optimize_size
5262 || hi[1] == const0_rtx || lo[1] == const0_rtx))
5263 {
5264 rtx xor0, xor1;
32b5b1aa 5265
3a3677ff
RH
5266 xor1 = hi[0];
5267 if (hi[1] != const0_rtx)
5268 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
5269 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 5270
3a3677ff
RH
5271 xor0 = lo[0];
5272 if (lo[1] != const0_rtx)
5273 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
5274 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 5275
3a3677ff
RH
5276 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
5277 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 5278
3a3677ff
RH
5279 ix86_compare_op0 = tmp;
5280 ix86_compare_op1 = const0_rtx;
5281 ix86_expand_branch (code, label);
5282 return;
5283 }
e075ae69 5284
1f9124e4
JJ
5285 /* Otherwise, if we are doing less-than or greater-or-equal-than,
5286 op1 is a constant and the low word is zero, then we can just
5287 examine the high word. */
32b5b1aa 5288
1f9124e4
JJ
5289 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
5290 switch (code)
5291 {
5292 case LT: case LTU: case GE: case GEU:
5293 ix86_compare_op0 = hi[0];
5294 ix86_compare_op1 = hi[1];
5295 ix86_expand_branch (code, label);
5296 return;
5297 default:
5298 break;
5299 }
e075ae69 5300
3a3677ff 5301 /* Otherwise, we need two or three jumps. */
e075ae69 5302
3a3677ff 5303 label2 = gen_label_rtx ();
e075ae69 5304
3a3677ff
RH
5305 code1 = code;
5306 code2 = swap_condition (code);
5307 code3 = unsigned_condition (code);
e075ae69 5308
3a3677ff
RH
5309 switch (code)
5310 {
5311 case LT: case GT: case LTU: case GTU:
5312 break;
e075ae69 5313
3a3677ff
RH
5314 case LE: code1 = LT; code2 = GT; break;
5315 case GE: code1 = GT; code2 = LT; break;
5316 case LEU: code1 = LTU; code2 = GTU; break;
5317 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 5318
3a3677ff
RH
5319 case EQ: code1 = NIL; code2 = NE; break;
5320 case NE: code2 = NIL; break;
e075ae69 5321
3a3677ff
RH
5322 default:
5323 abort ();
5324 }
e075ae69 5325
3a3677ff
RH
5326 /*
5327 * a < b =>
5328 * if (hi(a) < hi(b)) goto true;
5329 * if (hi(a) > hi(b)) goto false;
5330 * if (lo(a) < lo(b)) goto true;
5331 * false:
5332 */
5333
5334 ix86_compare_op0 = hi[0];
5335 ix86_compare_op1 = hi[1];
5336
5337 if (code1 != NIL)
5338 ix86_expand_branch (code1, label);
5339 if (code2 != NIL)
5340 ix86_expand_branch (code2, label2);
5341
5342 ix86_compare_op0 = lo[0];
5343 ix86_compare_op1 = lo[1];
5344 ix86_expand_branch (code3, label);
5345
5346 if (code2 != NIL)
5347 emit_label (label2);
5348 return;
5349 }
e075ae69 5350
3a3677ff
RH
5351 default:
5352 abort ();
5353 }
32b5b1aa 5354}
e075ae69 5355
9e7adcb3
JH
5356/* Split branch based on floating point condition. */
5357void
5358ix86_split_fp_branch (condition, op1, op2, target1, target2, tmp)
5359 rtx condition, op1, op2, target1, target2, tmp;
5360{
5361 rtx second, bypass;
5362 rtx label = NULL_RTX;
5363 enum rtx_code code = GET_CODE (condition);
9e7adcb3
JH
5364
5365 if (target2 != pc_rtx)
5366 {
5367 rtx tmp = target2;
5368 code = reverse_condition_maybe_unordered (code);
5369 target2 = target1;
5370 target1 = tmp;
5371 }
5372
5373 condition = ix86_expand_fp_compare (code, op1, op2,
5374 tmp, &second, &bypass);
5375 if (bypass != NULL_RTX)
5376 {
5377 label = gen_label_rtx ();
5378 emit_jump_insn (gen_rtx_SET
5379 (VOIDmode, pc_rtx,
5380 gen_rtx_IF_THEN_ELSE (VOIDmode,
5381 bypass,
5382 gen_rtx_LABEL_REF (VOIDmode,
5383 label),
5384 pc_rtx)));
5385 }
5386 /* AMD Athlon and probably other CPUs too have fast bypass path between the
5387 comparison and first branch. The second branch takes longer to execute
5388 so place first branch the worse predicable one if possible. */
5389 if (second != NULL_RTX
5390 && (GET_CODE (second) == UNORDERED || GET_CODE (second) == ORDERED))
5391 {
5392 rtx tmp = condition;
5393 condition = second;
5394 second = tmp;
5395 }
5396 emit_jump_insn (gen_rtx_SET
5397 (VOIDmode, pc_rtx,
5398 gen_rtx_IF_THEN_ELSE (VOIDmode,
5399 condition, target1, target2)));
5400 if (second != NULL_RTX)
5401 emit_jump_insn (gen_rtx_SET
5402 (VOIDmode, pc_rtx,
5403 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1, target2)));
5404 if (label != NULL_RTX)
5405 emit_label (label);
5406}
5407
32b5b1aa 5408int
3a3677ff 5409ix86_expand_setcc (code, dest)
e075ae69 5410 enum rtx_code code;
e075ae69 5411 rtx dest;
32b5b1aa 5412{
a1b8572c
JH
5413 rtx ret, tmp, tmpreg;
5414 rtx second_test, bypass_test;
e075ae69
RH
5415 int type;
5416
5417 if (GET_MODE (ix86_compare_op0) == DImode)
5418 return 0; /* FAIL */
5419
5420 /* Three modes of generation:
5421 0 -- destination does not overlap compare sources:
5422 clear dest first, emit strict_low_part setcc.
5423 1 -- destination does overlap compare sources:
5424 emit subreg setcc, zero extend.
5425 2 -- destination is in QImode:
5426 emit setcc only.
5427 */
5428
5429 type = 0;
e075ae69
RH
5430
5431 if (GET_MODE (dest) == QImode)
5432 type = 2;
5433 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
a500c31b 5434 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
e075ae69
RH
5435 type = 1;
5436
5437 if (type == 0)
5438 emit_move_insn (dest, const0_rtx);
5439
a1b8572c 5440 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
5441 PUT_MODE (ret, QImode);
5442
5443 tmp = dest;
a1b8572c 5444 tmpreg = dest;
e075ae69 5445 if (type == 0)
32b5b1aa 5446 {
e075ae69 5447 tmp = gen_lowpart (QImode, dest);
a1b8572c 5448 tmpreg = tmp;
e075ae69
RH
5449 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
5450 }
5451 else if (type == 1)
5452 {
5453 if (!cse_not_expected)
5454 tmp = gen_reg_rtx (QImode);
5455 else
5456 tmp = gen_lowpart (QImode, dest);
a1b8572c 5457 tmpreg = tmp;
e075ae69 5458 }
32b5b1aa 5459
e075ae69 5460 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
5461 if (bypass_test || second_test)
5462 {
5463 rtx test = second_test;
5464 int bypass = 0;
5465 rtx tmp2 = gen_reg_rtx (QImode);
5466 if (bypass_test)
5467 {
5468 if (second_test)
5469 abort();
5470 test = bypass_test;
5471 bypass = 1;
5472 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
5473 }
5474 PUT_MODE (test, QImode);
5475 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
5476
5477 if (bypass)
5478 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
5479 else
5480 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
5481 }
e075ae69
RH
5482
5483 if (type == 1)
5484 {
5485 rtx clob;
5486
5487 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
5488 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
5489 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
5490 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5491 emit_insn (tmp);
32b5b1aa 5492 }
e075ae69
RH
5493
5494 return 1; /* DONE */
32b5b1aa 5495}
e075ae69 5496
32b5b1aa 5497int
e075ae69
RH
5498ix86_expand_int_movcc (operands)
5499 rtx operands[];
32b5b1aa 5500{
e075ae69
RH
5501 enum rtx_code code = GET_CODE (operands[1]), compare_code;
5502 rtx compare_seq, compare_op;
a1b8572c 5503 rtx second_test, bypass_test;
32b5b1aa 5504
36583fea
JH
5505 /* When the compare code is not LTU or GEU, we can not use sbbl case.
5506 In case comparsion is done with immediate, we can convert it to LTU or
5507 GEU by altering the integer. */
5508
5509 if ((code == LEU || code == GTU)
5510 && GET_CODE (ix86_compare_op1) == CONST_INT
5511 && GET_MODE (operands[0]) != HImode
5512 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
0f290768 5513 && GET_CODE (operands[2]) == CONST_INT
36583fea
JH
5514 && GET_CODE (operands[3]) == CONST_INT)
5515 {
5516 if (code == LEU)
5517 code = LTU;
5518 else
5519 code = GEU;
5520 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
5521 }
3a3677ff 5522
e075ae69 5523 start_sequence ();
a1b8572c 5524 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
5525 compare_seq = gen_sequence ();
5526 end_sequence ();
5527
5528 compare_code = GET_CODE (compare_op);
5529
5530 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
5531 HImode insns, we'd be swallowed in word prefix ops. */
5532
5533 if (GET_MODE (operands[0]) != HImode
0f290768 5534 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
5535 && GET_CODE (operands[3]) == CONST_INT)
5536 {
5537 rtx out = operands[0];
5538 HOST_WIDE_INT ct = INTVAL (operands[2]);
5539 HOST_WIDE_INT cf = INTVAL (operands[3]);
5540 HOST_WIDE_INT diff;
5541
a1b8572c
JH
5542 if ((compare_code == LTU || compare_code == GEU)
5543 && !second_test && !bypass_test)
e075ae69 5544 {
e075ae69
RH
5545
5546 /* Detect overlap between destination and compare sources. */
5547 rtx tmp = out;
5548
0f290768 5549 /* To simplify rest of code, restrict to the GEU case. */
36583fea
JH
5550 if (compare_code == LTU)
5551 {
5552 int tmp = ct;
5553 ct = cf;
5554 cf = tmp;
5555 compare_code = reverse_condition (compare_code);
5556 code = reverse_condition (code);
5557 }
5558 diff = ct - cf;
5559
e075ae69 5560 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
a500c31b 5561 || reg_overlap_mentioned_p (out, ix86_compare_op1))
e075ae69
RH
5562 tmp = gen_reg_rtx (SImode);
5563
5564 emit_insn (compare_seq);
5565 emit_insn (gen_x86_movsicc_0_m1 (tmp));
5566
36583fea
JH
5567 if (diff == 1)
5568 {
5569 /*
5570 * cmpl op0,op1
5571 * sbbl dest,dest
5572 * [addl dest, ct]
5573 *
5574 * Size 5 - 8.
5575 */
5576 if (ct)
5577 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5578 }
5579 else if (cf == -1)
5580 {
5581 /*
5582 * cmpl op0,op1
5583 * sbbl dest,dest
5584 * orl $ct, dest
5585 *
5586 * Size 8.
5587 */
5588 emit_insn (gen_iorsi3 (out, out, GEN_INT (ct)));
5589 }
5590 else if (diff == -1 && ct)
5591 {
5592 /*
5593 * cmpl op0,op1
5594 * sbbl dest,dest
5595 * xorl $-1, dest
5596 * [addl dest, cf]
5597 *
5598 * Size 8 - 11.
5599 */
5600 emit_insn (gen_one_cmplsi2 (tmp, tmp));
5601 if (cf)
5602 emit_insn (gen_addsi3 (out, out, GEN_INT (cf)));
5603 }
5604 else
5605 {
5606 /*
5607 * cmpl op0,op1
5608 * sbbl dest,dest
5609 * andl cf - ct, dest
5610 * [addl dest, ct]
5611 *
5612 * Size 8 - 11.
5613 */
5614 emit_insn (gen_andsi3 (out, out, GEN_INT (cf - ct)));
5615 if (ct)
5616 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5617 }
e075ae69
RH
5618
5619 if (tmp != out)
5620 emit_move_insn (out, tmp);
5621
5622 return 1; /* DONE */
5623 }
5624
5625 diff = ct - cf;
5626 if (diff < 0)
5627 {
5628 HOST_WIDE_INT tmp;
5629 tmp = ct, ct = cf, cf = tmp;
5630 diff = -diff;
734dba19
JH
5631 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
5632 {
5633 /* We may be reversing unordered compare to normal compare, that
5634 is not valid in general (we may convert non-trapping condition
5635 to trapping one), however on i386 we currently emit all
5636 comparisons unordered. */
5637 compare_code = reverse_condition_maybe_unordered (compare_code);
5638 code = reverse_condition_maybe_unordered (code);
5639 }
5640 else
5641 {
5642 compare_code = reverse_condition (compare_code);
5643 code = reverse_condition (code);
5644 }
e075ae69
RH
5645 }
5646 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
5647 || diff == 3 || diff == 5 || diff == 9)
5648 {
5649 /*
5650 * xorl dest,dest
5651 * cmpl op1,op2
5652 * setcc dest
5653 * lea cf(dest*(ct-cf)),dest
5654 *
5655 * Size 14.
5656 *
5657 * This also catches the degenerate setcc-only case.
5658 */
5659
5660 rtx tmp;
5661 int nops;
5662
5663 out = emit_store_flag (out, code, ix86_compare_op0,
5664 ix86_compare_op1, VOIDmode, 0, 1);
5665
5666 nops = 0;
5667 if (diff == 1)
5668 tmp = out;
5669 else
5670 {
5671 tmp = gen_rtx_MULT (SImode, out, GEN_INT (diff & ~1));
5672 nops++;
5673 if (diff & 1)
5674 {
5675 tmp = gen_rtx_PLUS (SImode, tmp, out);
5676 nops++;
5677 }
5678 }
5679 if (cf != 0)
5680 {
5681 tmp = gen_rtx_PLUS (SImode, tmp, GEN_INT (cf));
5682 nops++;
5683 }
5684 if (tmp != out)
5685 {
5686 if (nops == 0)
5687 emit_move_insn (out, tmp);
5688 else if (nops == 1)
5689 {
5690 rtx clob;
5691
5692 clob = gen_rtx_REG (CCmode, FLAGS_REG);
5693 clob = gen_rtx_CLOBBER (VOIDmode, clob);
5694
5695 tmp = gen_rtx_SET (VOIDmode, out, tmp);
5696 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5697 emit_insn (tmp);
5698 }
5699 else
5700 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
5701 }
5702 if (out != operands[0])
5703 emit_move_insn (operands[0], out);
5704
5705 return 1; /* DONE */
5706 }
5707
5708 /*
5709 * General case: Jumpful:
5710 * xorl dest,dest cmpl op1, op2
5711 * cmpl op1, op2 movl ct, dest
5712 * setcc dest jcc 1f
5713 * decl dest movl cf, dest
5714 * andl (cf-ct),dest 1:
5715 * addl ct,dest
0f290768 5716 *
e075ae69
RH
5717 * Size 20. Size 14.
5718 *
5719 * This is reasonably steep, but branch mispredict costs are
5720 * high on modern cpus, so consider failing only if optimizing
5721 * for space.
5722 *
5723 * %%% Parameterize branch_cost on the tuning architecture, then
5724 * use that. The 80386 couldn't care less about mispredicts.
5725 */
5726
5727 if (!optimize_size && !TARGET_CMOVE)
5728 {
5729 if (ct == 0)
5730 {
5731 ct = cf;
5732 cf = 0;
734dba19
JH
5733 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
5734 {
5735 /* We may be reversing unordered compare to normal compare,
5736 that is not valid in general (we may convert non-trapping
5737 condition to trapping one), however on i386 we currently
5738 emit all comparisons unordered. */
5739 compare_code = reverse_condition_maybe_unordered (compare_code);
5740 code = reverse_condition_maybe_unordered (code);
5741 }
5742 else
5743 {
5744 compare_code = reverse_condition (compare_code);
5745 code = reverse_condition (code);
5746 }
e075ae69
RH
5747 }
5748
5749 out = emit_store_flag (out, code, ix86_compare_op0,
5750 ix86_compare_op1, VOIDmode, 0, 1);
5751
5752 emit_insn (gen_addsi3 (out, out, constm1_rtx));
5753 emit_insn (gen_andsi3 (out, out, GEN_INT (cf-ct)));
5754 if (ct != 0)
5755 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5756 if (out != operands[0])
5757 emit_move_insn (operands[0], out);
5758
5759 return 1; /* DONE */
5760 }
5761 }
5762
5763 if (!TARGET_CMOVE)
5764 {
5765 /* Try a few things more with specific constants and a variable. */
5766
78a0d70c 5767 optab op;
e075ae69
RH
5768 rtx var, orig_out, out, tmp;
5769
5770 if (optimize_size)
5771 return 0; /* FAIL */
5772
0f290768 5773 /* If one of the two operands is an interesting constant, load a
e075ae69 5774 constant with the above and mask it in with a logical operation. */
0f290768 5775
e075ae69
RH
5776 if (GET_CODE (operands[2]) == CONST_INT)
5777 {
5778 var = operands[3];
5779 if (INTVAL (operands[2]) == 0)
5780 operands[3] = constm1_rtx, op = and_optab;
5781 else if (INTVAL (operands[2]) == -1)
5782 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
5783 else
5784 return 0; /* FAIL */
e075ae69
RH
5785 }
5786 else if (GET_CODE (operands[3]) == CONST_INT)
5787 {
5788 var = operands[2];
5789 if (INTVAL (operands[3]) == 0)
5790 operands[2] = constm1_rtx, op = and_optab;
5791 else if (INTVAL (operands[3]) == -1)
5792 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
5793 else
5794 return 0; /* FAIL */
e075ae69 5795 }
78a0d70c 5796 else
e075ae69
RH
5797 return 0; /* FAIL */
5798
5799 orig_out = operands[0];
5800 tmp = gen_reg_rtx (GET_MODE (orig_out));
5801 operands[0] = tmp;
5802
5803 /* Recurse to get the constant loaded. */
5804 if (ix86_expand_int_movcc (operands) == 0)
5805 return 0; /* FAIL */
5806
5807 /* Mask in the interesting variable. */
5808 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
5809 OPTAB_WIDEN);
5810 if (out != orig_out)
5811 emit_move_insn (orig_out, out);
5812
5813 return 1; /* DONE */
5814 }
5815
5816 /*
5817 * For comparison with above,
5818 *
5819 * movl cf,dest
5820 * movl ct,tmp
5821 * cmpl op1,op2
5822 * cmovcc tmp,dest
5823 *
5824 * Size 15.
5825 */
5826
5827 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
5828 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
5829 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
5830 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
5831
a1b8572c
JH
5832 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
5833 {
5834 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
5835 emit_move_insn (tmp, operands[3]);
5836 operands[3] = tmp;
5837 }
5838 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
5839 {
5840 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
5841 emit_move_insn (tmp, operands[2]);
5842 operands[2] = tmp;
5843 }
5844
e075ae69
RH
5845 emit_insn (compare_seq);
5846 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5847 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5848 compare_op, operands[2],
5849 operands[3])));
a1b8572c
JH
5850 if (bypass_test)
5851 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5852 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5853 bypass_test,
5854 operands[3],
5855 operands[0])));
5856 if (second_test)
5857 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5858 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5859 second_test,
5860 operands[2],
5861 operands[0])));
e075ae69
RH
5862
5863 return 1; /* DONE */
e9a25f70 5864}
e075ae69 5865
32b5b1aa 5866int
e075ae69
RH
5867ix86_expand_fp_movcc (operands)
5868 rtx operands[];
32b5b1aa 5869{
e075ae69 5870 enum rtx_code code;
e075ae69 5871 rtx tmp;
a1b8572c 5872 rtx compare_op, second_test, bypass_test;
32b5b1aa 5873
e075ae69 5874 /* The floating point conditional move instructions don't directly
0f290768 5875 support conditions resulting from a signed integer comparison. */
32b5b1aa 5876
e075ae69 5877 code = GET_CODE (operands[1]);
a1b8572c 5878 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
5879
5880 /* The floating point conditional move instructions don't directly
5881 support signed integer comparisons. */
5882
a1b8572c 5883 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 5884 {
a1b8572c
JH
5885 if (second_test != NULL || bypass_test != NULL)
5886 abort();
e075ae69 5887 tmp = gen_reg_rtx (QImode);
3a3677ff 5888 ix86_expand_setcc (code, tmp);
e075ae69
RH
5889 code = NE;
5890 ix86_compare_op0 = tmp;
5891 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
5892 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
5893 }
5894 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
5895 {
5896 tmp = gen_reg_rtx (GET_MODE (operands[0]));
5897 emit_move_insn (tmp, operands[3]);
5898 operands[3] = tmp;
5899 }
5900 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
5901 {
5902 tmp = gen_reg_rtx (GET_MODE (operands[0]));
5903 emit_move_insn (tmp, operands[2]);
5904 operands[2] = tmp;
e075ae69 5905 }
e9a25f70 5906
e075ae69
RH
5907 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5908 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9e7adcb3 5909 compare_op,
e075ae69
RH
5910 operands[2],
5911 operands[3])));
a1b8572c
JH
5912 if (bypass_test)
5913 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5914 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5915 bypass_test,
5916 operands[3],
5917 operands[0])));
5918 if (second_test)
5919 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5920 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5921 second_test,
5922 operands[2],
5923 operands[0])));
32b5b1aa 5924
e075ae69 5925 return 1;
32b5b1aa
SC
5926}
5927
2450a057
JH
5928/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
5929 works for floating pointer parameters and nonoffsetable memories.
5930 For pushes, it returns just stack offsets; the values will be saved
5931 in the right order. Maximally three parts are generated. */
5932
2b589241 5933static int
2450a057
JH
5934ix86_split_to_parts (operand, parts, mode)
5935 rtx operand;
5936 rtx *parts;
5937 enum machine_mode mode;
32b5b1aa 5938{
2b589241 5939 int size = mode == TFmode ? 3 : GET_MODE_SIZE (mode) / 4;
2450a057 5940
a7180f70
BS
5941 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
5942 abort ();
2450a057
JH
5943 if (size < 2 || size > 3)
5944 abort ();
5945
d7a29404
JH
5946 /* Optimize constant pool reference to immediates. This is used by fp moves,
5947 that force all constants to memory to allow combining. */
5948
5949 if (GET_CODE (operand) == MEM
5950 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
5951 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
5952 operand = get_pool_constant (XEXP (operand, 0));
5953
2450a057 5954 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 5955 {
2450a057
JH
5956 /* The only non-offsetable memories we handle are pushes. */
5957 if (! push_operand (operand, VOIDmode))
5958 abort ();
5959
5960 PUT_MODE (operand, SImode);
5961 parts[0] = parts[1] = parts[2] = operand;
5962 }
5963 else
5964 {
5965 if (mode == DImode)
5966 split_di (&operand, 1, &parts[0], &parts[1]);
5967 else
e075ae69 5968 {
2450a057
JH
5969 if (REG_P (operand))
5970 {
5971 if (!reload_completed)
5972 abort ();
5973 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
5974 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
5975 if (size == 3)
5976 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
5977 }
5978 else if (offsettable_memref_p (operand))
5979 {
5980 PUT_MODE (operand, SImode);
5981 parts[0] = operand;
5982 parts[1] = adj_offsettable_operand (operand, 4);
5983 if (size == 3)
5984 parts[2] = adj_offsettable_operand (operand, 8);
5985 }
5986 else if (GET_CODE (operand) == CONST_DOUBLE)
5987 {
5988 REAL_VALUE_TYPE r;
2b589241 5989 long l[4];
2450a057
JH
5990
5991 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
5992 switch (mode)
5993 {
5994 case XFmode:
2b589241 5995 case TFmode:
2450a057
JH
5996 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
5997 parts[2] = GEN_INT (l[2]);
5998 break;
5999 case DFmode:
6000 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
6001 break;
6002 default:
6003 abort ();
6004 }
6005 parts[1] = GEN_INT (l[1]);
6006 parts[0] = GEN_INT (l[0]);
6007 }
6008 else
6009 abort ();
e075ae69 6010 }
2450a057
JH
6011 }
6012
2b589241 6013 return size;
2450a057
JH
6014}
6015
6016/* Emit insns to perform a move or push of DI, DF, and XF values.
6017 Return false when normal moves are needed; true when all required
6018 insns have been emitted. Operands 2-4 contain the input values
6019 int the correct order; operands 5-7 contain the output values. */
6020
0f290768 6021int
2450a057
JH
6022ix86_split_long_move (operands1)
6023 rtx operands1[];
6024{
6025 rtx part[2][3];
6026 rtx operands[2];
2b589241 6027 int size;
2450a057
JH
6028 int push = 0;
6029 int collisions = 0;
6030
6031 /* Make our own copy to avoid clobbering the operands. */
6032 operands[0] = copy_rtx (operands1[0]);
6033 operands[1] = copy_rtx (operands1[1]);
6034
2450a057
JH
6035 /* The only non-offsettable memory we handle is push. */
6036 if (push_operand (operands[0], VOIDmode))
6037 push = 1;
6038 else if (GET_CODE (operands[0]) == MEM
6039 && ! offsettable_memref_p (operands[0]))
6040 abort ();
6041
2b589241 6042 size = ix86_split_to_parts (operands[0], part[0], GET_MODE (operands1[0]));
2450a057
JH
6043 ix86_split_to_parts (operands[1], part[1], GET_MODE (operands1[0]));
6044
6045 /* When emitting push, take care for source operands on the stack. */
6046 if (push && GET_CODE (operands[1]) == MEM
6047 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
6048 {
6049 if (size == 3)
6050 part[1][1] = part[1][2];
6051 part[1][0] = part[1][1];
6052 }
6053
0f290768 6054 /* We need to do copy in the right order in case an address register
2450a057
JH
6055 of the source overlaps the destination. */
6056 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
6057 {
6058 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
6059 collisions++;
6060 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
6061 collisions++;
6062 if (size == 3
6063 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
6064 collisions++;
6065
6066 /* Collision in the middle part can be handled by reordering. */
6067 if (collisions == 1 && size == 3
6068 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 6069 {
2450a057
JH
6070 rtx tmp;
6071 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
6072 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
6073 }
e075ae69 6074
2450a057
JH
6075 /* If there are more collisions, we can't handle it by reordering.
6076 Do an lea to the last part and use only one colliding move. */
6077 else if (collisions > 1)
6078 {
6079 collisions = 1;
6080 emit_insn (gen_rtx_SET (VOIDmode, part[0][size - 1],
6081 XEXP (part[1][0], 0)));
6082 part[1][0] = change_address (part[1][0], SImode, part[0][size - 1]);
6083 part[1][1] = adj_offsettable_operand (part[1][0], 4);
6084 if (size == 3)
6085 part[1][2] = adj_offsettable_operand (part[1][0], 8);
6086 }
6087 }
6088
6089 if (push)
6090 {
6091 if (size == 3)
2b589241
JH
6092 {
6093 /* We use only first 12 bytes of TFmode value, but for pushing we
6094 are required to adjust stack as if we were pushing real 16byte
6095 value. */
6096 if (GET_MODE (operands1[0]) == TFmode)
6097 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
6098 GEN_INT (-4)));
6099 emit_insn (gen_push (part[1][2]));
6100 }
2450a057
JH
6101 emit_insn (gen_push (part[1][1]));
6102 emit_insn (gen_push (part[1][0]));
6103 return 1;
6104 }
6105
6106 /* Choose correct order to not overwrite the source before it is copied. */
6107 if ((REG_P (part[0][0])
6108 && REG_P (part[1][1])
6109 && (REGNO (part[0][0]) == REGNO (part[1][1])
6110 || (size == 3
6111 && REGNO (part[0][0]) == REGNO (part[1][2]))))
6112 || (collisions > 0
6113 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
6114 {
6115 if (size == 3)
6116 {
6117 operands1[2] = part[0][2];
6118 operands1[3] = part[0][1];
6119 operands1[4] = part[0][0];
6120 operands1[5] = part[1][2];
6121 operands1[6] = part[1][1];
6122 operands1[7] = part[1][0];
6123 }
6124 else
6125 {
6126 operands1[2] = part[0][1];
6127 operands1[3] = part[0][0];
6128 operands1[5] = part[1][1];
6129 operands1[6] = part[1][0];
6130 }
6131 }
6132 else
6133 {
6134 if (size == 3)
6135 {
6136 operands1[2] = part[0][0];
6137 operands1[3] = part[0][1];
6138 operands1[4] = part[0][2];
6139 operands1[5] = part[1][0];
6140 operands1[6] = part[1][1];
6141 operands1[7] = part[1][2];
6142 }
6143 else
6144 {
6145 operands1[2] = part[0][0];
6146 operands1[3] = part[0][1];
6147 operands1[5] = part[1][0];
6148 operands1[6] = part[1][1];
e075ae69
RH
6149 }
6150 }
32b5b1aa 6151
e9a25f70 6152 return 0;
32b5b1aa 6153}
32b5b1aa 6154
e075ae69
RH
6155void
6156ix86_split_ashldi (operands, scratch)
6157 rtx *operands, scratch;
32b5b1aa 6158{
e075ae69
RH
6159 rtx low[2], high[2];
6160 int count;
b985a30f 6161
e075ae69
RH
6162 if (GET_CODE (operands[2]) == CONST_INT)
6163 {
6164 split_di (operands, 2, low, high);
6165 count = INTVAL (operands[2]) & 63;
32b5b1aa 6166
e075ae69
RH
6167 if (count >= 32)
6168 {
6169 emit_move_insn (high[0], low[1]);
6170 emit_move_insn (low[0], const0_rtx);
b985a30f 6171
e075ae69
RH
6172 if (count > 32)
6173 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
6174 }
6175 else
6176 {
6177 if (!rtx_equal_p (operands[0], operands[1]))
6178 emit_move_insn (operands[0], operands[1]);
6179 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
6180 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
6181 }
6182 }
6183 else
6184 {
6185 if (!rtx_equal_p (operands[0], operands[1]))
6186 emit_move_insn (operands[0], operands[1]);
b985a30f 6187
e075ae69 6188 split_di (operands, 1, low, high);
b985a30f 6189
e075ae69
RH
6190 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
6191 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 6192
fe577e58 6193 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 6194 {
fe577e58 6195 if (! no_new_pseudos)
e075ae69
RH
6196 scratch = force_reg (SImode, const0_rtx);
6197 else
6198 emit_move_insn (scratch, const0_rtx);
6199
6200 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
6201 scratch));
6202 }
6203 else
6204 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
6205 }
e9a25f70 6206}
32b5b1aa 6207
e075ae69
RH
6208void
6209ix86_split_ashrdi (operands, scratch)
6210 rtx *operands, scratch;
32b5b1aa 6211{
e075ae69
RH
6212 rtx low[2], high[2];
6213 int count;
32b5b1aa 6214
e075ae69
RH
6215 if (GET_CODE (operands[2]) == CONST_INT)
6216 {
6217 split_di (operands, 2, low, high);
6218 count = INTVAL (operands[2]) & 63;
32b5b1aa 6219
e075ae69
RH
6220 if (count >= 32)
6221 {
6222 emit_move_insn (low[0], high[1]);
32b5b1aa 6223
e075ae69
RH
6224 if (! reload_completed)
6225 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
6226 else
6227 {
6228 emit_move_insn (high[0], low[0]);
6229 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
6230 }
6231
6232 if (count > 32)
6233 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
6234 }
6235 else
6236 {
6237 if (!rtx_equal_p (operands[0], operands[1]))
6238 emit_move_insn (operands[0], operands[1]);
6239 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
6240 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
6241 }
6242 }
6243 else
32b5b1aa 6244 {
e075ae69
RH
6245 if (!rtx_equal_p (operands[0], operands[1]))
6246 emit_move_insn (operands[0], operands[1]);
6247
6248 split_di (operands, 1, low, high);
6249
6250 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
6251 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
6252
fe577e58 6253 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 6254 {
fe577e58 6255 if (! no_new_pseudos)
e075ae69
RH
6256 scratch = gen_reg_rtx (SImode);
6257 emit_move_insn (scratch, high[0]);
6258 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
6259 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
6260 scratch));
6261 }
6262 else
6263 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 6264 }
e075ae69 6265}
32b5b1aa 6266
e075ae69
RH
6267void
6268ix86_split_lshrdi (operands, scratch)
6269 rtx *operands, scratch;
6270{
6271 rtx low[2], high[2];
6272 int count;
32b5b1aa 6273
e075ae69 6274 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 6275 {
e075ae69
RH
6276 split_di (operands, 2, low, high);
6277 count = INTVAL (operands[2]) & 63;
6278
6279 if (count >= 32)
c7271385 6280 {
e075ae69
RH
6281 emit_move_insn (low[0], high[1]);
6282 emit_move_insn (high[0], const0_rtx);
32b5b1aa 6283
e075ae69
RH
6284 if (count > 32)
6285 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
6286 }
6287 else
6288 {
6289 if (!rtx_equal_p (operands[0], operands[1]))
6290 emit_move_insn (operands[0], operands[1]);
6291 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
6292 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
6293 }
32b5b1aa 6294 }
e075ae69
RH
6295 else
6296 {
6297 if (!rtx_equal_p (operands[0], operands[1]))
6298 emit_move_insn (operands[0], operands[1]);
32b5b1aa 6299
e075ae69
RH
6300 split_di (operands, 1, low, high);
6301
6302 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
6303 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
6304
6305 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 6306 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 6307 {
fe577e58 6308 if (! no_new_pseudos)
e075ae69
RH
6309 scratch = force_reg (SImode, const0_rtx);
6310 else
6311 emit_move_insn (scratch, const0_rtx);
6312
6313 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
6314 scratch));
6315 }
6316 else
6317 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
6318 }
32b5b1aa 6319}
3f803cd9 6320
e075ae69
RH
6321/* Expand the appropriate insns for doing strlen if not just doing
6322 repnz; scasb
6323
6324 out = result, initialized with the start address
6325 align_rtx = alignment of the address.
6326 scratch = scratch register, initialized with the startaddress when
77ebd435 6327 not aligned, otherwise undefined
3f803cd9
SC
6328
6329 This is just the body. It needs the initialisations mentioned above and
6330 some address computing at the end. These things are done in i386.md. */
6331
e075ae69
RH
6332void
6333ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch)
6334 rtx out, align_rtx, scratch;
3f803cd9 6335{
e075ae69
RH
6336 int align;
6337 rtx tmp;
6338 rtx align_2_label = NULL_RTX;
6339 rtx align_3_label = NULL_RTX;
6340 rtx align_4_label = gen_label_rtx ();
6341 rtx end_0_label = gen_label_rtx ();
e075ae69 6342 rtx mem;
e2e52e1b 6343 rtx tmpreg = gen_reg_rtx (SImode);
e075ae69
RH
6344
6345 align = 0;
6346 if (GET_CODE (align_rtx) == CONST_INT)
6347 align = INTVAL (align_rtx);
3f803cd9 6348
e9a25f70 6349 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 6350
e9a25f70 6351 /* Is there a known alignment and is it less than 4? */
e075ae69 6352 if (align < 4)
3f803cd9 6353 {
e9a25f70 6354 /* Is there a known alignment and is it not 2? */
e075ae69 6355 if (align != 2)
3f803cd9 6356 {
e075ae69
RH
6357 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
6358 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
6359
6360 /* Leave just the 3 lower bits. */
6361 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (3),
6362 NULL_RTX, 0, OPTAB_WIDEN);
6363
9076b9c1 6364 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
77ebd435 6365 SImode, 1, 0, align_4_label);
9076b9c1
JH
6366 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
6367 SImode, 1, 0, align_2_label);
6368 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
6369 SImode, 1, 0, align_3_label);
3f803cd9
SC
6370 }
6371 else
6372 {
e9a25f70
JL
6373 /* Since the alignment is 2, we have to check 2 or 0 bytes;
6374 check if is aligned to 4 - byte. */
e9a25f70 6375
e075ae69
RH
6376 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (2),
6377 NULL_RTX, 0, OPTAB_WIDEN);
6378
9076b9c1
JH
6379 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
6380 SImode, 1, 0, align_4_label);
3f803cd9
SC
6381 }
6382
e075ae69 6383 mem = gen_rtx_MEM (QImode, out);
e9a25f70 6384
e075ae69 6385 /* Now compare the bytes. */
e9a25f70 6386
0f290768 6387 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1
JH
6388 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6389 QImode, 1, 0, end_0_label);
3f803cd9 6390
0f290768 6391 /* Increment the address. */
e075ae69 6392 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 6393
e075ae69
RH
6394 /* Not needed with an alignment of 2 */
6395 if (align != 2)
6396 {
6397 emit_label (align_2_label);
3f803cd9 6398
9076b9c1
JH
6399 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6400 QImode, 1, 0, end_0_label);
e075ae69
RH
6401
6402 emit_insn (gen_addsi3 (out, out, const1_rtx));
6403
6404 emit_label (align_3_label);
6405 }
6406
9076b9c1
JH
6407 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6408 QImode, 1, 0, end_0_label);
e075ae69
RH
6409
6410 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
6411 }
6412
e075ae69
RH
6413 /* Generate loop to check 4 bytes at a time. It is not a good idea to
6414 align this loop. It gives only huge programs, but does not help to
6415 speed up. */
6416 emit_label (align_4_label);
3f803cd9 6417
e075ae69
RH
6418 mem = gen_rtx_MEM (SImode, out);
6419 emit_move_insn (scratch, mem);
e075ae69 6420 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 6421
e2e52e1b
JH
6422 /* This formula yields a nonzero result iff one of the bytes is zero.
6423 This saves three branches inside loop and many cycles. */
6424
6425 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
6426 emit_insn (gen_one_cmplsi2 (scratch, scratch));
6427 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
6428 emit_insn (gen_andsi3 (tmpreg, tmpreg, GEN_INT (0x80808080)));
9076b9c1
JH
6429 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0,
6430 SImode, 1, 0, align_4_label);
e2e52e1b
JH
6431
6432 if (TARGET_CMOVE)
6433 {
6434 rtx reg = gen_reg_rtx (SImode);
6435 emit_move_insn (reg, tmpreg);
6436 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
6437
0f290768 6438 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 6439 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
6440 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6441 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6442 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
6443 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
6444 reg,
6445 tmpreg)));
e2e52e1b
JH
6446 /* Emit lea manually to avoid clobbering of flags. */
6447 emit_insn (gen_rtx_SET (SImode, reg,
6448 gen_rtx_PLUS (SImode, out, GEN_INT (2))));
6449
6450 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6451 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6452 emit_insn (gen_rtx_SET (VOIDmode, out,
6453 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
6454 reg,
6455 out)));
e2e52e1b
JH
6456
6457 }
6458 else
6459 {
6460 rtx end_2_label = gen_label_rtx ();
6461 /* Is zero in the first two bytes? */
6462
16189740 6463 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
6464 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6465 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
6466 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
6467 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
6468 pc_rtx);
6469 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
6470 JUMP_LABEL (tmp) = end_2_label;
6471
0f290768 6472 /* Not in the first two. Move two bytes forward. */
e2e52e1b
JH
6473 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
6474 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
6475
6476 emit_label (end_2_label);
6477
6478 }
6479
0f290768 6480 /* Avoid branch in fixing the byte. */
e2e52e1b 6481 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190
JH
6482 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
6483 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
e075ae69
RH
6484
6485 emit_label (end_0_label);
6486}
6487\f
e075ae69
RH
6488/* Clear stack slot assignments remembered from previous functions.
6489 This is called from INIT_EXPANDERS once before RTL is emitted for each
6490 function. */
6491
36edd3cc
BS
6492static void
6493ix86_init_machine_status (p)
1526a060 6494 struct function *p;
e075ae69 6495{
37b15744
RH
6496 p->machine = (struct machine_function *)
6497 xcalloc (1, sizeof (struct machine_function));
e075ae69
RH
6498}
6499
1526a060
BS
6500/* Mark machine specific bits of P for GC. */
6501static void
6502ix86_mark_machine_status (p)
6503 struct function *p;
6504{
37b15744 6505 struct machine_function *machine = p->machine;
1526a060
BS
6506 enum machine_mode mode;
6507 int n;
6508
37b15744
RH
6509 if (! machine)
6510 return;
6511
1526a060
BS
6512 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
6513 mode = (enum machine_mode) ((int) mode + 1))
6514 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
37b15744
RH
6515 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
6516}
6517
6518static void
6519ix86_free_machine_status (p)
6520 struct function *p;
6521{
6522 free (p->machine);
6523 p->machine = NULL;
1526a060
BS
6524}
6525
e075ae69
RH
6526/* Return a MEM corresponding to a stack slot with mode MODE.
6527 Allocate a new slot if necessary.
6528
6529 The RTL for a function can have several slots available: N is
6530 which slot to use. */
6531
6532rtx
6533assign_386_stack_local (mode, n)
6534 enum machine_mode mode;
6535 int n;
6536{
6537 if (n < 0 || n >= MAX_386_STACK_LOCALS)
6538 abort ();
6539
6540 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
6541 ix86_stack_locals[(int) mode][n]
6542 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
6543
6544 return ix86_stack_locals[(int) mode][n];
6545}
6546\f
6547/* Calculate the length of the memory address in the instruction
6548 encoding. Does not include the one-byte modrm, opcode, or prefix. */
6549
6550static int
6551memory_address_length (addr)
6552 rtx addr;
6553{
6554 struct ix86_address parts;
6555 rtx base, index, disp;
6556 int len;
6557
6558 if (GET_CODE (addr) == PRE_DEC
6559 || GET_CODE (addr) == POST_INC)
6560 return 0;
3f803cd9 6561
e075ae69
RH
6562 if (! ix86_decompose_address (addr, &parts))
6563 abort ();
3f803cd9 6564
e075ae69
RH
6565 base = parts.base;
6566 index = parts.index;
6567 disp = parts.disp;
6568 len = 0;
3f803cd9 6569
e075ae69
RH
6570 /* Register Indirect. */
6571 if (base && !index && !disp)
6572 {
6573 /* Special cases: ebp and esp need the two-byte modrm form. */
6574 if (addr == stack_pointer_rtx
6575 || addr == arg_pointer_rtx
564d80f4
JH
6576 || addr == frame_pointer_rtx
6577 || addr == hard_frame_pointer_rtx)
e075ae69 6578 len = 1;
3f803cd9 6579 }
e9a25f70 6580
e075ae69
RH
6581 /* Direct Addressing. */
6582 else if (disp && !base && !index)
6583 len = 4;
6584
3f803cd9
SC
6585 else
6586 {
e075ae69
RH
6587 /* Find the length of the displacement constant. */
6588 if (disp)
6589 {
6590 if (GET_CODE (disp) == CONST_INT
6591 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
6592 len = 1;
6593 else
6594 len = 4;
6595 }
3f803cd9 6596
e075ae69
RH
6597 /* An index requires the two-byte modrm form. */
6598 if (index)
6599 len += 1;
3f803cd9
SC
6600 }
6601
e075ae69
RH
6602 return len;
6603}
79325812 6604
6ef67412
JH
6605/* Compute default value for "length_immediate" attribute. When SHORTFORM is set
6606 expect that insn have 8bit immediate alternative. */
e075ae69 6607int
6ef67412 6608ix86_attr_length_immediate_default (insn, shortform)
e075ae69 6609 rtx insn;
6ef67412 6610 int shortform;
e075ae69 6611{
6ef67412
JH
6612 int len = 0;
6613 int i;
6c698a6d 6614 extract_insn_cached (insn);
6ef67412
JH
6615 for (i = recog_data.n_operands - 1; i >= 0; --i)
6616 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 6617 {
6ef67412 6618 if (len)
3071fab5 6619 abort ();
6ef67412
JH
6620 if (shortform
6621 && GET_CODE (recog_data.operand[i]) == CONST_INT
6622 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
6623 len = 1;
6624 else
6625 {
6626 switch (get_attr_mode (insn))
6627 {
6628 case MODE_QI:
6629 len+=1;
6630 break;
6631 case MODE_HI:
6632 len+=2;
6633 break;
6634 case MODE_SI:
6635 len+=4;
6636 break;
6637 default:
6638 fatal_insn ("Unknown insn mode", insn);
6639 }
6640 }
3071fab5 6641 }
6ef67412
JH
6642 return len;
6643}
6644/* Compute default value for "length_address" attribute. */
6645int
6646ix86_attr_length_address_default (insn)
6647 rtx insn;
6648{
6649 int i;
6c698a6d 6650 extract_insn_cached (insn);
1ccbefce
RH
6651 for (i = recog_data.n_operands - 1; i >= 0; --i)
6652 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 6653 {
6ef67412 6654 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
6655 break;
6656 }
6ef67412 6657 return 0;
3f803cd9 6658}
e075ae69
RH
6659\f
6660/* Return the maximum number of instructions a cpu can issue. */
b657fc39 6661
e075ae69
RH
6662int
6663ix86_issue_rate ()
b657fc39 6664{
e075ae69 6665 switch (ix86_cpu)
b657fc39 6666 {
e075ae69
RH
6667 case PROCESSOR_PENTIUM:
6668 case PROCESSOR_K6:
6669 return 2;
79325812 6670
e075ae69
RH
6671 case PROCESSOR_PENTIUMPRO:
6672 return 3;
b657fc39 6673
b657fc39 6674 default:
e075ae69 6675 return 1;
b657fc39 6676 }
b657fc39
L
6677}
6678
e075ae69
RH
6679/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
6680 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 6681
e075ae69
RH
6682static int
6683ix86_flags_dependant (insn, dep_insn, insn_type)
6684 rtx insn, dep_insn;
6685 enum attr_type insn_type;
6686{
6687 rtx set, set2;
b657fc39 6688
e075ae69
RH
6689 /* Simplify the test for uninteresting insns. */
6690 if (insn_type != TYPE_SETCC
6691 && insn_type != TYPE_ICMOV
6692 && insn_type != TYPE_FCMOV
6693 && insn_type != TYPE_IBR)
6694 return 0;
b657fc39 6695
e075ae69
RH
6696 if ((set = single_set (dep_insn)) != 0)
6697 {
6698 set = SET_DEST (set);
6699 set2 = NULL_RTX;
6700 }
6701 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
6702 && XVECLEN (PATTERN (dep_insn), 0) == 2
6703 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
6704 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
6705 {
6706 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
6707 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
6708 }
78a0d70c
ZW
6709 else
6710 return 0;
b657fc39 6711
78a0d70c
ZW
6712 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
6713 return 0;
b657fc39 6714
78a0d70c
ZW
6715 /* This test is true if the dependant insn reads the flags but
6716 not any other potentially set register. */
6717 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
6718 return 0;
6719
6720 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
6721 return 0;
6722
6723 return 1;
e075ae69 6724}
b657fc39 6725
e075ae69
RH
6726/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
6727 address with operands set by DEP_INSN. */
6728
6729static int
6730ix86_agi_dependant (insn, dep_insn, insn_type)
6731 rtx insn, dep_insn;
6732 enum attr_type insn_type;
6733{
6734 rtx addr;
6735
6736 if (insn_type == TYPE_LEA)
5fbdde42
RH
6737 {
6738 addr = PATTERN (insn);
6739 if (GET_CODE (addr) == SET)
6740 ;
6741 else if (GET_CODE (addr) == PARALLEL
6742 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
6743 addr = XVECEXP (addr, 0, 0);
6744 else
6745 abort ();
6746 addr = SET_SRC (addr);
6747 }
e075ae69
RH
6748 else
6749 {
6750 int i;
6c698a6d 6751 extract_insn_cached (insn);
1ccbefce
RH
6752 for (i = recog_data.n_operands - 1; i >= 0; --i)
6753 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 6754 {
1ccbefce 6755 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
6756 goto found;
6757 }
6758 return 0;
6759 found:;
b657fc39
L
6760 }
6761
e075ae69 6762 return modified_in_p (addr, dep_insn);
b657fc39 6763}
a269a03c
JC
6764
6765int
e075ae69 6766ix86_adjust_cost (insn, link, dep_insn, cost)
a269a03c
JC
6767 rtx insn, link, dep_insn;
6768 int cost;
6769{
e075ae69 6770 enum attr_type insn_type, dep_insn_type;
0b5107cf 6771 enum attr_memory memory;
e075ae69 6772 rtx set, set2;
9b00189f 6773 int dep_insn_code_number;
a269a03c 6774
309ada50 6775 /* Anti and output depenancies have zero cost on all CPUs. */
e075ae69 6776 if (REG_NOTE_KIND (link) != 0)
309ada50 6777 return 0;
a269a03c 6778
9b00189f
JH
6779 dep_insn_code_number = recog_memoized (dep_insn);
6780
e075ae69 6781 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 6782 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 6783 return cost;
a269a03c 6784
1c71e60e
JH
6785 insn_type = get_attr_type (insn);
6786 dep_insn_type = get_attr_type (dep_insn);
9b00189f 6787
1c71e60e
JH
6788 /* Prologue and epilogue allocators can have a false dependency on ebp.
6789 This results in one cycle extra stall on Pentium prologue scheduling,
6790 so handle this important case manually. */
6791 if (dep_insn_code_number == CODE_FOR_pro_epilogue_adjust_stack
6792 && dep_insn_type == TYPE_ALU
9b00189f
JH
6793 && !reg_mentioned_p (stack_pointer_rtx, insn))
6794 return 0;
6795
a269a03c
JC
6796 switch (ix86_cpu)
6797 {
6798 case PROCESSOR_PENTIUM:
e075ae69
RH
6799 /* Address Generation Interlock adds a cycle of latency. */
6800 if (ix86_agi_dependant (insn, dep_insn, insn_type))
6801 cost += 1;
6802
6803 /* ??? Compares pair with jump/setcc. */
6804 if (ix86_flags_dependant (insn, dep_insn, insn_type))
6805 cost = 0;
6806
6807 /* Floating point stores require value to be ready one cycle ealier. */
0f290768 6808 if (insn_type == TYPE_FMOV
e075ae69
RH
6809 && get_attr_memory (insn) == MEMORY_STORE
6810 && !ix86_agi_dependant (insn, dep_insn, insn_type))
6811 cost += 1;
6812 break;
a269a03c 6813
e075ae69 6814 case PROCESSOR_PENTIUMPRO:
0f290768 6815 /* Since we can't represent delayed latencies of load+operation,
e075ae69
RH
6816 increase the cost here for non-imov insns. */
6817 if (dep_insn_type != TYPE_IMOV
6818 && dep_insn_type != TYPE_FMOV
0b5107cf
JH
6819 && ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
6820 || memory == MEMORY_BOTH))
e075ae69
RH
6821 cost += 1;
6822
6823 /* INT->FP conversion is expensive. */
6824 if (get_attr_fp_int_src (dep_insn))
6825 cost += 5;
6826
6827 /* There is one cycle extra latency between an FP op and a store. */
6828 if (insn_type == TYPE_FMOV
6829 && (set = single_set (dep_insn)) != NULL_RTX
6830 && (set2 = single_set (insn)) != NULL_RTX
6831 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
6832 && GET_CODE (SET_DEST (set2)) == MEM)
6833 cost += 1;
6834 break;
a269a03c 6835
e075ae69
RH
6836 case PROCESSOR_K6:
6837 /* The esp dependency is resolved before the instruction is really
6838 finished. */
6839 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
6840 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
6841 return 1;
a269a03c 6842
0f290768 6843 /* Since we can't represent delayed latencies of load+operation,
e075ae69 6844 increase the cost here for non-imov insns. */
0b5107cf
JH
6845 if ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
6846 || memory == MEMORY_BOTH)
e075ae69
RH
6847 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
6848
6849 /* INT->FP conversion is expensive. */
6850 if (get_attr_fp_int_src (dep_insn))
6851 cost += 5;
a14003ee 6852 break;
e075ae69 6853
309ada50 6854 case PROCESSOR_ATHLON:
0b5107cf
JH
6855 if ((memory = get_attr_memory (dep_insn)) == MEMORY_LOAD
6856 || memory == MEMORY_BOTH)
6857 {
6858 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
6859 cost += 2;
6860 else
6861 cost += 3;
6862 }
309ada50 6863
a269a03c 6864 default:
a269a03c
JC
6865 break;
6866 }
6867
6868 return cost;
6869}
0a726ef1 6870
e075ae69
RH
6871static union
6872{
6873 struct ppro_sched_data
6874 {
6875 rtx decode[3];
6876 int issued_this_cycle;
6877 } ppro;
6878} ix86_sched_data;
0a726ef1 6879
e075ae69
RH
6880static int
6881ix86_safe_length (insn)
6882 rtx insn;
6883{
6884 if (recog_memoized (insn) >= 0)
6885 return get_attr_length(insn);
6886 else
6887 return 128;
6888}
0a726ef1 6889
e075ae69
RH
6890static int
6891ix86_safe_length_prefix (insn)
6892 rtx insn;
6893{
6894 if (recog_memoized (insn) >= 0)
6895 return get_attr_length(insn);
6896 else
6897 return 0;
6898}
6899
6900static enum attr_memory
6901ix86_safe_memory (insn)
6902 rtx insn;
6903{
6904 if (recog_memoized (insn) >= 0)
6905 return get_attr_memory(insn);
6906 else
6907 return MEMORY_UNKNOWN;
6908}
0a726ef1 6909
e075ae69
RH
6910static enum attr_pent_pair
6911ix86_safe_pent_pair (insn)
6912 rtx insn;
6913{
6914 if (recog_memoized (insn) >= 0)
6915 return get_attr_pent_pair(insn);
6916 else
6917 return PENT_PAIR_NP;
6918}
0a726ef1 6919
e075ae69
RH
6920static enum attr_ppro_uops
6921ix86_safe_ppro_uops (insn)
6922 rtx insn;
6923{
6924 if (recog_memoized (insn) >= 0)
6925 return get_attr_ppro_uops (insn);
6926 else
6927 return PPRO_UOPS_MANY;
6928}
0a726ef1 6929
e075ae69
RH
6930static void
6931ix86_dump_ppro_packet (dump)
6932 FILE *dump;
0a726ef1 6933{
e075ae69 6934 if (ix86_sched_data.ppro.decode[0])
0a726ef1 6935 {
e075ae69
RH
6936 fprintf (dump, "PPRO packet: %d",
6937 INSN_UID (ix86_sched_data.ppro.decode[0]));
6938 if (ix86_sched_data.ppro.decode[1])
6939 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
6940 if (ix86_sched_data.ppro.decode[2])
6941 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
6942 fputc ('\n', dump);
6943 }
6944}
0a726ef1 6945
e075ae69 6946/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 6947
e075ae69
RH
6948void
6949ix86_sched_init (dump, sched_verbose)
6950 FILE *dump ATTRIBUTE_UNUSED;
6951 int sched_verbose ATTRIBUTE_UNUSED;
6952{
6953 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
6954}
6955
6956/* Shift INSN to SLOT, and shift everything else down. */
6957
6958static void
6959ix86_reorder_insn (insnp, slot)
6960 rtx *insnp, *slot;
6961{
6962 if (insnp != slot)
6963 {
6964 rtx insn = *insnp;
0f290768 6965 do
e075ae69
RH
6966 insnp[0] = insnp[1];
6967 while (++insnp != slot);
6968 *insnp = insn;
0a726ef1 6969 }
e075ae69
RH
6970}
6971
6972/* Find an instruction with given pairability and minimal amount of cycles
6973 lost by the fact that the CPU waits for both pipelines to finish before
6974 reading next instructions. Also take care that both instructions together
6975 can not exceed 7 bytes. */
6976
6977static rtx *
6978ix86_pent_find_pair (e_ready, ready, type, first)
6979 rtx *e_ready;
6980 rtx *ready;
6981 enum attr_pent_pair type;
6982 rtx first;
6983{
6984 int mincycles, cycles;
6985 enum attr_pent_pair tmp;
6986 enum attr_memory memory;
6987 rtx *insnp, *bestinsnp = NULL;
0a726ef1 6988
e075ae69
RH
6989 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
6990 return NULL;
0a726ef1 6991
e075ae69
RH
6992 memory = ix86_safe_memory (first);
6993 cycles = result_ready_cost (first);
6994 mincycles = INT_MAX;
6995
6996 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
6997 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
6998 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
6ec6d558 6999 {
e075ae69
RH
7000 enum attr_memory second_memory;
7001 int secondcycles, currentcycles;
7002
7003 second_memory = ix86_safe_memory (*insnp);
7004 secondcycles = result_ready_cost (*insnp);
7005 currentcycles = abs (cycles - secondcycles);
7006
7007 if (secondcycles >= 1 && cycles >= 1)
6ec6d558 7008 {
e075ae69
RH
7009 /* Two read/modify/write instructions together takes two
7010 cycles longer. */
7011 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
7012 currentcycles += 2;
0f290768 7013
e075ae69
RH
7014 /* Read modify/write instruction followed by read/modify
7015 takes one cycle longer. */
7016 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
7017 && tmp != PENT_PAIR_UV
7018 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
7019 currentcycles += 1;
6ec6d558 7020 }
e075ae69
RH
7021 if (currentcycles < mincycles)
7022 bestinsnp = insnp, mincycles = currentcycles;
6ec6d558 7023 }
0a726ef1 7024
e075ae69
RH
7025 return bestinsnp;
7026}
7027
78a0d70c 7028/* Subroutines of ix86_sched_reorder. */
e075ae69 7029
c6991660 7030static void
78a0d70c 7031ix86_sched_reorder_pentium (ready, e_ready)
e075ae69 7032 rtx *ready;
78a0d70c 7033 rtx *e_ready;
e075ae69 7034{
78a0d70c 7035 enum attr_pent_pair pair1, pair2;
e075ae69 7036 rtx *insnp;
e075ae69 7037
78a0d70c
ZW
7038 /* This wouldn't be necessary if Haifa knew that static insn ordering
7039 is important to which pipe an insn is issued to. So we have to make
7040 some minor rearrangements. */
e075ae69 7041
78a0d70c
ZW
7042 pair1 = ix86_safe_pent_pair (*e_ready);
7043
7044 /* If the first insn is non-pairable, let it be. */
7045 if (pair1 == PENT_PAIR_NP)
7046 return;
7047
7048 pair2 = PENT_PAIR_NP;
7049 insnp = 0;
7050
7051 /* If the first insn is UV or PV pairable, search for a PU
7052 insn to go with. */
7053 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
e075ae69 7054 {
78a0d70c
ZW
7055 insnp = ix86_pent_find_pair (e_ready-1, ready,
7056 PENT_PAIR_PU, *e_ready);
7057 if (insnp)
7058 pair2 = PENT_PAIR_PU;
7059 }
e075ae69 7060
78a0d70c
ZW
7061 /* If the first insn is PU or UV pairable, search for a PV
7062 insn to go with. */
7063 if (pair2 == PENT_PAIR_NP
7064 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
7065 {
7066 insnp = ix86_pent_find_pair (e_ready-1, ready,
7067 PENT_PAIR_PV, *e_ready);
7068 if (insnp)
7069 pair2 = PENT_PAIR_PV;
7070 }
e075ae69 7071
78a0d70c
ZW
7072 /* If the first insn is pairable, search for a UV
7073 insn to go with. */
7074 if (pair2 == PENT_PAIR_NP)
7075 {
7076 insnp = ix86_pent_find_pair (e_ready-1, ready,
7077 PENT_PAIR_UV, *e_ready);
7078 if (insnp)
7079 pair2 = PENT_PAIR_UV;
7080 }
e075ae69 7081
78a0d70c
ZW
7082 if (pair2 == PENT_PAIR_NP)
7083 return;
e075ae69 7084
78a0d70c
ZW
7085 /* Found something! Decide if we need to swap the order. */
7086 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
7087 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
7088 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
7089 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
7090 ix86_reorder_insn (insnp, e_ready);
7091 else
7092 ix86_reorder_insn (insnp, e_ready - 1);
7093}
e075ae69 7094
c6991660 7095static void
78a0d70c
ZW
7096ix86_sched_reorder_ppro (ready, e_ready)
7097 rtx *ready;
7098 rtx *e_ready;
7099{
7100 rtx decode[3];
7101 enum attr_ppro_uops cur_uops;
7102 int issued_this_cycle;
7103 rtx *insnp;
7104 int i;
e075ae69 7105
0f290768 7106 /* At this point .ppro.decode contains the state of the three
78a0d70c 7107 decoders from last "cycle". That is, those insns that were
0f290768 7108 actually independent. But here we're scheduling for the
78a0d70c
ZW
7109 decoder, and we may find things that are decodable in the
7110 same cycle. */
e075ae69 7111
0f290768 7112 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
78a0d70c 7113 issued_this_cycle = 0;
e075ae69 7114
78a0d70c
ZW
7115 insnp = e_ready;
7116 cur_uops = ix86_safe_ppro_uops (*insnp);
0a726ef1 7117
78a0d70c
ZW
7118 /* If the decoders are empty, and we've a complex insn at the
7119 head of the priority queue, let it issue without complaint. */
7120 if (decode[0] == NULL)
7121 {
7122 if (cur_uops == PPRO_UOPS_MANY)
7123 {
7124 decode[0] = *insnp;
7125 goto ppro_done;
7126 }
7127
7128 /* Otherwise, search for a 2-4 uop unsn to issue. */
7129 while (cur_uops != PPRO_UOPS_FEW)
7130 {
7131 if (insnp == ready)
7132 break;
7133 cur_uops = ix86_safe_ppro_uops (*--insnp);
7134 }
7135
7136 /* If so, move it to the head of the line. */
7137 if (cur_uops == PPRO_UOPS_FEW)
7138 ix86_reorder_insn (insnp, e_ready);
0a726ef1 7139
78a0d70c
ZW
7140 /* Issue the head of the queue. */
7141 issued_this_cycle = 1;
7142 decode[0] = *e_ready--;
7143 }
fb693d44 7144
78a0d70c
ZW
7145 /* Look for simple insns to fill in the other two slots. */
7146 for (i = 1; i < 3; ++i)
7147 if (decode[i] == NULL)
7148 {
7149 if (ready >= e_ready)
7150 goto ppro_done;
fb693d44 7151
e075ae69
RH
7152 insnp = e_ready;
7153 cur_uops = ix86_safe_ppro_uops (*insnp);
78a0d70c
ZW
7154 while (cur_uops != PPRO_UOPS_ONE)
7155 {
7156 if (insnp == ready)
7157 break;
7158 cur_uops = ix86_safe_ppro_uops (*--insnp);
7159 }
fb693d44 7160
78a0d70c
ZW
7161 /* Found one. Move it to the head of the queue and issue it. */
7162 if (cur_uops == PPRO_UOPS_ONE)
e075ae69 7163 {
78a0d70c
ZW
7164 ix86_reorder_insn (insnp, e_ready);
7165 decode[i] = *e_ready--;
7166 issued_this_cycle++;
7167 continue;
7168 }
fb693d44 7169
78a0d70c
ZW
7170 /* ??? Didn't find one. Ideally, here we would do a lazy split
7171 of 2-uop insns, issue one and queue the other. */
7172 }
fb693d44 7173
78a0d70c
ZW
7174 ppro_done:
7175 if (issued_this_cycle == 0)
7176 issued_this_cycle = 1;
7177 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
7178}
fb693d44 7179
0f290768 7180/* We are about to being issuing insns for this clock cycle.
78a0d70c
ZW
7181 Override the default sort algorithm to better slot instructions. */
7182int
7183ix86_sched_reorder (dump, sched_verbose, ready, n_ready, clock_var)
7184 FILE *dump ATTRIBUTE_UNUSED;
7185 int sched_verbose ATTRIBUTE_UNUSED;
7186 rtx *ready;
7187 int n_ready;
7188 int clock_var ATTRIBUTE_UNUSED;
7189{
7190 rtx *e_ready = ready + n_ready - 1;
fb693d44 7191
78a0d70c
ZW
7192 if (n_ready < 2)
7193 goto out;
e075ae69 7194
78a0d70c
ZW
7195 switch (ix86_cpu)
7196 {
7197 default:
7198 break;
e075ae69 7199
78a0d70c
ZW
7200 case PROCESSOR_PENTIUM:
7201 ix86_sched_reorder_pentium (ready, e_ready);
7202 break;
e075ae69 7203
78a0d70c
ZW
7204 case PROCESSOR_PENTIUMPRO:
7205 ix86_sched_reorder_ppro (ready, e_ready);
e075ae69 7206 break;
fb693d44
RH
7207 }
7208
e075ae69
RH
7209out:
7210 return ix86_issue_rate ();
7211}
fb693d44 7212
e075ae69
RH
7213/* We are about to issue INSN. Return the number of insns left on the
7214 ready queue that can be issued this cycle. */
b222082e 7215
e075ae69
RH
7216int
7217ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
7218 FILE *dump;
7219 int sched_verbose;
7220 rtx insn;
7221 int can_issue_more;
7222{
7223 int i;
7224 switch (ix86_cpu)
fb693d44 7225 {
e075ae69
RH
7226 default:
7227 return can_issue_more - 1;
fb693d44 7228
e075ae69
RH
7229 case PROCESSOR_PENTIUMPRO:
7230 {
7231 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 7232
e075ae69
RH
7233 if (uops == PPRO_UOPS_MANY)
7234 {
7235 if (sched_verbose)
7236 ix86_dump_ppro_packet (dump);
7237 ix86_sched_data.ppro.decode[0] = insn;
7238 ix86_sched_data.ppro.decode[1] = NULL;
7239 ix86_sched_data.ppro.decode[2] = NULL;
7240 if (sched_verbose)
7241 ix86_dump_ppro_packet (dump);
7242 ix86_sched_data.ppro.decode[0] = NULL;
7243 }
7244 else if (uops == PPRO_UOPS_FEW)
7245 {
7246 if (sched_verbose)
7247 ix86_dump_ppro_packet (dump);
7248 ix86_sched_data.ppro.decode[0] = insn;
7249 ix86_sched_data.ppro.decode[1] = NULL;
7250 ix86_sched_data.ppro.decode[2] = NULL;
7251 }
7252 else
7253 {
7254 for (i = 0; i < 3; ++i)
7255 if (ix86_sched_data.ppro.decode[i] == NULL)
7256 {
7257 ix86_sched_data.ppro.decode[i] = insn;
7258 break;
7259 }
7260 if (i == 3)
7261 abort ();
7262 if (i == 2)
7263 {
7264 if (sched_verbose)
7265 ix86_dump_ppro_packet (dump);
7266 ix86_sched_data.ppro.decode[0] = NULL;
7267 ix86_sched_data.ppro.decode[1] = NULL;
7268 ix86_sched_data.ppro.decode[2] = NULL;
7269 }
7270 }
7271 }
7272 return --ix86_sched_data.ppro.issued_this_cycle;
7273 }
fb693d44 7274}
a7180f70
BS
7275\f
7276/* Compute the alignment given to a constant that is being placed in memory.
7277 EXP is the constant and ALIGN is the alignment that the object would
7278 ordinarily have.
7279 The value of this function is used instead of that alignment to align
7280 the object. */
7281
7282int
7283ix86_constant_alignment (exp, align)
7284 tree exp;
7285 int align;
7286{
7287 if (TREE_CODE (exp) == REAL_CST)
7288 {
7289 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
7290 return 64;
7291 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
7292 return 128;
7293 }
7294 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
7295 && align < 256)
7296 return 256;
7297
7298 return align;
7299}
7300
7301/* Compute the alignment for a static variable.
7302 TYPE is the data type, and ALIGN is the alignment that
7303 the object would ordinarily have. The value of this function is used
7304 instead of that alignment to align the object. */
7305
7306int
7307ix86_data_alignment (type, align)
7308 tree type;
7309 int align;
7310{
7311 if (AGGREGATE_TYPE_P (type)
7312 && TYPE_SIZE (type)
7313 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
7314 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
7315 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
7316 return 256;
7317
7318 if (TREE_CODE (type) == ARRAY_TYPE)
7319 {
7320 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
7321 return 64;
7322 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
7323 return 128;
7324 }
7325 else if (TREE_CODE (type) == COMPLEX_TYPE)
7326 {
0f290768 7327
a7180f70
BS
7328 if (TYPE_MODE (type) == DCmode && align < 64)
7329 return 64;
7330 if (TYPE_MODE (type) == XCmode && align < 128)
7331 return 128;
7332 }
7333 else if ((TREE_CODE (type) == RECORD_TYPE
7334 || TREE_CODE (type) == UNION_TYPE
7335 || TREE_CODE (type) == QUAL_UNION_TYPE)
7336 && TYPE_FIELDS (type))
7337 {
7338 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
7339 return 64;
7340 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
7341 return 128;
7342 }
7343 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
7344 || TREE_CODE (type) == INTEGER_TYPE)
7345 {
7346 if (TYPE_MODE (type) == DFmode && align < 64)
7347 return 64;
7348 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
7349 return 128;
7350 }
7351
7352 return align;
7353}
7354
7355/* Compute the alignment for a local variable.
7356 TYPE is the data type, and ALIGN is the alignment that
7357 the object would ordinarily have. The value of this macro is used
7358 instead of that alignment to align the object. */
7359
7360int
7361ix86_local_alignment (type, align)
7362 tree type;
7363 int align;
7364{
7365 if (TREE_CODE (type) == ARRAY_TYPE)
7366 {
7367 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
7368 return 64;
7369 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
7370 return 128;
7371 }
7372 else if (TREE_CODE (type) == COMPLEX_TYPE)
7373 {
7374 if (TYPE_MODE (type) == DCmode && align < 64)
7375 return 64;
7376 if (TYPE_MODE (type) == XCmode && align < 128)
7377 return 128;
7378 }
7379 else if ((TREE_CODE (type) == RECORD_TYPE
7380 || TREE_CODE (type) == UNION_TYPE
7381 || TREE_CODE (type) == QUAL_UNION_TYPE)
7382 && TYPE_FIELDS (type))
7383 {
7384 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
7385 return 64;
7386 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
7387 return 128;
7388 }
7389 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
7390 || TREE_CODE (type) == INTEGER_TYPE)
7391 {
0f290768 7392
a7180f70
BS
7393 if (TYPE_MODE (type) == DFmode && align < 64)
7394 return 64;
7395 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
7396 return 128;
7397 }
7398 return align;
7399}
bd793c65
BS
7400
7401#define def_builtin(NAME, TYPE, CODE) \
7402 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL_PTR)
7403struct builtin_description
7404{
7405 enum insn_code icode;
7406 const char * name;
7407 enum ix86_builtins code;
7408 enum rtx_code comparison;
7409 unsigned int flag;
7410};
7411
7412static struct builtin_description bdesc_comi[] =
7413{
7414 { CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
7415 { CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
7416 { CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
7417 { CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
7418 { CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
7419 { CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
7420 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
7421 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
7422 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
7423 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
7424 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
7425 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
7426};
7427
7428static struct builtin_description bdesc_2arg[] =
7429{
7430 /* SSE */
7431 { CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
7432 { CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
7433 { CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
7434 { CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
7435 { CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
7436 { CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
7437 { CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
7438 { CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
7439
7440 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
7441 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
7442 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
7443 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
7444 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
7445 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
7446 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
7447 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
7448 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
7449 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
7450 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
7451 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
7452 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
7453 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
7454 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
7455 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
7456 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
7457 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
7458 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
7459 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
7460 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
7461 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
7462 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
7463 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
7464
7465 { CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
7466 { CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
7467 { CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
7468 { CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
7469
7470 { CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
7471 { CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
7472 { CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
7473 { CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
7474
7475 { CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
7476 { CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
7477 { CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
7478 { CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
7479 { CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
7480
7481 /* MMX */
7482 { CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
7483 { CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
7484 { CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
7485 { CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
7486 { CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
7487 { CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
7488
7489 { CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
7490 { CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
7491 { CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
7492 { CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
7493 { CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
7494 { CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
7495 { CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
7496 { CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
7497
7498 { CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
7499 { CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
7500 { CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
7501
7502 { CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
7503 { CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
7504 { CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
7505 { CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
7506
7507 { CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
7508 { CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
7509
7510 { CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
7511 { CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
7512 { CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
7513 { CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
7514 { CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
7515 { CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
7516
7517 { CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
7518 { CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
7519 { CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
7520 { CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
7521
7522 { CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
7523 { CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
7524 { CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
7525 { CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
7526 { CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
7527 { CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
7528
7529 /* Special. */
7530 { CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
7531 { CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
7532 { CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
7533
7534 { CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
7535 { CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
7536
7537 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
7538 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
7539 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
7540 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
7541 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
7542 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
7543
7544 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
7545 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
7546 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
7547 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
7548 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
7549 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
7550
7551 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
7552 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
7553 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
7554 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
7555
7556 { CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
7557 { CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
7558
7559};
7560
7561static struct builtin_description bdesc_1arg[] =
7562{
7563 { CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
7564 { CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
7565
7566 { CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
7567 { CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
7568 { CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
7569
7570 { CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
7571 { CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
7572 { CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
7573 { CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
7574
7575};
7576
7577/* Expand all the target specific builtins. This is not called if TARGET_MMX
7578 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
7579 builtins. */
7580void
7581ix86_init_builtins ()
7582{
7583 struct builtin_description * d;
77ebd435 7584 size_t i;
cbd5937a 7585 tree endlink = void_list_node;
bd793c65
BS
7586
7587 tree pchar_type_node = build_pointer_type (char_type_node);
7588 tree pfloat_type_node = build_pointer_type (float_type_node);
7589 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
7590 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
7591
7592 /* Comparisons. */
7593 tree int_ftype_v4sf_v4sf
7594 = build_function_type (integer_type_node,
7595 tree_cons (NULL_TREE, V4SF_type_node,
7596 tree_cons (NULL_TREE,
7597 V4SF_type_node,
7598 endlink)));
7599 tree v4si_ftype_v4sf_v4sf
7600 = build_function_type (V4SI_type_node,
7601 tree_cons (NULL_TREE, V4SF_type_node,
7602 tree_cons (NULL_TREE,
7603 V4SF_type_node,
7604 endlink)));
7605 /* MMX/SSE/integer conversions. */
7606 tree int_ftype_v4sf_int
7607 = build_function_type (integer_type_node,
7608 tree_cons (NULL_TREE, V4SF_type_node,
7609 tree_cons (NULL_TREE,
7610 integer_type_node,
7611 endlink)));
7612 tree int_ftype_v4sf
7613 = build_function_type (integer_type_node,
7614 tree_cons (NULL_TREE, V4SF_type_node,
7615 endlink));
7616 tree int_ftype_v8qi
7617 = build_function_type (integer_type_node,
7618 tree_cons (NULL_TREE, V8QI_type_node,
7619 endlink));
7620 tree int_ftype_v2si
7621 = build_function_type (integer_type_node,
7622 tree_cons (NULL_TREE, V2SI_type_node,
7623 endlink));
7624 tree v2si_ftype_int
7625 = build_function_type (V2SI_type_node,
7626 tree_cons (NULL_TREE, integer_type_node,
7627 endlink));
7628 tree v4sf_ftype_v4sf_int
7629 = build_function_type (integer_type_node,
7630 tree_cons (NULL_TREE, V4SF_type_node,
7631 tree_cons (NULL_TREE, integer_type_node,
7632 endlink)));
7633 tree v4sf_ftype_v4sf_v2si
7634 = build_function_type (V4SF_type_node,
7635 tree_cons (NULL_TREE, V4SF_type_node,
7636 tree_cons (NULL_TREE, V2SI_type_node,
7637 endlink)));
7638 tree int_ftype_v4hi_int
7639 = build_function_type (integer_type_node,
7640 tree_cons (NULL_TREE, V4HI_type_node,
7641 tree_cons (NULL_TREE, integer_type_node,
7642 endlink)));
7643 tree v4hi_ftype_v4hi_int_int
332316cd 7644 = build_function_type (V4HI_type_node,
bd793c65
BS
7645 tree_cons (NULL_TREE, V4HI_type_node,
7646 tree_cons (NULL_TREE, integer_type_node,
7647 tree_cons (NULL_TREE,
7648 integer_type_node,
7649 endlink))));
7650 /* Miscellaneous. */
7651 tree v8qi_ftype_v4hi_v4hi
7652 = build_function_type (V8QI_type_node,
7653 tree_cons (NULL_TREE, V4HI_type_node,
7654 tree_cons (NULL_TREE, V4HI_type_node,
7655 endlink)));
7656 tree v4hi_ftype_v2si_v2si
7657 = build_function_type (V4HI_type_node,
7658 tree_cons (NULL_TREE, V2SI_type_node,
7659 tree_cons (NULL_TREE, V2SI_type_node,
7660 endlink)));
7661 tree v4sf_ftype_v4sf_v4sf_int
7662 = build_function_type (V4SF_type_node,
7663 tree_cons (NULL_TREE, V4SF_type_node,
7664 tree_cons (NULL_TREE, V4SF_type_node,
7665 tree_cons (NULL_TREE,
7666 integer_type_node,
7667 endlink))));
7668 tree v4hi_ftype_v8qi_v8qi
7669 = build_function_type (V4HI_type_node,
7670 tree_cons (NULL_TREE, V8QI_type_node,
7671 tree_cons (NULL_TREE, V8QI_type_node,
7672 endlink)));
7673 tree v2si_ftype_v4hi_v4hi
7674 = build_function_type (V2SI_type_node,
7675 tree_cons (NULL_TREE, V4HI_type_node,
7676 tree_cons (NULL_TREE, V4HI_type_node,
7677 endlink)));
7678 tree v4hi_ftype_v4hi_int
7679 = build_function_type (V4HI_type_node,
7680 tree_cons (NULL_TREE, V4HI_type_node,
7681 tree_cons (NULL_TREE, integer_type_node,
7682 endlink)));
7683 tree di_ftype_di_int
7684 = build_function_type (long_long_unsigned_type_node,
7685 tree_cons (NULL_TREE, long_long_unsigned_type_node,
7686 tree_cons (NULL_TREE, integer_type_node,
7687 endlink)));
7688 tree v8qi_ftype_v8qi_di
7689 = build_function_type (V8QI_type_node,
7690 tree_cons (NULL_TREE, V8QI_type_node,
7691 tree_cons (NULL_TREE,
7692 long_long_integer_type_node,
7693 endlink)));
7694 tree v4hi_ftype_v4hi_di
7695 = build_function_type (V4HI_type_node,
7696 tree_cons (NULL_TREE, V4HI_type_node,
7697 tree_cons (NULL_TREE,
7698 long_long_integer_type_node,
7699 endlink)));
7700 tree v2si_ftype_v2si_di
7701 = build_function_type (V2SI_type_node,
7702 tree_cons (NULL_TREE, V2SI_type_node,
7703 tree_cons (NULL_TREE,
7704 long_long_integer_type_node,
7705 endlink)));
7706 tree void_ftype_void
7707 = build_function_type (void_type_node, endlink);
7708 tree void_ftype_pchar_int
7709 = build_function_type (void_type_node,
7710 tree_cons (NULL_TREE, pchar_type_node,
7711 tree_cons (NULL_TREE, integer_type_node,
7712 endlink)));
7713 tree void_ftype_unsigned
7714 = build_function_type (void_type_node,
7715 tree_cons (NULL_TREE, unsigned_type_node,
7716 endlink));
7717 tree unsigned_ftype_void
7718 = build_function_type (unsigned_type_node, endlink);
7719 tree di_ftype_void
7720 = build_function_type (long_long_unsigned_type_node, endlink);
7721 tree ti_ftype_void
7722 = build_function_type (intTI_type_node, endlink);
7723 tree v2si_ftype_v4sf
7724 = build_function_type (V2SI_type_node,
7725 tree_cons (NULL_TREE, V4SF_type_node,
7726 endlink));
7727 /* Loads/stores. */
7728 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
7729 tree_cons (NULL_TREE, V8QI_type_node,
7730 tree_cons (NULL_TREE,
7731 pchar_type_node,
7732 endlink)));
7733 tree void_ftype_v8qi_v8qi_pchar
7734 = build_function_type (void_type_node, maskmovq_args);
7735 tree v4sf_ftype_pfloat
7736 = build_function_type (V4SF_type_node,
7737 tree_cons (NULL_TREE, pfloat_type_node,
7738 endlink));
7739 tree v4sf_ftype_float
7740 = build_function_type (V4SF_type_node,
7741 tree_cons (NULL_TREE, float_type_node,
7742 endlink));
7743 tree v4sf_ftype_float_float_float_float
7744 = build_function_type (V4SF_type_node,
7745 tree_cons (NULL_TREE, float_type_node,
7746 tree_cons (NULL_TREE, float_type_node,
7747 tree_cons (NULL_TREE,
7748 float_type_node,
7749 tree_cons (NULL_TREE,
7750 float_type_node,
7751 endlink)))));
7752 /* @@@ the type is bogus */
7753 tree v4sf_ftype_v4sf_pv2si
7754 = build_function_type (V4SF_type_node,
7755 tree_cons (NULL_TREE, V4SF_type_node,
7756 tree_cons (NULL_TREE, pv2si_type_node,
7757 endlink)));
7758 tree v4sf_ftype_pv2si_v4sf
7759 = build_function_type (V4SF_type_node,
7760 tree_cons (NULL_TREE, V4SF_type_node,
7761 tree_cons (NULL_TREE, pv2si_type_node,
7762 endlink)));
7763 tree void_ftype_pfloat_v4sf
7764 = build_function_type (void_type_node,
7765 tree_cons (NULL_TREE, pfloat_type_node,
7766 tree_cons (NULL_TREE, V4SF_type_node,
7767 endlink)));
7768 tree void_ftype_pdi_di
7769 = build_function_type (void_type_node,
7770 tree_cons (NULL_TREE, pdi_type_node,
7771 tree_cons (NULL_TREE,
7772 long_long_unsigned_type_node,
7773 endlink)));
7774 /* Normal vector unops. */
7775 tree v4sf_ftype_v4sf
7776 = build_function_type (V4SF_type_node,
7777 tree_cons (NULL_TREE, V4SF_type_node,
7778 endlink));
0f290768 7779
bd793c65
BS
7780 /* Normal vector binops. */
7781 tree v4sf_ftype_v4sf_v4sf
7782 = build_function_type (V4SF_type_node,
7783 tree_cons (NULL_TREE, V4SF_type_node,
7784 tree_cons (NULL_TREE, V4SF_type_node,
7785 endlink)));
7786 tree v8qi_ftype_v8qi_v8qi
7787 = build_function_type (V8QI_type_node,
7788 tree_cons (NULL_TREE, V8QI_type_node,
7789 tree_cons (NULL_TREE, V8QI_type_node,
7790 endlink)));
7791 tree v4hi_ftype_v4hi_v4hi
7792 = build_function_type (V4HI_type_node,
7793 tree_cons (NULL_TREE, V4HI_type_node,
7794 tree_cons (NULL_TREE, V4HI_type_node,
7795 endlink)));
7796 tree v2si_ftype_v2si_v2si
7797 = build_function_type (V2SI_type_node,
7798 tree_cons (NULL_TREE, V2SI_type_node,
7799 tree_cons (NULL_TREE, V2SI_type_node,
7800 endlink)));
7801 tree ti_ftype_ti_ti
7802 = build_function_type (intTI_type_node,
7803 tree_cons (NULL_TREE, intTI_type_node,
7804 tree_cons (NULL_TREE, intTI_type_node,
7805 endlink)));
7806 tree di_ftype_di_di
7807 = build_function_type (long_long_unsigned_type_node,
7808 tree_cons (NULL_TREE, long_long_unsigned_type_node,
7809 tree_cons (NULL_TREE,
7810 long_long_unsigned_type_node,
7811 endlink)));
7812
7813 /* Add all builtins that are more or less simple operations on two
7814 operands. */
7815 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
7816 {
7817 /* Use one of the operands; the target can have a different mode for
7818 mask-generating compares. */
7819 enum machine_mode mode;
7820 tree type;
7821
7822 if (d->name == 0)
7823 continue;
7824 mode = insn_data[d->icode].operand[1].mode;
7825
7826 if (! TARGET_SSE && ! VALID_MMX_REG_MODE (mode))
7827 continue;
7828
7829 switch (mode)
7830 {
7831 case V4SFmode:
7832 type = v4sf_ftype_v4sf_v4sf;
7833 break;
7834 case V8QImode:
7835 type = v8qi_ftype_v8qi_v8qi;
7836 break;
7837 case V4HImode:
7838 type = v4hi_ftype_v4hi_v4hi;
7839 break;
7840 case V2SImode:
7841 type = v2si_ftype_v2si_v2si;
7842 break;
7843 case TImode:
7844 type = ti_ftype_ti_ti;
7845 break;
7846 case DImode:
7847 type = di_ftype_di_di;
7848 break;
7849
7850 default:
7851 abort ();
7852 }
0f290768 7853
bd793c65
BS
7854 /* Override for comparisons. */
7855 if (d->icode == CODE_FOR_maskcmpv4sf3
7856 || d->icode == CODE_FOR_maskncmpv4sf3
7857 || d->icode == CODE_FOR_vmmaskcmpv4sf3
7858 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
7859 type = v4si_ftype_v4sf_v4sf;
7860
7861 def_builtin (d->name, type, d->code);
7862 }
7863
7864 /* Add the remaining MMX insns with somewhat more complicated types. */
7865 def_builtin ("__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
7866 def_builtin ("__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
7867 def_builtin ("__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
7868 def_builtin ("__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
7869 def_builtin ("__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
7870 def_builtin ("__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
7871 def_builtin ("__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
7872 def_builtin ("__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
7873 def_builtin ("__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
7874
7875 def_builtin ("__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
7876 def_builtin ("__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
7877 def_builtin ("__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
7878
7879 def_builtin ("__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
7880 def_builtin ("__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
7881
7882 def_builtin ("__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
7883 def_builtin ("__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
7884
7885 /* Everything beyond this point is SSE only. */
7886 if (! TARGET_SSE)
7887 return;
0f290768 7888
bd793c65
BS
7889 /* comi/ucomi insns. */
7890 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
7891 def_builtin (d->name, int_ftype_v4sf_v4sf, d->code);
7892
7893 def_builtin ("__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
7894 def_builtin ("__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
7895 def_builtin ("__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
7896
7897 def_builtin ("__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
7898 def_builtin ("__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
7899 def_builtin ("__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
7900 def_builtin ("__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
7901 def_builtin ("__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
7902 def_builtin ("__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
7903
7904 def_builtin ("__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
7905 def_builtin ("__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
7906
7907 def_builtin ("__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
7908
7909 def_builtin ("__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
7910 def_builtin ("__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
7911 def_builtin ("__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
7912 def_builtin ("__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
7913 def_builtin ("__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
7914 def_builtin ("__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
7915
7916 def_builtin ("__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
7917 def_builtin ("__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
7918 def_builtin ("__builtin_ia32_storehps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
7919 def_builtin ("__builtin_ia32_storelps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
7920
7921 def_builtin ("__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
7922 def_builtin ("__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
7923 def_builtin ("__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
7924 def_builtin ("__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
7925
7926 def_builtin ("__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
7927 def_builtin ("__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
7928
7929 def_builtin ("__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
7930
7931 def_builtin ("__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
7932 def_builtin ("__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
7933 def_builtin ("__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
7934 def_builtin ("__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
7935 def_builtin ("__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
7936 def_builtin ("__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
7937
7938 def_builtin ("__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
7939
7940 /* Composite intrinsics. */
7941 def_builtin ("__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
7942 def_builtin ("__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
7943 def_builtin ("__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
7944 def_builtin ("__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
7945 def_builtin ("__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
7946 def_builtin ("__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
7947 def_builtin ("__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
7948}
7949
7950/* Errors in the source file can cause expand_expr to return const0_rtx
7951 where we expect a vector. To avoid crashing, use one of the vector
7952 clear instructions. */
7953static rtx
7954safe_vector_operand (x, mode)
7955 rtx x;
7956 enum machine_mode mode;
7957{
7958 if (x != const0_rtx)
7959 return x;
7960 x = gen_reg_rtx (mode);
7961
7962 if (VALID_MMX_REG_MODE (mode))
7963 emit_insn (gen_mmx_clrdi (mode == DImode ? x
7964 : gen_rtx_SUBREG (DImode, x, 0)));
7965 else
7966 emit_insn (gen_sse_clrti (mode == TImode ? x
7967 : gen_rtx_SUBREG (TImode, x, 0)));
7968 return x;
7969}
7970
7971/* Subroutine of ix86_expand_builtin to take care of binop insns. */
7972
7973static rtx
7974ix86_expand_binop_builtin (icode, arglist, target)
7975 enum insn_code icode;
7976 tree arglist;
7977 rtx target;
7978{
7979 rtx pat;
7980 tree arg0 = TREE_VALUE (arglist);
7981 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7982 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7983 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
7984 enum machine_mode tmode = insn_data[icode].operand[0].mode;
7985 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
7986 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
7987
7988 if (VECTOR_MODE_P (mode0))
7989 op0 = safe_vector_operand (op0, mode0);
7990 if (VECTOR_MODE_P (mode1))
7991 op1 = safe_vector_operand (op1, mode1);
7992
7993 if (! target
7994 || GET_MODE (target) != tmode
7995 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
7996 target = gen_reg_rtx (tmode);
7997
7998 /* In case the insn wants input operands in modes different from
7999 the result, abort. */
8000 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
8001 abort ();
8002
8003 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8004 op0 = copy_to_mode_reg (mode0, op0);
8005 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8006 op1 = copy_to_mode_reg (mode1, op1);
8007
8008 pat = GEN_FCN (icode) (target, op0, op1);
8009 if (! pat)
8010 return 0;
8011 emit_insn (pat);
8012 return target;
8013}
8014
8015/* Subroutine of ix86_expand_builtin to take care of stores. */
8016
8017static rtx
8018ix86_expand_store_builtin (icode, arglist, shuffle)
8019 enum insn_code icode;
8020 tree arglist;
8021 int shuffle;
8022{
8023 rtx pat;
8024 tree arg0 = TREE_VALUE (arglist);
8025 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8026 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8027 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8028 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
8029 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
8030
8031 if (VECTOR_MODE_P (mode1))
8032 op1 = safe_vector_operand (op1, mode1);
8033
8034 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8035 if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
8036 op1 = copy_to_mode_reg (mode1, op1);
8037 if (shuffle >= 0)
8038 emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
8039 pat = GEN_FCN (icode) (op0, op1);
8040 if (pat)
8041 emit_insn (pat);
8042 return 0;
8043}
8044
8045/* Subroutine of ix86_expand_builtin to take care of unop insns. */
8046
8047static rtx
8048ix86_expand_unop_builtin (icode, arglist, target, do_load)
8049 enum insn_code icode;
8050 tree arglist;
8051 rtx target;
8052 int do_load;
8053{
8054 rtx pat;
8055 tree arg0 = TREE_VALUE (arglist);
8056 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8057 enum machine_mode tmode = insn_data[icode].operand[0].mode;
8058 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
8059
8060 if (! target
8061 || GET_MODE (target) != tmode
8062 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8063 target = gen_reg_rtx (tmode);
8064 if (do_load)
8065 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8066 else
8067 {
8068 if (VECTOR_MODE_P (mode0))
8069 op0 = safe_vector_operand (op0, mode0);
8070
8071 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8072 op0 = copy_to_mode_reg (mode0, op0);
8073 }
8074
8075 pat = GEN_FCN (icode) (target, op0);
8076 if (! pat)
8077 return 0;
8078 emit_insn (pat);
8079 return target;
8080}
8081
8082/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
8083 sqrtss, rsqrtss, rcpss. */
8084
8085static rtx
8086ix86_expand_unop1_builtin (icode, arglist, target)
8087 enum insn_code icode;
8088 tree arglist;
8089 rtx target;
8090{
8091 rtx pat;
8092 tree arg0 = TREE_VALUE (arglist);
8093 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8094 enum machine_mode tmode = insn_data[icode].operand[0].mode;
8095 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
8096
8097 if (! target
8098 || GET_MODE (target) != tmode
8099 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8100 target = gen_reg_rtx (tmode);
8101
8102 if (VECTOR_MODE_P (mode0))
8103 op0 = safe_vector_operand (op0, mode0);
8104
8105 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8106 op0 = copy_to_mode_reg (mode0, op0);
8107
8108 pat = GEN_FCN (icode) (target, op0, op0);
8109 if (! pat)
8110 return 0;
8111 emit_insn (pat);
8112 return target;
8113}
8114
8115/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
8116
8117static rtx
8118ix86_expand_sse_compare (d, arglist, target)
8119 struct builtin_description *d;
8120 tree arglist;
8121 rtx target;
8122{
8123 rtx pat;
8124 tree arg0 = TREE_VALUE (arglist);
8125 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8126 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8127 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8128 rtx op2;
8129 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
8130 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
8131 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
8132 enum rtx_code comparison = d->comparison;
8133
8134 if (VECTOR_MODE_P (mode0))
8135 op0 = safe_vector_operand (op0, mode0);
8136 if (VECTOR_MODE_P (mode1))
8137 op1 = safe_vector_operand (op1, mode1);
8138
8139 /* Swap operands if we have a comparison that isn't available in
8140 hardware. */
8141 if (d->flag)
8142 {
8143 target = gen_reg_rtx (tmode);
8144 emit_move_insn (target, op1);
8145 op1 = op0;
8146 op0 = target;
8147 comparison = swap_condition (comparison);
8148 }
8149 else if (! target
8150 || GET_MODE (target) != tmode
8151 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
8152 target = gen_reg_rtx (tmode);
8153
8154 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
8155 op0 = copy_to_mode_reg (mode0, op0);
8156 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
8157 op1 = copy_to_mode_reg (mode1, op1);
8158
8159 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
8160 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
8161 if (! pat)
8162 return 0;
8163 emit_insn (pat);
8164 return target;
8165}
8166
8167/* Subroutine of ix86_expand_builtin to take care of comi insns. */
8168
8169static rtx
8170ix86_expand_sse_comi (d, arglist, target)
8171 struct builtin_description *d;
8172 tree arglist;
8173 rtx target;
8174{
8175 rtx pat;
8176 tree arg0 = TREE_VALUE (arglist);
8177 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8178 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8179 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8180 rtx op2;
8181 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
8182 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
8183 enum rtx_code comparison = d->comparison;
8184
8185 if (VECTOR_MODE_P (mode0))
8186 op0 = safe_vector_operand (op0, mode0);
8187 if (VECTOR_MODE_P (mode1))
8188 op1 = safe_vector_operand (op1, mode1);
8189
8190 /* Swap operands if we have a comparison that isn't available in
8191 hardware. */
8192 if (d->flag)
8193 {
8194 rtx tmp = op1;
8195 op1 = op0;
8196 op0 = tmp;
8197 comparison = swap_condition (comparison);
8198 }
8199
8200 target = gen_reg_rtx (SImode);
8201 emit_move_insn (target, const0_rtx);
8202 target = gen_rtx_SUBREG (QImode, target, 0);
8203
8204 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
8205 op0 = copy_to_mode_reg (mode0, op0);
8206 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
8207 op1 = copy_to_mode_reg (mode1, op1);
8208
8209 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
8210 pat = GEN_FCN (d->icode) (op0, op1, op2);
8211 if (! pat)
8212 return 0;
8213 emit_insn (pat);
8214 emit_insn (gen_setcc_2 (target, op2));
8215
8216 return target;
8217}
8218
8219/* Expand an expression EXP that calls a built-in function,
8220 with result going to TARGET if that's convenient
8221 (and in mode MODE if that's convenient).
8222 SUBTARGET may be used as the target for computing one of EXP's operands.
8223 IGNORE is nonzero if the value is to be ignored. */
8224
8225rtx
8226ix86_expand_builtin (exp, target, subtarget, mode, ignore)
8227 tree exp;
8228 rtx target;
8229 rtx subtarget ATTRIBUTE_UNUSED;
8230 enum machine_mode mode ATTRIBUTE_UNUSED;
8231 int ignore ATTRIBUTE_UNUSED;
8232{
8233 struct builtin_description *d;
77ebd435 8234 size_t i;
bd793c65
BS
8235 enum insn_code icode;
8236 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8237 tree arglist = TREE_OPERAND (exp, 1);
8238 tree arg0, arg1, arg2, arg3;
8239 rtx op0, op1, op2, pat;
8240 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 8241 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
8242
8243 switch (fcode)
8244 {
8245 case IX86_BUILTIN_EMMS:
8246 emit_insn (gen_emms ());
8247 return 0;
8248
8249 case IX86_BUILTIN_SFENCE:
8250 emit_insn (gen_sfence ());
8251 return 0;
8252
8253 case IX86_BUILTIN_M_FROM_INT:
8254 target = gen_reg_rtx (DImode);
8255 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8256 emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
8257 return target;
8258
8259 case IX86_BUILTIN_M_TO_INT:
8260 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8261 op0 = copy_to_mode_reg (DImode, op0);
8262 target = gen_reg_rtx (SImode);
8263 emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
8264 return target;
8265
8266 case IX86_BUILTIN_PEXTRW:
8267 icode = CODE_FOR_mmx_pextrw;
8268 arg0 = TREE_VALUE (arglist);
8269 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8270 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8271 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8272 tmode = insn_data[icode].operand[0].mode;
8273 mode0 = insn_data[icode].operand[1].mode;
8274 mode1 = insn_data[icode].operand[2].mode;
8275
8276 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8277 op0 = copy_to_mode_reg (mode0, op0);
8278 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8279 {
8280 /* @@@ better error message */
8281 error ("selector must be an immediate");
8282 return const0_rtx;
8283 }
8284 if (target == 0
8285 || GET_MODE (target) != tmode
8286 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8287 target = gen_reg_rtx (tmode);
8288 pat = GEN_FCN (icode) (target, op0, op1);
8289 if (! pat)
8290 return 0;
8291 emit_insn (pat);
8292 return target;
8293
8294 case IX86_BUILTIN_PINSRW:
8295 icode = CODE_FOR_mmx_pinsrw;
8296 arg0 = TREE_VALUE (arglist);
8297 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8298 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8299 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8300 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8301 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8302 tmode = insn_data[icode].operand[0].mode;
8303 mode0 = insn_data[icode].operand[1].mode;
8304 mode1 = insn_data[icode].operand[2].mode;
8305 mode2 = insn_data[icode].operand[3].mode;
8306
8307 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8308 op0 = copy_to_mode_reg (mode0, op0);
8309 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8310 op1 = copy_to_mode_reg (mode1, op1);
8311 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
8312 {
8313 /* @@@ better error message */
8314 error ("selector must be an immediate");
8315 return const0_rtx;
8316 }
8317 if (target == 0
8318 || GET_MODE (target) != tmode
8319 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8320 target = gen_reg_rtx (tmode);
8321 pat = GEN_FCN (icode) (target, op0, op1, op2);
8322 if (! pat)
8323 return 0;
8324 emit_insn (pat);
8325 return target;
8326
8327 case IX86_BUILTIN_MASKMOVQ:
8328 icode = CODE_FOR_mmx_maskmovq;
8329 /* Note the arg order is different from the operand order. */
8330 arg1 = TREE_VALUE (arglist);
8331 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
8332 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8333 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8334 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8335 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8336 mode0 = insn_data[icode].operand[0].mode;
8337 mode1 = insn_data[icode].operand[1].mode;
8338 mode2 = insn_data[icode].operand[2].mode;
8339
8340 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8341 op0 = copy_to_mode_reg (mode0, op0);
8342 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
8343 op1 = copy_to_mode_reg (mode1, op1);
8344 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
8345 op2 = copy_to_mode_reg (mode2, op2);
8346 pat = GEN_FCN (icode) (op0, op1, op2);
8347 if (! pat)
8348 return 0;
8349 emit_insn (pat);
8350 return 0;
8351
8352 case IX86_BUILTIN_SQRTSS:
8353 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
8354 case IX86_BUILTIN_RSQRTSS:
8355 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
8356 case IX86_BUILTIN_RCPSS:
8357 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
8358
8359 case IX86_BUILTIN_LOADAPS:
8360 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
8361
8362 case IX86_BUILTIN_LOADUPS:
8363 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
8364
8365 case IX86_BUILTIN_STOREAPS:
8366 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
8367 case IX86_BUILTIN_STOREUPS:
8368 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
8369
8370 case IX86_BUILTIN_LOADSS:
8371 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
8372
8373 case IX86_BUILTIN_STORESS:
8374 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
8375
0f290768 8376 case IX86_BUILTIN_LOADHPS:
bd793c65
BS
8377 case IX86_BUILTIN_LOADLPS:
8378 icode = (fcode == IX86_BUILTIN_LOADHPS
8379 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
8380 arg0 = TREE_VALUE (arglist);
8381 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8382 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8383 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8384 tmode = insn_data[icode].operand[0].mode;
8385 mode0 = insn_data[icode].operand[1].mode;
8386 mode1 = insn_data[icode].operand[2].mode;
8387
8388 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8389 op0 = copy_to_mode_reg (mode0, op0);
8390 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
8391 if (target == 0
8392 || GET_MODE (target) != tmode
8393 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8394 target = gen_reg_rtx (tmode);
8395 pat = GEN_FCN (icode) (target, op0, op1);
8396 if (! pat)
8397 return 0;
8398 emit_insn (pat);
8399 return target;
0f290768 8400
bd793c65
BS
8401 case IX86_BUILTIN_STOREHPS:
8402 case IX86_BUILTIN_STORELPS:
8403 icode = (fcode == IX86_BUILTIN_STOREHPS
8404 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
8405 arg0 = TREE_VALUE (arglist);
8406 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8407 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8408 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8409 mode0 = insn_data[icode].operand[1].mode;
8410 mode1 = insn_data[icode].operand[2].mode;
8411
8412 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8413 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8414 op1 = copy_to_mode_reg (mode1, op1);
8415
8416 pat = GEN_FCN (icode) (op0, op0, op1);
8417 if (! pat)
8418 return 0;
8419 emit_insn (pat);
8420 return 0;
8421
8422 case IX86_BUILTIN_MOVNTPS:
8423 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
8424 case IX86_BUILTIN_MOVNTQ:
8425 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
8426
8427 case IX86_BUILTIN_LDMXCSR:
8428 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8429 target = assign_386_stack_local (SImode, 0);
8430 emit_move_insn (target, op0);
8431 emit_insn (gen_ldmxcsr (target));
8432 return 0;
8433
8434 case IX86_BUILTIN_STMXCSR:
8435 target = assign_386_stack_local (SImode, 0);
8436 emit_insn (gen_stmxcsr (target));
8437 return copy_to_mode_reg (SImode, target);
8438
8439 case IX86_BUILTIN_PREFETCH:
8440 icode = CODE_FOR_prefetch;
8441 arg0 = TREE_VALUE (arglist);
8442 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8443 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8444 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
332316cd
BS
8445 mode0 = insn_data[icode].operand[0].mode;
8446 mode1 = insn_data[icode].operand[1].mode;
bd793c65 8447
332316cd 8448 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
bd793c65
BS
8449 {
8450 /* @@@ better error message */
8451 error ("selector must be an immediate");
8452 return const0_rtx;
8453 }
8454
332316cd 8455 op0 = copy_to_mode_reg (Pmode, op0);
bd793c65
BS
8456 pat = GEN_FCN (icode) (op0, op1);
8457 if (! pat)
8458 return 0;
8459 emit_insn (pat);
8460 return target;
0f290768 8461
bd793c65
BS
8462 case IX86_BUILTIN_SHUFPS:
8463 icode = CODE_FOR_sse_shufps;
8464 arg0 = TREE_VALUE (arglist);
8465 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8466 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8467 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8468 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8469 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8470 tmode = insn_data[icode].operand[0].mode;
8471 mode0 = insn_data[icode].operand[1].mode;
8472 mode1 = insn_data[icode].operand[2].mode;
8473 mode2 = insn_data[icode].operand[3].mode;
8474
8475 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8476 op0 = copy_to_mode_reg (mode0, op0);
8477 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8478 op1 = copy_to_mode_reg (mode1, op1);
8479 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
8480 {
8481 /* @@@ better error message */
8482 error ("mask must be an immediate");
8483 return const0_rtx;
8484 }
8485 if (target == 0
8486 || GET_MODE (target) != tmode
8487 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8488 target = gen_reg_rtx (tmode);
8489 pat = GEN_FCN (icode) (target, op0, op1, op2);
8490 if (! pat)
8491 return 0;
8492 emit_insn (pat);
8493 return target;
8494
8495 case IX86_BUILTIN_PSHUFW:
8496 icode = CODE_FOR_mmx_pshufw;
8497 arg0 = TREE_VALUE (arglist);
8498 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8499 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8500 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8501 tmode = insn_data[icode].operand[0].mode;
8502 mode0 = insn_data[icode].operand[2].mode;
8503 mode1 = insn_data[icode].operand[3].mode;
8504
8505 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8506 op0 = copy_to_mode_reg (mode0, op0);
8507 if (! (*insn_data[icode].operand[3].predicate) (op1, mode1))
8508 {
8509 /* @@@ better error message */
8510 error ("mask must be an immediate");
8511 return const0_rtx;
8512 }
8513 if (target == 0
8514 || GET_MODE (target) != tmode
8515 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8516 target = gen_reg_rtx (tmode);
8517 pat = GEN_FCN (icode) (target, target, op0, op1);
8518 if (! pat)
8519 return 0;
8520 emit_insn (pat);
8521 return target;
8522
8523 /* Composite intrinsics. */
8524 case IX86_BUILTIN_SETPS1:
8525 target = assign_386_stack_local (SFmode, 0);
8526 arg0 = TREE_VALUE (arglist);
8527 emit_move_insn (change_address (target, SFmode, XEXP (target, 0)),
8528 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
8529 op0 = gen_reg_rtx (V4SFmode);
8530 emit_insn (gen_sse_loadss (op0, change_address (target, V4SFmode,
8531 XEXP (target, 0))));
8532 emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
8533 return op0;
0f290768 8534
bd793c65
BS
8535 case IX86_BUILTIN_SETPS:
8536 target = assign_386_stack_local (V4SFmode, 0);
8537 op0 = change_address (target, SFmode, XEXP (target, 0));
8538 arg0 = TREE_VALUE (arglist);
8539 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8540 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8541 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
8542 emit_move_insn (op0,
8543 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
8544 emit_move_insn (adj_offsettable_operand (op0, 4),
8545 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
8546 emit_move_insn (adj_offsettable_operand (op0, 8),
8547 expand_expr (arg2, NULL_RTX, VOIDmode, 0));
8548 emit_move_insn (adj_offsettable_operand (op0, 12),
8549 expand_expr (arg3, NULL_RTX, VOIDmode, 0));
8550 op0 = gen_reg_rtx (V4SFmode);
8551 emit_insn (gen_sse_movaps (op0, target));
8552 return op0;
8553
8554 case IX86_BUILTIN_CLRPS:
8555 target = gen_reg_rtx (TImode);
8556 emit_insn (gen_sse_clrti (target));
8557 return target;
8558
8559 case IX86_BUILTIN_LOADRPS:
8560 target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
8561 gen_reg_rtx (V4SFmode), 1);
8562 emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
8563 return target;
8564
8565 case IX86_BUILTIN_LOADPS1:
8566 target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
8567 gen_reg_rtx (V4SFmode), 1);
8568 emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
8569 return target;
8570
8571 case IX86_BUILTIN_STOREPS1:
8572 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
8573 case IX86_BUILTIN_STORERPS:
8574 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
8575
8576 case IX86_BUILTIN_MMX_ZERO:
8577 target = gen_reg_rtx (DImode);
8578 emit_insn (gen_mmx_clrdi (target));
8579 return target;
8580
8581 default:
8582 break;
8583 }
8584
8585 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
8586 if (d->code == fcode)
8587 {
8588 /* Compares are treated specially. */
8589 if (d->icode == CODE_FOR_maskcmpv4sf3
8590 || d->icode == CODE_FOR_vmmaskcmpv4sf3
8591 || d->icode == CODE_FOR_maskncmpv4sf3
8592 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
8593 return ix86_expand_sse_compare (d, arglist, target);
8594
8595 return ix86_expand_binop_builtin (d->icode, arglist, target);
8596 }
8597
8598 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
8599 if (d->code == fcode)
8600 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 8601
bd793c65
BS
8602 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
8603 if (d->code == fcode)
8604 return ix86_expand_sse_comi (d, arglist, target);
0f290768 8605
bd793c65
BS
8606 /* @@@ Should really do something sensible here. */
8607 return 0;
bd793c65 8608}
4211a8fb
JH
8609
8610/* Store OPERAND to the memory after reload is completed. This means
8611 that we can't easilly use assign_stack_local. */
8612rtx
8613ix86_force_to_memory (mode, operand)
8614 enum machine_mode mode;
8615 rtx operand;
8616{
8617 if (!reload_completed)
8618 abort ();
8619 switch (mode)
8620 {
8621 case DImode:
8622 {
8623 rtx operands[2];
8624 split_di (&operand, 1, operands, operands+1);
8625 emit_insn (
8626 gen_rtx_SET (VOIDmode,
8627 gen_rtx_MEM (SImode,
8628 gen_rtx_PRE_DEC (Pmode,
8629 stack_pointer_rtx)),
8630 operands[1]));
8631 emit_insn (
8632 gen_rtx_SET (VOIDmode,
8633 gen_rtx_MEM (SImode,
8634 gen_rtx_PRE_DEC (Pmode,
8635 stack_pointer_rtx)),
8636 operands[0]));
8637 }
8638 break;
8639 case HImode:
8640 /* It is better to store HImodes as SImodes. */
8641 if (!TARGET_PARTIAL_REG_STALL)
8642 operand = gen_lowpart (SImode, operand);
8643 /* FALLTHRU */
8644 case SImode:
8645 emit_insn (
8646 gen_rtx_SET (VOIDmode,
8647 gen_rtx_MEM (GET_MODE (operand),
8648 gen_rtx_PRE_DEC (SImode,
8649 stack_pointer_rtx)),
8650 operand));
8651 break;
8652 default:
8653 abort();
8654 }
8655 return gen_rtx_MEM (mode, stack_pointer_rtx);
8656}
8657
8658/* Free operand from the memory. */
8659void
8660ix86_free_from_memory (mode)
8661 enum machine_mode mode;
8662{
8663 /* Use LEA to deallocate stack space. In peephole2 it will be converted
8664 to pop or add instruction if registers are available. */
8665 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8666 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8667 GEN_INT (mode == DImode
8668 ? 8
8669 : mode == HImode && TARGET_PARTIAL_REG_STALL
8670 ? 2
8671 : 4))));
8672}
This page took 2.174567 seconds and 5 git commands to generate.