]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
i386.md (sse_mov?fcc*): New patterns and splitters.
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
8752c357 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
4592bdcb 3 Free Software Foundation, Inc.
2a2ab3f9
JVA
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
1fba7553 23#include <setjmp.h>
bb5177ac 24#include "system.h"
2a2ab3f9 25#include "rtl.h"
6baf1cc8
BS
26#include "tree.h"
27#include "tm_p.h"
2a2ab3f9
JVA
28#include "regs.h"
29#include "hard-reg-set.h"
30#include "real.h"
31#include "insn-config.h"
32#include "conditions.h"
33#include "insn-flags.h"
34#include "output.h"
35#include "insn-attr.h"
2a2ab3f9 36#include "flags.h"
a8ffcc81 37#include "except.h"
ecbc4695 38#include "function.h"
00c79232 39#include "recog.h"
ced8dd8c 40#include "expr.h"
f103890b 41#include "toplev.h"
e075ae69 42#include "basic-block.h"
1526a060 43#include "ggc.h"
2a2ab3f9 44
8dfe5673
RK
45#ifndef CHECK_STACK_LIMIT
46#define CHECK_STACK_LIMIT -1
47#endif
48
32b5b1aa
SC
49/* Processor costs (relative to an add) */
50struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 51 1, /* cost of an add instruction */
32b5b1aa
SC
52 1, /* cost of a lea instruction */
53 3, /* variable shift costs */
54 2, /* constant shift costs */
55 6, /* cost of starting a multiply */
56 1, /* cost of multiply per each bit set */
e075ae69 57 23, /* cost of a divide/mod */
96e7ae40 58 15, /* "large" insn */
e2e52e1b 59 3, /* MOVE_RATIO */
7c6b971d 60 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
61 {2, 4, 2}, /* cost of loading integer registers
62 in QImode, HImode and SImode.
0f290768 63 Relative to reg-reg move (2). */
96e7ae40
JH
64 {2, 4, 2}, /* cost of storing integer registers */
65 2, /* cost of reg,reg fld/fst */
66 {8, 8, 8}, /* cost of loading fp registers
67 in SFmode, DFmode and XFmode */
fa79946e
JH
68 {8, 8, 8}, /* cost of loading integer registers */
69 2, /* cost of moving MMX register */
70 {4, 8}, /* cost of loading MMX registers
71 in SImode and DImode */
72 {4, 8}, /* cost of storing MMX registers
73 in SImode and DImode */
74 2, /* cost of moving SSE register */
75 {4, 8, 16}, /* cost of loading SSE registers
76 in SImode, DImode and TImode */
77 {4, 8, 16}, /* cost of storing SSE registers
78 in SImode, DImode and TImode */
79 3, /* MMX or SSE register to integer */
32b5b1aa
SC
80};
81
82struct processor_costs i486_cost = { /* 486 specific costs */
83 1, /* cost of an add instruction */
84 1, /* cost of a lea instruction */
85 3, /* variable shift costs */
86 2, /* constant shift costs */
87 12, /* cost of starting a multiply */
88 1, /* cost of multiply per each bit set */
e075ae69 89 40, /* cost of a divide/mod */
96e7ae40 90 15, /* "large" insn */
e2e52e1b 91 3, /* MOVE_RATIO */
7c6b971d 92 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
93 {2, 4, 2}, /* cost of loading integer registers
94 in QImode, HImode and SImode.
0f290768 95 Relative to reg-reg move (2). */
96e7ae40
JH
96 {2, 4, 2}, /* cost of storing integer registers */
97 2, /* cost of reg,reg fld/fst */
98 {8, 8, 8}, /* cost of loading fp registers
99 in SFmode, DFmode and XFmode */
fa79946e
JH
100 {8, 8, 8}, /* cost of loading integer registers */
101 2, /* cost of moving MMX register */
102 {4, 8}, /* cost of loading MMX registers
103 in SImode and DImode */
104 {4, 8}, /* cost of storing MMX registers
105 in SImode and DImode */
106 2, /* cost of moving SSE register */
107 {4, 8, 16}, /* cost of loading SSE registers
108 in SImode, DImode and TImode */
109 {4, 8, 16}, /* cost of storing SSE registers
110 in SImode, DImode and TImode */
111 3 /* MMX or SSE register to integer */
32b5b1aa
SC
112};
113
e5cb57e8 114struct processor_costs pentium_cost = {
32b5b1aa
SC
115 1, /* cost of an add instruction */
116 1, /* cost of a lea instruction */
856b07a1 117 4, /* variable shift costs */
e5cb57e8 118 1, /* constant shift costs */
856b07a1
SC
119 11, /* cost of starting a multiply */
120 0, /* cost of multiply per each bit set */
e075ae69 121 25, /* cost of a divide/mod */
96e7ae40 122 8, /* "large" insn */
e2e52e1b 123 6, /* MOVE_RATIO */
7c6b971d 124 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
125 {2, 4, 2}, /* cost of loading integer registers
126 in QImode, HImode and SImode.
0f290768 127 Relative to reg-reg move (2). */
96e7ae40
JH
128 {2, 4, 2}, /* cost of storing integer registers */
129 2, /* cost of reg,reg fld/fst */
130 {2, 2, 6}, /* cost of loading fp registers
131 in SFmode, DFmode and XFmode */
fa79946e
JH
132 {4, 4, 6}, /* cost of loading integer registers */
133 8, /* cost of moving MMX register */
134 {8, 8}, /* cost of loading MMX registers
135 in SImode and DImode */
136 {8, 8}, /* cost of storing MMX registers
137 in SImode and DImode */
138 2, /* cost of moving SSE register */
139 {4, 8, 16}, /* cost of loading SSE registers
140 in SImode, DImode and TImode */
141 {4, 8, 16}, /* cost of storing SSE registers
142 in SImode, DImode and TImode */
143 3 /* MMX or SSE register to integer */
32b5b1aa
SC
144};
145
856b07a1
SC
146struct processor_costs pentiumpro_cost = {
147 1, /* cost of an add instruction */
148 1, /* cost of a lea instruction */
e075ae69 149 1, /* variable shift costs */
856b07a1 150 1, /* constant shift costs */
369e59b1 151 4, /* cost of starting a multiply */
856b07a1 152 0, /* cost of multiply per each bit set */
e075ae69 153 17, /* cost of a divide/mod */
96e7ae40 154 8, /* "large" insn */
e2e52e1b 155 6, /* MOVE_RATIO */
7c6b971d 156 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
157 {4, 4, 4}, /* cost of loading integer registers
158 in QImode, HImode and SImode.
0f290768 159 Relative to reg-reg move (2). */
96e7ae40
JH
160 {2, 2, 2}, /* cost of storing integer registers */
161 2, /* cost of reg,reg fld/fst */
162 {2, 2, 6}, /* cost of loading fp registers
163 in SFmode, DFmode and XFmode */
fa79946e
JH
164 {4, 4, 6}, /* cost of loading integer registers */
165 2, /* cost of moving MMX register */
166 {2, 2}, /* cost of loading MMX registers
167 in SImode and DImode */
168 {2, 2}, /* cost of storing MMX registers
169 in SImode and DImode */
170 2, /* cost of moving SSE register */
171 {2, 2, 8}, /* cost of loading SSE registers
172 in SImode, DImode and TImode */
173 {2, 2, 8}, /* cost of storing SSE registers
174 in SImode, DImode and TImode */
175 3 /* MMX or SSE register to integer */
856b07a1
SC
176};
177
a269a03c
JC
178struct processor_costs k6_cost = {
179 1, /* cost of an add instruction */
e075ae69 180 2, /* cost of a lea instruction */
a269a03c
JC
181 1, /* variable shift costs */
182 1, /* constant shift costs */
73fe76e4 183 3, /* cost of starting a multiply */
a269a03c 184 0, /* cost of multiply per each bit set */
e075ae69 185 18, /* cost of a divide/mod */
96e7ae40 186 8, /* "large" insn */
e2e52e1b 187 4, /* MOVE_RATIO */
7c6b971d 188 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
189 {4, 5, 4}, /* cost of loading integer registers
190 in QImode, HImode and SImode.
0f290768 191 Relative to reg-reg move (2). */
96e7ae40
JH
192 {2, 3, 2}, /* cost of storing integer registers */
193 4, /* cost of reg,reg fld/fst */
194 {6, 6, 6}, /* cost of loading fp registers
195 in SFmode, DFmode and XFmode */
fa79946e
JH
196 {4, 4, 4}, /* cost of loading integer registers */
197 2, /* cost of moving MMX register */
198 {2, 2}, /* cost of loading MMX registers
199 in SImode and DImode */
200 {2, 2}, /* cost of storing MMX registers
201 in SImode and DImode */
202 2, /* cost of moving SSE register */
203 {2, 2, 8}, /* cost of loading SSE registers
204 in SImode, DImode and TImode */
205 {2, 2, 8}, /* cost of storing SSE registers
206 in SImode, DImode and TImode */
207 6 /* MMX or SSE register to integer */
a269a03c
JC
208};
209
309ada50
JH
210struct processor_costs athlon_cost = {
211 1, /* cost of an add instruction */
0b5107cf 212 2, /* cost of a lea instruction */
309ada50
JH
213 1, /* variable shift costs */
214 1, /* constant shift costs */
215 5, /* cost of starting a multiply */
216 0, /* cost of multiply per each bit set */
0b5107cf 217 42, /* cost of a divide/mod */
309ada50 218 8, /* "large" insn */
e2e52e1b 219 9, /* MOVE_RATIO */
309ada50
JH
220 4, /* cost for loading QImode using movzbl */
221 {4, 5, 4}, /* cost of loading integer registers
222 in QImode, HImode and SImode.
0f290768 223 Relative to reg-reg move (2). */
309ada50
JH
224 {2, 3, 2}, /* cost of storing integer registers */
225 4, /* cost of reg,reg fld/fst */
0b5107cf 226 {6, 6, 20}, /* cost of loading fp registers
309ada50 227 in SFmode, DFmode and XFmode */
fa79946e
JH
228 {4, 4, 16}, /* cost of loading integer registers */
229 2, /* cost of moving MMX register */
230 {2, 2}, /* cost of loading MMX registers
231 in SImode and DImode */
232 {2, 2}, /* cost of storing MMX registers
233 in SImode and DImode */
234 2, /* cost of moving SSE register */
235 {2, 2, 8}, /* cost of loading SSE registers
236 in SImode, DImode and TImode */
237 {2, 2, 8}, /* cost of storing SSE registers
238 in SImode, DImode and TImode */
239 6 /* MMX or SSE register to integer */
309ada50
JH
240};
241
32b5b1aa
SC
242struct processor_costs *ix86_cost = &pentium_cost;
243
a269a03c
JC
244/* Processor feature/optimization bitmasks. */
245#define m_386 (1<<PROCESSOR_I386)
246#define m_486 (1<<PROCESSOR_I486)
247#define m_PENT (1<<PROCESSOR_PENTIUM)
248#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
249#define m_K6 (1<<PROCESSOR_K6)
309ada50 250#define m_ATHLON (1<<PROCESSOR_ATHLON)
a269a03c 251
309ada50
JH
252const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
253const int x86_push_memory = m_386 | m_K6 | m_ATHLON;
a269a03c 254const int x86_zero_extend_with_and = m_486 | m_PENT;
369e59b1 255const int x86_movx = m_ATHLON | m_PPRO /* m_386 | m_K6 */;
e075ae69 256const int x86_double_with_add = ~m_386;
a269a03c 257const int x86_use_bit_test = m_386;
e2e52e1b 258const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
a269a03c
JC
259const int x86_use_q_reg = m_PENT | m_PPRO | m_K6;
260const int x86_use_any_reg = m_486;
309ada50
JH
261const int x86_cmove = m_PPRO | m_ATHLON;
262const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON;
c0c102a9 263const int x86_use_sahf = m_PPRO | m_K6;
e075ae69
RH
264const int x86_partial_reg_stall = m_PPRO;
265const int x86_use_loop = m_K6;
309ada50 266const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
e075ae69
RH
267const int x86_use_mov0 = m_K6;
268const int x86_use_cltd = ~(m_PENT | m_K6);
269const int x86_read_modify_write = ~m_PENT;
270const int x86_read_modify = ~(m_PENT | m_PPRO);
271const int x86_split_long_moves = m_PPRO;
e9e80858 272const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
f90800f8 273const int x86_single_stringop = m_386;
d9f32422
JH
274const int x86_qimode_math = ~(0);
275const int x86_promote_qi_regs = 0;
276const int x86_himode_math = ~(m_PPRO);
277const int x86_promote_hi_regs = m_PPRO;
bdeb029c
JH
278const int x86_sub_esp_4 = m_ATHLON | m_PPRO;
279const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486;
280const int x86_add_esp_4 = m_ATHLON | m_K6;
281const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486;
0b5107cf
JH
282const int x86_integer_DFmode_moves = ~m_ATHLON;
283const int x86_partial_reg_dependency = m_ATHLON;
284const int x86_memory_mismatch_stall = m_ATHLON;
a269a03c 285
564d80f4 286#define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
2a2ab3f9 287
e075ae69
RH
288const char * const hi_reg_name[] = HI_REGISTER_NAMES;
289const char * const qi_reg_name[] = QI_REGISTER_NAMES;
290const char * const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
4c0d89b5
RS
291
292/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 293 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 294
e075ae69 295enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
296{
297 /* ax, dx, cx, bx */
ab408a86 298 AREG, DREG, CREG, BREG,
4c0d89b5 299 /* si, di, bp, sp */
e075ae69 300 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
301 /* FP registers */
302 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 303 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 304 /* arg pointer */
83774849 305 NON_Q_REGS,
564d80f4 306 /* flags, fpsr, dirflag, frame */
a7180f70
BS
307 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
308 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
309 SSE_REGS, SSE_REGS,
310 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
311 MMX_REGS, MMX_REGS
4c0d89b5 312};
c572e5ba 313
83774849
RH
314/* The "default" register map. */
315
0f290768 316int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
317{
318 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
319 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
320 -1, -1, -1, -1, /* arg, flags, fpsr, dir */
a7180f70
BS
321 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
322 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
83774849
RH
323};
324
325/* Define the register numbers to be used in Dwarf debugging information.
326 The SVR4 reference port C compiler uses the following register numbers
327 in its Dwarf output code:
328 0 for %eax (gcc regno = 0)
329 1 for %ecx (gcc regno = 2)
330 2 for %edx (gcc regno = 1)
331 3 for %ebx (gcc regno = 3)
332 4 for %esp (gcc regno = 7)
333 5 for %ebp (gcc regno = 6)
334 6 for %esi (gcc regno = 4)
335 7 for %edi (gcc regno = 5)
336 The following three DWARF register numbers are never generated by
337 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
338 believes these numbers have these meanings.
339 8 for %eip (no gcc equivalent)
340 9 for %eflags (gcc regno = 17)
341 10 for %trapno (no gcc equivalent)
342 It is not at all clear how we should number the FP stack registers
343 for the x86 architecture. If the version of SDB on x86/svr4 were
344 a bit less brain dead with respect to floating-point then we would
345 have a precedent to follow with respect to DWARF register numbers
346 for x86 FP registers, but the SDB on x86/svr4 is so completely
347 broken with respect to FP registers that it is hardly worth thinking
348 of it as something to strive for compatibility with.
349 The version of x86/svr4 SDB I have at the moment does (partially)
350 seem to believe that DWARF register number 11 is associated with
351 the x86 register %st(0), but that's about all. Higher DWARF
352 register numbers don't seem to be associated with anything in
353 particular, and even for DWARF regno 11, SDB only seems to under-
354 stand that it should say that a variable lives in %st(0) (when
355 asked via an `=' command) if we said it was in DWARF regno 11,
356 but SDB still prints garbage when asked for the value of the
357 variable in question (via a `/' command).
358 (Also note that the labels SDB prints for various FP stack regs
359 when doing an `x' command are all wrong.)
360 Note that these problems generally don't affect the native SVR4
361 C compiler because it doesn't allow the use of -O with -g and
362 because when it is *not* optimizing, it allocates a memory
363 location for each floating-point variable, and the memory
364 location is what gets described in the DWARF AT_location
365 attribute for the variable in question.
366 Regardless of the severe mental illness of the x86/svr4 SDB, we
367 do something sensible here and we use the following DWARF
368 register numbers. Note that these are all stack-top-relative
369 numbers.
370 11 for %st(0) (gcc regno = 8)
371 12 for %st(1) (gcc regno = 9)
372 13 for %st(2) (gcc regno = 10)
373 14 for %st(3) (gcc regno = 11)
374 15 for %st(4) (gcc regno = 12)
375 16 for %st(5) (gcc regno = 13)
376 17 for %st(6) (gcc regno = 14)
377 18 for %st(7) (gcc regno = 15)
378*/
0f290768 379int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
380{
381 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
382 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
383 -1, 9, -1, -1, /* arg, flags, fpsr, dir */
a7180f70
BS
384 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
385 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
83774849
RH
386};
387
c572e5ba
JVA
388/* Test and compare insns in i386.md store the information needed to
389 generate branch and scc insns here. */
390
e075ae69
RH
391struct rtx_def *ix86_compare_op0 = NULL_RTX;
392struct rtx_def *ix86_compare_op1 = NULL_RTX;
f5316dfe 393
36edd3cc
BS
394#define MAX_386_STACK_LOCALS 2
395
396/* Define the structure for the machine field in struct function. */
397struct machine_function
398{
399 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
6fca22eb 400 int accesses_prev_frame;
36edd3cc
BS
401};
402
01d939e8 403#define ix86_stack_locals (cfun->machine->stack_locals)
36edd3cc 404
4dd2ac2c
JH
405/* Structure describing stack frame layout.
406 Stack grows downward:
407
408 [arguments]
409 <- ARG_POINTER
410 saved pc
411
412 saved frame pointer if frame_pointer_needed
413 <- HARD_FRAME_POINTER
414 [saved regs]
415
416 [padding1] \
417 )
418 [va_arg registers] (
419 > to_allocate <- FRAME_POINTER
420 [frame] (
421 )
422 [padding2] /
423 */
424struct ix86_frame
425{
426 int nregs;
427 int padding1;
428 HOST_WIDE_INT frame;
429 int padding2;
430 int outgoing_arguments_size;
431
432 HOST_WIDE_INT to_allocate;
433 /* The offsets relative to ARG_POINTER. */
434 HOST_WIDE_INT frame_pointer_offset;
435 HOST_WIDE_INT hard_frame_pointer_offset;
436 HOST_WIDE_INT stack_pointer_offset;
437};
438
c8c5cb99 439/* which cpu are we scheduling for */
e42ea7f9 440enum processor_type ix86_cpu;
c8c5cb99
SC
441
442/* which instruction set architecture to use. */
c942177e 443int ix86_arch;
c8c5cb99
SC
444
445/* Strings to hold which cpu and instruction set architecture to use. */
9c23aa47
ZW
446const char *ix86_cpu_string; /* for -mcpu=<xxx> */
447const char *ix86_arch_string; /* for -march=<xxx> */
c8c5cb99 448
0f290768 449/* # of registers to use to pass arguments. */
e075ae69 450const char *ix86_regparm_string;
e9a25f70 451
e075ae69
RH
452/* ix86_regparm_string as a number */
453int ix86_regparm;
e9a25f70
JL
454
455/* Alignment to use for loops and jumps: */
456
0f290768 457/* Power of two alignment for loops. */
e075ae69 458const char *ix86_align_loops_string;
e9a25f70 459
0f290768 460/* Power of two alignment for non-loop jumps. */
e075ae69 461const char *ix86_align_jumps_string;
e9a25f70 462
3af4bd89 463/* Power of two alignment for stack boundary in bytes. */
e075ae69 464const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
465
466/* Preferred alignment for stack boundary in bits. */
e075ae69 467int ix86_preferred_stack_boundary;
3af4bd89 468
e9a25f70 469/* Values 1-5: see jump.c */
e075ae69
RH
470int ix86_branch_cost;
471const char *ix86_branch_cost_string;
e9a25f70 472
0f290768 473/* Power of two alignment for functions. */
e075ae69
RH
474int ix86_align_funcs;
475const char *ix86_align_funcs_string;
b08de47e 476
0f290768 477/* Power of two alignment for loops. */
e075ae69 478int ix86_align_loops;
b08de47e 479
0f290768 480/* Power of two alignment for non-loop jumps. */
e075ae69
RH
481int ix86_align_jumps;
482\f
f6da8bc3
KG
483static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
484static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
e075ae69 485 int, int, FILE *));
f6da8bc3 486static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
3a3677ff
RH
487static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
488 rtx *, rtx *));
f6da8bc3
KG
489static rtx gen_push PARAMS ((rtx));
490static int memory_address_length PARAMS ((rtx addr));
491static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
492static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
493static int ix86_safe_length PARAMS ((rtx));
494static enum attr_memory ix86_safe_memory PARAMS ((rtx));
495static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
496static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
497static void ix86_dump_ppro_packet PARAMS ((FILE *));
498static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
499static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
e075ae69 500 rtx));
f6da8bc3
KG
501static void ix86_init_machine_status PARAMS ((struct function *));
502static void ix86_mark_machine_status PARAMS ((struct function *));
37b15744 503static void ix86_free_machine_status PARAMS ((struct function *));
2b589241 504static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
f6da8bc3 505static int ix86_safe_length_prefix PARAMS ((rtx));
0903fcab
JH
506static int ix86_nsaved_regs PARAMS((void));
507static void ix86_emit_save_regs PARAMS((void));
da2d1d3a 508static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int));
0903fcab 509static void ix86_emit_epilogue_esp_adjustment PARAMS((int));
0e4970d7 510static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
c6991660
KG
511static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
512static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
55efb413 513static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
e075ae69
RH
514
515struct ix86_address
516{
517 rtx base, index, disp;
518 HOST_WIDE_INT scale;
519};
b08de47e 520
e075ae69 521static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
bd793c65
BS
522
523struct builtin_description;
524static rtx ix86_expand_sse_comi PARAMS ((struct builtin_description *, tree,
525 rtx));
526static rtx ix86_expand_sse_compare PARAMS ((struct builtin_description *, tree,
527 rtx));
528static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
529static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
530static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
531static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
532static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
c0c102a9
JH
533static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
534static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
535 enum rtx_code *,
536 enum rtx_code *,
537 enum rtx_code *));
9e7adcb3
JH
538static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
539 rtx *, rtx *));
540static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
541static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
542static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
543static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
4dd2ac2c
JH
544static int ix86_save_reg PARAMS ((int));
545static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
e075ae69 546\f
f5316dfe
MM
547/* Sometimes certain combinations of command options do not make
548 sense on a particular target machine. You can define a macro
549 `OVERRIDE_OPTIONS' to take account of this. This macro, if
550 defined, is executed once just after all the command options have
551 been parsed.
552
553 Don't use this macro to turn on various extra optimizations for
554 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
555
556void
557override_options ()
558{
400500c4 559 int i;
e075ae69
RH
560 /* Comes from final.c -- no real reason to change it. */
561#define MAX_CODE_ALIGN 16
f5316dfe 562
c8c5cb99
SC
563 static struct ptt
564 {
e075ae69
RH
565 struct processor_costs *cost; /* Processor costs */
566 int target_enable; /* Target flags to enable. */
567 int target_disable; /* Target flags to disable. */
568 int align_loop; /* Default alignments. */
569 int align_jump;
570 int align_func;
571 int branch_cost;
572 }
0f290768 573 const processor_target_table[PROCESSOR_max] =
e075ae69
RH
574 {
575 {&i386_cost, 0, 0, 2, 2, 2, 1},
576 {&i486_cost, 0, 0, 4, 4, 4, 1},
577 {&pentium_cost, 0, 0, -4, -4, -4, 1},
578 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
309ada50
JH
579 {&k6_cost, 0, 0, -5, -5, 4, 1},
580 {&athlon_cost, 0, 0, 4, -4, 4, 1}
e075ae69
RH
581 };
582
583 static struct pta
584 {
0f290768 585 const char *name; /* processor name or nickname. */
e075ae69
RH
586 enum processor_type processor;
587 }
0f290768 588 const processor_alias_table[] =
e075ae69
RH
589 {
590 {"i386", PROCESSOR_I386},
591 {"i486", PROCESSOR_I486},
592 {"i586", PROCESSOR_PENTIUM},
593 {"pentium", PROCESSOR_PENTIUM},
594 {"i686", PROCESSOR_PENTIUMPRO},
595 {"pentiumpro", PROCESSOR_PENTIUMPRO},
e075ae69 596 {"k6", PROCESSOR_K6},
309ada50 597 {"athlon", PROCESSOR_ATHLON},
3af4bd89 598 };
c8c5cb99 599
0f290768 600 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
c8c5cb99 601
f5316dfe
MM
602#ifdef SUBTARGET_OVERRIDE_OPTIONS
603 SUBTARGET_OVERRIDE_OPTIONS;
604#endif
605
5a6ee819 606 ix86_arch = PROCESSOR_I386;
e075ae69
RH
607 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
608
609 if (ix86_arch_string != 0)
610 {
e075ae69
RH
611 for (i = 0; i < pta_size; i++)
612 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
613 {
614 ix86_arch = processor_alias_table[i].processor;
615 /* Default cpu tuning to the architecture. */
616 ix86_cpu = ix86_arch;
617 break;
618 }
400500c4 619
e075ae69
RH
620 if (i == pta_size)
621 error ("bad value (%s) for -march= switch", ix86_arch_string);
622 }
623
624 if (ix86_cpu_string != 0)
625 {
e075ae69
RH
626 for (i = 0; i < pta_size; i++)
627 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
628 {
629 ix86_cpu = processor_alias_table[i].processor;
630 break;
631 }
632 if (i == pta_size)
633 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
634 }
635
636 ix86_cost = processor_target_table[ix86_cpu].cost;
637 target_flags |= processor_target_table[ix86_cpu].target_enable;
638 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
639
36edd3cc
BS
640 /* Arrange to set up i386_stack_locals for all functions. */
641 init_machine_status = ix86_init_machine_status;
1526a060 642 mark_machine_status = ix86_mark_machine_status;
37b15744 643 free_machine_status = ix86_free_machine_status;
36edd3cc 644
0f290768 645 /* Validate -mregparm= value. */
e075ae69 646 if (ix86_regparm_string)
b08de47e 647 {
400500c4
RK
648 i = atoi (ix86_regparm_string);
649 if (i < 0 || i > REGPARM_MAX)
650 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
651 else
652 ix86_regparm = i;
b08de47e
MM
653 }
654
e9a25f70 655 /* Validate -malign-loops= value, or provide default. */
e075ae69
RH
656 ix86_align_loops = processor_target_table[ix86_cpu].align_loop;
657 if (ix86_align_loops_string)
b08de47e 658 {
400500c4
RK
659 i = atoi (ix86_align_loops_string);
660 if (i < 0 || i > MAX_CODE_ALIGN)
661 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
662 else
663 ix86_align_loops = i;
b08de47e 664 }
3af4bd89
JH
665
666 /* Validate -malign-jumps= value, or provide default. */
e075ae69
RH
667 ix86_align_jumps = processor_target_table[ix86_cpu].align_jump;
668 if (ix86_align_jumps_string)
b08de47e 669 {
400500c4
RK
670 i = atoi (ix86_align_jumps_string);
671 if (i < 0 || i > MAX_CODE_ALIGN)
672 error ("-malign-jumps=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
673 else
674 ix86_align_jumps = i;
b08de47e 675 }
b08de47e 676
0f290768 677 /* Validate -malign-functions= value, or provide default. */
e075ae69
RH
678 ix86_align_funcs = processor_target_table[ix86_cpu].align_func;
679 if (ix86_align_funcs_string)
b08de47e 680 {
400500c4
RK
681 i = atoi (ix86_align_funcs_string);
682 if (i < 0 || i > MAX_CODE_ALIGN)
683 error ("-malign-functions=%d is not between 0 and %d",
684 i, MAX_CODE_ALIGN);
685 else
686 ix86_align_funcs = i;
b08de47e 687 }
3af4bd89 688
e4c0478d 689 /* Validate -mpreferred-stack-boundary= value, or provide default.
3af4bd89 690 The default of 128 bits is for Pentium III's SSE __m128. */
e075ae69
RH
691 ix86_preferred_stack_boundary = 128;
692 if (ix86_preferred_stack_boundary_string)
3af4bd89 693 {
400500c4 694 i = atoi (ix86_preferred_stack_boundary_string);
3af4bd89 695 if (i < 2 || i > 31)
400500c4
RK
696 error ("-mpreferred-stack-boundary=%d is not between 2 and 31", i);
697 else
698 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 699 }
77a989d1 700
0f290768 701 /* Validate -mbranch-cost= value, or provide default. */
e075ae69
RH
702 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
703 if (ix86_branch_cost_string)
804a8ee0 704 {
400500c4
RK
705 i = atoi (ix86_branch_cost_string);
706 if (i < 0 || i > 5)
707 error ("-mbranch-cost=%d is not between 0 and 5", i);
708 else
709 ix86_branch_cost = i;
804a8ee0 710 }
804a8ee0 711
e9a25f70
JL
712 /* Keep nonleaf frame pointers. */
713 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 714 flag_omit_frame_pointer = 1;
e075ae69
RH
715
716 /* If we're doing fast math, we don't care about comparison order
717 wrt NaNs. This lets us use a shorter comparison sequence. */
718 if (flag_fast_math)
719 target_flags &= ~MASK_IEEE_FP;
720
a7180f70
BS
721 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
722 on by -msse. */
723 if (TARGET_SSE)
724 target_flags |= MASK_MMX;
f5316dfe
MM
725}
726\f
32b5b1aa 727void
c6aded7c 728optimization_options (level, size)
32b5b1aa 729 int level;
bb5177ac 730 int size ATTRIBUTE_UNUSED;
32b5b1aa 731{
e9a25f70
JL
732 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
733 make the problem with not enough registers even worse. */
32b5b1aa
SC
734#ifdef INSN_SCHEDULING
735 if (level > 1)
736 flag_schedule_insns = 0;
737#endif
738}
b08de47e
MM
739\f
740/* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
741 attribute for DECL. The attributes in ATTRIBUTES have previously been
742 assigned to DECL. */
743
744int
e075ae69 745ix86_valid_decl_attribute_p (decl, attributes, identifier, args)
bb5177ac
RL
746 tree decl ATTRIBUTE_UNUSED;
747 tree attributes ATTRIBUTE_UNUSED;
748 tree identifier ATTRIBUTE_UNUSED;
749 tree args ATTRIBUTE_UNUSED;
b08de47e
MM
750{
751 return 0;
752}
753
754/* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
755 attribute for TYPE. The attributes in ATTRIBUTES have previously been
756 assigned to TYPE. */
757
758int
e075ae69 759ix86_valid_type_attribute_p (type, attributes, identifier, args)
b08de47e 760 tree type;
bb5177ac 761 tree attributes ATTRIBUTE_UNUSED;
b08de47e
MM
762 tree identifier;
763 tree args;
764{
765 if (TREE_CODE (type) != FUNCTION_TYPE
ac478ac0 766 && TREE_CODE (type) != METHOD_TYPE
b08de47e
MM
767 && TREE_CODE (type) != FIELD_DECL
768 && TREE_CODE (type) != TYPE_DECL)
769 return 0;
770
771 /* Stdcall attribute says callee is responsible for popping arguments
772 if they are not variable. */
773 if (is_attribute_p ("stdcall", identifier))
774 return (args == NULL_TREE);
775
0f290768 776 /* Cdecl attribute says the callee is a normal C declaration. */
b08de47e
MM
777 if (is_attribute_p ("cdecl", identifier))
778 return (args == NULL_TREE);
779
780 /* Regparm attribute specifies how many integer arguments are to be
0f290768 781 passed in registers. */
b08de47e
MM
782 if (is_attribute_p ("regparm", identifier))
783 {
784 tree cst;
785
e9a25f70 786 if (! args || TREE_CODE (args) != TREE_LIST
b08de47e
MM
787 || TREE_CHAIN (args) != NULL_TREE
788 || TREE_VALUE (args) == NULL_TREE)
789 return 0;
790
791 cst = TREE_VALUE (args);
792 if (TREE_CODE (cst) != INTEGER_CST)
793 return 0;
794
cce097f1 795 if (compare_tree_int (cst, REGPARM_MAX) > 0)
b08de47e
MM
796 return 0;
797
798 return 1;
799 }
800
801 return 0;
802}
803
804/* Return 0 if the attributes for two types are incompatible, 1 if they
805 are compatible, and 2 if they are nearly compatible (which causes a
806 warning to be generated). */
807
808int
e075ae69 809ix86_comp_type_attributes (type1, type2)
afcfe58c
MM
810 tree type1;
811 tree type2;
b08de47e 812{
0f290768 813 /* Check for mismatch of non-default calling convention. */
69ddee61 814 const char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
815
816 if (TREE_CODE (type1) != FUNCTION_TYPE)
817 return 1;
818
819 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
820 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
821 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 822 return 0;
b08de47e
MM
823 return 1;
824}
b08de47e
MM
825\f
826/* Value is the number of bytes of arguments automatically
827 popped when returning from a subroutine call.
828 FUNDECL is the declaration node of the function (as a tree),
829 FUNTYPE is the data type of the function (as a tree),
830 or for a library call it is an identifier node for the subroutine name.
831 SIZE is the number of bytes of arguments passed on the stack.
832
833 On the 80386, the RTD insn may be used to pop them if the number
834 of args is fixed, but if the number is variable then the caller
835 must pop them all. RTD can't be used for library calls now
836 because the library is compiled with the Unix compiler.
837 Use of RTD is a selectable option, since it is incompatible with
838 standard Unix calling sequences. If the option is not selected,
839 the caller must always pop the args.
840
841 The attribute stdcall is equivalent to RTD on a per module basis. */
842
843int
e075ae69 844ix86_return_pops_args (fundecl, funtype, size)
b08de47e
MM
845 tree fundecl;
846 tree funtype;
847 int size;
79325812 848{
3345ee7d 849 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 850
0f290768 851 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 852 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 853
0f290768 854 /* Stdcall functions will pop the stack if not variable args. */
698cdd84
SC
855 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
856 rtd = 1;
79325812 857
698cdd84
SC
858 if (rtd
859 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
860 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
861 == void_type_node)))
698cdd84
SC
862 return size;
863 }
79325812 864
e9a25f70 865 /* Lose any fake structure return argument. */
698cdd84
SC
866 if (aggregate_value_p (TREE_TYPE (funtype)))
867 return GET_MODE_SIZE (Pmode);
79325812 868
2614aac6 869 return 0;
b08de47e 870}
b08de47e
MM
871\f
872/* Argument support functions. */
873
874/* Initialize a variable CUM of type CUMULATIVE_ARGS
875 for a call to a function whose data type is FNTYPE.
876 For a library call, FNTYPE is 0. */
877
878void
879init_cumulative_args (cum, fntype, libname)
e9a25f70 880 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
b08de47e
MM
881 tree fntype; /* tree ptr for function decl */
882 rtx libname; /* SYMBOL_REF of library name or 0 */
883{
884 static CUMULATIVE_ARGS zero_cum;
885 tree param, next_param;
886
887 if (TARGET_DEBUG_ARG)
888 {
889 fprintf (stderr, "\ninit_cumulative_args (");
890 if (fntype)
e9a25f70
JL
891 fprintf (stderr, "fntype code = %s, ret code = %s",
892 tree_code_name[(int) TREE_CODE (fntype)],
893 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
894 else
895 fprintf (stderr, "no fntype");
896
897 if (libname)
898 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
899 }
900
901 *cum = zero_cum;
902
903 /* Set up the number of registers to use for passing arguments. */
e075ae69 904 cum->nregs = ix86_regparm;
b08de47e
MM
905 if (fntype)
906 {
907 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
e9a25f70 908
b08de47e
MM
909 if (attr)
910 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
911 }
912
913 /* Determine if this function has variable arguments. This is
914 indicated by the last argument being 'void_type_mode' if there
915 are no variable arguments. If there are variable arguments, then
916 we won't pass anything in registers */
917
918 if (cum->nregs)
919 {
920 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 921 param != 0; param = next_param)
b08de47e
MM
922 {
923 next_param = TREE_CHAIN (param);
e9a25f70 924 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
b08de47e
MM
925 cum->nregs = 0;
926 }
927 }
928
929 if (TARGET_DEBUG_ARG)
930 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
931
932 return;
933}
934
935/* Update the data in CUM to advance over an argument
936 of mode MODE and data type TYPE.
937 (TYPE is null for libcalls where that information may not be available.) */
938
939void
940function_arg_advance (cum, mode, type, named)
941 CUMULATIVE_ARGS *cum; /* current arg information */
942 enum machine_mode mode; /* current arg mode */
943 tree type; /* type of the argument or 0 if lib support */
944 int named; /* whether or not the argument was named */
945{
5ac9118e
KG
946 int bytes =
947 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
948 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
949
950 if (TARGET_DEBUG_ARG)
951 fprintf (stderr,
e9a25f70 952 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
b08de47e 953 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
82a127a9 954 if (TARGET_SSE && mode == TImode)
b08de47e 955 {
82a127a9
CM
956 cum->sse_words += words;
957 cum->sse_nregs -= 1;
958 cum->sse_regno += 1;
959 if (cum->sse_nregs <= 0)
960 {
961 cum->sse_nregs = 0;
962 cum->sse_regno = 0;
963 }
b08de47e 964 }
82a127a9
CM
965 else
966 {
967 cum->words += words;
968 cum->nregs -= words;
969 cum->regno += words;
b08de47e 970
82a127a9
CM
971 if (cum->nregs <= 0)
972 {
973 cum->nregs = 0;
974 cum->regno = 0;
975 }
976 }
b08de47e
MM
977 return;
978}
979
980/* Define where to put the arguments to a function.
981 Value is zero to push the argument on the stack,
982 or a hard register in which to store the argument.
983
984 MODE is the argument's machine mode.
985 TYPE is the data type of the argument (as a tree).
986 This is null for libcalls where that information may
987 not be available.
988 CUM is a variable of type CUMULATIVE_ARGS which gives info about
989 the preceding args and about the function being called.
990 NAMED is nonzero if this argument is a named parameter
991 (otherwise it is an extra parameter matching an ellipsis). */
992
993struct rtx_def *
994function_arg (cum, mode, type, named)
995 CUMULATIVE_ARGS *cum; /* current arg information */
996 enum machine_mode mode; /* current arg mode */
997 tree type; /* type of the argument or 0 if lib support */
998 int named; /* != 0 for normal args, == 0 for ... args */
999{
1000 rtx ret = NULL_RTX;
5ac9118e
KG
1001 int bytes =
1002 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
1003 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1004
1005 switch (mode)
1006 {
0f290768 1007 /* For now, pass fp/complex values on the stack. */
e9a25f70 1008 default:
b08de47e
MM
1009 break;
1010
1011 case BLKmode:
1012 case DImode:
1013 case SImode:
1014 case HImode:
1015 case QImode:
1016 if (words <= cum->nregs)
f64cecad 1017 ret = gen_rtx_REG (mode, cum->regno);
b08de47e 1018 break;
82a127a9
CM
1019 case TImode:
1020 if (cum->sse_nregs)
1021 ret = gen_rtx_REG (mode, cum->sse_regno);
1022 break;
b08de47e
MM
1023 }
1024
1025 if (TARGET_DEBUG_ARG)
1026 {
1027 fprintf (stderr,
e9a25f70 1028 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
b08de47e
MM
1029 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
1030
1031 if (ret)
1032 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
1033 else
1034 fprintf (stderr, ", stack");
1035
1036 fprintf (stderr, " )\n");
1037 }
1038
1039 return ret;
1040}
e075ae69 1041\f
8bad7136
JL
1042
1043/* Return nonzero if OP is (const_int 1), else return zero. */
1044
1045int
1046const_int_1_operand (op, mode)
1047 rtx op;
1048 enum machine_mode mode ATTRIBUTE_UNUSED;
1049{
1050 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
1051}
1052
e075ae69
RH
1053/* Returns 1 if OP is either a symbol reference or a sum of a symbol
1054 reference and a constant. */
b08de47e
MM
1055
1056int
e075ae69
RH
1057symbolic_operand (op, mode)
1058 register rtx op;
1059 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 1060{
e075ae69 1061 switch (GET_CODE (op))
2a2ab3f9 1062 {
e075ae69
RH
1063 case SYMBOL_REF:
1064 case LABEL_REF:
1065 return 1;
1066
1067 case CONST:
1068 op = XEXP (op, 0);
1069 if (GET_CODE (op) == SYMBOL_REF
1070 || GET_CODE (op) == LABEL_REF
1071 || (GET_CODE (op) == UNSPEC
1072 && XINT (op, 1) >= 6
1073 && XINT (op, 1) <= 7))
1074 return 1;
1075 if (GET_CODE (op) != PLUS
1076 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1077 return 0;
1078
1079 op = XEXP (op, 0);
1080 if (GET_CODE (op) == SYMBOL_REF
1081 || GET_CODE (op) == LABEL_REF)
1082 return 1;
1083 /* Only @GOTOFF gets offsets. */
1084 if (GET_CODE (op) != UNSPEC
1085 || XINT (op, 1) != 7)
1086 return 0;
1087
1088 op = XVECEXP (op, 0, 0);
1089 if (GET_CODE (op) == SYMBOL_REF
1090 || GET_CODE (op) == LABEL_REF)
1091 return 1;
1092 return 0;
1093
1094 default:
1095 return 0;
2a2ab3f9
JVA
1096 }
1097}
2a2ab3f9 1098
e075ae69 1099/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 1100
e075ae69
RH
1101int
1102pic_symbolic_operand (op, mode)
1103 register rtx op;
1104 enum machine_mode mode ATTRIBUTE_UNUSED;
1105{
1106 if (GET_CODE (op) == CONST)
2a2ab3f9 1107 {
e075ae69
RH
1108 op = XEXP (op, 0);
1109 if (GET_CODE (op) == UNSPEC)
1110 return 1;
1111 if (GET_CODE (op) != PLUS
1112 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1113 return 0;
1114 op = XEXP (op, 0);
1115 if (GET_CODE (op) == UNSPEC)
1116 return 1;
2a2ab3f9 1117 }
e075ae69 1118 return 0;
2a2ab3f9 1119}
2a2ab3f9 1120
28d52ffb
RH
1121/* Test for a valid operand for a call instruction. Don't allow the
1122 arg pointer register or virtual regs since they may decay into
1123 reg + const, which the patterns can't handle. */
2a2ab3f9 1124
e075ae69
RH
1125int
1126call_insn_operand (op, mode)
1127 rtx op;
1128 enum machine_mode mode ATTRIBUTE_UNUSED;
1129{
e075ae69
RH
1130 /* Disallow indirect through a virtual register. This leads to
1131 compiler aborts when trying to eliminate them. */
1132 if (GET_CODE (op) == REG
1133 && (op == arg_pointer_rtx
564d80f4 1134 || op == frame_pointer_rtx
e075ae69
RH
1135 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
1136 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
1137 return 0;
2a2ab3f9 1138
28d52ffb
RH
1139 /* Disallow `call 1234'. Due to varying assembler lameness this
1140 gets either rejected or translated to `call .+1234'. */
1141 if (GET_CODE (op) == CONST_INT)
1142 return 0;
1143
cbbf65e0
RH
1144 /* Explicitly allow SYMBOL_REF even if pic. */
1145 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 1146 return 1;
2a2ab3f9 1147
cbbf65e0
RH
1148 /* Half-pic doesn't allow anything but registers and constants.
1149 We've just taken care of the later. */
1150 if (HALF_PIC_P ())
1151 return register_operand (op, Pmode);
1152
1153 /* Otherwise we can allow any general_operand in the address. */
1154 return general_operand (op, Pmode);
e075ae69 1155}
79325812 1156
e075ae69
RH
1157int
1158constant_call_address_operand (op, mode)
1159 rtx op;
1160 enum machine_mode mode ATTRIBUTE_UNUSED;
1161{
eaf19aba
JJ
1162 if (GET_CODE (op) == CONST
1163 && GET_CODE (XEXP (op, 0)) == PLUS
1164 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1165 op = XEXP (XEXP (op, 0), 0);
e1ff012c 1166 return GET_CODE (op) == SYMBOL_REF;
e075ae69 1167}
2a2ab3f9 1168
e075ae69 1169/* Match exactly zero and one. */
e9a25f70 1170
0f290768 1171int
e075ae69
RH
1172const0_operand (op, mode)
1173 register rtx op;
1174 enum machine_mode mode;
1175{
1176 return op == CONST0_RTX (mode);
1177}
e9a25f70 1178
0f290768 1179int
e075ae69
RH
1180const1_operand (op, mode)
1181 register rtx op;
1182 enum machine_mode mode ATTRIBUTE_UNUSED;
1183{
1184 return op == const1_rtx;
1185}
2a2ab3f9 1186
e075ae69 1187/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 1188
e075ae69
RH
1189int
1190const248_operand (op, mode)
1191 register rtx op;
1192 enum machine_mode mode ATTRIBUTE_UNUSED;
1193{
1194 return (GET_CODE (op) == CONST_INT
1195 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
1196}
e9a25f70 1197
e075ae69 1198/* True if this is a constant appropriate for an increment or decremenmt. */
81fd0956 1199
e075ae69
RH
1200int
1201incdec_operand (op, mode)
1202 register rtx op;
1203 enum machine_mode mode;
1204{
1205 if (op == const1_rtx || op == constm1_rtx)
1206 return 1;
1207 if (GET_CODE (op) != CONST_INT)
1208 return 0;
1209 if (mode == SImode && INTVAL (op) == (HOST_WIDE_INT) 0xffffffff)
1210 return 1;
1211 if (mode == HImode && INTVAL (op) == (HOST_WIDE_INT) 0xffff)
1212 return 1;
1213 if (mode == QImode && INTVAL (op) == (HOST_WIDE_INT) 0xff)
1214 return 1;
1215 return 0;
1216}
2a2ab3f9 1217
0f290768 1218/* Return false if this is the stack pointer, or any other fake
e075ae69
RH
1219 register eliminable to the stack pointer. Otherwise, this is
1220 a register operand.
2a2ab3f9 1221
e075ae69
RH
1222 This is used to prevent esp from being used as an index reg.
1223 Which would only happen in pathological cases. */
5f1ec3e6 1224
e075ae69
RH
1225int
1226reg_no_sp_operand (op, mode)
1227 register rtx op;
1228 enum machine_mode mode;
1229{
1230 rtx t = op;
1231 if (GET_CODE (t) == SUBREG)
1232 t = SUBREG_REG (t);
564d80f4 1233 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 1234 return 0;
2a2ab3f9 1235
e075ae69 1236 return register_operand (op, mode);
2a2ab3f9 1237}
b840bfb0 1238
915119a5
BS
1239int
1240mmx_reg_operand (op, mode)
1241 register rtx op;
bd793c65 1242 enum machine_mode mode ATTRIBUTE_UNUSED;
915119a5
BS
1243{
1244 return MMX_REG_P (op);
1245}
1246
2c5a510c
RH
1247/* Return false if this is any eliminable register. Otherwise
1248 general_operand. */
1249
1250int
1251general_no_elim_operand (op, mode)
1252 register rtx op;
1253 enum machine_mode mode;
1254{
1255 rtx t = op;
1256 if (GET_CODE (t) == SUBREG)
1257 t = SUBREG_REG (t);
1258 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1259 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1260 || t == virtual_stack_dynamic_rtx)
1261 return 0;
1262
1263 return general_operand (op, mode);
1264}
1265
1266/* Return false if this is any eliminable register. Otherwise
1267 register_operand or const_int. */
1268
1269int
1270nonmemory_no_elim_operand (op, mode)
1271 register rtx op;
1272 enum machine_mode mode;
1273{
1274 rtx t = op;
1275 if (GET_CODE (t) == SUBREG)
1276 t = SUBREG_REG (t);
1277 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1278 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1279 || t == virtual_stack_dynamic_rtx)
1280 return 0;
1281
1282 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
1283}
1284
e075ae69 1285/* Return true if op is a Q_REGS class register. */
b840bfb0 1286
e075ae69
RH
1287int
1288q_regs_operand (op, mode)
1289 register rtx op;
1290 enum machine_mode mode;
b840bfb0 1291{
e075ae69
RH
1292 if (mode != VOIDmode && GET_MODE (op) != mode)
1293 return 0;
1294 if (GET_CODE (op) == SUBREG)
1295 op = SUBREG_REG (op);
1296 return QI_REG_P (op);
0f290768 1297}
b840bfb0 1298
e075ae69 1299/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 1300
e075ae69
RH
1301int
1302non_q_regs_operand (op, mode)
1303 register rtx op;
1304 enum machine_mode mode;
1305{
1306 if (mode != VOIDmode && GET_MODE (op) != mode)
1307 return 0;
1308 if (GET_CODE (op) == SUBREG)
1309 op = SUBREG_REG (op);
1310 return NON_QI_REG_P (op);
0f290768 1311}
b840bfb0 1312
915119a5
BS
1313/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
1314 insns. */
1315int
1316sse_comparison_operator (op, mode)
1317 rtx op;
1318 enum machine_mode mode ATTRIBUTE_UNUSED;
1319{
1320 enum rtx_code code = GET_CODE (op);
a46d1d38
JH
1321 switch (code)
1322 {
1323 /* Operations supported directly. */
1324 case EQ:
1325 case LT:
1326 case LE:
1327 case UNORDERED:
1328 case NE:
1329 case UNGE:
1330 case UNGT:
1331 case ORDERED:
1332 return 1;
1333 /* These are equivalent to ones above in non-IEEE comparisons. */
1334 case UNEQ:
1335 case UNLT:
1336 case UNLE:
1337 case LTGT:
1338 case GE:
1339 case GT:
1340 return !TARGET_IEEE_FP;
1341 default:
1342 return 0;
1343 }
915119a5 1344}
9076b9c1 1345/* Return 1 if OP is a valid comparison operator in valid mode. */
e075ae69 1346int
9076b9c1
JH
1347ix86_comparison_operator (op, mode)
1348 register rtx op;
1349 enum machine_mode mode;
e075ae69 1350{
9076b9c1 1351 enum machine_mode inmode;
9a915772 1352 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
1353 if (mode != VOIDmode && GET_MODE (op) != mode)
1354 return 0;
9a915772
JH
1355 if (GET_RTX_CLASS (code) != '<')
1356 return 0;
1357 inmode = GET_MODE (XEXP (op, 0));
1358
1359 if (inmode == CCFPmode || inmode == CCFPUmode)
1360 {
1361 enum rtx_code second_code, bypass_code;
1362 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1363 return (bypass_code == NIL && second_code == NIL);
1364 }
1365 switch (code)
3a3677ff
RH
1366 {
1367 case EQ: case NE:
3a3677ff 1368 return 1;
9076b9c1 1369 case LT: case GE:
7e08e190 1370 if (inmode == CCmode || inmode == CCGCmode
9076b9c1
JH
1371 || inmode == CCGOCmode || inmode == CCNOmode)
1372 return 1;
1373 return 0;
7e08e190 1374 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
7e08e190 1375 if (inmode == CCmode)
9076b9c1
JH
1376 return 1;
1377 return 0;
1378 case GT: case LE:
7e08e190 1379 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
9076b9c1
JH
1380 return 1;
1381 return 0;
3a3677ff
RH
1382 default:
1383 return 0;
1384 }
1385}
1386
9076b9c1 1387/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3a3677ff 1388
9076b9c1
JH
1389int
1390fcmov_comparison_operator (op, mode)
3a3677ff
RH
1391 register rtx op;
1392 enum machine_mode mode;
1393{
b62d22a2 1394 enum machine_mode inmode;
9a915772 1395 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
1396 if (mode != VOIDmode && GET_MODE (op) != mode)
1397 return 0;
9a915772
JH
1398 if (GET_RTX_CLASS (code) != '<')
1399 return 0;
1400 inmode = GET_MODE (XEXP (op, 0));
1401 if (inmode == CCFPmode || inmode == CCFPUmode)
3a3677ff 1402 {
9a915772
JH
1403 enum rtx_code second_code, bypass_code;
1404 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1405 if (bypass_code != NIL || second_code != NIL)
1406 return 0;
1407 code = ix86_fp_compare_code_to_integer (code);
1408 }
1409 /* i387 supports just limited amount of conditional codes. */
1410 switch (code)
1411 {
1412 case LTU: case GTU: case LEU: case GEU:
1413 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
9076b9c1
JH
1414 return 1;
1415 return 0;
9a915772
JH
1416 case ORDERED: case UNORDERED:
1417 case EQ: case NE:
1418 return 1;
3a3677ff
RH
1419 default:
1420 return 0;
1421 }
e075ae69 1422}
b840bfb0 1423
e9e80858
JH
1424/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1425
1426int
1427promotable_binary_operator (op, mode)
1428 register rtx op;
1429 enum machine_mode mode ATTRIBUTE_UNUSED;
1430{
1431 switch (GET_CODE (op))
1432 {
1433 case MULT:
1434 /* Modern CPUs have same latency for HImode and SImode multiply,
1435 but 386 and 486 do HImode multiply faster. */
1436 return ix86_cpu > PROCESSOR_I486;
1437 case PLUS:
1438 case AND:
1439 case IOR:
1440 case XOR:
1441 case ASHIFT:
1442 return 1;
1443 default:
1444 return 0;
1445 }
1446}
1447
e075ae69
RH
1448/* Nearly general operand, but accept any const_double, since we wish
1449 to be able to drop them into memory rather than have them get pulled
1450 into registers. */
b840bfb0 1451
2a2ab3f9 1452int
e075ae69
RH
1453cmp_fp_expander_operand (op, mode)
1454 register rtx op;
1455 enum machine_mode mode;
2a2ab3f9 1456{
e075ae69 1457 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 1458 return 0;
e075ae69 1459 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 1460 return 1;
e075ae69 1461 return general_operand (op, mode);
2a2ab3f9
JVA
1462}
1463
e075ae69 1464/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
1465
1466int
e075ae69 1467ext_register_operand (op, mode)
2a2ab3f9 1468 register rtx op;
bb5177ac 1469 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 1470{
e075ae69
RH
1471 if (GET_MODE (op) != SImode && GET_MODE (op) != HImode)
1472 return 0;
1473 return register_operand (op, VOIDmode);
1474}
1475
1476/* Return 1 if this is a valid binary floating-point operation.
0f290768 1477 OP is the expression matched, and MODE is its mode. */
e075ae69
RH
1478
1479int
1480binary_fp_operator (op, mode)
1481 register rtx op;
1482 enum machine_mode mode;
1483{
1484 if (mode != VOIDmode && mode != GET_MODE (op))
1485 return 0;
1486
2a2ab3f9
JVA
1487 switch (GET_CODE (op))
1488 {
e075ae69
RH
1489 case PLUS:
1490 case MINUS:
1491 case MULT:
1492 case DIV:
1493 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 1494
2a2ab3f9
JVA
1495 default:
1496 return 0;
1497 }
1498}
fee2770d 1499
e075ae69
RH
1500int
1501mult_operator(op, mode)
1502 register rtx op;
1503 enum machine_mode mode ATTRIBUTE_UNUSED;
1504{
1505 return GET_CODE (op) == MULT;
1506}
1507
1508int
1509div_operator(op, mode)
1510 register rtx op;
1511 enum machine_mode mode ATTRIBUTE_UNUSED;
1512{
1513 return GET_CODE (op) == DIV;
1514}
0a726ef1
JL
1515
1516int
e075ae69
RH
1517arith_or_logical_operator (op, mode)
1518 rtx op;
1519 enum machine_mode mode;
0a726ef1 1520{
e075ae69
RH
1521 return ((mode == VOIDmode || GET_MODE (op) == mode)
1522 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
1523 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
1524}
1525
e075ae69 1526/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
1527
1528int
e075ae69
RH
1529memory_displacement_operand (op, mode)
1530 register rtx op;
1531 enum machine_mode mode;
4f2c8ebb 1532{
e075ae69 1533 struct ix86_address parts;
e9a25f70 1534
e075ae69
RH
1535 if (! memory_operand (op, mode))
1536 return 0;
1537
1538 if (! ix86_decompose_address (XEXP (op, 0), &parts))
1539 abort ();
1540
1541 return parts.disp != NULL_RTX;
4f2c8ebb
RS
1542}
1543
16189740 1544/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
1545 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1546
1547 ??? It seems likely that this will only work because cmpsi is an
1548 expander, and no actual insns use this. */
4f2c8ebb
RS
1549
1550int
e075ae69
RH
1551cmpsi_operand (op, mode)
1552 rtx op;
1553 enum machine_mode mode;
fee2770d 1554{
e075ae69
RH
1555 if (general_operand (op, mode))
1556 return 1;
1557
1558 if (GET_CODE (op) == AND
1559 && GET_MODE (op) == SImode
1560 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
1561 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
1562 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
1563 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
1564 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
1565 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 1566 return 1;
e9a25f70 1567
fee2770d
RS
1568 return 0;
1569}
d784886d 1570
e075ae69
RH
1571/* Returns 1 if OP is memory operand that can not be represented by the
1572 modRM array. */
d784886d
RK
1573
1574int
e075ae69 1575long_memory_operand (op, mode)
d784886d
RK
1576 register rtx op;
1577 enum machine_mode mode;
1578{
e075ae69 1579 if (! memory_operand (op, mode))
d784886d
RK
1580 return 0;
1581
e075ae69 1582 return memory_address_length (op) != 0;
d784886d 1583}
2247f6ed
JH
1584
1585/* Return nonzero if the rtx is known aligned. */
1586
1587int
1588aligned_operand (op, mode)
1589 rtx op;
1590 enum machine_mode mode;
1591{
1592 struct ix86_address parts;
1593
1594 if (!general_operand (op, mode))
1595 return 0;
1596
0f290768 1597 /* Registers and immediate operands are always "aligned". */
2247f6ed
JH
1598 if (GET_CODE (op) != MEM)
1599 return 1;
1600
0f290768 1601 /* Don't even try to do any aligned optimizations with volatiles. */
2247f6ed
JH
1602 if (MEM_VOLATILE_P (op))
1603 return 0;
1604
1605 op = XEXP (op, 0);
1606
1607 /* Pushes and pops are only valid on the stack pointer. */
1608 if (GET_CODE (op) == PRE_DEC
1609 || GET_CODE (op) == POST_INC)
1610 return 1;
1611
1612 /* Decode the address. */
1613 if (! ix86_decompose_address (op, &parts))
1614 abort ();
1615
1616 /* Look for some component that isn't known to be aligned. */
1617 if (parts.index)
1618 {
1619 if (parts.scale < 4
bdb429a5 1620 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
1621 return 0;
1622 }
1623 if (parts.base)
1624 {
bdb429a5 1625 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
1626 return 0;
1627 }
1628 if (parts.disp)
1629 {
1630 if (GET_CODE (parts.disp) != CONST_INT
1631 || (INTVAL (parts.disp) & 3) != 0)
1632 return 0;
1633 }
1634
1635 /* Didn't find one -- this must be an aligned address. */
1636 return 1;
1637}
e075ae69
RH
1638\f
1639/* Return true if the constant is something that can be loaded with
1640 a special instruction. Only handle 0.0 and 1.0; others are less
1641 worthwhile. */
57dbca5e
BS
1642
1643int
e075ae69
RH
1644standard_80387_constant_p (x)
1645 rtx x;
57dbca5e 1646{
2b04e52b 1647 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 1648 return -1;
2b04e52b
JH
1649 /* Note that on the 80387, other constants, such as pi, that we should support
1650 too. On some machines, these are much slower to load as standard constant,
1651 than to load from doubles in memory. */
1652 if (x == CONST0_RTX (GET_MODE (x)))
1653 return 1;
1654 if (x == CONST1_RTX (GET_MODE (x)))
1655 return 2;
e075ae69 1656 return 0;
57dbca5e
BS
1657}
1658
2b04e52b
JH
1659/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
1660 */
1661int
1662standard_sse_constant_p (x)
1663 rtx x;
1664{
1665 if (GET_CODE (x) != CONST_DOUBLE)
1666 return -1;
1667 return (x == CONST0_RTX (GET_MODE (x)));
1668}
1669
2a2ab3f9
JVA
1670/* Returns 1 if OP contains a symbol reference */
1671
1672int
1673symbolic_reference_mentioned_p (op)
1674 rtx op;
1675{
6f7d635c 1676 register const char *fmt;
2a2ab3f9
JVA
1677 register int i;
1678
1679 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
1680 return 1;
1681
1682 fmt = GET_RTX_FORMAT (GET_CODE (op));
1683 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
1684 {
1685 if (fmt[i] == 'E')
1686 {
1687 register int j;
1688
1689 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
1690 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
1691 return 1;
1692 }
e9a25f70 1693
2a2ab3f9
JVA
1694 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
1695 return 1;
1696 }
1697
1698 return 0;
1699}
e075ae69
RH
1700
1701/* Return 1 if it is appropriate to emit `ret' instructions in the
1702 body of a function. Do this only if the epilogue is simple, needing a
1703 couple of insns. Prior to reloading, we can't tell how many registers
1704 must be saved, so return 0 then. Return 0 if there is no frame
1705 marker to de-allocate.
1706
1707 If NON_SAVING_SETJMP is defined and true, then it is not possible
1708 for the epilogue to be simple, so return 0. This is a special case
1709 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1710 until final, but jump_optimize may need to know sooner if a
1711 `return' is OK. */
32b5b1aa
SC
1712
1713int
e075ae69 1714ix86_can_use_return_insn_p ()
32b5b1aa 1715{
4dd2ac2c 1716 struct ix86_frame frame;
9a7372d6 1717
e075ae69
RH
1718#ifdef NON_SAVING_SETJMP
1719 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
1720 return 0;
1721#endif
9a7372d6
RH
1722#ifdef FUNCTION_BLOCK_PROFILER_EXIT
1723 if (profile_block_flag == 2)
1724 return 0;
1725#endif
1726
1727 if (! reload_completed || frame_pointer_needed)
1728 return 0;
32b5b1aa 1729
9a7372d6
RH
1730 /* Don't allow more than 32 pop, since that's all we can do
1731 with one instruction. */
1732 if (current_function_pops_args
1733 && current_function_args_size >= 32768)
e075ae69 1734 return 0;
32b5b1aa 1735
4dd2ac2c
JH
1736 ix86_compute_frame_layout (&frame);
1737 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 1738}
6fca22eb
RH
1739
1740/* Value should be nonzero if functions must have frame pointers.
1741 Zero means the frame pointer need not be set up (and parms may
1742 be accessed via the stack pointer) in functions that seem suitable. */
1743
1744int
1745ix86_frame_pointer_required ()
1746{
1747 /* If we accessed previous frames, then the generated code expects
1748 to be able to access the saved ebp value in our frame. */
1749 if (cfun->machine->accesses_prev_frame)
1750 return 1;
1751
1752 /* Several x86 os'es need a frame pointer for other reasons,
1753 usually pertaining to setjmp. */
1754 if (SUBTARGET_FRAME_POINTER_REQUIRED)
1755 return 1;
1756
1757 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
1758 the frame pointer by default. Turn it back on now if we've not
1759 got a leaf function. */
1760 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
1761 return 1;
1762
1763 return 0;
1764}
1765
1766/* Record that the current function accesses previous call frames. */
1767
1768void
1769ix86_setup_frame_addresses ()
1770{
1771 cfun->machine->accesses_prev_frame = 1;
1772}
e075ae69 1773\f
4cf12e7e 1774static char pic_label_name[32];
e9a25f70 1775
e075ae69
RH
1776/* This function generates code for -fpic that loads %ebx with
1777 the return address of the caller and then returns. */
1778
1779void
4cf12e7e 1780ix86_asm_file_end (file)
e075ae69 1781 FILE *file;
e075ae69
RH
1782{
1783 rtx xops[2];
32b5b1aa 1784
4cf12e7e
RH
1785 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
1786 return;
32b5b1aa 1787
c7f0da1d
RH
1788 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
1789 to updating relocations to a section being discarded such that this
1790 doesn't work. Ought to detect this at configure time. */
1791#if 0 && defined (ASM_OUTPUT_SECTION_NAME)
4cf12e7e
RH
1792 /* The trick here is to create a linkonce section containing the
1793 pic label thunk, but to refer to it with an internal label.
1794 Because the label is internal, we don't have inter-dso name
1795 binding issues on hosts that don't support ".hidden".
e9a25f70 1796
4cf12e7e
RH
1797 In order to use these macros, however, we must create a fake
1798 function decl. */
1799 {
1800 tree decl = build_decl (FUNCTION_DECL,
1801 get_identifier ("i686.get_pc_thunk"),
1802 error_mark_node);
1803 DECL_ONE_ONLY (decl) = 1;
1804 UNIQUE_SECTION (decl, 0);
1805 named_section (decl, NULL, 0);
1806 }
1807#else
1808 text_section ();
1809#endif
0afeb08a 1810
4cf12e7e
RH
1811 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
1812 internal (non-global) label that's being emitted, it didn't make
1813 sense to have .type information for local labels. This caused
1814 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
1815 me debug info for a label that you're declaring non-global?) this
1816 was changed to call ASM_OUTPUT_LABEL() instead. */
1817
1818 ASM_OUTPUT_LABEL (file, pic_label_name);
1819
1820 xops[0] = pic_offset_table_rtx;
1821 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
1822 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
1823 output_asm_insn ("ret", xops);
32b5b1aa 1824}
32b5b1aa 1825
e075ae69
RH
1826void
1827load_pic_register ()
32b5b1aa 1828{
e075ae69 1829 rtx gotsym, pclab;
32b5b1aa 1830
a8a05998 1831 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
32b5b1aa 1832
e075ae69 1833 if (TARGET_DEEP_BRANCH_PREDICTION)
32b5b1aa 1834 {
4cf12e7e
RH
1835 if (! pic_label_name[0])
1836 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
e075ae69 1837 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
32b5b1aa 1838 }
e075ae69 1839 else
e5cb57e8 1840 {
e075ae69 1841 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
e5cb57e8 1842 }
e5cb57e8 1843
e075ae69 1844 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
2a2ab3f9 1845
e075ae69
RH
1846 if (! TARGET_DEEP_BRANCH_PREDICTION)
1847 emit_insn (gen_popsi1 (pic_offset_table_rtx));
79325812 1848
e075ae69 1849 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
e9a25f70 1850}
8dfe5673 1851
e075ae69 1852/* Generate an SImode "push" pattern for input ARG. */
e9a25f70 1853
e075ae69
RH
1854static rtx
1855gen_push (arg)
1856 rtx arg;
e9a25f70 1857{
c5c76735
JL
1858 return gen_rtx_SET (VOIDmode,
1859 gen_rtx_MEM (SImode,
1860 gen_rtx_PRE_DEC (SImode,
1861 stack_pointer_rtx)),
1862 arg);
e9a25f70
JL
1863}
1864
4dd2ac2c
JH
1865/* Return 1 if we need to save REGNO. */
1866static int
1867ix86_save_reg (regno)
1868 int regno;
1869{
1870 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1871 || current_function_uses_const_pool);
1872 return ((regs_ever_live[regno] && !call_used_regs[regno]
1873 && !fixed_regs[regno]
1874 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed))
1875 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used));
1876
1877}
1878
0903fcab
JH
1879/* Return number of registers to be saved on the stack. */
1880
1881static int
1882ix86_nsaved_regs ()
1883{
1884 int nregs = 0;
0903fcab
JH
1885 int regno;
1886
4dd2ac2c
JH
1887 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1888 if (ix86_save_reg (regno))
1889 nregs++;
0903fcab
JH
1890 return nregs;
1891}
1892
1893/* Return the offset between two registers, one to be eliminated, and the other
1894 its replacement, at the start of a routine. */
1895
1896HOST_WIDE_INT
1897ix86_initial_elimination_offset (from, to)
1898 int from;
1899 int to;
1900{
4dd2ac2c
JH
1901 struct ix86_frame frame;
1902 ix86_compute_frame_layout (&frame);
564d80f4
JH
1903
1904 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 1905 return frame.hard_frame_pointer_offset;
564d80f4
JH
1906 else if (from == FRAME_POINTER_REGNUM
1907 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 1908 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
1909 else
1910 {
564d80f4
JH
1911 if (to != STACK_POINTER_REGNUM)
1912 abort ();
1913 else if (from == ARG_POINTER_REGNUM)
4dd2ac2c 1914 return frame.stack_pointer_offset;
564d80f4
JH
1915 else if (from != FRAME_POINTER_REGNUM)
1916 abort ();
0903fcab 1917 else
4dd2ac2c 1918 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
1919 }
1920}
1921
4dd2ac2c 1922/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 1923
4dd2ac2c
JH
1924static void
1925ix86_compute_frame_layout (frame)
1926 struct ix86_frame *frame;
65954bd8 1927{
65954bd8 1928 HOST_WIDE_INT total_size;
564d80f4 1929 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
44affdae
JH
1930 int offset;
1931 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4dd2ac2c 1932 HOST_WIDE_INT size = get_frame_size ();
65954bd8 1933
4dd2ac2c 1934 frame->nregs = ix86_nsaved_regs ();
564d80f4 1935 total_size = size;
65954bd8 1936
4dd2ac2c
JH
1937 /* Skip return value and save base pointer. */
1938 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
1939
1940 frame->hard_frame_pointer_offset = offset;
564d80f4 1941
fcbfaa65
RK
1942 /* Do some sanity checking of stack_alignment_needed and
1943 preferred_alignment, since i386 port is the only using those features
1944 that may break easilly. */
564d80f4 1945
44affdae
JH
1946 if (size && !stack_alignment_needed)
1947 abort ();
44affdae
JH
1948 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
1949 abort ();
1950 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1951 abort ();
1952 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1953 abort ();
564d80f4 1954
4dd2ac2c
JH
1955 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
1956 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 1957
4dd2ac2c
JH
1958 /* Register save area */
1959 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 1960
4dd2ac2c
JH
1961 /* Align start of frame for local function. */
1962 frame->padding1 = ((offset + stack_alignment_needed - 1)
1963 & -stack_alignment_needed) - offset;
f73ad30e 1964
4dd2ac2c 1965 offset += frame->padding1;
65954bd8 1966
4dd2ac2c
JH
1967 /* Frame pointer points here. */
1968 frame->frame_pointer_offset = offset;
54ff41b7 1969
4dd2ac2c 1970 offset += size;
65954bd8 1971
4dd2ac2c 1972 /* Add outgoing arguments area. */
f73ad30e 1973 if (ACCUMULATE_OUTGOING_ARGS)
4dd2ac2c
JH
1974 {
1975 offset += current_function_outgoing_args_size;
1976 frame->outgoing_arguments_size = current_function_outgoing_args_size;
1977 }
1978 else
1979 frame->outgoing_arguments_size = 0;
564d80f4 1980
4dd2ac2c
JH
1981 /* Align stack boundary. */
1982 frame->padding2 = ((offset + preferred_alignment - 1)
1983 & -preferred_alignment) - offset;
1984
1985 offset += frame->padding2;
1986
1987 /* We've reached end of stack frame. */
1988 frame->stack_pointer_offset = offset;
1989
1990 /* Size prologue needs to allocate. */
1991 frame->to_allocate =
1992 (size + frame->padding1 + frame->padding2
1993 + frame->outgoing_arguments_size);
1994
1995#if 0
1996 fprintf (stderr, "nregs: %i\n", frame->nregs);
1997 fprintf (stderr, "size: %i\n", size);
1998 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
1999 fprintf (stderr, "padding1: %i\n", frame->padding1);
2000 fprintf (stderr, "padding2: %i\n", frame->padding2);
2001 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
2002 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
2003 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
2004 frame->hard_frame_pointer_offset);
2005 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
2006#endif
65954bd8
JL
2007}
2008
0903fcab
JH
2009/* Emit code to save registers in the prologue. */
2010
2011static void
2012ix86_emit_save_regs ()
2013{
2014 register int regno;
0903fcab 2015 rtx insn;
0903fcab 2016
4dd2ac2c
JH
2017 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
2018 if (ix86_save_reg (regno))
0903fcab
JH
2019 {
2020 insn = emit_insn (gen_push (gen_rtx_REG (SImode, regno)));
2021 RTX_FRAME_RELATED_P (insn) = 1;
2022 }
2023}
2024
0f290768 2025/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
2026
2027void
2028ix86_expand_prologue ()
2a2ab3f9 2029{
564d80f4 2030 rtx insn;
aae75261
JVA
2031 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2032 || current_function_uses_const_pool);
4dd2ac2c
JH
2033 struct ix86_frame frame;
2034
2035 ix86_compute_frame_layout (&frame);
79325812 2036
e075ae69
RH
2037 /* Note: AT&T enter does NOT have reversed args. Enter is probably
2038 slower on all targets. Also sdb doesn't like it. */
e9a25f70 2039
2a2ab3f9
JVA
2040 if (frame_pointer_needed)
2041 {
564d80f4 2042 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 2043 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 2044
564d80f4 2045 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 2046 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
2047 }
2048
1c71e60e 2049 ix86_emit_save_regs ();
564d80f4 2050
4dd2ac2c 2051 if (frame.to_allocate == 0)
8dfe5673 2052 ;
4dd2ac2c 2053 else if (! TARGET_STACK_PROBE || frame.to_allocate < CHECK_STACK_LIMIT)
469ac993 2054 {
e075ae69 2055 if (frame_pointer_needed)
1c71e60e
JH
2056 insn = emit_insn (gen_pro_epilogue_adjust_stack
2057 (stack_pointer_rtx, stack_pointer_rtx,
4dd2ac2c 2058 GEN_INT (-frame.to_allocate), hard_frame_pointer_rtx));
79325812 2059 else
e075ae69 2060 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
4dd2ac2c 2061 GEN_INT (-frame.to_allocate)));
e075ae69 2062 RTX_FRAME_RELATED_P (insn) = 1;
469ac993 2063 }
79325812 2064 else
8dfe5673 2065 {
e075ae69 2066 /* ??? Is this only valid for Win32? */
e9a25f70 2067
e075ae69 2068 rtx arg0, sym;
e9a25f70 2069
e075ae69 2070 arg0 = gen_rtx_REG (SImode, 0);
4dd2ac2c 2071 emit_move_insn (arg0, GEN_INT (frame.to_allocate));
77a989d1 2072
e075ae69
RH
2073 sym = gen_rtx_MEM (FUNCTION_MODE,
2074 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
2075 insn = emit_call_insn (gen_call (sym, const0_rtx));
2076
2077 CALL_INSN_FUNCTION_USAGE (insn)
276ab4a4
RH
2078 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
2079 CALL_INSN_FUNCTION_USAGE (insn));
e075ae69 2080 }
e9a25f70 2081
84530511
SC
2082#ifdef SUBTARGET_PROLOGUE
2083 SUBTARGET_PROLOGUE;
0f290768 2084#endif
84530511 2085
e9a25f70 2086 if (pic_reg_used)
e075ae69 2087 load_pic_register ();
77a989d1 2088
e9a25f70
JL
2089 /* If we are profiling, make sure no instructions are scheduled before
2090 the call to mcount. However, if -fpic, the above call will have
2091 done that. */
e075ae69 2092 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
e9a25f70 2093 emit_insn (gen_blockage ());
77a989d1
SC
2094}
2095
0903fcab
JH
2096/* Emit code to add TSIZE to esp value. Use POP instruction when
2097 profitable. */
2098
2099static void
2100ix86_emit_epilogue_esp_adjustment (tsize)
2101 int tsize;
2102{
bdeb029c
JH
2103 /* If a frame pointer is present, we must be sure to tie the sp
2104 to the fp so that we don't mis-schedule. */
2105 if (frame_pointer_needed)
2106 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2107 stack_pointer_rtx,
2108 GEN_INT (tsize),
2109 hard_frame_pointer_rtx));
0903fcab 2110 else
bdeb029c
JH
2111 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
2112 GEN_INT (tsize)));
0903fcab
JH
2113}
2114
da2d1d3a
JH
2115/* Emit code to restore saved registers using MOV insns. First register
2116 is restored from POINTER + OFFSET. */
2117static void
2118ix86_emit_restore_regs_using_mov (pointer, offset)
2119 rtx pointer;
2120 int offset;
2121{
2122 int regno;
da2d1d3a 2123
4dd2ac2c
JH
2124 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2125 if (ix86_save_reg (regno))
da2d1d3a 2126 {
4dd2ac2c
JH
2127 emit_move_insn (gen_rtx_REG (Pmode, regno),
2128 adj_offsettable_operand (gen_rtx_MEM (Pmode,
da2d1d3a
JH
2129 pointer),
2130 offset));
4dd2ac2c 2131 offset += UNITS_PER_WORD;
da2d1d3a
JH
2132 }
2133}
2134
0f290768 2135/* Restore function stack, frame, and registers. */
e9a25f70 2136
2a2ab3f9 2137void
cbbf65e0
RH
2138ix86_expand_epilogue (emit_return)
2139 int emit_return;
2a2ab3f9 2140{
1c71e60e 2141 int regno;
fdb8a883 2142 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 2143 struct ix86_frame frame;
65954bd8 2144 HOST_WIDE_INT offset;
4dd2ac2c
JH
2145
2146 ix86_compute_frame_layout (&frame);
2a2ab3f9 2147
1c71e60e 2148 /* Calculate start of saved registers relative to ebp. */
4dd2ac2c 2149 offset = -frame.nregs * UNITS_PER_WORD;
2a2ab3f9 2150
1c71e60e
JH
2151#ifdef FUNCTION_BLOCK_PROFILER_EXIT
2152 if (profile_block_flag == 2)
564d80f4 2153 {
1c71e60e 2154 FUNCTION_BLOCK_PROFILER_EXIT;
564d80f4 2155 }
1c71e60e 2156#endif
564d80f4 2157
fdb8a883
JW
2158 /* If we're only restoring one register and sp is not valid then
2159 using a move instruction to restore the register since it's
0f290768 2160 less work than reloading sp and popping the register.
da2d1d3a
JH
2161
2162 The default code result in stack adjustment using add/lea instruction,
2163 while this code results in LEAVE instruction (or discrete equivalent),
2164 so it is profitable in some other cases as well. Especially when there
2165 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2166 and there is exactly one register to pop. This heruistic may need some
2167 tuning in future. */
4dd2ac2c
JH
2168 if ((!sp_valid && frame.nregs <= 1)
2169 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
da2d1d3a 2170 || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
4dd2ac2c 2171 && frame.nregs == 1))
2a2ab3f9 2172 {
da2d1d3a
JH
2173 /* Restore registers. We can use ebp or esp to address the memory
2174 locations. If both are available, default to ebp, since offsets
2175 are known to be small. Only exception is esp pointing directly to the
2176 end of block of saved registers, where we may simplify addressing
2177 mode. */
2178
4dd2ac2c
JH
2179 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
2180 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
da2d1d3a
JH
2181 else
2182 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, offset);
2a2ab3f9 2183
da2d1d3a 2184 if (!frame_pointer_needed)
4dd2ac2c
JH
2185 ix86_emit_epilogue_esp_adjustment (frame.to_allocate
2186 + frame.nregs * UNITS_PER_WORD);
0f290768 2187 /* If not an i386, mov & pop is faster than "leave". */
da2d1d3a 2188 else if (TARGET_USE_LEAVE || optimize_size)
564d80f4 2189 emit_insn (gen_leave ());
c8c5cb99 2190 else
2a2ab3f9 2191 {
1c71e60e
JH
2192 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2193 hard_frame_pointer_rtx,
2194 const0_rtx,
2195 hard_frame_pointer_rtx));
564d80f4 2196 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
2197 }
2198 }
1c71e60e 2199 else
68f654ec 2200 {
1c71e60e
JH
2201 /* First step is to deallocate the stack frame so that we can
2202 pop the registers. */
2203 if (!sp_valid)
2204 {
2205 if (!frame_pointer_needed)
2206 abort ();
2207 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2208 hard_frame_pointer_rtx,
2209 GEN_INT (offset),
2210 hard_frame_pointer_rtx));
2211 }
4dd2ac2c
JH
2212 else if (frame.to_allocate)
2213 ix86_emit_epilogue_esp_adjustment (frame.to_allocate);
1c71e60e 2214
4dd2ac2c
JH
2215 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2216 if (ix86_save_reg (regno))
1c71e60e 2217 emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
4dd2ac2c
JH
2218 if (frame_pointer_needed)
2219 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
68f654ec 2220 }
68f654ec 2221
cbbf65e0
RH
2222 /* Sibcall epilogues don't want a return instruction. */
2223 if (! emit_return)
2224 return;
2225
2a2ab3f9
JVA
2226 if (current_function_pops_args && current_function_args_size)
2227 {
e075ae69 2228 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 2229
b8c752c8
UD
2230 /* i386 can only pop 64K bytes. If asked to pop more, pop
2231 return address, do explicit add, and jump indirectly to the
0f290768 2232 caller. */
2a2ab3f9 2233
b8c752c8 2234 if (current_function_pops_args >= 65536)
2a2ab3f9 2235 {
e075ae69 2236 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 2237
e075ae69
RH
2238 emit_insn (gen_popsi1 (ecx));
2239 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 2240 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 2241 }
79325812 2242 else
e075ae69
RH
2243 emit_jump_insn (gen_return_pop_internal (popc));
2244 }
2245 else
2246 emit_jump_insn (gen_return_internal ());
2247}
2248\f
2249/* Extract the parts of an RTL expression that is a valid memory address
2250 for an instruction. Return false if the structure of the address is
2251 grossly off. */
2252
2253static int
2254ix86_decompose_address (addr, out)
2255 register rtx addr;
2256 struct ix86_address *out;
2257{
2258 rtx base = NULL_RTX;
2259 rtx index = NULL_RTX;
2260 rtx disp = NULL_RTX;
2261 HOST_WIDE_INT scale = 1;
2262 rtx scale_rtx = NULL_RTX;
2263
2264 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
2265 base = addr;
2266 else if (GET_CODE (addr) == PLUS)
2267 {
2268 rtx op0 = XEXP (addr, 0);
2269 rtx op1 = XEXP (addr, 1);
2270 enum rtx_code code0 = GET_CODE (op0);
2271 enum rtx_code code1 = GET_CODE (op1);
2272
2273 if (code0 == REG || code0 == SUBREG)
2274 {
2275 if (code1 == REG || code1 == SUBREG)
2276 index = op0, base = op1; /* index + base */
2277 else
2278 base = op0, disp = op1; /* base + displacement */
2279 }
2280 else if (code0 == MULT)
e9a25f70 2281 {
e075ae69
RH
2282 index = XEXP (op0, 0);
2283 scale_rtx = XEXP (op0, 1);
2284 if (code1 == REG || code1 == SUBREG)
2285 base = op1; /* index*scale + base */
e9a25f70 2286 else
e075ae69
RH
2287 disp = op1; /* index*scale + disp */
2288 }
2289 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
2290 {
2291 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
2292 scale_rtx = XEXP (XEXP (op0, 0), 1);
2293 base = XEXP (op0, 1);
2294 disp = op1;
2a2ab3f9 2295 }
e075ae69
RH
2296 else if (code0 == PLUS)
2297 {
2298 index = XEXP (op0, 0); /* index + base + disp */
2299 base = XEXP (op0, 1);
2300 disp = op1;
2301 }
2302 else
2303 return FALSE;
2304 }
2305 else if (GET_CODE (addr) == MULT)
2306 {
2307 index = XEXP (addr, 0); /* index*scale */
2308 scale_rtx = XEXP (addr, 1);
2309 }
2310 else if (GET_CODE (addr) == ASHIFT)
2311 {
2312 rtx tmp;
2313
2314 /* We're called for lea too, which implements ashift on occasion. */
2315 index = XEXP (addr, 0);
2316 tmp = XEXP (addr, 1);
2317 if (GET_CODE (tmp) != CONST_INT)
2318 return FALSE;
2319 scale = INTVAL (tmp);
2320 if ((unsigned HOST_WIDE_INT) scale > 3)
2321 return FALSE;
2322 scale = 1 << scale;
2a2ab3f9 2323 }
2a2ab3f9 2324 else
e075ae69
RH
2325 disp = addr; /* displacement */
2326
2327 /* Extract the integral value of scale. */
2328 if (scale_rtx)
e9a25f70 2329 {
e075ae69
RH
2330 if (GET_CODE (scale_rtx) != CONST_INT)
2331 return FALSE;
2332 scale = INTVAL (scale_rtx);
e9a25f70 2333 }
3b3c6a3f 2334
e075ae69
RH
2335 /* Allow arg pointer and stack pointer as index if there is not scaling */
2336 if (base && index && scale == 1
564d80f4
JH
2337 && (index == arg_pointer_rtx || index == frame_pointer_rtx
2338 || index == stack_pointer_rtx))
e075ae69
RH
2339 {
2340 rtx tmp = base;
2341 base = index;
2342 index = tmp;
2343 }
2344
2345 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
2346 if ((base == hard_frame_pointer_rtx
2347 || base == frame_pointer_rtx
2348 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
2349 disp = const0_rtx;
2350
2351 /* Special case: on K6, [%esi] makes the instruction vector decoded.
2352 Avoid this by transforming to [%esi+0]. */
2353 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
2354 && base && !index && !disp
329e1d01 2355 && REG_P (base)
e075ae69
RH
2356 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
2357 disp = const0_rtx;
2358
2359 /* Special case: encode reg+reg instead of reg*2. */
2360 if (!base && index && scale && scale == 2)
2361 base = index, scale = 1;
0f290768 2362
e075ae69
RH
2363 /* Special case: scaling cannot be encoded without base or displacement. */
2364 if (!base && !disp && index && scale != 1)
2365 disp = const0_rtx;
2366
2367 out->base = base;
2368 out->index = index;
2369 out->disp = disp;
2370 out->scale = scale;
3b3c6a3f 2371
e075ae69
RH
2372 return TRUE;
2373}
01329426
JH
2374\f
2375/* Return cost of the memory address x.
2376 For i386, it is better to use a complex address than let gcc copy
2377 the address into a reg and make a new pseudo. But not if the address
2378 requires to two regs - that would mean more pseudos with longer
2379 lifetimes. */
2380int
2381ix86_address_cost (x)
2382 rtx x;
2383{
2384 struct ix86_address parts;
2385 int cost = 1;
3b3c6a3f 2386
01329426
JH
2387 if (!ix86_decompose_address (x, &parts))
2388 abort ();
2389
2390 /* More complex memory references are better. */
2391 if (parts.disp && parts.disp != const0_rtx)
2392 cost--;
2393
2394 /* Attempt to minimize number of registers in the address. */
2395 if ((parts.base
2396 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
2397 || (parts.index
2398 && (!REG_P (parts.index)
2399 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
2400 cost++;
2401
2402 if (parts.base
2403 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
2404 && parts.index
2405 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
2406 && parts.base != parts.index)
2407 cost++;
2408
2409 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
2410 since it's predecode logic can't detect the length of instructions
2411 and it degenerates to vector decoded. Increase cost of such
2412 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 2413 to split such addresses or even refuse such addresses at all.
01329426
JH
2414
2415 Following addressing modes are affected:
2416 [base+scale*index]
2417 [scale*index+disp]
2418 [base+index]
0f290768 2419
01329426
JH
2420 The first and last case may be avoidable by explicitly coding the zero in
2421 memory address, but I don't have AMD-K6 machine handy to check this
2422 theory. */
2423
2424 if (TARGET_K6
2425 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
2426 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
2427 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
2428 cost += 10;
0f290768 2429
01329426
JH
2430 return cost;
2431}
2432\f
b949ea8b
JW
2433/* If X is a machine specific address (i.e. a symbol or label being
2434 referenced as a displacement from the GOT implemented using an
2435 UNSPEC), then return the base term. Otherwise return X. */
2436
2437rtx
2438ix86_find_base_term (x)
2439 rtx x;
2440{
2441 rtx term;
2442
2443 if (GET_CODE (x) != PLUS
2444 || XEXP (x, 0) != pic_offset_table_rtx
2445 || GET_CODE (XEXP (x, 1)) != CONST)
2446 return x;
2447
2448 term = XEXP (XEXP (x, 1), 0);
2449
2450 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
2451 term = XEXP (term, 0);
2452
2453 if (GET_CODE (term) != UNSPEC
2454 || XVECLEN (term, 0) != 1
2455 || XINT (term, 1) != 7)
2456 return x;
2457
2458 term = XVECEXP (term, 0, 0);
2459
2460 if (GET_CODE (term) != SYMBOL_REF
2461 && GET_CODE (term) != LABEL_REF)
2462 return x;
2463
2464 return term;
2465}
2466\f
e075ae69
RH
2467/* Determine if a given CONST RTX is a valid memory displacement
2468 in PIC mode. */
0f290768 2469
59be65f6 2470int
91bb873f
RH
2471legitimate_pic_address_disp_p (disp)
2472 register rtx disp;
2473{
2474 if (GET_CODE (disp) != CONST)
2475 return 0;
2476 disp = XEXP (disp, 0);
2477
2478 if (GET_CODE (disp) == PLUS)
2479 {
2480 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
2481 return 0;
2482 disp = XEXP (disp, 0);
2483 }
2484
2485 if (GET_CODE (disp) != UNSPEC
2486 || XVECLEN (disp, 0) != 1)
2487 return 0;
2488
2489 /* Must be @GOT or @GOTOFF. */
2490 if (XINT (disp, 1) != 6
2491 && XINT (disp, 1) != 7)
2492 return 0;
2493
2494 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2495 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
2496 return 0;
2497
2498 return 1;
2499}
2500
e075ae69
RH
2501/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
2502 memory address for an instruction. The MODE argument is the machine mode
2503 for the MEM expression that wants to use this address.
2504
2505 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
2506 convert common non-canonical forms to canonical form so that they will
2507 be recognized. */
2508
3b3c6a3f
MM
2509int
2510legitimate_address_p (mode, addr, strict)
2511 enum machine_mode mode;
2512 register rtx addr;
2513 int strict;
2514{
e075ae69
RH
2515 struct ix86_address parts;
2516 rtx base, index, disp;
2517 HOST_WIDE_INT scale;
2518 const char *reason = NULL;
2519 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
2520
2521 if (TARGET_DEBUG_ADDR)
2522 {
2523 fprintf (stderr,
e9a25f70 2524 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 2525 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
2526 debug_rtx (addr);
2527 }
2528
e075ae69 2529 if (! ix86_decompose_address (addr, &parts))
3b3c6a3f 2530 {
e075ae69 2531 reason = "decomposition failed";
50e60bc3 2532 goto report_error;
3b3c6a3f
MM
2533 }
2534
e075ae69
RH
2535 base = parts.base;
2536 index = parts.index;
2537 disp = parts.disp;
2538 scale = parts.scale;
91f0226f 2539
e075ae69 2540 /* Validate base register.
e9a25f70
JL
2541
2542 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
2543 is one word out of a two word structure, which is represented internally
2544 as a DImode int. */
e9a25f70 2545
3b3c6a3f
MM
2546 if (base)
2547 {
e075ae69
RH
2548 reason_rtx = base;
2549
3d771dfd 2550 if (GET_CODE (base) != REG)
3b3c6a3f 2551 {
e075ae69 2552 reason = "base is not a register";
50e60bc3 2553 goto report_error;
3b3c6a3f
MM
2554 }
2555
c954bd01
RH
2556 if (GET_MODE (base) != Pmode)
2557 {
e075ae69 2558 reason = "base is not in Pmode";
50e60bc3 2559 goto report_error;
c954bd01
RH
2560 }
2561
e9a25f70
JL
2562 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
2563 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
3b3c6a3f 2564 {
e075ae69 2565 reason = "base is not valid";
50e60bc3 2566 goto report_error;
3b3c6a3f
MM
2567 }
2568 }
2569
e075ae69 2570 /* Validate index register.
e9a25f70
JL
2571
2572 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
2573 is one word out of a two word structure, which is represented internally
2574 as a DImode int. */
e075ae69
RH
2575
2576 if (index)
3b3c6a3f 2577 {
e075ae69
RH
2578 reason_rtx = index;
2579
2580 if (GET_CODE (index) != REG)
3b3c6a3f 2581 {
e075ae69 2582 reason = "index is not a register";
50e60bc3 2583 goto report_error;
3b3c6a3f
MM
2584 }
2585
e075ae69 2586 if (GET_MODE (index) != Pmode)
c954bd01 2587 {
e075ae69 2588 reason = "index is not in Pmode";
50e60bc3 2589 goto report_error;
c954bd01
RH
2590 }
2591
e075ae69
RH
2592 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
2593 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
3b3c6a3f 2594 {
e075ae69 2595 reason = "index is not valid";
50e60bc3 2596 goto report_error;
3b3c6a3f
MM
2597 }
2598 }
3b3c6a3f 2599
e075ae69
RH
2600 /* Validate scale factor. */
2601 if (scale != 1)
3b3c6a3f 2602 {
e075ae69
RH
2603 reason_rtx = GEN_INT (scale);
2604 if (!index)
3b3c6a3f 2605 {
e075ae69 2606 reason = "scale without index";
50e60bc3 2607 goto report_error;
3b3c6a3f
MM
2608 }
2609
e075ae69 2610 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 2611 {
e075ae69 2612 reason = "scale is not a valid multiplier";
50e60bc3 2613 goto report_error;
3b3c6a3f
MM
2614 }
2615 }
2616
91bb873f 2617 /* Validate displacement. */
3b3c6a3f
MM
2618 if (disp)
2619 {
e075ae69
RH
2620 reason_rtx = disp;
2621
91bb873f 2622 if (!CONSTANT_ADDRESS_P (disp))
3b3c6a3f 2623 {
e075ae69 2624 reason = "displacement is not constant";
50e60bc3 2625 goto report_error;
3b3c6a3f
MM
2626 }
2627
e075ae69 2628 if (GET_CODE (disp) == CONST_DOUBLE)
3b3c6a3f 2629 {
e075ae69 2630 reason = "displacement is a const_double";
50e60bc3 2631 goto report_error;
3b3c6a3f
MM
2632 }
2633
91bb873f 2634 if (flag_pic && SYMBOLIC_CONST (disp))
3b3c6a3f 2635 {
91bb873f
RH
2636 if (! legitimate_pic_address_disp_p (disp))
2637 {
e075ae69 2638 reason = "displacement is an invalid pic construct";
50e60bc3 2639 goto report_error;
91bb873f
RH
2640 }
2641
4e9efe54 2642 /* This code used to verify that a symbolic pic displacement
0f290768
KH
2643 includes the pic_offset_table_rtx register.
2644
4e9efe54
JH
2645 While this is good idea, unfortunately these constructs may
2646 be created by "adds using lea" optimization for incorrect
2647 code like:
2648
2649 int a;
2650 int foo(int i)
2651 {
2652 return *(&a+i);
2653 }
2654
50e60bc3 2655 This code is nonsensical, but results in addressing
4e9efe54
JH
2656 GOT table with pic_offset_table_rtx base. We can't
2657 just refuse it easilly, since it gets matched by
2658 "addsi3" pattern, that later gets split to lea in the
2659 case output register differs from input. While this
2660 can be handled by separate addsi pattern for this case
2661 that never results in lea, this seems to be easier and
2662 correct fix for crash to disable this test. */
3b3c6a3f 2663 }
91bb873f 2664 else if (HALF_PIC_P ())
3b3c6a3f 2665 {
91bb873f 2666 if (! HALF_PIC_ADDRESS_P (disp)
e075ae69 2667 || (base != NULL_RTX || index != NULL_RTX))
91bb873f 2668 {
e075ae69 2669 reason = "displacement is an invalid half-pic reference";
50e60bc3 2670 goto report_error;
91bb873f 2671 }
3b3c6a3f
MM
2672 }
2673 }
2674
e075ae69 2675 /* Everything looks valid. */
3b3c6a3f 2676 if (TARGET_DEBUG_ADDR)
e075ae69 2677 fprintf (stderr, "Success.\n");
3b3c6a3f 2678 return TRUE;
e075ae69 2679
50e60bc3 2680report_error:
e075ae69
RH
2681 if (TARGET_DEBUG_ADDR)
2682 {
2683 fprintf (stderr, "Error: %s\n", reason);
2684 debug_rtx (reason_rtx);
2685 }
2686 return FALSE;
3b3c6a3f 2687}
3b3c6a3f 2688\f
55efb413
JW
2689/* Return an unique alias set for the GOT. */
2690
0f290768 2691static HOST_WIDE_INT
55efb413
JW
2692ix86_GOT_alias_set ()
2693{
2694 static HOST_WIDE_INT set = -1;
2695 if (set == -1)
2696 set = new_alias_set ();
2697 return set;
0f290768 2698}
55efb413 2699
3b3c6a3f
MM
2700/* Return a legitimate reference for ORIG (an address) using the
2701 register REG. If REG is 0, a new pseudo is generated.
2702
91bb873f 2703 There are two types of references that must be handled:
3b3c6a3f
MM
2704
2705 1. Global data references must load the address from the GOT, via
2706 the PIC reg. An insn is emitted to do this load, and the reg is
2707 returned.
2708
91bb873f
RH
2709 2. Static data references, constant pool addresses, and code labels
2710 compute the address as an offset from the GOT, whose base is in
2711 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
2712 differentiate them from global data objects. The returned
2713 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
2714
2715 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 2716 reg also appears in the address. */
3b3c6a3f
MM
2717
2718rtx
2719legitimize_pic_address (orig, reg)
2720 rtx orig;
2721 rtx reg;
2722{
2723 rtx addr = orig;
2724 rtx new = orig;
91bb873f 2725 rtx base;
3b3c6a3f 2726
91bb873f
RH
2727 if (GET_CODE (addr) == LABEL_REF
2728 || (GET_CODE (addr) == SYMBOL_REF
2729 && (CONSTANT_POOL_ADDRESS_P (addr)
2730 || SYMBOL_REF_FLAG (addr))))
3b3c6a3f 2731 {
91bb873f
RH
2732 /* This symbol may be referenced via a displacement from the PIC
2733 base address (@GOTOFF). */
3b3c6a3f 2734
91bb873f 2735 current_function_uses_pic_offset_table = 1;
4859dd36
RH
2736 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
2737 new = gen_rtx_CONST (Pmode, new);
91bb873f 2738 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 2739
91bb873f
RH
2740 if (reg != 0)
2741 {
3b3c6a3f 2742 emit_move_insn (reg, new);
91bb873f 2743 new = reg;
3b3c6a3f 2744 }
3b3c6a3f 2745 }
91bb873f 2746 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 2747 {
91bb873f 2748 /* This symbol must be referenced via a load from the
0f290768 2749 Global Offset Table (@GOT). */
3b3c6a3f 2750
91bb873f 2751 current_function_uses_pic_offset_table = 1;
4859dd36
RH
2752 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
2753 new = gen_rtx_CONST (Pmode, new);
91bb873f
RH
2754 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2755 new = gen_rtx_MEM (Pmode, new);
2756 RTX_UNCHANGING_P (new) = 1;
0f290768 2757 MEM_ALIAS_SET (new) = ix86_GOT_alias_set ();
3b3c6a3f
MM
2758
2759 if (reg == 0)
2760 reg = gen_reg_rtx (Pmode);
91bb873f
RH
2761 emit_move_insn (reg, new);
2762 new = reg;
0f290768 2763 }
91bb873f
RH
2764 else
2765 {
2766 if (GET_CODE (addr) == CONST)
3b3c6a3f 2767 {
91bb873f
RH
2768 addr = XEXP (addr, 0);
2769 if (GET_CODE (addr) == UNSPEC)
2770 {
2771 /* Check that the unspec is one of the ones we generate? */
2772 }
2773 else if (GET_CODE (addr) != PLUS)
564d80f4 2774 abort ();
3b3c6a3f 2775 }
91bb873f
RH
2776 if (GET_CODE (addr) == PLUS)
2777 {
2778 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 2779
91bb873f
RH
2780 /* Check first to see if this is a constant offset from a @GOTOFF
2781 symbol reference. */
2782 if ((GET_CODE (op0) == LABEL_REF
2783 || (GET_CODE (op0) == SYMBOL_REF
2784 && (CONSTANT_POOL_ADDRESS_P (op0)
2785 || SYMBOL_REF_FLAG (op0))))
2786 && GET_CODE (op1) == CONST_INT)
2787 {
2788 current_function_uses_pic_offset_table = 1;
4859dd36
RH
2789 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
2790 new = gen_rtx_PLUS (Pmode, new, op1);
2791 new = gen_rtx_CONST (Pmode, new);
91bb873f
RH
2792 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2793
2794 if (reg != 0)
2795 {
2796 emit_move_insn (reg, new);
2797 new = reg;
2798 }
2799 }
2800 else
2801 {
2802 base = legitimize_pic_address (XEXP (addr, 0), reg);
2803 new = legitimize_pic_address (XEXP (addr, 1),
2804 base == reg ? NULL_RTX : reg);
2805
2806 if (GET_CODE (new) == CONST_INT)
2807 new = plus_constant (base, INTVAL (new));
2808 else
2809 {
2810 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
2811 {
2812 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
2813 new = XEXP (new, 1);
2814 }
2815 new = gen_rtx_PLUS (Pmode, base, new);
2816 }
2817 }
2818 }
3b3c6a3f
MM
2819 }
2820 return new;
2821}
2822\f
3b3c6a3f
MM
2823/* Try machine-dependent ways of modifying an illegitimate address
2824 to be legitimate. If we find one, return the new, valid address.
2825 This macro is used in only one place: `memory_address' in explow.c.
2826
2827 OLDX is the address as it was before break_out_memory_refs was called.
2828 In some cases it is useful to look at this to decide what needs to be done.
2829
2830 MODE and WIN are passed so that this macro can use
2831 GO_IF_LEGITIMATE_ADDRESS.
2832
2833 It is always safe for this macro to do nothing. It exists to recognize
2834 opportunities to optimize the output.
2835
2836 For the 80386, we handle X+REG by loading X into a register R and
2837 using R+REG. R will go in a general reg and indexing will be used.
2838 However, if REG is a broken-out memory address or multiplication,
2839 nothing needs to be done because REG can certainly go in a general reg.
2840
2841 When -fpic is used, special handling is needed for symbolic references.
2842 See comments by legitimize_pic_address in i386.c for details. */
2843
2844rtx
2845legitimize_address (x, oldx, mode)
2846 register rtx x;
bb5177ac 2847 register rtx oldx ATTRIBUTE_UNUSED;
3b3c6a3f
MM
2848 enum machine_mode mode;
2849{
2850 int changed = 0;
2851 unsigned log;
2852
2853 if (TARGET_DEBUG_ADDR)
2854 {
e9a25f70
JL
2855 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
2856 GET_MODE_NAME (mode));
3b3c6a3f
MM
2857 debug_rtx (x);
2858 }
2859
2860 if (flag_pic && SYMBOLIC_CONST (x))
2861 return legitimize_pic_address (x, 0);
2862
2863 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
2864 if (GET_CODE (x) == ASHIFT
2865 && GET_CODE (XEXP (x, 1)) == CONST_INT
2866 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
2867 {
2868 changed = 1;
a269a03c
JC
2869 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
2870 GEN_INT (1 << log));
3b3c6a3f
MM
2871 }
2872
2873 if (GET_CODE (x) == PLUS)
2874 {
0f290768 2875 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 2876
3b3c6a3f
MM
2877 if (GET_CODE (XEXP (x, 0)) == ASHIFT
2878 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2879 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
2880 {
2881 changed = 1;
c5c76735
JL
2882 XEXP (x, 0) = gen_rtx_MULT (Pmode,
2883 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
2884 GEN_INT (1 << log));
3b3c6a3f
MM
2885 }
2886
2887 if (GET_CODE (XEXP (x, 1)) == ASHIFT
2888 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
2889 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
2890 {
2891 changed = 1;
c5c76735
JL
2892 XEXP (x, 1) = gen_rtx_MULT (Pmode,
2893 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
2894 GEN_INT (1 << log));
3b3c6a3f
MM
2895 }
2896
0f290768 2897 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
2898 if (GET_CODE (XEXP (x, 1)) == MULT)
2899 {
2900 rtx tmp = XEXP (x, 0);
2901 XEXP (x, 0) = XEXP (x, 1);
2902 XEXP (x, 1) = tmp;
2903 changed = 1;
2904 }
2905
2906 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
2907 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
2908 created by virtual register instantiation, register elimination, and
2909 similar optimizations. */
2910 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
2911 {
2912 changed = 1;
c5c76735
JL
2913 x = gen_rtx_PLUS (Pmode,
2914 gen_rtx_PLUS (Pmode, XEXP (x, 0),
2915 XEXP (XEXP (x, 1), 0)),
2916 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
2917 }
2918
e9a25f70
JL
2919 /* Canonicalize
2920 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
2921 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
2922 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
2923 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
2924 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
2925 && CONSTANT_P (XEXP (x, 1)))
2926 {
00c79232
ML
2927 rtx constant;
2928 rtx other = NULL_RTX;
3b3c6a3f
MM
2929
2930 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2931 {
2932 constant = XEXP (x, 1);
2933 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
2934 }
2935 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
2936 {
2937 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
2938 other = XEXP (x, 1);
2939 }
2940 else
2941 constant = 0;
2942
2943 if (constant)
2944 {
2945 changed = 1;
c5c76735
JL
2946 x = gen_rtx_PLUS (Pmode,
2947 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
2948 XEXP (XEXP (XEXP (x, 0), 1), 0)),
2949 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
2950 }
2951 }
2952
2953 if (changed && legitimate_address_p (mode, x, FALSE))
2954 return x;
2955
2956 if (GET_CODE (XEXP (x, 0)) == MULT)
2957 {
2958 changed = 1;
2959 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
2960 }
2961
2962 if (GET_CODE (XEXP (x, 1)) == MULT)
2963 {
2964 changed = 1;
2965 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
2966 }
2967
2968 if (changed
2969 && GET_CODE (XEXP (x, 1)) == REG
2970 && GET_CODE (XEXP (x, 0)) == REG)
2971 return x;
2972
2973 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
2974 {
2975 changed = 1;
2976 x = legitimize_pic_address (x, 0);
2977 }
2978
2979 if (changed && legitimate_address_p (mode, x, FALSE))
2980 return x;
2981
2982 if (GET_CODE (XEXP (x, 0)) == REG)
2983 {
2984 register rtx temp = gen_reg_rtx (Pmode);
2985 register rtx val = force_operand (XEXP (x, 1), temp);
2986 if (val != temp)
2987 emit_move_insn (temp, val);
2988
2989 XEXP (x, 1) = temp;
2990 return x;
2991 }
2992
2993 else if (GET_CODE (XEXP (x, 1)) == REG)
2994 {
2995 register rtx temp = gen_reg_rtx (Pmode);
2996 register rtx val = force_operand (XEXP (x, 0), temp);
2997 if (val != temp)
2998 emit_move_insn (temp, val);
2999
3000 XEXP (x, 0) = temp;
3001 return x;
3002 }
3003 }
3004
3005 return x;
3006}
2a2ab3f9
JVA
3007\f
3008/* Print an integer constant expression in assembler syntax. Addition
3009 and subtraction are the only arithmetic that may appear in these
3010 expressions. FILE is the stdio stream to write to, X is the rtx, and
3011 CODE is the operand print code from the output string. */
3012
3013static void
3014output_pic_addr_const (file, x, code)
3015 FILE *file;
3016 rtx x;
3017 int code;
3018{
3019 char buf[256];
3020
3021 switch (GET_CODE (x))
3022 {
3023 case PC:
3024 if (flag_pic)
3025 putc ('.', file);
3026 else
3027 abort ();
3028 break;
3029
3030 case SYMBOL_REF:
91bb873f
RH
3031 assemble_name (file, XSTR (x, 0));
3032 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
3033 fputs ("@PLT", file);
2a2ab3f9
JVA
3034 break;
3035
91bb873f
RH
3036 case LABEL_REF:
3037 x = XEXP (x, 0);
3038 /* FALLTHRU */
2a2ab3f9
JVA
3039 case CODE_LABEL:
3040 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
3041 assemble_name (asm_out_file, buf);
3042 break;
3043
3044 case CONST_INT:
f64cecad 3045 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
3046 break;
3047
3048 case CONST:
3049 /* This used to output parentheses around the expression,
3050 but that does not work on the 386 (either ATT or BSD assembler). */
3051 output_pic_addr_const (file, XEXP (x, 0), code);
3052 break;
3053
3054 case CONST_DOUBLE:
3055 if (GET_MODE (x) == VOIDmode)
3056 {
3057 /* We can use %d if the number is <32 bits and positive. */
3058 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
3059 fprintf (file, "0x%lx%08lx",
3060 (unsigned long) CONST_DOUBLE_HIGH (x),
3061 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 3062 else
f64cecad 3063 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
3064 }
3065 else
3066 /* We can't handle floating point constants;
3067 PRINT_OPERAND must handle them. */
3068 output_operand_lossage ("floating constant misused");
3069 break;
3070
3071 case PLUS:
e9a25f70 3072 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
3073 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
3074 {
2a2ab3f9 3075 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 3076 putc ('+', file);
e9a25f70 3077 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 3078 }
91bb873f 3079 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 3080 {
2a2ab3f9 3081 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 3082 putc ('+', file);
e9a25f70 3083 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 3084 }
91bb873f
RH
3085 else
3086 abort ();
2a2ab3f9
JVA
3087 break;
3088
3089 case MINUS:
e075ae69 3090 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
2a2ab3f9 3091 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 3092 putc ('-', file);
2a2ab3f9 3093 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 3094 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
2a2ab3f9
JVA
3095 break;
3096
91bb873f
RH
3097 case UNSPEC:
3098 if (XVECLEN (x, 0) != 1)
77ebd435 3099 abort ();
91bb873f
RH
3100 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
3101 switch (XINT (x, 1))
77ebd435
AJ
3102 {
3103 case 6:
3104 fputs ("@GOT", file);
3105 break;
3106 case 7:
3107 fputs ("@GOTOFF", file);
3108 break;
3109 case 8:
3110 fputs ("@PLT", file);
3111 break;
3112 default:
3113 output_operand_lossage ("invalid UNSPEC as operand");
3114 break;
3115 }
91bb873f
RH
3116 break;
3117
2a2ab3f9
JVA
3118 default:
3119 output_operand_lossage ("invalid expression as operand");
3120 }
3121}
1865dbb5 3122
0f290768 3123/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
1865dbb5
JM
3124 We need to handle our special PIC relocations. */
3125
0f290768 3126void
1865dbb5
JM
3127i386_dwarf_output_addr_const (file, x)
3128 FILE *file;
3129 rtx x;
3130{
f0ca81d2 3131 fprintf (file, "%s", INT_ASM_OP);
1865dbb5
JM
3132 if (flag_pic)
3133 output_pic_addr_const (file, x, '\0');
3134 else
3135 output_addr_const (file, x);
3136 fputc ('\n', file);
3137}
3138
3139/* In the name of slightly smaller debug output, and to cater to
3140 general assembler losage, recognize PIC+GOTOFF and turn it back
3141 into a direct symbol reference. */
3142
3143rtx
3144i386_simplify_dwarf_addr (orig_x)
3145 rtx orig_x;
3146{
3147 rtx x = orig_x;
3148
3149 if (GET_CODE (x) != PLUS
3150 || GET_CODE (XEXP (x, 0)) != REG
3151 || GET_CODE (XEXP (x, 1)) != CONST)
3152 return orig_x;
3153
3154 x = XEXP (XEXP (x, 1), 0);
3155 if (GET_CODE (x) == UNSPEC
3adbce3d
RH
3156 && (XINT (x, 1) == 6
3157 || XINT (x, 1) == 7))
1865dbb5
JM
3158 return XVECEXP (x, 0, 0);
3159
3160 if (GET_CODE (x) == PLUS
3161 && GET_CODE (XEXP (x, 0)) == UNSPEC
3162 && GET_CODE (XEXP (x, 1)) == CONST_INT
3adbce3d
RH
3163 && (XINT (XEXP (x, 0), 1) == 6
3164 || XINT (XEXP (x, 0), 1) == 7))
1865dbb5
JM
3165 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
3166
3167 return orig_x;
3168}
2a2ab3f9 3169\f
a269a03c 3170static void
e075ae69 3171put_condition_code (code, mode, reverse, fp, file)
a269a03c 3172 enum rtx_code code;
e075ae69
RH
3173 enum machine_mode mode;
3174 int reverse, fp;
a269a03c
JC
3175 FILE *file;
3176{
a269a03c
JC
3177 const char *suffix;
3178
9a915772
JH
3179 if (mode == CCFPmode || mode == CCFPUmode)
3180 {
3181 enum rtx_code second_code, bypass_code;
3182 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3183 if (bypass_code != NIL || second_code != NIL)
3184 abort();
3185 code = ix86_fp_compare_code_to_integer (code);
3186 mode = CCmode;
3187 }
a269a03c
JC
3188 if (reverse)
3189 code = reverse_condition (code);
e075ae69 3190
a269a03c
JC
3191 switch (code)
3192 {
3193 case EQ:
3194 suffix = "e";
3195 break;
a269a03c
JC
3196 case NE:
3197 suffix = "ne";
3198 break;
a269a03c 3199 case GT:
7e08e190 3200 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
3201 abort ();
3202 suffix = "g";
a269a03c 3203 break;
a269a03c 3204 case GTU:
e075ae69
RH
3205 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
3206 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 3207 if (mode != CCmode)
0f290768 3208 abort ();
e075ae69 3209 suffix = fp ? "nbe" : "a";
a269a03c 3210 break;
a269a03c 3211 case LT:
9076b9c1 3212 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 3213 suffix = "s";
7e08e190 3214 else if (mode == CCmode || mode == CCGCmode)
e075ae69 3215 suffix = "l";
9076b9c1 3216 else
0f290768 3217 abort ();
a269a03c 3218 break;
a269a03c 3219 case LTU:
9076b9c1 3220 if (mode != CCmode)
0f290768 3221 abort ();
a269a03c
JC
3222 suffix = "b";
3223 break;
a269a03c 3224 case GE:
9076b9c1 3225 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 3226 suffix = "ns";
7e08e190 3227 else if (mode == CCmode || mode == CCGCmode)
e075ae69 3228 suffix = "ge";
9076b9c1 3229 else
0f290768 3230 abort ();
a269a03c 3231 break;
a269a03c 3232 case GEU:
e075ae69 3233 /* ??? As above. */
7e08e190 3234 if (mode != CCmode)
0f290768 3235 abort ();
7e08e190 3236 suffix = fp ? "nb" : "ae";
a269a03c 3237 break;
a269a03c 3238 case LE:
7e08e190 3239 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
3240 abort ();
3241 suffix = "le";
a269a03c 3242 break;
a269a03c 3243 case LEU:
9076b9c1
JH
3244 if (mode != CCmode)
3245 abort ();
7e08e190 3246 suffix = "be";
a269a03c 3247 break;
3a3677ff 3248 case UNORDERED:
9e7adcb3 3249 suffix = fp ? "u" : "p";
3a3677ff
RH
3250 break;
3251 case ORDERED:
9e7adcb3 3252 suffix = fp ? "nu" : "np";
3a3677ff 3253 break;
a269a03c
JC
3254 default:
3255 abort ();
3256 }
3257 fputs (suffix, file);
3258}
3259
e075ae69
RH
3260void
3261print_reg (x, code, file)
3262 rtx x;
3263 int code;
3264 FILE *file;
e5cb57e8 3265{
e075ae69 3266 if (REGNO (x) == ARG_POINTER_REGNUM
564d80f4 3267 || REGNO (x) == FRAME_POINTER_REGNUM
e075ae69
RH
3268 || REGNO (x) == FLAGS_REG
3269 || REGNO (x) == FPSR_REG)
3270 abort ();
e9a25f70 3271
e075ae69
RH
3272 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
3273 putc ('%', file);
3274
3275 if (code == 'w')
3276 code = 2;
3277 else if (code == 'b')
3278 code = 1;
3279 else if (code == 'k')
3280 code = 4;
3281 else if (code == 'y')
3282 code = 3;
3283 else if (code == 'h')
3284 code = 0;
a7180f70
BS
3285 else if (code == 'm' || MMX_REG_P (x))
3286 code = 5;
e075ae69
RH
3287 else
3288 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 3289
e075ae69
RH
3290 switch (code)
3291 {
a7180f70
BS
3292 case 5:
3293 fputs (hi_reg_name[REGNO (x)], file);
3294 break;
e075ae69
RH
3295 case 3:
3296 if (STACK_TOP_P (x))
3297 {
3298 fputs ("st(0)", file);
3299 break;
3300 }
3301 /* FALLTHRU */
3302 case 4:
3303 case 8:
3304 case 12:
446988df 3305 if (! ANY_FP_REG_P (x))
e075ae69
RH
3306 putc ('e', file);
3307 /* FALLTHRU */
a7180f70 3308 case 16:
e075ae69
RH
3309 case 2:
3310 fputs (hi_reg_name[REGNO (x)], file);
3311 break;
3312 case 1:
3313 fputs (qi_reg_name[REGNO (x)], file);
3314 break;
3315 case 0:
3316 fputs (qi_high_reg_name[REGNO (x)], file);
3317 break;
3318 default:
3319 abort ();
fe25fea3 3320 }
e5cb57e8
SC
3321}
3322
2a2ab3f9 3323/* Meaning of CODE:
fe25fea3 3324 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 3325 C -- print opcode suffix for set/cmov insn.
fe25fea3 3326 c -- like C, but print reversed condition
2a2ab3f9
JVA
3327 R -- print the prefix for register names.
3328 z -- print the opcode suffix for the size of the current operand.
3329 * -- print a star (in certain assembler syntax)
fb204271 3330 A -- print an absolute memory reference.
2a2ab3f9 3331 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
3332 s -- print a shift double count, followed by the assemblers argument
3333 delimiter.
fe25fea3
SC
3334 b -- print the QImode name of the register for the indicated operand.
3335 %b0 would print %al if operands[0] is reg 0.
3336 w -- likewise, print the HImode name of the register.
3337 k -- likewise, print the SImode name of the register.
3338 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
a7180f70 3339 y -- print "st(0)" instead of "st" as a register.
a46d1d38
JH
3340 m -- print "st(n)" as an mmx register.
3341 D -- print condition for SSE cmp instruction.
3342 */
2a2ab3f9
JVA
3343
3344void
3345print_operand (file, x, code)
3346 FILE *file;
3347 rtx x;
3348 int code;
3349{
3350 if (code)
3351 {
3352 switch (code)
3353 {
3354 case '*':
e075ae69 3355 if (ASSEMBLER_DIALECT == 0)
2a2ab3f9
JVA
3356 putc ('*', file);
3357 return;
3358
fb204271
DN
3359 case 'A':
3360 if (ASSEMBLER_DIALECT == 0)
3361 putc ('*', file);
3362 else if (ASSEMBLER_DIALECT == 1)
3363 {
3364 /* Intel syntax. For absolute addresses, registers should not
3365 be surrounded by braces. */
3366 if (GET_CODE (x) != REG)
3367 {
3368 putc ('[', file);
3369 PRINT_OPERAND (file, x, 0);
3370 putc (']', file);
3371 return;
3372 }
3373 }
3374
3375 PRINT_OPERAND (file, x, 0);
3376 return;
3377
3378
2a2ab3f9 3379 case 'L':
e075ae69
RH
3380 if (ASSEMBLER_DIALECT == 0)
3381 putc ('l', file);
2a2ab3f9
JVA
3382 return;
3383
3384 case 'W':
e075ae69
RH
3385 if (ASSEMBLER_DIALECT == 0)
3386 putc ('w', file);
2a2ab3f9
JVA
3387 return;
3388
3389 case 'B':
e075ae69
RH
3390 if (ASSEMBLER_DIALECT == 0)
3391 putc ('b', file);
2a2ab3f9
JVA
3392 return;
3393
3394 case 'Q':
e075ae69
RH
3395 if (ASSEMBLER_DIALECT == 0)
3396 putc ('l', file);
2a2ab3f9
JVA
3397 return;
3398
3399 case 'S':
e075ae69
RH
3400 if (ASSEMBLER_DIALECT == 0)
3401 putc ('s', file);
2a2ab3f9
JVA
3402 return;
3403
5f1ec3e6 3404 case 'T':
e075ae69
RH
3405 if (ASSEMBLER_DIALECT == 0)
3406 putc ('t', file);
5f1ec3e6
JVA
3407 return;
3408
2a2ab3f9
JVA
3409 case 'z':
3410 /* 387 opcodes don't get size suffixes if the operands are
0f290768 3411 registers. */
2a2ab3f9
JVA
3412
3413 if (STACK_REG_P (x))
3414 return;
3415
3416 /* this is the size of op from size of operand */
3417 switch (GET_MODE_SIZE (GET_MODE (x)))
3418 {
2a2ab3f9 3419 case 2:
155d8a47
JW
3420#ifdef HAVE_GAS_FILDS_FISTS
3421 putc ('s', file);
3422#endif
2a2ab3f9
JVA
3423 return;
3424
3425 case 4:
3426 if (GET_MODE (x) == SFmode)
3427 {
e075ae69 3428 putc ('s', file);
2a2ab3f9
JVA
3429 return;
3430 }
3431 else
e075ae69 3432 putc ('l', file);
2a2ab3f9
JVA
3433 return;
3434
5f1ec3e6 3435 case 12:
2b589241 3436 case 16:
e075ae69
RH
3437 putc ('t', file);
3438 return;
5f1ec3e6 3439
2a2ab3f9
JVA
3440 case 8:
3441 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
3442 {
3443#ifdef GAS_MNEMONICS
e075ae69 3444 putc ('q', file);
56c0e8fa 3445#else
e075ae69
RH
3446 putc ('l', file);
3447 putc ('l', file);
56c0e8fa
JVA
3448#endif
3449 }
e075ae69
RH
3450 else
3451 putc ('l', file);
2a2ab3f9 3452 return;
155d8a47
JW
3453
3454 default:
3455 abort ();
2a2ab3f9 3456 }
4af3895e
JVA
3457
3458 case 'b':
3459 case 'w':
3460 case 'k':
3461 case 'h':
3462 case 'y':
a7180f70 3463 case 'm':
5cb6195d 3464 case 'X':
e075ae69 3465 case 'P':
4af3895e
JVA
3466 break;
3467
2d49677f
SC
3468 case 's':
3469 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
3470 {
3471 PRINT_OPERAND (file, x, 0);
e075ae69 3472 putc (',', file);
2d49677f 3473 }
a269a03c
JC
3474 return;
3475
a46d1d38
JH
3476 case 'D':
3477 /* Little bit of braindamage here. The SSE compare instructions
3478 does use completely different names for the comparisons that the
3479 fp conditional moves. */
3480 switch (GET_CODE (x))
3481 {
3482 case EQ:
3483 case UNEQ:
3484 fputs ("eq", file);
3485 break;
3486 case LT:
3487 case UNLT:
3488 fputs ("lt", file);
3489 break;
3490 case LE:
3491 case UNLE:
3492 fputs ("le", file);
3493 break;
3494 case UNORDERED:
3495 fputs ("unord", file);
3496 break;
3497 case NE:
3498 case LTGT:
3499 fputs ("neq", file);
3500 break;
3501 case UNGE:
3502 case GE:
3503 fputs ("nlt", file);
3504 break;
3505 case UNGT:
3506 case GT:
3507 fputs ("nle", file);
3508 break;
3509 case ORDERED:
3510 fputs ("ord", file);
3511 break;
3512 default:
3513 abort ();
3514 break;
3515 }
3516 return;
1853aadd 3517 case 'C':
e075ae69 3518 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 3519 return;
fe25fea3 3520 case 'F':
e075ae69 3521 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
3522 return;
3523
e9a25f70 3524 /* Like above, but reverse condition */
e075ae69
RH
3525 case 'c':
3526 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
3527 return;
fe25fea3 3528 case 'f':
e075ae69 3529 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 3530 return;
e5cb57e8 3531
4af3895e 3532 default:
68daafd4
JVA
3533 {
3534 char str[50];
68daafd4
JVA
3535 sprintf (str, "invalid operand code `%c'", code);
3536 output_operand_lossage (str);
3537 }
2a2ab3f9
JVA
3538 }
3539 }
e9a25f70 3540
2a2ab3f9
JVA
3541 if (GET_CODE (x) == REG)
3542 {
3543 PRINT_REG (x, code, file);
3544 }
e9a25f70 3545
2a2ab3f9
JVA
3546 else if (GET_CODE (x) == MEM)
3547 {
e075ae69
RH
3548 /* No `byte ptr' prefix for call instructions. */
3549 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
2a2ab3f9 3550 {
69ddee61 3551 const char * size;
e075ae69
RH
3552 switch (GET_MODE_SIZE (GET_MODE (x)))
3553 {
3554 case 1: size = "BYTE"; break;
3555 case 2: size = "WORD"; break;
3556 case 4: size = "DWORD"; break;
3557 case 8: size = "QWORD"; break;
3558 case 12: size = "XWORD"; break;
a7180f70 3559 case 16: size = "XMMWORD"; break;
e075ae69 3560 default:
564d80f4 3561 abort ();
e075ae69 3562 }
fb204271
DN
3563
3564 /* Check for explicit size override (codes 'b', 'w' and 'k') */
3565 if (code == 'b')
3566 size = "BYTE";
3567 else if (code == 'w')
3568 size = "WORD";
3569 else if (code == 'k')
3570 size = "DWORD";
3571
e075ae69
RH
3572 fputs (size, file);
3573 fputs (" PTR ", file);
2a2ab3f9 3574 }
e075ae69
RH
3575
3576 x = XEXP (x, 0);
3577 if (flag_pic && CONSTANT_ADDRESS_P (x))
3578 output_pic_addr_const (file, x, code);
2a2ab3f9 3579 else
e075ae69 3580 output_address (x);
2a2ab3f9 3581 }
e9a25f70 3582
2a2ab3f9
JVA
3583 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
3584 {
e9a25f70
JL
3585 REAL_VALUE_TYPE r;
3586 long l;
3587
5f1ec3e6
JVA
3588 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3589 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69
RH
3590
3591 if (ASSEMBLER_DIALECT == 0)
3592 putc ('$', file);
52267fcb 3593 fprintf (file, "0x%lx", l);
5f1ec3e6 3594 }
e9a25f70 3595
0f290768 3596 /* These float cases don't actually occur as immediate operands. */
5f1ec3e6
JVA
3597 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
3598 {
e9a25f70
JL
3599 REAL_VALUE_TYPE r;
3600 char dstr[30];
3601
5f1ec3e6
JVA
3602 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3603 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3604 fprintf (file, "%s", dstr);
2a2ab3f9 3605 }
e9a25f70 3606
2b589241
JH
3607 else if (GET_CODE (x) == CONST_DOUBLE
3608 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
2a2ab3f9 3609 {
e9a25f70
JL
3610 REAL_VALUE_TYPE r;
3611 char dstr[30];
3612
5f1ec3e6
JVA
3613 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3614 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3615 fprintf (file, "%s", dstr);
2a2ab3f9 3616 }
79325812 3617 else
2a2ab3f9 3618 {
4af3895e 3619 if (code != 'P')
2a2ab3f9 3620 {
695dac07 3621 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69
RH
3622 {
3623 if (ASSEMBLER_DIALECT == 0)
3624 putc ('$', file);
3625 }
2a2ab3f9
JVA
3626 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
3627 || GET_CODE (x) == LABEL_REF)
e075ae69
RH
3628 {
3629 if (ASSEMBLER_DIALECT == 0)
3630 putc ('$', file);
3631 else
3632 fputs ("OFFSET FLAT:", file);
3633 }
2a2ab3f9 3634 }
e075ae69
RH
3635 if (GET_CODE (x) == CONST_INT)
3636 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3637 else if (flag_pic)
2a2ab3f9
JVA
3638 output_pic_addr_const (file, x, code);
3639 else
3640 output_addr_const (file, x);
3641 }
3642}
3643\f
3644/* Print a memory operand whose address is ADDR. */
3645
3646void
3647print_operand_address (file, addr)
3648 FILE *file;
3649 register rtx addr;
3650{
e075ae69
RH
3651 struct ix86_address parts;
3652 rtx base, index, disp;
3653 int scale;
e9a25f70 3654
e075ae69
RH
3655 if (! ix86_decompose_address (addr, &parts))
3656 abort ();
e9a25f70 3657
e075ae69
RH
3658 base = parts.base;
3659 index = parts.index;
3660 disp = parts.disp;
3661 scale = parts.scale;
e9a25f70 3662
e075ae69
RH
3663 if (!base && !index)
3664 {
3665 /* Displacement only requires special attention. */
e9a25f70 3666
e075ae69 3667 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 3668 {
e075ae69 3669 if (ASSEMBLER_DIALECT != 0)
fb204271
DN
3670 {
3671 if (USER_LABEL_PREFIX[0] == 0)
3672 putc ('%', file);
3673 fputs ("ds:", file);
3674 }
e075ae69 3675 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
2a2ab3f9 3676 }
e075ae69
RH
3677 else if (flag_pic)
3678 output_pic_addr_const (file, addr, 0);
3679 else
3680 output_addr_const (file, addr);
3681 }
3682 else
3683 {
3684 if (ASSEMBLER_DIALECT == 0)
2a2ab3f9 3685 {
e075ae69 3686 if (disp)
2a2ab3f9 3687 {
c399861d 3688 if (flag_pic)
e075ae69
RH
3689 output_pic_addr_const (file, disp, 0);
3690 else if (GET_CODE (disp) == LABEL_REF)
3691 output_asm_label (disp);
2a2ab3f9 3692 else
e075ae69 3693 output_addr_const (file, disp);
2a2ab3f9
JVA
3694 }
3695
e075ae69
RH
3696 putc ('(', file);
3697 if (base)
3698 PRINT_REG (base, 0, file);
3699 if (index)
2a2ab3f9 3700 {
e075ae69
RH
3701 putc (',', file);
3702 PRINT_REG (index, 0, file);
3703 if (scale != 1)
3704 fprintf (file, ",%d", scale);
2a2ab3f9 3705 }
e075ae69 3706 putc (')', file);
2a2ab3f9 3707 }
2a2ab3f9
JVA
3708 else
3709 {
e075ae69 3710 rtx offset = NULL_RTX;
e9a25f70 3711
e075ae69
RH
3712 if (disp)
3713 {
3714 /* Pull out the offset of a symbol; print any symbol itself. */
3715 if (GET_CODE (disp) == CONST
3716 && GET_CODE (XEXP (disp, 0)) == PLUS
3717 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
3718 {
3719 offset = XEXP (XEXP (disp, 0), 1);
3720 disp = gen_rtx_CONST (VOIDmode,
3721 XEXP (XEXP (disp, 0), 0));
3722 }
ce193852 3723
e075ae69
RH
3724 if (flag_pic)
3725 output_pic_addr_const (file, disp, 0);
3726 else if (GET_CODE (disp) == LABEL_REF)
3727 output_asm_label (disp);
3728 else if (GET_CODE (disp) == CONST_INT)
3729 offset = disp;
3730 else
3731 output_addr_const (file, disp);
3732 }
e9a25f70 3733
e075ae69
RH
3734 putc ('[', file);
3735 if (base)
a8620236 3736 {
e075ae69
RH
3737 PRINT_REG (base, 0, file);
3738 if (offset)
3739 {
3740 if (INTVAL (offset) >= 0)
3741 putc ('+', file);
3742 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3743 }
a8620236 3744 }
e075ae69
RH
3745 else if (offset)
3746 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 3747 else
e075ae69 3748 putc ('0', file);
e9a25f70 3749
e075ae69
RH
3750 if (index)
3751 {
3752 putc ('+', file);
3753 PRINT_REG (index, 0, file);
3754 if (scale != 1)
3755 fprintf (file, "*%d", scale);
3756 }
3757 putc (']', file);
3758 }
2a2ab3f9
JVA
3759 }
3760}
3761\f
3762/* Split one or more DImode RTL references into pairs of SImode
3763 references. The RTL can be REG, offsettable MEM, integer constant, or
3764 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
3765 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 3766 that parallel "operands". */
2a2ab3f9
JVA
3767
3768void
3769split_di (operands, num, lo_half, hi_half)
3770 rtx operands[];
3771 int num;
3772 rtx lo_half[], hi_half[];
3773{
3774 while (num--)
3775 {
57dbca5e 3776 rtx op = operands[num];
e075ae69
RH
3777 if (CONSTANT_P (op))
3778 split_double (op, &lo_half[num], &hi_half[num]);
3779 else if (! reload_completed)
a269a03c
JC
3780 {
3781 lo_half[num] = gen_lowpart (SImode, op);
3782 hi_half[num] = gen_highpart (SImode, op);
3783 }
3784 else if (GET_CODE (op) == REG)
2a2ab3f9 3785 {
57dbca5e
BS
3786 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
3787 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
2a2ab3f9 3788 }
57dbca5e 3789 else if (offsettable_memref_p (op))
2a2ab3f9 3790 {
57dbca5e
BS
3791 rtx lo_addr = XEXP (op, 0);
3792 rtx hi_addr = XEXP (adj_offsettable_operand (op, 4), 0);
3793 lo_half[num] = change_address (op, SImode, lo_addr);
3794 hi_half[num] = change_address (op, SImode, hi_addr);
2a2ab3f9
JVA
3795 }
3796 else
564d80f4 3797 abort ();
2a2ab3f9
JVA
3798 }
3799}
3800\f
2a2ab3f9
JVA
3801/* Output code to perform a 387 binary operation in INSN, one of PLUS,
3802 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
3803 is the expression of the binary operation. The output may either be
3804 emitted here, or returned to the caller, like all output_* functions.
3805
3806 There is no guarantee that the operands are the same mode, as they
0f290768 3807 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 3808
e3c2afab
AM
3809#ifndef SYSV386_COMPAT
3810/* Set to 1 for compatibility with brain-damaged assemblers. No-one
3811 wants to fix the assemblers because that causes incompatibility
3812 with gcc. No-one wants to fix gcc because that causes
3813 incompatibility with assemblers... You can use the option of
3814 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
3815#define SYSV386_COMPAT 1
3816#endif
3817
69ddee61 3818const char *
2a2ab3f9
JVA
3819output_387_binary_op (insn, operands)
3820 rtx insn;
3821 rtx *operands;
3822{
e3c2afab 3823 static char buf[30];
69ddee61 3824 const char *p;
1deaa899
JH
3825 const char *ssep;
3826 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
2a2ab3f9 3827
e3c2afab
AM
3828#ifdef ENABLE_CHECKING
3829 /* Even if we do not want to check the inputs, this documents input
3830 constraints. Which helps in understanding the following code. */
3831 if (STACK_REG_P (operands[0])
3832 && ((REG_P (operands[1])
3833 && REGNO (operands[0]) == REGNO (operands[1])
3834 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
3835 || (REG_P (operands[2])
3836 && REGNO (operands[0]) == REGNO (operands[2])
3837 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
3838 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
3839 ; /* ok */
1deaa899 3840 else if (!is_sse)
e3c2afab
AM
3841 abort ();
3842#endif
3843
2a2ab3f9
JVA
3844 switch (GET_CODE (operands[3]))
3845 {
3846 case PLUS:
e075ae69
RH
3847 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3848 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3849 p = "fiadd";
3850 else
3851 p = "fadd";
1deaa899 3852 ssep = "add";
2a2ab3f9
JVA
3853 break;
3854
3855 case MINUS:
e075ae69
RH
3856 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3857 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3858 p = "fisub";
3859 else
3860 p = "fsub";
1deaa899 3861 ssep = "sub";
2a2ab3f9
JVA
3862 break;
3863
3864 case MULT:
e075ae69
RH
3865 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3866 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3867 p = "fimul";
3868 else
3869 p = "fmul";
1deaa899 3870 ssep = "mul";
2a2ab3f9
JVA
3871 break;
3872
3873 case DIV:
e075ae69
RH
3874 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3875 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3876 p = "fidiv";
3877 else
3878 p = "fdiv";
1deaa899 3879 ssep = "div";
2a2ab3f9
JVA
3880 break;
3881
3882 default:
3883 abort ();
3884 }
3885
1deaa899
JH
3886 if (is_sse)
3887 {
3888 strcpy (buf, ssep);
3889 if (GET_MODE (operands[0]) == SFmode)
3890 strcat (buf, "ss\t{%2, %0|%0, %2}");
3891 else
3892 strcat (buf, "sd\t{%2, %0|%0, %2}");
3893 return buf;
3894 }
e075ae69 3895 strcpy (buf, p);
2a2ab3f9
JVA
3896
3897 switch (GET_CODE (operands[3]))
3898 {
3899 case MULT:
3900 case PLUS:
3901 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
3902 {
e3c2afab 3903 rtx temp = operands[2];
2a2ab3f9
JVA
3904 operands[2] = operands[1];
3905 operands[1] = temp;
3906 }
3907
e3c2afab
AM
3908 /* know operands[0] == operands[1]. */
3909
2a2ab3f9 3910 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
3911 {
3912 p = "%z2\t%2";
3913 break;
3914 }
2a2ab3f9
JVA
3915
3916 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
3917 {
3918 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
3919 /* How is it that we are storing to a dead operand[2]?
3920 Well, presumably operands[1] is dead too. We can't
3921 store the result to st(0) as st(0) gets popped on this
3922 instruction. Instead store to operands[2] (which I
3923 think has to be st(1)). st(1) will be popped later.
3924 gcc <= 2.8.1 didn't have this check and generated
3925 assembly code that the Unixware assembler rejected. */
3926 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 3927 else
e3c2afab 3928 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 3929 break;
6b28fd63 3930 }
2a2ab3f9
JVA
3931
3932 if (STACK_TOP_P (operands[0]))
e3c2afab 3933 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 3934 else
e3c2afab 3935 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 3936 break;
2a2ab3f9
JVA
3937
3938 case MINUS:
3939 case DIV:
3940 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
3941 {
3942 p = "r%z1\t%1";
3943 break;
3944 }
2a2ab3f9
JVA
3945
3946 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
3947 {
3948 p = "%z2\t%2";
3949 break;
3950 }
2a2ab3f9 3951
2a2ab3f9 3952 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 3953 {
e3c2afab
AM
3954#if SYSV386_COMPAT
3955 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
3956 derived assemblers, confusingly reverse the direction of
3957 the operation for fsub{r} and fdiv{r} when the
3958 destination register is not st(0). The Intel assembler
3959 doesn't have this brain damage. Read !SYSV386_COMPAT to
3960 figure out what the hardware really does. */
3961 if (STACK_TOP_P (operands[0]))
3962 p = "{p\t%0, %2|rp\t%2, %0}";
3963 else
3964 p = "{rp\t%2, %0|p\t%0, %2}";
3965#else
6b28fd63 3966 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
3967 /* As above for fmul/fadd, we can't store to st(0). */
3968 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 3969 else
e3c2afab
AM
3970 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
3971#endif
e075ae69 3972 break;
6b28fd63 3973 }
2a2ab3f9
JVA
3974
3975 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 3976 {
e3c2afab 3977#if SYSV386_COMPAT
6b28fd63 3978 if (STACK_TOP_P (operands[0]))
e3c2afab 3979 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 3980 else
e3c2afab
AM
3981 p = "{p\t%1, %0|rp\t%0, %1}";
3982#else
3983 if (STACK_TOP_P (operands[0]))
3984 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
3985 else
3986 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
3987#endif
e075ae69 3988 break;
6b28fd63 3989 }
2a2ab3f9
JVA
3990
3991 if (STACK_TOP_P (operands[0]))
3992 {
3993 if (STACK_TOP_P (operands[1]))
e3c2afab 3994 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 3995 else
e3c2afab 3996 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 3997 break;
2a2ab3f9
JVA
3998 }
3999 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
4000 {
4001#if SYSV386_COMPAT
4002 p = "{\t%1, %0|r\t%0, %1}";
4003#else
4004 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
4005#endif
4006 }
2a2ab3f9 4007 else
e3c2afab
AM
4008 {
4009#if SYSV386_COMPAT
4010 p = "{r\t%2, %0|\t%0, %2}";
4011#else
4012 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
4013#endif
4014 }
e075ae69 4015 break;
2a2ab3f9
JVA
4016
4017 default:
4018 abort ();
4019 }
e075ae69
RH
4020
4021 strcat (buf, p);
4022 return buf;
2a2ab3f9 4023}
e075ae69 4024
2a2ab3f9 4025/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 4026 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 4027 operand may be [SDX]Fmode. */
2a2ab3f9 4028
69ddee61 4029const char *
2a2ab3f9
JVA
4030output_fix_trunc (insn, operands)
4031 rtx insn;
4032 rtx *operands;
4033{
4034 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69
RH
4035 int dimode_p = GET_MODE (operands[0]) == DImode;
4036 rtx xops[4];
2a2ab3f9 4037
e075ae69
RH
4038 /* Jump through a hoop or two for DImode, since the hardware has no
4039 non-popping instruction. We used to do this a different way, but
4040 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
4041 if (dimode_p && !stack_top_dies)
4042 output_asm_insn ("fld\t%y1", operands);
e075ae69
RH
4043
4044 if (! STACK_TOP_P (operands[1]))
10195bd8
JW
4045 abort ();
4046
e075ae69
RH
4047 xops[0] = GEN_INT (12);
4048 xops[1] = adj_offsettable_operand (operands[2], 1);
4049 xops[1] = change_address (xops[1], QImode, NULL_RTX);
305f097e 4050
e075ae69
RH
4051 xops[2] = operands[0];
4052 if (GET_CODE (operands[0]) != MEM)
4053 xops[2] = operands[3];
2a2ab3f9 4054
e075ae69
RH
4055 output_asm_insn ("fnstcw\t%2", operands);
4056 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands);
4057 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops);
4058 output_asm_insn ("fldcw\t%2", operands);
4059 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands);
e9a25f70 4060
e075ae69
RH
4061 if (stack_top_dies || dimode_p)
4062 output_asm_insn ("fistp%z2\t%2", xops);
10195bd8 4063 else
e075ae69
RH
4064 output_asm_insn ("fist%z2\t%2", xops);
4065
4066 output_asm_insn ("fldcw\t%2", operands);
10195bd8 4067
e075ae69 4068 if (GET_CODE (operands[0]) != MEM)
2a2ab3f9 4069 {
e075ae69 4070 if (dimode_p)
2e14a41b 4071 {
e075ae69
RH
4072 split_di (operands+0, 1, xops+0, xops+1);
4073 split_di (operands+3, 1, xops+2, xops+3);
4074 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4075 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops);
2e14a41b 4076 }
46d21d2c 4077 else if (GET_MODE (operands[0]) == SImode)
e3c2afab 4078 output_asm_insn ("mov{l}\t{%3, %0|%0, %3}", operands);
46d21d2c
JW
4079 else
4080 output_asm_insn ("mov{w}\t{%3, %0|%0, %3}", operands);
2a2ab3f9 4081 }
2a2ab3f9 4082
e075ae69 4083 return "";
2a2ab3f9 4084}
cda749b1 4085
e075ae69
RH
4086/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
4087 should be used and 2 when fnstsw should be used. UNORDERED_P is true
4088 when fucom should be used. */
4089
69ddee61 4090const char *
e075ae69 4091output_fp_compare (insn, operands, eflags_p, unordered_p)
cda749b1
JW
4092 rtx insn;
4093 rtx *operands;
e075ae69 4094 int eflags_p, unordered_p;
cda749b1 4095{
e075ae69
RH
4096 int stack_top_dies;
4097 rtx cmp_op0 = operands[0];
4098 rtx cmp_op1 = operands[1];
0644b628 4099 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
e075ae69
RH
4100
4101 if (eflags_p == 2)
4102 {
4103 cmp_op0 = cmp_op1;
4104 cmp_op1 = operands[2];
4105 }
0644b628
JH
4106 if (is_sse)
4107 {
4108 if (GET_MODE (operands[0]) == SFmode)
4109 if (unordered_p)
4110 return "ucomiss\t{%1, %0|%0, %1}";
4111 else
4112 return "comiss\t{%1, %0|%0, %y}";
4113 else
4114 if (unordered_p)
4115 return "ucomisd\t{%1, %0|%0, %1}";
4116 else
4117 return "comisd\t{%1, %0|%0, %y}";
4118 }
cda749b1 4119
e075ae69 4120 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
4121 abort ();
4122
e075ae69 4123 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 4124
e075ae69
RH
4125 if (STACK_REG_P (cmp_op1)
4126 && stack_top_dies
4127 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
4128 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 4129 {
e075ae69
RH
4130 /* If both the top of the 387 stack dies, and the other operand
4131 is also a stack register that dies, then this must be a
4132 `fcompp' float compare */
4133
4134 if (eflags_p == 1)
4135 {
4136 /* There is no double popping fcomi variant. Fortunately,
4137 eflags is immune from the fstp's cc clobbering. */
4138 if (unordered_p)
4139 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
4140 else
4141 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
4142 return "fstp\t%y0";
4143 }
4144 else
cda749b1 4145 {
e075ae69
RH
4146 if (eflags_p == 2)
4147 {
4148 if (unordered_p)
4149 return "fucompp\n\tfnstsw\t%0";
4150 else
4151 return "fcompp\n\tfnstsw\t%0";
4152 }
cda749b1
JW
4153 else
4154 {
e075ae69
RH
4155 if (unordered_p)
4156 return "fucompp";
4157 else
4158 return "fcompp";
cda749b1
JW
4159 }
4160 }
cda749b1
JW
4161 }
4162 else
4163 {
e075ae69 4164 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 4165
0f290768 4166 static const char * const alt[24] =
e075ae69
RH
4167 {
4168 "fcom%z1\t%y1",
4169 "fcomp%z1\t%y1",
4170 "fucom%z1\t%y1",
4171 "fucomp%z1\t%y1",
0f290768 4172
e075ae69
RH
4173 "ficom%z1\t%y1",
4174 "ficomp%z1\t%y1",
4175 NULL,
4176 NULL,
4177
4178 "fcomi\t{%y1, %0|%0, %y1}",
4179 "fcomip\t{%y1, %0|%0, %y1}",
4180 "fucomi\t{%y1, %0|%0, %y1}",
4181 "fucomip\t{%y1, %0|%0, %y1}",
4182
4183 NULL,
4184 NULL,
4185 NULL,
4186 NULL,
4187
4188 "fcom%z2\t%y2\n\tfnstsw\t%0",
4189 "fcomp%z2\t%y2\n\tfnstsw\t%0",
4190 "fucom%z2\t%y2\n\tfnstsw\t%0",
4191 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 4192
e075ae69
RH
4193 "ficom%z2\t%y2\n\tfnstsw\t%0",
4194 "ficomp%z2\t%y2\n\tfnstsw\t%0",
4195 NULL,
4196 NULL
4197 };
4198
4199 int mask;
69ddee61 4200 const char *ret;
e075ae69
RH
4201
4202 mask = eflags_p << 3;
4203 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
4204 mask |= unordered_p << 1;
4205 mask |= stack_top_dies;
4206
4207 if (mask >= 24)
4208 abort ();
4209 ret = alt[mask];
4210 if (ret == NULL)
4211 abort ();
cda749b1 4212
e075ae69 4213 return ret;
cda749b1
JW
4214 }
4215}
2a2ab3f9 4216
e075ae69 4217/* Output assembler code to FILE to initialize basic-block profiling.
2a2ab3f9 4218
e075ae69 4219 If profile_block_flag == 2
2a2ab3f9 4220
e075ae69
RH
4221 Output code to call the subroutine `__bb_init_trace_func'
4222 and pass two parameters to it. The first parameter is
4223 the address of a block allocated in the object module.
4224 The second parameter is the number of the first basic block
4225 of the function.
2a2ab3f9 4226
e075ae69 4227 The name of the block is a local symbol made with this statement:
0f290768 4228
e075ae69 4229 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
2a2ab3f9 4230
e075ae69
RH
4231 Of course, since you are writing the definition of
4232 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4233 can take a short cut in the definition of this macro and use the
4234 name that you know will result.
2a2ab3f9 4235
e075ae69
RH
4236 The number of the first basic block of the function is
4237 passed to the macro in BLOCK_OR_LABEL.
2a2ab3f9 4238
e075ae69
RH
4239 If described in a virtual assembler language the code to be
4240 output looks like:
2a2ab3f9 4241
e075ae69
RH
4242 parameter1 <- LPBX0
4243 parameter2 <- BLOCK_OR_LABEL
4244 call __bb_init_trace_func
2a2ab3f9 4245
e075ae69 4246 else if profile_block_flag != 0
e74389ff 4247
e075ae69
RH
4248 Output code to call the subroutine `__bb_init_func'
4249 and pass one single parameter to it, which is the same
4250 as the first parameter to `__bb_init_trace_func'.
e74389ff 4251
e075ae69
RH
4252 The first word of this parameter is a flag which will be nonzero if
4253 the object module has already been initialized. So test this word
4254 first, and do not call `__bb_init_func' if the flag is nonzero.
4255 Note: When profile_block_flag == 2 the test need not be done
4256 but `__bb_init_trace_func' *must* be called.
e74389ff 4257
e075ae69
RH
4258 BLOCK_OR_LABEL may be used to generate a label number as a
4259 branch destination in case `__bb_init_func' will not be called.
e74389ff 4260
e075ae69
RH
4261 If described in a virtual assembler language the code to be
4262 output looks like:
2a2ab3f9 4263
e075ae69
RH
4264 cmp (LPBX0),0
4265 jne local_label
4266 parameter1 <- LPBX0
4267 call __bb_init_func
4268 local_label:
4269*/
c572e5ba 4270
e075ae69
RH
4271void
4272ix86_output_function_block_profiler (file, block_or_label)
4273 FILE *file;
4274 int block_or_label;
c572e5ba 4275{
e075ae69
RH
4276 static int num_func = 0;
4277 rtx xops[8];
4278 char block_table[80], false_label[80];
c572e5ba 4279
e075ae69 4280 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
e9a25f70 4281
e075ae69
RH
4282 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4283 xops[5] = stack_pointer_rtx;
4284 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
2a2ab3f9 4285
e075ae69 4286 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
c572e5ba 4287
e075ae69 4288 switch (profile_block_flag)
c572e5ba 4289 {
e075ae69
RH
4290 case 2:
4291 xops[2] = GEN_INT (block_or_label);
4292 xops[3] = gen_rtx_MEM (Pmode,
4293 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
4294 xops[6] = GEN_INT (8);
e9a25f70 4295
e075ae69
RH
4296 output_asm_insn ("push{l}\t%2", xops);
4297 if (!flag_pic)
4298 output_asm_insn ("push{l}\t%1", xops);
e9a25f70 4299 else
870a0c2c 4300 {
e075ae69
RH
4301 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4302 output_asm_insn ("push{l}\t%7", xops);
870a0c2c 4303 }
e075ae69
RH
4304 output_asm_insn ("call\t%P3", xops);
4305 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4306 break;
c572e5ba 4307
e075ae69
RH
4308 default:
4309 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
c572e5ba 4310
e075ae69
RH
4311 xops[0] = const0_rtx;
4312 xops[2] = gen_rtx_MEM (Pmode,
4313 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
4314 xops[3] = gen_rtx_MEM (Pmode,
4315 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
4316 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
4317 xops[6] = GEN_INT (4);
a14003ee 4318
e075ae69 4319 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
446ba526 4320
e075ae69
RH
4321 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
4322 output_asm_insn ("jne\t%2", xops);
870a0c2c 4323
e075ae69
RH
4324 if (!flag_pic)
4325 output_asm_insn ("push{l}\t%1", xops);
4326 else
4327 {
4328 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
4329 output_asm_insn ("push{l}\t%7", xops);
870a0c2c 4330 }
e075ae69
RH
4331 output_asm_insn ("call\t%P3", xops);
4332 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4333 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
4334 num_func++;
4335 break;
c572e5ba 4336 }
2a2ab3f9 4337}
305f097e 4338
e075ae69
RH
4339/* Output assembler code to FILE to increment a counter associated
4340 with basic block number BLOCKNO.
305f097e 4341
e075ae69 4342 If profile_block_flag == 2
ecbc4695 4343
e075ae69
RH
4344 Output code to initialize the global structure `__bb' and
4345 call the function `__bb_trace_func' which will increment the
4346 counter.
ecbc4695 4347
e075ae69
RH
4348 `__bb' consists of two words. In the first word the number
4349 of the basic block has to be stored. In the second word
0f290768 4350 the address of a block allocated in the object module
e075ae69 4351 has to be stored.
ecbc4695 4352
e075ae69 4353 The basic block number is given by BLOCKNO.
ecbc4695 4354
0f290768 4355 The address of the block is given by the label created with
305f097e 4356
e075ae69 4357 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
305f097e 4358
e075ae69 4359 by FUNCTION_BLOCK_PROFILER.
ecbc4695 4360
e075ae69
RH
4361 Of course, since you are writing the definition of
4362 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4363 can take a short cut in the definition of this macro and use the
4364 name that you know will result.
305f097e 4365
e075ae69
RH
4366 If described in a virtual assembler language the code to be
4367 output looks like:
305f097e 4368
e075ae69
RH
4369 move BLOCKNO -> (__bb)
4370 move LPBX0 -> (__bb+4)
4371 call __bb_trace_func
305f097e 4372
e075ae69
RH
4373 Note that function `__bb_trace_func' must not change the
4374 machine state, especially the flag register. To grant
4375 this, you must output code to save and restore registers
4376 either in this macro or in the macros MACHINE_STATE_SAVE
4377 and MACHINE_STATE_RESTORE. The last two macros will be
4378 used in the function `__bb_trace_func', so you must make
0f290768 4379 sure that the function prologue does not change any
e075ae69 4380 register prior to saving it with MACHINE_STATE_SAVE.
305f097e 4381
e075ae69 4382 else if profile_block_flag != 0
305f097e 4383
e075ae69
RH
4384 Output code to increment the counter directly.
4385 Basic blocks are numbered separately from zero within each
4386 compiled object module. The count associated with block number
0f290768 4387 BLOCKNO is at index BLOCKNO in an array of words; the name of
e075ae69 4388 this array is a local symbol made with this statement:
32b5b1aa 4389
e075ae69 4390 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
32b5b1aa 4391
e075ae69
RH
4392 Of course, since you are writing the definition of
4393 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4394 can take a short cut in the definition of this macro and use the
0f290768 4395 name that you know will result.
32b5b1aa 4396
e075ae69
RH
4397 If described in a virtual assembler language the code to be
4398 output looks like:
32b5b1aa 4399
e075ae69
RH
4400 inc (LPBX2+4*BLOCKNO)
4401*/
32b5b1aa 4402
e075ae69
RH
4403void
4404ix86_output_block_profiler (file, blockno)
4405 FILE *file ATTRIBUTE_UNUSED;
4406 int blockno;
4407{
4408 rtx xops[8], cnt_rtx;
4409 char counts[80];
4410 char *block_table = counts;
4411
4412 switch (profile_block_flag)
4413 {
4414 case 2:
4415 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
32b5b1aa 4416
e075ae69
RH
4417 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4418 xops[2] = GEN_INT (blockno);
4419 xops[3] = gen_rtx_MEM (Pmode,
4420 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
4421 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
4422 xops[5] = plus_constant (xops[4], 4);
4423 xops[0] = gen_rtx_MEM (SImode, xops[4]);
4424 xops[6] = gen_rtx_MEM (SImode, xops[5]);
79325812 4425
e075ae69 4426 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
32b5b1aa 4427
e075ae69
RH
4428 output_asm_insn ("pushf", xops);
4429 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4430 if (flag_pic)
32b5b1aa 4431 {
e075ae69
RH
4432 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4433 output_asm_insn ("push{l}\t%7", xops);
4434 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4435 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
4436 output_asm_insn ("pop{l}\t%7", xops);
4437 }
4438 else
4439 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
4440 output_asm_insn ("call\t%P3", xops);
4441 output_asm_insn ("popf", xops);
32b5b1aa 4442
e075ae69 4443 break;
32b5b1aa 4444
e075ae69
RH
4445 default:
4446 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
4447 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
4448 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
32b5b1aa 4449
e075ae69
RH
4450 if (blockno)
4451 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
32b5b1aa 4452
e075ae69
RH
4453 if (flag_pic)
4454 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
32b5b1aa 4455
e075ae69
RH
4456 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
4457 output_asm_insn ("inc{l}\t%0", xops);
32b5b1aa 4458
e075ae69 4459 break;
32b5b1aa 4460 }
32b5b1aa 4461}
32b5b1aa 4462\f
79325812 4463void
e075ae69
RH
4464ix86_expand_move (mode, operands)
4465 enum machine_mode mode;
4466 rtx operands[];
32b5b1aa 4467{
e075ae69 4468 int strict = (reload_in_progress || reload_completed);
e075ae69 4469 rtx insn;
e9a25f70 4470
e075ae69 4471 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
32b5b1aa 4472 {
e075ae69 4473 /* Emit insns to move operands[1] into operands[0]. */
e9a25f70 4474
e075ae69
RH
4475 if (GET_CODE (operands[0]) == MEM)
4476 operands[1] = force_reg (Pmode, operands[1]);
4477 else
32b5b1aa 4478 {
e075ae69
RH
4479 rtx temp = operands[0];
4480 if (GET_CODE (temp) != REG)
4481 temp = gen_reg_rtx (Pmode);
4482 temp = legitimize_pic_address (operands[1], temp);
4483 if (temp == operands[0])
4484 return;
4485 operands[1] = temp;
32b5b1aa 4486 }
e075ae69
RH
4487 }
4488 else
4489 {
d7a29404
JH
4490 if (GET_CODE (operands[0]) == MEM
4491 && (GET_MODE (operands[0]) == QImode
4492 || !push_operand (operands[0], mode))
4493 && GET_CODE (operands[1]) == MEM)
e075ae69 4494 operands[1] = force_reg (mode, operands[1]);
e9a25f70 4495
2c5a510c
RH
4496 if (push_operand (operands[0], mode)
4497 && ! general_no_elim_operand (operands[1], mode))
4498 operands[1] = copy_to_mode_reg (mode, operands[1]);
4499
e075ae69 4500 if (FLOAT_MODE_P (mode))
32b5b1aa 4501 {
d7a29404
JH
4502 /* If we are loading a floating point constant to a register,
4503 force the value to memory now, since we'll get better code
4504 out the back end. */
e075ae69
RH
4505
4506 if (strict)
4507 ;
e075ae69 4508 else if (GET_CODE (operands[1]) == CONST_DOUBLE
d7a29404 4509 && register_operand (operands[0], mode))
e075ae69 4510 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
32b5b1aa 4511 }
32b5b1aa 4512 }
e9a25f70 4513
e075ae69 4514 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
e9a25f70 4515
e075ae69
RH
4516 emit_insn (insn);
4517}
e9a25f70 4518
e075ae69
RH
4519/* Attempt to expand a binary operator. Make the expansion closer to the
4520 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 4521 memory references (one output, two input) in a single insn. */
e9a25f70 4522
e075ae69
RH
4523void
4524ix86_expand_binary_operator (code, mode, operands)
4525 enum rtx_code code;
4526 enum machine_mode mode;
4527 rtx operands[];
4528{
4529 int matching_memory;
4530 rtx src1, src2, dst, op, clob;
4531
4532 dst = operands[0];
4533 src1 = operands[1];
4534 src2 = operands[2];
4535
4536 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
4537 if (GET_RTX_CLASS (code) == 'c'
4538 && (rtx_equal_p (dst, src2)
4539 || immediate_operand (src1, mode)))
4540 {
4541 rtx temp = src1;
4542 src1 = src2;
4543 src2 = temp;
32b5b1aa 4544 }
e9a25f70 4545
e075ae69
RH
4546 /* If the destination is memory, and we do not have matching source
4547 operands, do things in registers. */
4548 matching_memory = 0;
4549 if (GET_CODE (dst) == MEM)
32b5b1aa 4550 {
e075ae69
RH
4551 if (rtx_equal_p (dst, src1))
4552 matching_memory = 1;
4553 else if (GET_RTX_CLASS (code) == 'c'
4554 && rtx_equal_p (dst, src2))
4555 matching_memory = 2;
4556 else
4557 dst = gen_reg_rtx (mode);
4558 }
0f290768 4559
e075ae69
RH
4560 /* Both source operands cannot be in memory. */
4561 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
4562 {
4563 if (matching_memory != 2)
4564 src2 = force_reg (mode, src2);
4565 else
4566 src1 = force_reg (mode, src1);
32b5b1aa 4567 }
e9a25f70 4568
06a964de
JH
4569 /* If the operation is not commutable, source 1 cannot be a constant
4570 or non-matching memory. */
0f290768 4571 if ((CONSTANT_P (src1)
06a964de
JH
4572 || (!matching_memory && GET_CODE (src1) == MEM))
4573 && GET_RTX_CLASS (code) != 'c')
e075ae69 4574 src1 = force_reg (mode, src1);
0f290768 4575
e075ae69 4576 /* If optimizing, copy to regs to improve CSE */
fe577e58 4577 if (optimize && ! no_new_pseudos)
32b5b1aa 4578 {
e075ae69
RH
4579 if (GET_CODE (dst) == MEM)
4580 dst = gen_reg_rtx (mode);
4581 if (GET_CODE (src1) == MEM)
4582 src1 = force_reg (mode, src1);
4583 if (GET_CODE (src2) == MEM)
4584 src2 = force_reg (mode, src2);
32b5b1aa 4585 }
e9a25f70 4586
e075ae69
RH
4587 /* Emit the instruction. */
4588
4589 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
4590 if (reload_in_progress)
4591 {
4592 /* Reload doesn't know about the flags register, and doesn't know that
4593 it doesn't want to clobber it. We can only do this with PLUS. */
4594 if (code != PLUS)
4595 abort ();
4596 emit_insn (op);
4597 }
4598 else
32b5b1aa 4599 {
e075ae69
RH
4600 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4601 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 4602 }
e9a25f70 4603
e075ae69
RH
4604 /* Fix up the destination if needed. */
4605 if (dst != operands[0])
4606 emit_move_insn (operands[0], dst);
4607}
4608
4609/* Return TRUE or FALSE depending on whether the binary operator meets the
4610 appropriate constraints. */
4611
4612int
4613ix86_binary_operator_ok (code, mode, operands)
4614 enum rtx_code code;
4615 enum machine_mode mode ATTRIBUTE_UNUSED;
4616 rtx operands[3];
4617{
4618 /* Both source operands cannot be in memory. */
4619 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
4620 return 0;
4621 /* If the operation is not commutable, source 1 cannot be a constant. */
4622 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
4623 return 0;
4624 /* If the destination is memory, we must have a matching source operand. */
4625 if (GET_CODE (operands[0]) == MEM
4626 && ! (rtx_equal_p (operands[0], operands[1])
4627 || (GET_RTX_CLASS (code) == 'c'
4628 && rtx_equal_p (operands[0], operands[2]))))
4629 return 0;
06a964de
JH
4630 /* If the operation is not commutable and the source 1 is memory, we must
4631 have a matching destionation. */
4632 if (GET_CODE (operands[1]) == MEM
4633 && GET_RTX_CLASS (code) != 'c'
4634 && ! rtx_equal_p (operands[0], operands[1]))
4635 return 0;
e075ae69
RH
4636 return 1;
4637}
4638
4639/* Attempt to expand a unary operator. Make the expansion closer to the
4640 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 4641 memory references (one output, one input) in a single insn. */
e075ae69 4642
9d81fc27 4643void
e075ae69
RH
4644ix86_expand_unary_operator (code, mode, operands)
4645 enum rtx_code code;
4646 enum machine_mode mode;
4647 rtx operands[];
4648{
06a964de
JH
4649 int matching_memory;
4650 rtx src, dst, op, clob;
4651
4652 dst = operands[0];
4653 src = operands[1];
e075ae69 4654
06a964de
JH
4655 /* If the destination is memory, and we do not have matching source
4656 operands, do things in registers. */
4657 matching_memory = 0;
4658 if (GET_CODE (dst) == MEM)
32b5b1aa 4659 {
06a964de
JH
4660 if (rtx_equal_p (dst, src))
4661 matching_memory = 1;
e075ae69 4662 else
06a964de 4663 dst = gen_reg_rtx (mode);
32b5b1aa 4664 }
e9a25f70 4665
06a964de
JH
4666 /* When source operand is memory, destination must match. */
4667 if (!matching_memory && GET_CODE (src) == MEM)
4668 src = force_reg (mode, src);
0f290768 4669
06a964de 4670 /* If optimizing, copy to regs to improve CSE */
fe577e58 4671 if (optimize && ! no_new_pseudos)
06a964de
JH
4672 {
4673 if (GET_CODE (dst) == MEM)
4674 dst = gen_reg_rtx (mode);
4675 if (GET_CODE (src) == MEM)
4676 src = force_reg (mode, src);
4677 }
4678
4679 /* Emit the instruction. */
4680
4681 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
4682 if (reload_in_progress || code == NOT)
4683 {
4684 /* Reload doesn't know about the flags register, and doesn't know that
4685 it doesn't want to clobber it. */
4686 if (code != NOT)
4687 abort ();
4688 emit_insn (op);
4689 }
4690 else
4691 {
4692 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4693 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4694 }
4695
4696 /* Fix up the destination if needed. */
4697 if (dst != operands[0])
4698 emit_move_insn (operands[0], dst);
e075ae69
RH
4699}
4700
4701/* Return TRUE or FALSE depending on whether the unary operator meets the
4702 appropriate constraints. */
4703
4704int
4705ix86_unary_operator_ok (code, mode, operands)
4706 enum rtx_code code ATTRIBUTE_UNUSED;
4707 enum machine_mode mode ATTRIBUTE_UNUSED;
4708 rtx operands[2] ATTRIBUTE_UNUSED;
4709{
06a964de
JH
4710 /* If one of operands is memory, source and destination must match. */
4711 if ((GET_CODE (operands[0]) == MEM
4712 || GET_CODE (operands[1]) == MEM)
4713 && ! rtx_equal_p (operands[0], operands[1]))
4714 return FALSE;
e075ae69
RH
4715 return TRUE;
4716}
4717
16189740
RH
4718/* Return TRUE or FALSE depending on whether the first SET in INSN
4719 has source and destination with matching CC modes, and that the
4720 CC mode is at least as constrained as REQ_MODE. */
4721
4722int
4723ix86_match_ccmode (insn, req_mode)
4724 rtx insn;
4725 enum machine_mode req_mode;
4726{
4727 rtx set;
4728 enum machine_mode set_mode;
4729
4730 set = PATTERN (insn);
4731 if (GET_CODE (set) == PARALLEL)
4732 set = XVECEXP (set, 0, 0);
4733 if (GET_CODE (set) != SET)
4734 abort ();
9076b9c1
JH
4735 if (GET_CODE (SET_SRC (set)) != COMPARE)
4736 abort ();
16189740
RH
4737
4738 set_mode = GET_MODE (SET_DEST (set));
4739 switch (set_mode)
4740 {
9076b9c1
JH
4741 case CCNOmode:
4742 if (req_mode != CCNOmode
4743 && (req_mode != CCmode
4744 || XEXP (SET_SRC (set), 1) != const0_rtx))
4745 return 0;
4746 break;
16189740 4747 case CCmode:
9076b9c1 4748 if (req_mode == CCGCmode)
16189740
RH
4749 return 0;
4750 /* FALLTHRU */
9076b9c1
JH
4751 case CCGCmode:
4752 if (req_mode == CCGOCmode || req_mode == CCNOmode)
4753 return 0;
4754 /* FALLTHRU */
4755 case CCGOCmode:
16189740
RH
4756 if (req_mode == CCZmode)
4757 return 0;
4758 /* FALLTHRU */
4759 case CCZmode:
4760 break;
4761
4762 default:
4763 abort ();
4764 }
4765
4766 return (GET_MODE (SET_SRC (set)) == set_mode);
4767}
4768
e075ae69
RH
4769/* Generate insn patterns to do an integer compare of OPERANDS. */
4770
4771static rtx
4772ix86_expand_int_compare (code, op0, op1)
4773 enum rtx_code code;
4774 rtx op0, op1;
4775{
4776 enum machine_mode cmpmode;
4777 rtx tmp, flags;
4778
4779 cmpmode = SELECT_CC_MODE (code, op0, op1);
4780 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
4781
4782 /* This is very simple, but making the interface the same as in the
4783 FP case makes the rest of the code easier. */
4784 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
4785 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
4786
4787 /* Return the test that should be put into the flags user, i.e.
4788 the bcc, scc, or cmov instruction. */
4789 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
4790}
4791
3a3677ff
RH
4792/* Figure out whether to use ordered or unordered fp comparisons.
4793 Return the appropriate mode to use. */
e075ae69 4794
b1cdafbb 4795enum machine_mode
3a3677ff 4796ix86_fp_compare_mode (code)
8752c357 4797 enum rtx_code code ATTRIBUTE_UNUSED;
e075ae69 4798{
9e7adcb3
JH
4799 /* ??? In order to make all comparisons reversible, we do all comparisons
4800 non-trapping when compiling for IEEE. Once gcc is able to distinguish
4801 all forms trapping and nontrapping comparisons, we can make inequality
4802 comparisons trapping again, since it results in better code when using
4803 FCOM based compares. */
4804 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
4805}
4806
9076b9c1
JH
4807enum machine_mode
4808ix86_cc_mode (code, op0, op1)
4809 enum rtx_code code;
4810 rtx op0, op1;
4811{
4812 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
4813 return ix86_fp_compare_mode (code);
4814 switch (code)
4815 {
4816 /* Only zero flag is needed. */
4817 case EQ: /* ZF=0 */
4818 case NE: /* ZF!=0 */
4819 return CCZmode;
4820 /* Codes needing carry flag. */
265dab10
JH
4821 case GEU: /* CF=0 */
4822 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
4823 case LTU: /* CF=1 */
4824 case LEU: /* CF=1 | ZF=1 */
265dab10 4825 return CCmode;
9076b9c1
JH
4826 /* Codes possibly doable only with sign flag when
4827 comparing against zero. */
4828 case GE: /* SF=OF or SF=0 */
7e08e190 4829 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
4830 if (op1 == const0_rtx)
4831 return CCGOCmode;
4832 else
4833 /* For other cases Carry flag is not required. */
4834 return CCGCmode;
4835 /* Codes doable only with sign flag when comparing
4836 against zero, but we miss jump instruction for it
4837 so we need to use relational tests agains overflow
4838 that thus needs to be zero. */
4839 case GT: /* ZF=0 & SF=OF */
4840 case LE: /* ZF=1 | SF<>OF */
4841 if (op1 == const0_rtx)
4842 return CCNOmode;
4843 else
4844 return CCGCmode;
4845 default:
0f290768 4846 abort ();
9076b9c1
JH
4847 }
4848}
4849
3a3677ff
RH
4850/* Return true if we should use an FCOMI instruction for this fp comparison. */
4851
a940d8bd 4852int
3a3677ff 4853ix86_use_fcomi_compare (code)
9e7adcb3 4854 enum rtx_code code ATTRIBUTE_UNUSED;
3a3677ff 4855{
9e7adcb3
JH
4856 enum rtx_code swapped_code = swap_condition (code);
4857 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
4858 || (ix86_fp_comparison_cost (swapped_code)
4859 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
4860}
4861
0f290768 4862/* Swap, force into registers, or otherwise massage the two operands
3a3677ff
RH
4863 to a fp comparison. The operands are updated in place; the new
4864 comparsion code is returned. */
4865
4866static enum rtx_code
4867ix86_prepare_fp_compare_args (code, pop0, pop1)
4868 enum rtx_code code;
4869 rtx *pop0, *pop1;
4870{
4871 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
4872 rtx op0 = *pop0, op1 = *pop1;
4873 enum machine_mode op_mode = GET_MODE (op0);
0644b628 4874 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
3a3677ff 4875
e075ae69 4876 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
4877 The same is true of the XFmode compare instructions. The same is
4878 true of the fcomi compare instructions. */
4879
0644b628
JH
4880 if (!is_sse
4881 && (fpcmp_mode == CCFPUmode
4882 || op_mode == XFmode
4883 || op_mode == TFmode
4884 || ix86_use_fcomi_compare (code)))
e075ae69 4885 {
3a3677ff
RH
4886 op0 = force_reg (op_mode, op0);
4887 op1 = force_reg (op_mode, op1);
e075ae69
RH
4888 }
4889 else
4890 {
4891 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
4892 things around if they appear profitable, otherwise force op0
4893 into a register. */
4894
4895 if (standard_80387_constant_p (op0) == 0
4896 || (GET_CODE (op0) == MEM
4897 && ! (standard_80387_constant_p (op1) == 0
4898 || GET_CODE (op1) == MEM)))
32b5b1aa 4899 {
e075ae69
RH
4900 rtx tmp;
4901 tmp = op0, op0 = op1, op1 = tmp;
4902 code = swap_condition (code);
4903 }
4904
4905 if (GET_CODE (op0) != REG)
3a3677ff 4906 op0 = force_reg (op_mode, op0);
e075ae69
RH
4907
4908 if (CONSTANT_P (op1))
4909 {
4910 if (standard_80387_constant_p (op1))
3a3677ff 4911 op1 = force_reg (op_mode, op1);
e075ae69 4912 else
3a3677ff 4913 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
4914 }
4915 }
e9a25f70 4916
9e7adcb3
JH
4917 /* Try to rearrange the comparison to make it cheaper. */
4918 if (ix86_fp_comparison_cost (code)
4919 > ix86_fp_comparison_cost (swap_condition (code))
4920 && (GET_CODE (op0) == REG || !reload_completed))
4921 {
4922 rtx tmp;
4923 tmp = op0, op0 = op1, op1 = tmp;
4924 code = swap_condition (code);
4925 if (GET_CODE (op0) != REG)
4926 op0 = force_reg (op_mode, op0);
4927 }
4928
3a3677ff
RH
4929 *pop0 = op0;
4930 *pop1 = op1;
4931 return code;
4932}
4933
c0c102a9
JH
4934/* Convert comparison codes we use to represent FP comparison to integer
4935 code that will result in proper branch. Return UNKNOWN if no such code
4936 is available. */
4937static enum rtx_code
4938ix86_fp_compare_code_to_integer (code)
4939 enum rtx_code code;
4940{
4941 switch (code)
4942 {
4943 case GT:
4944 return GTU;
4945 case GE:
4946 return GEU;
4947 case ORDERED:
4948 case UNORDERED:
4949 return code;
4950 break;
4951 case UNEQ:
4952 return EQ;
4953 break;
4954 case UNLT:
4955 return LTU;
4956 break;
4957 case UNLE:
4958 return LEU;
4959 break;
4960 case LTGT:
4961 return NE;
4962 break;
4963 default:
4964 return UNKNOWN;
4965 }
4966}
4967
4968/* Split comparison code CODE into comparisons we can do using branch
4969 instructions. BYPASS_CODE is comparison code for branch that will
4970 branch around FIRST_CODE and SECOND_CODE. If some of branches
4971 is not required, set value to NIL.
4972 We never require more than two branches. */
4973static void
4974ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
4975 enum rtx_code code, *bypass_code, *first_code, *second_code;
4976{
4977 *first_code = code;
4978 *bypass_code = NIL;
4979 *second_code = NIL;
4980
4981 /* The fcomi comparison sets flags as follows:
4982
4983 cmp ZF PF CF
4984 > 0 0 0
4985 < 0 0 1
4986 = 1 0 0
4987 un 1 1 1 */
4988
4989 switch (code)
4990 {
4991 case GT: /* GTU - CF=0 & ZF=0 */
4992 case GE: /* GEU - CF=0 */
4993 case ORDERED: /* PF=0 */
4994 case UNORDERED: /* PF=1 */
4995 case UNEQ: /* EQ - ZF=1 */
4996 case UNLT: /* LTU - CF=1 */
4997 case UNLE: /* LEU - CF=1 | ZF=1 */
4998 case LTGT: /* EQ - ZF=0 */
4999 break;
5000 case LT: /* LTU - CF=1 - fails on unordered */
5001 *first_code = UNLT;
5002 *bypass_code = UNORDERED;
5003 break;
5004 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
5005 *first_code = UNLE;
5006 *bypass_code = UNORDERED;
5007 break;
5008 case EQ: /* EQ - ZF=1 - fails on unordered */
5009 *first_code = UNEQ;
5010 *bypass_code = UNORDERED;
5011 break;
5012 case NE: /* NE - ZF=0 - fails on unordered */
5013 *first_code = LTGT;
5014 *second_code = UNORDERED;
5015 break;
5016 case UNGE: /* GEU - CF=0 - fails on unordered */
5017 *first_code = GE;
5018 *second_code = UNORDERED;
5019 break;
5020 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
5021 *first_code = GT;
5022 *second_code = UNORDERED;
5023 break;
5024 default:
5025 abort ();
5026 }
5027 if (!TARGET_IEEE_FP)
5028 {
5029 *second_code = NIL;
5030 *bypass_code = NIL;
5031 }
5032}
5033
9e7adcb3
JH
5034/* Return cost of comparison done fcom + arithmetics operations on AX.
5035 All following functions do use number of instructions as an cost metrics.
5036 In future this should be tweaked to compute bytes for optimize_size and
5037 take into account performance of various instructions on various CPUs. */
5038static int
5039ix86_fp_comparison_arithmetics_cost (code)
5040 enum rtx_code code;
5041{
5042 if (!TARGET_IEEE_FP)
5043 return 4;
5044 /* The cost of code output by ix86_expand_fp_compare. */
5045 switch (code)
5046 {
5047 case UNLE:
5048 case UNLT:
5049 case LTGT:
5050 case GT:
5051 case GE:
5052 case UNORDERED:
5053 case ORDERED:
5054 case UNEQ:
5055 return 4;
5056 break;
5057 case LT:
5058 case NE:
5059 case EQ:
5060 case UNGE:
5061 return 5;
5062 break;
5063 case LE:
5064 case UNGT:
5065 return 6;
5066 break;
5067 default:
5068 abort ();
5069 }
5070}
5071
5072/* Return cost of comparison done using fcomi operation.
5073 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5074static int
5075ix86_fp_comparison_fcomi_cost (code)
5076 enum rtx_code code;
5077{
5078 enum rtx_code bypass_code, first_code, second_code;
5079 /* Return arbitarily high cost when instruction is not supported - this
5080 prevents gcc from using it. */
5081 if (!TARGET_CMOVE)
5082 return 1024;
5083 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5084 return (bypass_code != NIL || second_code != NIL) + 2;
5085}
5086
5087/* Return cost of comparison done using sahf operation.
5088 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5089static int
5090ix86_fp_comparison_sahf_cost (code)
5091 enum rtx_code code;
5092{
5093 enum rtx_code bypass_code, first_code, second_code;
5094 /* Return arbitarily high cost when instruction is not preferred - this
5095 avoids gcc from using it. */
5096 if (!TARGET_USE_SAHF && !optimize_size)
5097 return 1024;
5098 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5099 return (bypass_code != NIL || second_code != NIL) + 3;
5100}
5101
5102/* Compute cost of the comparison done using any method.
5103 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5104static int
5105ix86_fp_comparison_cost (code)
5106 enum rtx_code code;
5107{
5108 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
5109 int min;
5110
5111 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
5112 sahf_cost = ix86_fp_comparison_sahf_cost (code);
5113
5114 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
5115 if (min > sahf_cost)
5116 min = sahf_cost;
5117 if (min > fcomi_cost)
5118 min = fcomi_cost;
5119 return min;
5120}
c0c102a9 5121
3a3677ff
RH
5122/* Generate insn patterns to do a floating point compare of OPERANDS. */
5123
9e7adcb3
JH
5124static rtx
5125ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
3a3677ff
RH
5126 enum rtx_code code;
5127 rtx op0, op1, scratch;
9e7adcb3
JH
5128 rtx *second_test;
5129 rtx *bypass_test;
3a3677ff
RH
5130{
5131 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 5132 rtx tmp, tmp2;
9e7adcb3 5133 int cost = ix86_fp_comparison_cost (code);
c0c102a9 5134 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
5135
5136 fpcmp_mode = ix86_fp_compare_mode (code);
5137 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
5138
9e7adcb3
JH
5139 if (second_test)
5140 *second_test = NULL_RTX;
5141 if (bypass_test)
5142 *bypass_test = NULL_RTX;
5143
c0c102a9
JH
5144 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5145
9e7adcb3
JH
5146 /* Do fcomi/sahf based test when profitable. */
5147 if ((bypass_code == NIL || bypass_test)
5148 && (second_code == NIL || second_test)
5149 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 5150 {
c0c102a9
JH
5151 if (TARGET_CMOVE)
5152 {
5153 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5154 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
5155 tmp);
5156 emit_insn (tmp);
5157 }
5158 else
5159 {
5160 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5161 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
5162 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
5163 emit_insn (gen_x86_sahf_1 (scratch));
5164 }
e075ae69
RH
5165
5166 /* The FP codes work out to act like unsigned. */
9a915772 5167 intcmp_mode = fpcmp_mode;
9e7adcb3
JH
5168 code = first_code;
5169 if (bypass_code != NIL)
5170 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
5171 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5172 const0_rtx);
5173 if (second_code != NIL)
5174 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
5175 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5176 const0_rtx);
e075ae69
RH
5177 }
5178 else
5179 {
5180 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69
RH
5181 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5182 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
3a3677ff 5183 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 5184
9a915772
JH
5185 /* In the unordered case, we have to check C2 for NaN's, which
5186 doesn't happen to work out to anything nice combination-wise.
5187 So do some bit twiddling on the value we've got in AH to come
5188 up with an appropriate set of condition codes. */
e075ae69 5189
9a915772
JH
5190 intcmp_mode = CCNOmode;
5191 switch (code)
32b5b1aa 5192 {
9a915772
JH
5193 case GT:
5194 case UNGT:
5195 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 5196 {
3a3677ff 5197 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 5198 code = EQ;
9a915772
JH
5199 }
5200 else
5201 {
5202 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5203 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5204 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
5205 intcmp_mode = CCmode;
5206 code = GEU;
5207 }
5208 break;
5209 case LT:
5210 case UNLT:
5211 if (code == LT && TARGET_IEEE_FP)
5212 {
3a3677ff
RH
5213 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5214 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
5215 intcmp_mode = CCmode;
5216 code = EQ;
9a915772
JH
5217 }
5218 else
5219 {
5220 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
5221 code = NE;
5222 }
5223 break;
5224 case GE:
5225 case UNGE:
5226 if (code == GE || !TARGET_IEEE_FP)
5227 {
3a3677ff 5228 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 5229 code = EQ;
9a915772
JH
5230 }
5231 else
5232 {
5233 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5234 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
5235 GEN_INT (0x01)));
5236 code = NE;
5237 }
5238 break;
5239 case LE:
5240 case UNLE:
5241 if (code == LE && TARGET_IEEE_FP)
5242 {
3a3677ff
RH
5243 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5244 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5245 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
5246 intcmp_mode = CCmode;
5247 code = LTU;
9a915772
JH
5248 }
5249 else
5250 {
5251 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
5252 code = NE;
5253 }
5254 break;
5255 case EQ:
5256 case UNEQ:
5257 if (code == EQ && TARGET_IEEE_FP)
5258 {
3a3677ff
RH
5259 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5260 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
5261 intcmp_mode = CCmode;
5262 code = EQ;
9a915772
JH
5263 }
5264 else
5265 {
3a3677ff
RH
5266 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
5267 code = NE;
5268 break;
9a915772
JH
5269 }
5270 break;
5271 case NE:
5272 case LTGT:
5273 if (code == NE && TARGET_IEEE_FP)
5274 {
3a3677ff 5275 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
5276 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
5277 GEN_INT (0x40)));
3a3677ff 5278 code = NE;
9a915772
JH
5279 }
5280 else
5281 {
3a3677ff
RH
5282 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
5283 code = EQ;
32b5b1aa 5284 }
9a915772
JH
5285 break;
5286
5287 case UNORDERED:
5288 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
5289 code = NE;
5290 break;
5291 case ORDERED:
5292 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
5293 code = EQ;
5294 break;
5295
5296 default:
5297 abort ();
32b5b1aa 5298 }
32b5b1aa 5299 }
e075ae69
RH
5300
5301 /* Return the test that should be put into the flags user, i.e.
5302 the bcc, scc, or cmov instruction. */
5303 return gen_rtx_fmt_ee (code, VOIDmode,
5304 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5305 const0_rtx);
5306}
5307
9e3e266c 5308rtx
a1b8572c 5309ix86_expand_compare (code, second_test, bypass_test)
e075ae69 5310 enum rtx_code code;
a1b8572c 5311 rtx *second_test, *bypass_test;
e075ae69
RH
5312{
5313 rtx op0, op1, ret;
5314 op0 = ix86_compare_op0;
5315 op1 = ix86_compare_op1;
5316
a1b8572c
JH
5317 if (second_test)
5318 *second_test = NULL_RTX;
5319 if (bypass_test)
5320 *bypass_test = NULL_RTX;
5321
e075ae69 5322 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9e7adcb3 5323 ret = ix86_expand_fp_compare (code, op0, op1, gen_reg_rtx (HImode),
77ebd435 5324 second_test, bypass_test);
32b5b1aa 5325 else
e075ae69
RH
5326 ret = ix86_expand_int_compare (code, op0, op1);
5327
5328 return ret;
5329}
5330
5331void
3a3677ff 5332ix86_expand_branch (code, label)
e075ae69 5333 enum rtx_code code;
e075ae69
RH
5334 rtx label;
5335{
3a3677ff 5336 rtx tmp;
e075ae69 5337
3a3677ff 5338 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 5339 {
3a3677ff
RH
5340 case QImode:
5341 case HImode:
5342 case SImode:
a1b8572c 5343 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
5344 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5345 gen_rtx_LABEL_REF (VOIDmode, label),
5346 pc_rtx);
5347 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 5348 return;
e075ae69 5349
3a3677ff
RH
5350 case SFmode:
5351 case DFmode:
0f290768 5352 case XFmode:
2b589241 5353 case TFmode:
3a3677ff
RH
5354 /* Don't expand the comparison early, so that we get better code
5355 when jump or whoever decides to reverse the comparison. */
5356 {
5357 rtvec vec;
5358 int use_fcomi;
5359
5360 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
5361 &ix86_compare_op1);
5362
0b9aaeee 5363 tmp = gen_rtx_fmt_ee (code, VOIDmode,
3a3677ff
RH
5364 ix86_compare_op0, ix86_compare_op1);
5365 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5366 gen_rtx_LABEL_REF (VOIDmode, label),
5367 pc_rtx);
5368 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
5369
5370 use_fcomi = ix86_use_fcomi_compare (code);
5371 vec = rtvec_alloc (3 + !use_fcomi);
5372 RTVEC_ELT (vec, 0) = tmp;
5373 RTVEC_ELT (vec, 1)
5374 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
5375 RTVEC_ELT (vec, 2)
5376 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
5377 if (! use_fcomi)
5378 RTVEC_ELT (vec, 3)
5379 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
5380
5381 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
5382 return;
5383 }
32b5b1aa 5384
3a3677ff
RH
5385 case DImode:
5386 /* Expand DImode branch into multiple compare+branch. */
5387 {
5388 rtx lo[2], hi[2], label2;
5389 enum rtx_code code1, code2, code3;
32b5b1aa 5390
3a3677ff
RH
5391 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
5392 {
5393 tmp = ix86_compare_op0;
5394 ix86_compare_op0 = ix86_compare_op1;
5395 ix86_compare_op1 = tmp;
5396 code = swap_condition (code);
5397 }
5398 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
5399 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 5400
3a3677ff
RH
5401 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
5402 avoid two branches. This costs one extra insn, so disable when
5403 optimizing for size. */
32b5b1aa 5404
3a3677ff
RH
5405 if ((code == EQ || code == NE)
5406 && (!optimize_size
5407 || hi[1] == const0_rtx || lo[1] == const0_rtx))
5408 {
5409 rtx xor0, xor1;
32b5b1aa 5410
3a3677ff
RH
5411 xor1 = hi[0];
5412 if (hi[1] != const0_rtx)
5413 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
5414 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 5415
3a3677ff
RH
5416 xor0 = lo[0];
5417 if (lo[1] != const0_rtx)
5418 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
5419 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 5420
3a3677ff
RH
5421 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
5422 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 5423
3a3677ff
RH
5424 ix86_compare_op0 = tmp;
5425 ix86_compare_op1 = const0_rtx;
5426 ix86_expand_branch (code, label);
5427 return;
5428 }
e075ae69 5429
1f9124e4
JJ
5430 /* Otherwise, if we are doing less-than or greater-or-equal-than,
5431 op1 is a constant and the low word is zero, then we can just
5432 examine the high word. */
32b5b1aa 5433
1f9124e4
JJ
5434 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
5435 switch (code)
5436 {
5437 case LT: case LTU: case GE: case GEU:
5438 ix86_compare_op0 = hi[0];
5439 ix86_compare_op1 = hi[1];
5440 ix86_expand_branch (code, label);
5441 return;
5442 default:
5443 break;
5444 }
e075ae69 5445
3a3677ff 5446 /* Otherwise, we need two or three jumps. */
e075ae69 5447
3a3677ff 5448 label2 = gen_label_rtx ();
e075ae69 5449
3a3677ff
RH
5450 code1 = code;
5451 code2 = swap_condition (code);
5452 code3 = unsigned_condition (code);
e075ae69 5453
3a3677ff
RH
5454 switch (code)
5455 {
5456 case LT: case GT: case LTU: case GTU:
5457 break;
e075ae69 5458
3a3677ff
RH
5459 case LE: code1 = LT; code2 = GT; break;
5460 case GE: code1 = GT; code2 = LT; break;
5461 case LEU: code1 = LTU; code2 = GTU; break;
5462 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 5463
3a3677ff
RH
5464 case EQ: code1 = NIL; code2 = NE; break;
5465 case NE: code2 = NIL; break;
e075ae69 5466
3a3677ff
RH
5467 default:
5468 abort ();
5469 }
e075ae69 5470
3a3677ff
RH
5471 /*
5472 * a < b =>
5473 * if (hi(a) < hi(b)) goto true;
5474 * if (hi(a) > hi(b)) goto false;
5475 * if (lo(a) < lo(b)) goto true;
5476 * false:
5477 */
5478
5479 ix86_compare_op0 = hi[0];
5480 ix86_compare_op1 = hi[1];
5481
5482 if (code1 != NIL)
5483 ix86_expand_branch (code1, label);
5484 if (code2 != NIL)
5485 ix86_expand_branch (code2, label2);
5486
5487 ix86_compare_op0 = lo[0];
5488 ix86_compare_op1 = lo[1];
5489 ix86_expand_branch (code3, label);
5490
5491 if (code2 != NIL)
5492 emit_label (label2);
5493 return;
5494 }
e075ae69 5495
3a3677ff
RH
5496 default:
5497 abort ();
5498 }
32b5b1aa 5499}
e075ae69 5500
9e7adcb3
JH
5501/* Split branch based on floating point condition. */
5502void
5503ix86_split_fp_branch (condition, op1, op2, target1, target2, tmp)
5504 rtx condition, op1, op2, target1, target2, tmp;
5505{
5506 rtx second, bypass;
5507 rtx label = NULL_RTX;
5508 enum rtx_code code = GET_CODE (condition);
9e7adcb3
JH
5509
5510 if (target2 != pc_rtx)
5511 {
5512 rtx tmp = target2;
5513 code = reverse_condition_maybe_unordered (code);
5514 target2 = target1;
5515 target1 = tmp;
5516 }
5517
5518 condition = ix86_expand_fp_compare (code, op1, op2,
5519 tmp, &second, &bypass);
5520 if (bypass != NULL_RTX)
5521 {
5522 label = gen_label_rtx ();
5523 emit_jump_insn (gen_rtx_SET
5524 (VOIDmode, pc_rtx,
5525 gen_rtx_IF_THEN_ELSE (VOIDmode,
5526 bypass,
5527 gen_rtx_LABEL_REF (VOIDmode,
5528 label),
5529 pc_rtx)));
5530 }
5531 /* AMD Athlon and probably other CPUs too have fast bypass path between the
5532 comparison and first branch. The second branch takes longer to execute
5533 so place first branch the worse predicable one if possible. */
5534 if (second != NULL_RTX
5535 && (GET_CODE (second) == UNORDERED || GET_CODE (second) == ORDERED))
5536 {
5537 rtx tmp = condition;
5538 condition = second;
5539 second = tmp;
5540 }
5541 emit_jump_insn (gen_rtx_SET
5542 (VOIDmode, pc_rtx,
5543 gen_rtx_IF_THEN_ELSE (VOIDmode,
5544 condition, target1, target2)));
5545 if (second != NULL_RTX)
5546 emit_jump_insn (gen_rtx_SET
5547 (VOIDmode, pc_rtx,
5548 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1, target2)));
5549 if (label != NULL_RTX)
5550 emit_label (label);
5551}
5552
32b5b1aa 5553int
3a3677ff 5554ix86_expand_setcc (code, dest)
e075ae69 5555 enum rtx_code code;
e075ae69 5556 rtx dest;
32b5b1aa 5557{
a1b8572c
JH
5558 rtx ret, tmp, tmpreg;
5559 rtx second_test, bypass_test;
e075ae69
RH
5560 int type;
5561
5562 if (GET_MODE (ix86_compare_op0) == DImode)
5563 return 0; /* FAIL */
5564
5565 /* Three modes of generation:
5566 0 -- destination does not overlap compare sources:
5567 clear dest first, emit strict_low_part setcc.
5568 1 -- destination does overlap compare sources:
5569 emit subreg setcc, zero extend.
5570 2 -- destination is in QImode:
5571 emit setcc only.
5572 */
5573
5574 type = 0;
e075ae69
RH
5575
5576 if (GET_MODE (dest) == QImode)
5577 type = 2;
5578 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
a500c31b 5579 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
e075ae69
RH
5580 type = 1;
5581
5582 if (type == 0)
5583 emit_move_insn (dest, const0_rtx);
5584
a1b8572c 5585 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
5586 PUT_MODE (ret, QImode);
5587
5588 tmp = dest;
a1b8572c 5589 tmpreg = dest;
e075ae69 5590 if (type == 0)
32b5b1aa 5591 {
e075ae69 5592 tmp = gen_lowpart (QImode, dest);
a1b8572c 5593 tmpreg = tmp;
e075ae69
RH
5594 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
5595 }
5596 else if (type == 1)
5597 {
5598 if (!cse_not_expected)
5599 tmp = gen_reg_rtx (QImode);
5600 else
5601 tmp = gen_lowpart (QImode, dest);
a1b8572c 5602 tmpreg = tmp;
e075ae69 5603 }
32b5b1aa 5604
e075ae69 5605 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
5606 if (bypass_test || second_test)
5607 {
5608 rtx test = second_test;
5609 int bypass = 0;
5610 rtx tmp2 = gen_reg_rtx (QImode);
5611 if (bypass_test)
5612 {
5613 if (second_test)
5614 abort();
5615 test = bypass_test;
5616 bypass = 1;
5617 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
5618 }
5619 PUT_MODE (test, QImode);
5620 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
5621
5622 if (bypass)
5623 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
5624 else
5625 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
5626 }
e075ae69
RH
5627
5628 if (type == 1)
5629 {
5630 rtx clob;
5631
5632 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
5633 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
5634 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
5635 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5636 emit_insn (tmp);
32b5b1aa 5637 }
e075ae69
RH
5638
5639 return 1; /* DONE */
32b5b1aa 5640}
e075ae69 5641
32b5b1aa 5642int
e075ae69
RH
5643ix86_expand_int_movcc (operands)
5644 rtx operands[];
32b5b1aa 5645{
e075ae69
RH
5646 enum rtx_code code = GET_CODE (operands[1]), compare_code;
5647 rtx compare_seq, compare_op;
a1b8572c 5648 rtx second_test, bypass_test;
32b5b1aa 5649
36583fea
JH
5650 /* When the compare code is not LTU or GEU, we can not use sbbl case.
5651 In case comparsion is done with immediate, we can convert it to LTU or
5652 GEU by altering the integer. */
5653
5654 if ((code == LEU || code == GTU)
5655 && GET_CODE (ix86_compare_op1) == CONST_INT
5656 && GET_MODE (operands[0]) != HImode
5657 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
0f290768 5658 && GET_CODE (operands[2]) == CONST_INT
36583fea
JH
5659 && GET_CODE (operands[3]) == CONST_INT)
5660 {
5661 if (code == LEU)
5662 code = LTU;
5663 else
5664 code = GEU;
5665 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
5666 }
3a3677ff 5667
e075ae69 5668 start_sequence ();
a1b8572c 5669 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
5670 compare_seq = gen_sequence ();
5671 end_sequence ();
5672
5673 compare_code = GET_CODE (compare_op);
5674
5675 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
5676 HImode insns, we'd be swallowed in word prefix ops. */
5677
5678 if (GET_MODE (operands[0]) != HImode
0f290768 5679 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
5680 && GET_CODE (operands[3]) == CONST_INT)
5681 {
5682 rtx out = operands[0];
5683 HOST_WIDE_INT ct = INTVAL (operands[2]);
5684 HOST_WIDE_INT cf = INTVAL (operands[3]);
5685 HOST_WIDE_INT diff;
5686
a1b8572c
JH
5687 if ((compare_code == LTU || compare_code == GEU)
5688 && !second_test && !bypass_test)
e075ae69 5689 {
e075ae69
RH
5690
5691 /* Detect overlap between destination and compare sources. */
5692 rtx tmp = out;
5693
0f290768 5694 /* To simplify rest of code, restrict to the GEU case. */
36583fea
JH
5695 if (compare_code == LTU)
5696 {
5697 int tmp = ct;
5698 ct = cf;
5699 cf = tmp;
5700 compare_code = reverse_condition (compare_code);
5701 code = reverse_condition (code);
5702 }
5703 diff = ct - cf;
5704
e075ae69 5705 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
a500c31b 5706 || reg_overlap_mentioned_p (out, ix86_compare_op1))
e075ae69
RH
5707 tmp = gen_reg_rtx (SImode);
5708
5709 emit_insn (compare_seq);
5710 emit_insn (gen_x86_movsicc_0_m1 (tmp));
5711
36583fea
JH
5712 if (diff == 1)
5713 {
5714 /*
5715 * cmpl op0,op1
5716 * sbbl dest,dest
5717 * [addl dest, ct]
5718 *
5719 * Size 5 - 8.
5720 */
5721 if (ct)
5722 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5723 }
5724 else if (cf == -1)
5725 {
5726 /*
5727 * cmpl op0,op1
5728 * sbbl dest,dest
5729 * orl $ct, dest
5730 *
5731 * Size 8.
5732 */
5733 emit_insn (gen_iorsi3 (out, out, GEN_INT (ct)));
5734 }
5735 else if (diff == -1 && ct)
5736 {
5737 /*
5738 * cmpl op0,op1
5739 * sbbl dest,dest
5740 * xorl $-1, dest
5741 * [addl dest, cf]
5742 *
5743 * Size 8 - 11.
5744 */
5745 emit_insn (gen_one_cmplsi2 (tmp, tmp));
5746 if (cf)
5747 emit_insn (gen_addsi3 (out, out, GEN_INT (cf)));
5748 }
5749 else
5750 {
5751 /*
5752 * cmpl op0,op1
5753 * sbbl dest,dest
5754 * andl cf - ct, dest
5755 * [addl dest, ct]
5756 *
5757 * Size 8 - 11.
5758 */
5759 emit_insn (gen_andsi3 (out, out, GEN_INT (cf - ct)));
5760 if (ct)
5761 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5762 }
e075ae69
RH
5763
5764 if (tmp != out)
5765 emit_move_insn (out, tmp);
5766
5767 return 1; /* DONE */
5768 }
5769
5770 diff = ct - cf;
5771 if (diff < 0)
5772 {
5773 HOST_WIDE_INT tmp;
5774 tmp = ct, ct = cf, cf = tmp;
5775 diff = -diff;
734dba19
JH
5776 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
5777 {
5778 /* We may be reversing unordered compare to normal compare, that
5779 is not valid in general (we may convert non-trapping condition
5780 to trapping one), however on i386 we currently emit all
5781 comparisons unordered. */
5782 compare_code = reverse_condition_maybe_unordered (compare_code);
5783 code = reverse_condition_maybe_unordered (code);
5784 }
5785 else
5786 {
5787 compare_code = reverse_condition (compare_code);
5788 code = reverse_condition (code);
5789 }
e075ae69
RH
5790 }
5791 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
5792 || diff == 3 || diff == 5 || diff == 9)
5793 {
5794 /*
5795 * xorl dest,dest
5796 * cmpl op1,op2
5797 * setcc dest
5798 * lea cf(dest*(ct-cf)),dest
5799 *
5800 * Size 14.
5801 *
5802 * This also catches the degenerate setcc-only case.
5803 */
5804
5805 rtx tmp;
5806 int nops;
5807
5808 out = emit_store_flag (out, code, ix86_compare_op0,
5809 ix86_compare_op1, VOIDmode, 0, 1);
5810
5811 nops = 0;
5812 if (diff == 1)
5813 tmp = out;
5814 else
5815 {
5816 tmp = gen_rtx_MULT (SImode, out, GEN_INT (diff & ~1));
5817 nops++;
5818 if (diff & 1)
5819 {
5820 tmp = gen_rtx_PLUS (SImode, tmp, out);
5821 nops++;
5822 }
5823 }
5824 if (cf != 0)
5825 {
5826 tmp = gen_rtx_PLUS (SImode, tmp, GEN_INT (cf));
5827 nops++;
5828 }
5829 if (tmp != out)
5830 {
5831 if (nops == 0)
5832 emit_move_insn (out, tmp);
5833 else if (nops == 1)
5834 {
5835 rtx clob;
5836
5837 clob = gen_rtx_REG (CCmode, FLAGS_REG);
5838 clob = gen_rtx_CLOBBER (VOIDmode, clob);
5839
5840 tmp = gen_rtx_SET (VOIDmode, out, tmp);
5841 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5842 emit_insn (tmp);
5843 }
5844 else
5845 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
5846 }
5847 if (out != operands[0])
5848 emit_move_insn (operands[0], out);
5849
5850 return 1; /* DONE */
5851 }
5852
5853 /*
5854 * General case: Jumpful:
5855 * xorl dest,dest cmpl op1, op2
5856 * cmpl op1, op2 movl ct, dest
5857 * setcc dest jcc 1f
5858 * decl dest movl cf, dest
5859 * andl (cf-ct),dest 1:
5860 * addl ct,dest
0f290768 5861 *
e075ae69
RH
5862 * Size 20. Size 14.
5863 *
5864 * This is reasonably steep, but branch mispredict costs are
5865 * high on modern cpus, so consider failing only if optimizing
5866 * for space.
5867 *
5868 * %%% Parameterize branch_cost on the tuning architecture, then
5869 * use that. The 80386 couldn't care less about mispredicts.
5870 */
5871
5872 if (!optimize_size && !TARGET_CMOVE)
5873 {
5874 if (ct == 0)
5875 {
5876 ct = cf;
5877 cf = 0;
734dba19
JH
5878 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
5879 {
5880 /* We may be reversing unordered compare to normal compare,
5881 that is not valid in general (we may convert non-trapping
5882 condition to trapping one), however on i386 we currently
5883 emit all comparisons unordered. */
5884 compare_code = reverse_condition_maybe_unordered (compare_code);
5885 code = reverse_condition_maybe_unordered (code);
5886 }
5887 else
5888 {
5889 compare_code = reverse_condition (compare_code);
5890 code = reverse_condition (code);
5891 }
e075ae69
RH
5892 }
5893
5894 out = emit_store_flag (out, code, ix86_compare_op0,
5895 ix86_compare_op1, VOIDmode, 0, 1);
5896
5897 emit_insn (gen_addsi3 (out, out, constm1_rtx));
5898 emit_insn (gen_andsi3 (out, out, GEN_INT (cf-ct)));
5899 if (ct != 0)
5900 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5901 if (out != operands[0])
5902 emit_move_insn (operands[0], out);
5903
5904 return 1; /* DONE */
5905 }
5906 }
5907
5908 if (!TARGET_CMOVE)
5909 {
5910 /* Try a few things more with specific constants and a variable. */
5911
78a0d70c 5912 optab op;
e075ae69
RH
5913 rtx var, orig_out, out, tmp;
5914
5915 if (optimize_size)
5916 return 0; /* FAIL */
5917
0f290768 5918 /* If one of the two operands is an interesting constant, load a
e075ae69 5919 constant with the above and mask it in with a logical operation. */
0f290768 5920
e075ae69
RH
5921 if (GET_CODE (operands[2]) == CONST_INT)
5922 {
5923 var = operands[3];
5924 if (INTVAL (operands[2]) == 0)
5925 operands[3] = constm1_rtx, op = and_optab;
5926 else if (INTVAL (operands[2]) == -1)
5927 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
5928 else
5929 return 0; /* FAIL */
e075ae69
RH
5930 }
5931 else if (GET_CODE (operands[3]) == CONST_INT)
5932 {
5933 var = operands[2];
5934 if (INTVAL (operands[3]) == 0)
5935 operands[2] = constm1_rtx, op = and_optab;
5936 else if (INTVAL (operands[3]) == -1)
5937 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
5938 else
5939 return 0; /* FAIL */
e075ae69 5940 }
78a0d70c 5941 else
e075ae69
RH
5942 return 0; /* FAIL */
5943
5944 orig_out = operands[0];
5945 tmp = gen_reg_rtx (GET_MODE (orig_out));
5946 operands[0] = tmp;
5947
5948 /* Recurse to get the constant loaded. */
5949 if (ix86_expand_int_movcc (operands) == 0)
5950 return 0; /* FAIL */
5951
5952 /* Mask in the interesting variable. */
5953 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
5954 OPTAB_WIDEN);
5955 if (out != orig_out)
5956 emit_move_insn (orig_out, out);
5957
5958 return 1; /* DONE */
5959 }
5960
5961 /*
5962 * For comparison with above,
5963 *
5964 * movl cf,dest
5965 * movl ct,tmp
5966 * cmpl op1,op2
5967 * cmovcc tmp,dest
5968 *
5969 * Size 15.
5970 */
5971
5972 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
5973 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
5974 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
5975 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
5976
a1b8572c
JH
5977 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
5978 {
5979 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
5980 emit_move_insn (tmp, operands[3]);
5981 operands[3] = tmp;
5982 }
5983 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
5984 {
5985 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
5986 emit_move_insn (tmp, operands[2]);
5987 operands[2] = tmp;
5988 }
5989
e075ae69
RH
5990 emit_insn (compare_seq);
5991 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5992 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5993 compare_op, operands[2],
5994 operands[3])));
a1b8572c
JH
5995 if (bypass_test)
5996 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5997 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5998 bypass_test,
5999 operands[3],
6000 operands[0])));
6001 if (second_test)
6002 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6003 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6004 second_test,
6005 operands[2],
6006 operands[0])));
e075ae69
RH
6007
6008 return 1; /* DONE */
e9a25f70 6009}
e075ae69 6010
32b5b1aa 6011int
e075ae69
RH
6012ix86_expand_fp_movcc (operands)
6013 rtx operands[];
32b5b1aa 6014{
e075ae69 6015 enum rtx_code code;
e075ae69 6016 rtx tmp;
a1b8572c 6017 rtx compare_op, second_test, bypass_test;
32b5b1aa 6018
0073023d
JH
6019 /* For SF/DFmode conditional moves based on comparisons
6020 in same mode, we may want to use SSE min/max instructions. */
6021 if (((TARGET_SSE && GET_MODE (operands[0]) == SFmode)
6022 || (TARGET_SSE2 && GET_MODE (operands[0]) == DFmode))
6023 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
6024 /* We may be called from the post-reload splitter. */
6025 && (!REG_P (operands[0])
6026 || SSE_REG_P (operands[0])
6027 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
6028 {
6029 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
6030 code = GET_CODE (operands[1]);
6031
6032 /* See if we have (cross) match between comparison operands and
6033 conditional move operands. */
6034 if (rtx_equal_p (operands[2], op1))
6035 {
6036 rtx tmp = op0;
6037 op0 = op1;
6038 op1 = tmp;
6039 code = reverse_condition_maybe_unordered (code);
6040 }
6041 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
6042 {
6043 /* Check for min operation. */
6044 if (code == LT)
6045 {
6046 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
6047 if (memory_operand (op0, VOIDmode))
6048 op0 = force_reg (GET_MODE (operands[0]), op0);
6049 if (GET_MODE (operands[0]) == SFmode)
6050 emit_insn (gen_minsf3 (operands[0], op0, op1));
6051 else
6052 emit_insn (gen_mindf3 (operands[0], op0, op1));
6053 return 1;
6054 }
6055 /* Check for max operation. */
6056 if (code == GT)
6057 {
6058 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
6059 if (memory_operand (op0, VOIDmode))
6060 op0 = force_reg (GET_MODE (operands[0]), op0);
6061 if (GET_MODE (operands[0]) == SFmode)
6062 emit_insn (gen_maxsf3 (operands[0], op0, op1));
6063 else
6064 emit_insn (gen_maxdf3 (operands[0], op0, op1));
6065 return 1;
6066 }
6067 }
6068 /* Manage condition to be sse_comparison_operator. In case we are
6069 in non-ieee mode, try to canonicalize the destination operand
6070 to be first in the comparison - this helps reload to avoid extra
6071 moves. */
6072 if (!sse_comparison_operator (operands[1], VOIDmode)
6073 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
6074 {
6075 rtx tmp = ix86_compare_op0;
6076 ix86_compare_op0 = ix86_compare_op1;
6077 ix86_compare_op1 = tmp;
6078 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
6079 VOIDmode, ix86_compare_op0,
6080 ix86_compare_op1);
6081 }
6082 /* Similary try to manage result to be first operand of conditional
6083 move. */
6084 if (rtx_equal_p (operands[0], operands[3]))
6085 {
6086 rtx tmp = operands[2];
6087 operands[2] = operands[3];
6088 operands[2] = tmp;
6089 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
6090 (GET_CODE (operands[1])),
6091 VOIDmode, ix86_compare_op0,
6092 ix86_compare_op1);
6093 }
6094 if (GET_MODE (operands[0]) == SFmode)
6095 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
6096 operands[2], operands[3],
6097 ix86_compare_op0, ix86_compare_op1));
6098 else
6099 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
6100 operands[2], operands[3],
6101 ix86_compare_op0, ix86_compare_op1));
6102 return 1;
6103 }
6104
e075ae69 6105 /* The floating point conditional move instructions don't directly
0f290768 6106 support conditions resulting from a signed integer comparison. */
32b5b1aa 6107
e075ae69 6108 code = GET_CODE (operands[1]);
a1b8572c 6109 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
6110
6111 /* The floating point conditional move instructions don't directly
6112 support signed integer comparisons. */
6113
a1b8572c 6114 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 6115 {
a1b8572c
JH
6116 if (second_test != NULL || bypass_test != NULL)
6117 abort();
e075ae69 6118 tmp = gen_reg_rtx (QImode);
3a3677ff 6119 ix86_expand_setcc (code, tmp);
e075ae69
RH
6120 code = NE;
6121 ix86_compare_op0 = tmp;
6122 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
6123 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
6124 }
6125 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
6126 {
6127 tmp = gen_reg_rtx (GET_MODE (operands[0]));
6128 emit_move_insn (tmp, operands[3]);
6129 operands[3] = tmp;
6130 }
6131 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
6132 {
6133 tmp = gen_reg_rtx (GET_MODE (operands[0]));
6134 emit_move_insn (tmp, operands[2]);
6135 operands[2] = tmp;
e075ae69 6136 }
e9a25f70 6137
e075ae69
RH
6138 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6139 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9e7adcb3 6140 compare_op,
e075ae69
RH
6141 operands[2],
6142 operands[3])));
a1b8572c
JH
6143 if (bypass_test)
6144 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6145 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6146 bypass_test,
6147 operands[3],
6148 operands[0])));
6149 if (second_test)
6150 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6151 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6152 second_test,
6153 operands[2],
6154 operands[0])));
32b5b1aa 6155
e075ae69 6156 return 1;
32b5b1aa
SC
6157}
6158
2450a057
JH
6159/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
6160 works for floating pointer parameters and nonoffsetable memories.
6161 For pushes, it returns just stack offsets; the values will be saved
6162 in the right order. Maximally three parts are generated. */
6163
2b589241 6164static int
2450a057
JH
6165ix86_split_to_parts (operand, parts, mode)
6166 rtx operand;
6167 rtx *parts;
6168 enum machine_mode mode;
32b5b1aa 6169{
2b589241 6170 int size = mode == TFmode ? 3 : GET_MODE_SIZE (mode) / 4;
2450a057 6171
a7180f70
BS
6172 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
6173 abort ();
2450a057
JH
6174 if (size < 2 || size > 3)
6175 abort ();
6176
d7a29404
JH
6177 /* Optimize constant pool reference to immediates. This is used by fp moves,
6178 that force all constants to memory to allow combining. */
6179
6180 if (GET_CODE (operand) == MEM
6181 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
6182 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
6183 operand = get_pool_constant (XEXP (operand, 0));
6184
2450a057 6185 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 6186 {
2450a057
JH
6187 /* The only non-offsetable memories we handle are pushes. */
6188 if (! push_operand (operand, VOIDmode))
6189 abort ();
6190
6191 PUT_MODE (operand, SImode);
6192 parts[0] = parts[1] = parts[2] = operand;
6193 }
6194 else
6195 {
6196 if (mode == DImode)
6197 split_di (&operand, 1, &parts[0], &parts[1]);
6198 else
e075ae69 6199 {
2450a057
JH
6200 if (REG_P (operand))
6201 {
6202 if (!reload_completed)
6203 abort ();
6204 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
6205 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
6206 if (size == 3)
6207 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
6208 }
6209 else if (offsettable_memref_p (operand))
6210 {
6211 PUT_MODE (operand, SImode);
6212 parts[0] = operand;
6213 parts[1] = adj_offsettable_operand (operand, 4);
6214 if (size == 3)
6215 parts[2] = adj_offsettable_operand (operand, 8);
6216 }
6217 else if (GET_CODE (operand) == CONST_DOUBLE)
6218 {
6219 REAL_VALUE_TYPE r;
2b589241 6220 long l[4];
2450a057
JH
6221
6222 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
6223 switch (mode)
6224 {
6225 case XFmode:
2b589241 6226 case TFmode:
2450a057
JH
6227 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
6228 parts[2] = GEN_INT (l[2]);
6229 break;
6230 case DFmode:
6231 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
6232 break;
6233 default:
6234 abort ();
6235 }
6236 parts[1] = GEN_INT (l[1]);
6237 parts[0] = GEN_INT (l[0]);
6238 }
6239 else
6240 abort ();
e075ae69 6241 }
2450a057
JH
6242 }
6243
2b589241 6244 return size;
2450a057
JH
6245}
6246
6247/* Emit insns to perform a move or push of DI, DF, and XF values.
6248 Return false when normal moves are needed; true when all required
6249 insns have been emitted. Operands 2-4 contain the input values
6250 int the correct order; operands 5-7 contain the output values. */
6251
0f290768 6252int
2450a057
JH
6253ix86_split_long_move (operands1)
6254 rtx operands1[];
6255{
6256 rtx part[2][3];
6257 rtx operands[2];
2b589241 6258 int size;
2450a057
JH
6259 int push = 0;
6260 int collisions = 0;
6261
6262 /* Make our own copy to avoid clobbering the operands. */
6263 operands[0] = copy_rtx (operands1[0]);
6264 operands[1] = copy_rtx (operands1[1]);
6265
2450a057
JH
6266 /* The only non-offsettable memory we handle is push. */
6267 if (push_operand (operands[0], VOIDmode))
6268 push = 1;
6269 else if (GET_CODE (operands[0]) == MEM
6270 && ! offsettable_memref_p (operands[0]))
6271 abort ();
6272
2b589241 6273 size = ix86_split_to_parts (operands[0], part[0], GET_MODE (operands1[0]));
2450a057
JH
6274 ix86_split_to_parts (operands[1], part[1], GET_MODE (operands1[0]));
6275
6276 /* When emitting push, take care for source operands on the stack. */
6277 if (push && GET_CODE (operands[1]) == MEM
6278 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
6279 {
6280 if (size == 3)
6281 part[1][1] = part[1][2];
6282 part[1][0] = part[1][1];
6283 }
6284
0f290768 6285 /* We need to do copy in the right order in case an address register
2450a057
JH
6286 of the source overlaps the destination. */
6287 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
6288 {
6289 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
6290 collisions++;
6291 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
6292 collisions++;
6293 if (size == 3
6294 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
6295 collisions++;
6296
6297 /* Collision in the middle part can be handled by reordering. */
6298 if (collisions == 1 && size == 3
6299 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 6300 {
2450a057
JH
6301 rtx tmp;
6302 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
6303 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
6304 }
e075ae69 6305
2450a057
JH
6306 /* If there are more collisions, we can't handle it by reordering.
6307 Do an lea to the last part and use only one colliding move. */
6308 else if (collisions > 1)
6309 {
6310 collisions = 1;
6311 emit_insn (gen_rtx_SET (VOIDmode, part[0][size - 1],
6312 XEXP (part[1][0], 0)));
6313 part[1][0] = change_address (part[1][0], SImode, part[0][size - 1]);
6314 part[1][1] = adj_offsettable_operand (part[1][0], 4);
6315 if (size == 3)
6316 part[1][2] = adj_offsettable_operand (part[1][0], 8);
6317 }
6318 }
6319
6320 if (push)
6321 {
6322 if (size == 3)
2b589241
JH
6323 {
6324 /* We use only first 12 bytes of TFmode value, but for pushing we
6325 are required to adjust stack as if we were pushing real 16byte
6326 value. */
6327 if (GET_MODE (operands1[0]) == TFmode)
6328 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
6329 GEN_INT (-4)));
6330 emit_insn (gen_push (part[1][2]));
6331 }
2450a057
JH
6332 emit_insn (gen_push (part[1][1]));
6333 emit_insn (gen_push (part[1][0]));
6334 return 1;
6335 }
6336
6337 /* Choose correct order to not overwrite the source before it is copied. */
6338 if ((REG_P (part[0][0])
6339 && REG_P (part[1][1])
6340 && (REGNO (part[0][0]) == REGNO (part[1][1])
6341 || (size == 3
6342 && REGNO (part[0][0]) == REGNO (part[1][2]))))
6343 || (collisions > 0
6344 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
6345 {
6346 if (size == 3)
6347 {
6348 operands1[2] = part[0][2];
6349 operands1[3] = part[0][1];
6350 operands1[4] = part[0][0];
6351 operands1[5] = part[1][2];
6352 operands1[6] = part[1][1];
6353 operands1[7] = part[1][0];
6354 }
6355 else
6356 {
6357 operands1[2] = part[0][1];
6358 operands1[3] = part[0][0];
6359 operands1[5] = part[1][1];
6360 operands1[6] = part[1][0];
6361 }
6362 }
6363 else
6364 {
6365 if (size == 3)
6366 {
6367 operands1[2] = part[0][0];
6368 operands1[3] = part[0][1];
6369 operands1[4] = part[0][2];
6370 operands1[5] = part[1][0];
6371 operands1[6] = part[1][1];
6372 operands1[7] = part[1][2];
6373 }
6374 else
6375 {
6376 operands1[2] = part[0][0];
6377 operands1[3] = part[0][1];
6378 operands1[5] = part[1][0];
6379 operands1[6] = part[1][1];
e075ae69
RH
6380 }
6381 }
32b5b1aa 6382
e9a25f70 6383 return 0;
32b5b1aa 6384}
32b5b1aa 6385
e075ae69
RH
6386void
6387ix86_split_ashldi (operands, scratch)
6388 rtx *operands, scratch;
32b5b1aa 6389{
e075ae69
RH
6390 rtx low[2], high[2];
6391 int count;
b985a30f 6392
e075ae69
RH
6393 if (GET_CODE (operands[2]) == CONST_INT)
6394 {
6395 split_di (operands, 2, low, high);
6396 count = INTVAL (operands[2]) & 63;
32b5b1aa 6397
e075ae69
RH
6398 if (count >= 32)
6399 {
6400 emit_move_insn (high[0], low[1]);
6401 emit_move_insn (low[0], const0_rtx);
b985a30f 6402
e075ae69
RH
6403 if (count > 32)
6404 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
6405 }
6406 else
6407 {
6408 if (!rtx_equal_p (operands[0], operands[1]))
6409 emit_move_insn (operands[0], operands[1]);
6410 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
6411 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
6412 }
6413 }
6414 else
6415 {
6416 if (!rtx_equal_p (operands[0], operands[1]))
6417 emit_move_insn (operands[0], operands[1]);
b985a30f 6418
e075ae69 6419 split_di (operands, 1, low, high);
b985a30f 6420
e075ae69
RH
6421 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
6422 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 6423
fe577e58 6424 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 6425 {
fe577e58 6426 if (! no_new_pseudos)
e075ae69
RH
6427 scratch = force_reg (SImode, const0_rtx);
6428 else
6429 emit_move_insn (scratch, const0_rtx);
6430
6431 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
6432 scratch));
6433 }
6434 else
6435 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
6436 }
e9a25f70 6437}
32b5b1aa 6438
e075ae69
RH
6439void
6440ix86_split_ashrdi (operands, scratch)
6441 rtx *operands, scratch;
32b5b1aa 6442{
e075ae69
RH
6443 rtx low[2], high[2];
6444 int count;
32b5b1aa 6445
e075ae69
RH
6446 if (GET_CODE (operands[2]) == CONST_INT)
6447 {
6448 split_di (operands, 2, low, high);
6449 count = INTVAL (operands[2]) & 63;
32b5b1aa 6450
e075ae69
RH
6451 if (count >= 32)
6452 {
6453 emit_move_insn (low[0], high[1]);
32b5b1aa 6454
e075ae69
RH
6455 if (! reload_completed)
6456 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
6457 else
6458 {
6459 emit_move_insn (high[0], low[0]);
6460 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
6461 }
6462
6463 if (count > 32)
6464 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
6465 }
6466 else
6467 {
6468 if (!rtx_equal_p (operands[0], operands[1]))
6469 emit_move_insn (operands[0], operands[1]);
6470 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
6471 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
6472 }
6473 }
6474 else
32b5b1aa 6475 {
e075ae69
RH
6476 if (!rtx_equal_p (operands[0], operands[1]))
6477 emit_move_insn (operands[0], operands[1]);
6478
6479 split_di (operands, 1, low, high);
6480
6481 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
6482 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
6483
fe577e58 6484 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 6485 {
fe577e58 6486 if (! no_new_pseudos)
e075ae69
RH
6487 scratch = gen_reg_rtx (SImode);
6488 emit_move_insn (scratch, high[0]);
6489 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
6490 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
6491 scratch));
6492 }
6493 else
6494 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 6495 }
e075ae69 6496}
32b5b1aa 6497
e075ae69
RH
6498void
6499ix86_split_lshrdi (operands, scratch)
6500 rtx *operands, scratch;
6501{
6502 rtx low[2], high[2];
6503 int count;
32b5b1aa 6504
e075ae69 6505 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 6506 {
e075ae69
RH
6507 split_di (operands, 2, low, high);
6508 count = INTVAL (operands[2]) & 63;
6509
6510 if (count >= 32)
c7271385 6511 {
e075ae69
RH
6512 emit_move_insn (low[0], high[1]);
6513 emit_move_insn (high[0], const0_rtx);
32b5b1aa 6514
e075ae69
RH
6515 if (count > 32)
6516 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
6517 }
6518 else
6519 {
6520 if (!rtx_equal_p (operands[0], operands[1]))
6521 emit_move_insn (operands[0], operands[1]);
6522 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
6523 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
6524 }
32b5b1aa 6525 }
e075ae69
RH
6526 else
6527 {
6528 if (!rtx_equal_p (operands[0], operands[1]))
6529 emit_move_insn (operands[0], operands[1]);
32b5b1aa 6530
e075ae69
RH
6531 split_di (operands, 1, low, high);
6532
6533 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
6534 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
6535
6536 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 6537 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 6538 {
fe577e58 6539 if (! no_new_pseudos)
e075ae69
RH
6540 scratch = force_reg (SImode, const0_rtx);
6541 else
6542 emit_move_insn (scratch, const0_rtx);
6543
6544 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
6545 scratch));
6546 }
6547 else
6548 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
6549 }
32b5b1aa 6550}
3f803cd9 6551
e075ae69
RH
6552/* Expand the appropriate insns for doing strlen if not just doing
6553 repnz; scasb
6554
6555 out = result, initialized with the start address
6556 align_rtx = alignment of the address.
6557 scratch = scratch register, initialized with the startaddress when
77ebd435 6558 not aligned, otherwise undefined
3f803cd9
SC
6559
6560 This is just the body. It needs the initialisations mentioned above and
6561 some address computing at the end. These things are done in i386.md. */
6562
e075ae69
RH
6563void
6564ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch)
6565 rtx out, align_rtx, scratch;
3f803cd9 6566{
e075ae69
RH
6567 int align;
6568 rtx tmp;
6569 rtx align_2_label = NULL_RTX;
6570 rtx align_3_label = NULL_RTX;
6571 rtx align_4_label = gen_label_rtx ();
6572 rtx end_0_label = gen_label_rtx ();
e075ae69 6573 rtx mem;
e2e52e1b 6574 rtx tmpreg = gen_reg_rtx (SImode);
e075ae69
RH
6575
6576 align = 0;
6577 if (GET_CODE (align_rtx) == CONST_INT)
6578 align = INTVAL (align_rtx);
3f803cd9 6579
e9a25f70 6580 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 6581
e9a25f70 6582 /* Is there a known alignment and is it less than 4? */
e075ae69 6583 if (align < 4)
3f803cd9 6584 {
e9a25f70 6585 /* Is there a known alignment and is it not 2? */
e075ae69 6586 if (align != 2)
3f803cd9 6587 {
e075ae69
RH
6588 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
6589 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
6590
6591 /* Leave just the 3 lower bits. */
6592 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (3),
6593 NULL_RTX, 0, OPTAB_WIDEN);
6594
9076b9c1 6595 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
77ebd435 6596 SImode, 1, 0, align_4_label);
9076b9c1
JH
6597 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
6598 SImode, 1, 0, align_2_label);
6599 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
6600 SImode, 1, 0, align_3_label);
3f803cd9
SC
6601 }
6602 else
6603 {
e9a25f70
JL
6604 /* Since the alignment is 2, we have to check 2 or 0 bytes;
6605 check if is aligned to 4 - byte. */
e9a25f70 6606
e075ae69
RH
6607 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (2),
6608 NULL_RTX, 0, OPTAB_WIDEN);
6609
9076b9c1
JH
6610 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
6611 SImode, 1, 0, align_4_label);
3f803cd9
SC
6612 }
6613
e075ae69 6614 mem = gen_rtx_MEM (QImode, out);
e9a25f70 6615
e075ae69 6616 /* Now compare the bytes. */
e9a25f70 6617
0f290768 6618 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1
JH
6619 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6620 QImode, 1, 0, end_0_label);
3f803cd9 6621
0f290768 6622 /* Increment the address. */
e075ae69 6623 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 6624
e075ae69
RH
6625 /* Not needed with an alignment of 2 */
6626 if (align != 2)
6627 {
6628 emit_label (align_2_label);
3f803cd9 6629
9076b9c1
JH
6630 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6631 QImode, 1, 0, end_0_label);
e075ae69
RH
6632
6633 emit_insn (gen_addsi3 (out, out, const1_rtx));
6634
6635 emit_label (align_3_label);
6636 }
6637
9076b9c1
JH
6638 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6639 QImode, 1, 0, end_0_label);
e075ae69
RH
6640
6641 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
6642 }
6643
e075ae69
RH
6644 /* Generate loop to check 4 bytes at a time. It is not a good idea to
6645 align this loop. It gives only huge programs, but does not help to
6646 speed up. */
6647 emit_label (align_4_label);
3f803cd9 6648
e075ae69
RH
6649 mem = gen_rtx_MEM (SImode, out);
6650 emit_move_insn (scratch, mem);
e075ae69 6651 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 6652
e2e52e1b
JH
6653 /* This formula yields a nonzero result iff one of the bytes is zero.
6654 This saves three branches inside loop and many cycles. */
6655
6656 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
6657 emit_insn (gen_one_cmplsi2 (scratch, scratch));
6658 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
6659 emit_insn (gen_andsi3 (tmpreg, tmpreg, GEN_INT (0x80808080)));
9076b9c1
JH
6660 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0,
6661 SImode, 1, 0, align_4_label);
e2e52e1b
JH
6662
6663 if (TARGET_CMOVE)
6664 {
6665 rtx reg = gen_reg_rtx (SImode);
6666 emit_move_insn (reg, tmpreg);
6667 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
6668
0f290768 6669 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 6670 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
6671 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6672 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6673 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
6674 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
6675 reg,
6676 tmpreg)));
e2e52e1b
JH
6677 /* Emit lea manually to avoid clobbering of flags. */
6678 emit_insn (gen_rtx_SET (SImode, reg,
6679 gen_rtx_PLUS (SImode, out, GEN_INT (2))));
6680
6681 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6682 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6683 emit_insn (gen_rtx_SET (VOIDmode, out,
6684 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
6685 reg,
6686 out)));
e2e52e1b
JH
6687
6688 }
6689 else
6690 {
6691 rtx end_2_label = gen_label_rtx ();
6692 /* Is zero in the first two bytes? */
6693
16189740 6694 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
6695 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6696 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
6697 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
6698 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
6699 pc_rtx);
6700 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
6701 JUMP_LABEL (tmp) = end_2_label;
6702
0f290768 6703 /* Not in the first two. Move two bytes forward. */
e2e52e1b
JH
6704 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
6705 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
6706
6707 emit_label (end_2_label);
6708
6709 }
6710
0f290768 6711 /* Avoid branch in fixing the byte. */
e2e52e1b 6712 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190
JH
6713 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
6714 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
e075ae69
RH
6715
6716 emit_label (end_0_label);
6717}
6718\f
e075ae69
RH
6719/* Clear stack slot assignments remembered from previous functions.
6720 This is called from INIT_EXPANDERS once before RTL is emitted for each
6721 function. */
6722
36edd3cc
BS
6723static void
6724ix86_init_machine_status (p)
1526a060 6725 struct function *p;
e075ae69 6726{
37b15744
RH
6727 p->machine = (struct machine_function *)
6728 xcalloc (1, sizeof (struct machine_function));
e075ae69
RH
6729}
6730
1526a060
BS
6731/* Mark machine specific bits of P for GC. */
6732static void
6733ix86_mark_machine_status (p)
6734 struct function *p;
6735{
37b15744 6736 struct machine_function *machine = p->machine;
1526a060
BS
6737 enum machine_mode mode;
6738 int n;
6739
37b15744
RH
6740 if (! machine)
6741 return;
6742
1526a060
BS
6743 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
6744 mode = (enum machine_mode) ((int) mode + 1))
6745 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
37b15744
RH
6746 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
6747}
6748
6749static void
6750ix86_free_machine_status (p)
6751 struct function *p;
6752{
6753 free (p->machine);
6754 p->machine = NULL;
1526a060
BS
6755}
6756
e075ae69
RH
6757/* Return a MEM corresponding to a stack slot with mode MODE.
6758 Allocate a new slot if necessary.
6759
6760 The RTL for a function can have several slots available: N is
6761 which slot to use. */
6762
6763rtx
6764assign_386_stack_local (mode, n)
6765 enum machine_mode mode;
6766 int n;
6767{
6768 if (n < 0 || n >= MAX_386_STACK_LOCALS)
6769 abort ();
6770
6771 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
6772 ix86_stack_locals[(int) mode][n]
6773 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
6774
6775 return ix86_stack_locals[(int) mode][n];
6776}
6777\f
6778/* Calculate the length of the memory address in the instruction
6779 encoding. Does not include the one-byte modrm, opcode, or prefix. */
6780
6781static int
6782memory_address_length (addr)
6783 rtx addr;
6784{
6785 struct ix86_address parts;
6786 rtx base, index, disp;
6787 int len;
6788
6789 if (GET_CODE (addr) == PRE_DEC
6790 || GET_CODE (addr) == POST_INC)
6791 return 0;
3f803cd9 6792
e075ae69
RH
6793 if (! ix86_decompose_address (addr, &parts))
6794 abort ();
3f803cd9 6795
e075ae69
RH
6796 base = parts.base;
6797 index = parts.index;
6798 disp = parts.disp;
6799 len = 0;
3f803cd9 6800
e075ae69
RH
6801 /* Register Indirect. */
6802 if (base && !index && !disp)
6803 {
6804 /* Special cases: ebp and esp need the two-byte modrm form. */
6805 if (addr == stack_pointer_rtx
6806 || addr == arg_pointer_rtx
564d80f4
JH
6807 || addr == frame_pointer_rtx
6808 || addr == hard_frame_pointer_rtx)
e075ae69 6809 len = 1;
3f803cd9 6810 }
e9a25f70 6811
e075ae69
RH
6812 /* Direct Addressing. */
6813 else if (disp && !base && !index)
6814 len = 4;
6815
3f803cd9
SC
6816 else
6817 {
e075ae69
RH
6818 /* Find the length of the displacement constant. */
6819 if (disp)
6820 {
6821 if (GET_CODE (disp) == CONST_INT
6822 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
6823 len = 1;
6824 else
6825 len = 4;
6826 }
3f803cd9 6827
e075ae69
RH
6828 /* An index requires the two-byte modrm form. */
6829 if (index)
6830 len += 1;
3f803cd9
SC
6831 }
6832
e075ae69
RH
6833 return len;
6834}
79325812 6835
6ef67412
JH
6836/* Compute default value for "length_immediate" attribute. When SHORTFORM is set
6837 expect that insn have 8bit immediate alternative. */
e075ae69 6838int
6ef67412 6839ix86_attr_length_immediate_default (insn, shortform)
e075ae69 6840 rtx insn;
6ef67412 6841 int shortform;
e075ae69 6842{
6ef67412
JH
6843 int len = 0;
6844 int i;
6c698a6d 6845 extract_insn_cached (insn);
6ef67412
JH
6846 for (i = recog_data.n_operands - 1; i >= 0; --i)
6847 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 6848 {
6ef67412 6849 if (len)
3071fab5 6850 abort ();
6ef67412
JH
6851 if (shortform
6852 && GET_CODE (recog_data.operand[i]) == CONST_INT
6853 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
6854 len = 1;
6855 else
6856 {
6857 switch (get_attr_mode (insn))
6858 {
6859 case MODE_QI:
6860 len+=1;
6861 break;
6862 case MODE_HI:
6863 len+=2;
6864 break;
6865 case MODE_SI:
6866 len+=4;
6867 break;
6868 default:
6869 fatal_insn ("Unknown insn mode", insn);
6870 }
6871 }
3071fab5 6872 }
6ef67412
JH
6873 return len;
6874}
6875/* Compute default value for "length_address" attribute. */
6876int
6877ix86_attr_length_address_default (insn)
6878 rtx insn;
6879{
6880 int i;
6c698a6d 6881 extract_insn_cached (insn);
1ccbefce
RH
6882 for (i = recog_data.n_operands - 1; i >= 0; --i)
6883 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 6884 {
6ef67412 6885 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
6886 break;
6887 }
6ef67412 6888 return 0;
3f803cd9 6889}
e075ae69
RH
6890\f
6891/* Return the maximum number of instructions a cpu can issue. */
b657fc39 6892
e075ae69
RH
6893int
6894ix86_issue_rate ()
b657fc39 6895{
e075ae69 6896 switch (ix86_cpu)
b657fc39 6897 {
e075ae69
RH
6898 case PROCESSOR_PENTIUM:
6899 case PROCESSOR_K6:
6900 return 2;
79325812 6901
e075ae69
RH
6902 case PROCESSOR_PENTIUMPRO:
6903 return 3;
b657fc39 6904
b657fc39 6905 default:
e075ae69 6906 return 1;
b657fc39 6907 }
b657fc39
L
6908}
6909
e075ae69
RH
6910/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
6911 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 6912
e075ae69
RH
6913static int
6914ix86_flags_dependant (insn, dep_insn, insn_type)
6915 rtx insn, dep_insn;
6916 enum attr_type insn_type;
6917{
6918 rtx set, set2;
b657fc39 6919
e075ae69
RH
6920 /* Simplify the test for uninteresting insns. */
6921 if (insn_type != TYPE_SETCC
6922 && insn_type != TYPE_ICMOV
6923 && insn_type != TYPE_FCMOV
6924 && insn_type != TYPE_IBR)
6925 return 0;
b657fc39 6926
e075ae69
RH
6927 if ((set = single_set (dep_insn)) != 0)
6928 {
6929 set = SET_DEST (set);
6930 set2 = NULL_RTX;
6931 }
6932 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
6933 && XVECLEN (PATTERN (dep_insn), 0) == 2
6934 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
6935 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
6936 {
6937 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
6938 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
6939 }
78a0d70c
ZW
6940 else
6941 return 0;
b657fc39 6942
78a0d70c
ZW
6943 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
6944 return 0;
b657fc39 6945
78a0d70c
ZW
6946 /* This test is true if the dependant insn reads the flags but
6947 not any other potentially set register. */
6948 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
6949 return 0;
6950
6951 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
6952 return 0;
6953
6954 return 1;
e075ae69 6955}
b657fc39 6956
e075ae69
RH
6957/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
6958 address with operands set by DEP_INSN. */
6959
6960static int
6961ix86_agi_dependant (insn, dep_insn, insn_type)
6962 rtx insn, dep_insn;
6963 enum attr_type insn_type;
6964{
6965 rtx addr;
6966
6967 if (insn_type == TYPE_LEA)
5fbdde42
RH
6968 {
6969 addr = PATTERN (insn);
6970 if (GET_CODE (addr) == SET)
6971 ;
6972 else if (GET_CODE (addr) == PARALLEL
6973 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
6974 addr = XVECEXP (addr, 0, 0);
6975 else
6976 abort ();
6977 addr = SET_SRC (addr);
6978 }
e075ae69
RH
6979 else
6980 {
6981 int i;
6c698a6d 6982 extract_insn_cached (insn);
1ccbefce
RH
6983 for (i = recog_data.n_operands - 1; i >= 0; --i)
6984 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 6985 {
1ccbefce 6986 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
6987 goto found;
6988 }
6989 return 0;
6990 found:;
b657fc39
L
6991 }
6992
e075ae69 6993 return modified_in_p (addr, dep_insn);
b657fc39 6994}
a269a03c
JC
6995
6996int
e075ae69 6997ix86_adjust_cost (insn, link, dep_insn, cost)
a269a03c
JC
6998 rtx insn, link, dep_insn;
6999 int cost;
7000{
e075ae69 7001 enum attr_type insn_type, dep_insn_type;
0b5107cf 7002 enum attr_memory memory;
e075ae69 7003 rtx set, set2;
9b00189f 7004 int dep_insn_code_number;
a269a03c 7005
309ada50 7006 /* Anti and output depenancies have zero cost on all CPUs. */
e075ae69 7007 if (REG_NOTE_KIND (link) != 0)
309ada50 7008 return 0;
a269a03c 7009
9b00189f
JH
7010 dep_insn_code_number = recog_memoized (dep_insn);
7011
e075ae69 7012 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 7013 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 7014 return cost;
a269a03c 7015
1c71e60e
JH
7016 insn_type = get_attr_type (insn);
7017 dep_insn_type = get_attr_type (dep_insn);
9b00189f 7018
1c71e60e
JH
7019 /* Prologue and epilogue allocators can have a false dependency on ebp.
7020 This results in one cycle extra stall on Pentium prologue scheduling,
7021 so handle this important case manually. */
7022 if (dep_insn_code_number == CODE_FOR_pro_epilogue_adjust_stack
7023 && dep_insn_type == TYPE_ALU
9b00189f
JH
7024 && !reg_mentioned_p (stack_pointer_rtx, insn))
7025 return 0;
7026
a269a03c
JC
7027 switch (ix86_cpu)
7028 {
7029 case PROCESSOR_PENTIUM:
e075ae69
RH
7030 /* Address Generation Interlock adds a cycle of latency. */
7031 if (ix86_agi_dependant (insn, dep_insn, insn_type))
7032 cost += 1;
7033
7034 /* ??? Compares pair with jump/setcc. */
7035 if (ix86_flags_dependant (insn, dep_insn, insn_type))
7036 cost = 0;
7037
7038 /* Floating point stores require value to be ready one cycle ealier. */
0f290768 7039 if (insn_type == TYPE_FMOV
e075ae69
RH
7040 && get_attr_memory (insn) == MEMORY_STORE
7041 && !ix86_agi_dependant (insn, dep_insn, insn_type))
7042 cost += 1;
7043 break;
a269a03c 7044
e075ae69 7045 case PROCESSOR_PENTIUMPRO:
0f290768 7046 /* Since we can't represent delayed latencies of load+operation,
e075ae69
RH
7047 increase the cost here for non-imov insns. */
7048 if (dep_insn_type != TYPE_IMOV
7049 && dep_insn_type != TYPE_FMOV
0b5107cf
JH
7050 && ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
7051 || memory == MEMORY_BOTH))
e075ae69
RH
7052 cost += 1;
7053
7054 /* INT->FP conversion is expensive. */
7055 if (get_attr_fp_int_src (dep_insn))
7056 cost += 5;
7057
7058 /* There is one cycle extra latency between an FP op and a store. */
7059 if (insn_type == TYPE_FMOV
7060 && (set = single_set (dep_insn)) != NULL_RTX
7061 && (set2 = single_set (insn)) != NULL_RTX
7062 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
7063 && GET_CODE (SET_DEST (set2)) == MEM)
7064 cost += 1;
7065 break;
a269a03c 7066
e075ae69
RH
7067 case PROCESSOR_K6:
7068 /* The esp dependency is resolved before the instruction is really
7069 finished. */
7070 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
7071 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
7072 return 1;
a269a03c 7073
0f290768 7074 /* Since we can't represent delayed latencies of load+operation,
e075ae69 7075 increase the cost here for non-imov insns. */
0b5107cf
JH
7076 if ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
7077 || memory == MEMORY_BOTH)
e075ae69
RH
7078 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
7079
7080 /* INT->FP conversion is expensive. */
7081 if (get_attr_fp_int_src (dep_insn))
7082 cost += 5;
a14003ee 7083 break;
e075ae69 7084
309ada50 7085 case PROCESSOR_ATHLON:
0b5107cf
JH
7086 if ((memory = get_attr_memory (dep_insn)) == MEMORY_LOAD
7087 || memory == MEMORY_BOTH)
7088 {
7089 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
7090 cost += 2;
7091 else
7092 cost += 3;
7093 }
309ada50 7094
a269a03c 7095 default:
a269a03c
JC
7096 break;
7097 }
7098
7099 return cost;
7100}
0a726ef1 7101
e075ae69
RH
7102static union
7103{
7104 struct ppro_sched_data
7105 {
7106 rtx decode[3];
7107 int issued_this_cycle;
7108 } ppro;
7109} ix86_sched_data;
0a726ef1 7110
e075ae69
RH
7111static int
7112ix86_safe_length (insn)
7113 rtx insn;
7114{
7115 if (recog_memoized (insn) >= 0)
7116 return get_attr_length(insn);
7117 else
7118 return 128;
7119}
0a726ef1 7120
e075ae69
RH
7121static int
7122ix86_safe_length_prefix (insn)
7123 rtx insn;
7124{
7125 if (recog_memoized (insn) >= 0)
7126 return get_attr_length(insn);
7127 else
7128 return 0;
7129}
7130
7131static enum attr_memory
7132ix86_safe_memory (insn)
7133 rtx insn;
7134{
7135 if (recog_memoized (insn) >= 0)
7136 return get_attr_memory(insn);
7137 else
7138 return MEMORY_UNKNOWN;
7139}
0a726ef1 7140
e075ae69
RH
7141static enum attr_pent_pair
7142ix86_safe_pent_pair (insn)
7143 rtx insn;
7144{
7145 if (recog_memoized (insn) >= 0)
7146 return get_attr_pent_pair(insn);
7147 else
7148 return PENT_PAIR_NP;
7149}
0a726ef1 7150
e075ae69
RH
7151static enum attr_ppro_uops
7152ix86_safe_ppro_uops (insn)
7153 rtx insn;
7154{
7155 if (recog_memoized (insn) >= 0)
7156 return get_attr_ppro_uops (insn);
7157 else
7158 return PPRO_UOPS_MANY;
7159}
0a726ef1 7160
e075ae69
RH
7161static void
7162ix86_dump_ppro_packet (dump)
7163 FILE *dump;
0a726ef1 7164{
e075ae69 7165 if (ix86_sched_data.ppro.decode[0])
0a726ef1 7166 {
e075ae69
RH
7167 fprintf (dump, "PPRO packet: %d",
7168 INSN_UID (ix86_sched_data.ppro.decode[0]));
7169 if (ix86_sched_data.ppro.decode[1])
7170 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
7171 if (ix86_sched_data.ppro.decode[2])
7172 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
7173 fputc ('\n', dump);
7174 }
7175}
0a726ef1 7176
e075ae69 7177/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 7178
e075ae69
RH
7179void
7180ix86_sched_init (dump, sched_verbose)
7181 FILE *dump ATTRIBUTE_UNUSED;
7182 int sched_verbose ATTRIBUTE_UNUSED;
7183{
7184 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
7185}
7186
7187/* Shift INSN to SLOT, and shift everything else down. */
7188
7189static void
7190ix86_reorder_insn (insnp, slot)
7191 rtx *insnp, *slot;
7192{
7193 if (insnp != slot)
7194 {
7195 rtx insn = *insnp;
0f290768 7196 do
e075ae69
RH
7197 insnp[0] = insnp[1];
7198 while (++insnp != slot);
7199 *insnp = insn;
0a726ef1 7200 }
e075ae69
RH
7201}
7202
7203/* Find an instruction with given pairability and minimal amount of cycles
7204 lost by the fact that the CPU waits for both pipelines to finish before
7205 reading next instructions. Also take care that both instructions together
7206 can not exceed 7 bytes. */
7207
7208static rtx *
7209ix86_pent_find_pair (e_ready, ready, type, first)
7210 rtx *e_ready;
7211 rtx *ready;
7212 enum attr_pent_pair type;
7213 rtx first;
7214{
7215 int mincycles, cycles;
7216 enum attr_pent_pair tmp;
7217 enum attr_memory memory;
7218 rtx *insnp, *bestinsnp = NULL;
0a726ef1 7219
e075ae69
RH
7220 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
7221 return NULL;
0a726ef1 7222
e075ae69
RH
7223 memory = ix86_safe_memory (first);
7224 cycles = result_ready_cost (first);
7225 mincycles = INT_MAX;
7226
7227 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
7228 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
7229 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
6ec6d558 7230 {
e075ae69
RH
7231 enum attr_memory second_memory;
7232 int secondcycles, currentcycles;
7233
7234 second_memory = ix86_safe_memory (*insnp);
7235 secondcycles = result_ready_cost (*insnp);
7236 currentcycles = abs (cycles - secondcycles);
7237
7238 if (secondcycles >= 1 && cycles >= 1)
6ec6d558 7239 {
e075ae69
RH
7240 /* Two read/modify/write instructions together takes two
7241 cycles longer. */
7242 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
7243 currentcycles += 2;
0f290768 7244
e075ae69
RH
7245 /* Read modify/write instruction followed by read/modify
7246 takes one cycle longer. */
7247 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
7248 && tmp != PENT_PAIR_UV
7249 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
7250 currentcycles += 1;
6ec6d558 7251 }
e075ae69
RH
7252 if (currentcycles < mincycles)
7253 bestinsnp = insnp, mincycles = currentcycles;
6ec6d558 7254 }
0a726ef1 7255
e075ae69
RH
7256 return bestinsnp;
7257}
7258
78a0d70c 7259/* Subroutines of ix86_sched_reorder. */
e075ae69 7260
c6991660 7261static void
78a0d70c 7262ix86_sched_reorder_pentium (ready, e_ready)
e075ae69 7263 rtx *ready;
78a0d70c 7264 rtx *e_ready;
e075ae69 7265{
78a0d70c 7266 enum attr_pent_pair pair1, pair2;
e075ae69 7267 rtx *insnp;
e075ae69 7268
78a0d70c
ZW
7269 /* This wouldn't be necessary if Haifa knew that static insn ordering
7270 is important to which pipe an insn is issued to. So we have to make
7271 some minor rearrangements. */
e075ae69 7272
78a0d70c
ZW
7273 pair1 = ix86_safe_pent_pair (*e_ready);
7274
7275 /* If the first insn is non-pairable, let it be. */
7276 if (pair1 == PENT_PAIR_NP)
7277 return;
7278
7279 pair2 = PENT_PAIR_NP;
7280 insnp = 0;
7281
7282 /* If the first insn is UV or PV pairable, search for a PU
7283 insn to go with. */
7284 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
e075ae69 7285 {
78a0d70c
ZW
7286 insnp = ix86_pent_find_pair (e_ready-1, ready,
7287 PENT_PAIR_PU, *e_ready);
7288 if (insnp)
7289 pair2 = PENT_PAIR_PU;
7290 }
e075ae69 7291
78a0d70c
ZW
7292 /* If the first insn is PU or UV pairable, search for a PV
7293 insn to go with. */
7294 if (pair2 == PENT_PAIR_NP
7295 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
7296 {
7297 insnp = ix86_pent_find_pair (e_ready-1, ready,
7298 PENT_PAIR_PV, *e_ready);
7299 if (insnp)
7300 pair2 = PENT_PAIR_PV;
7301 }
e075ae69 7302
78a0d70c
ZW
7303 /* If the first insn is pairable, search for a UV
7304 insn to go with. */
7305 if (pair2 == PENT_PAIR_NP)
7306 {
7307 insnp = ix86_pent_find_pair (e_ready-1, ready,
7308 PENT_PAIR_UV, *e_ready);
7309 if (insnp)
7310 pair2 = PENT_PAIR_UV;
7311 }
e075ae69 7312
78a0d70c
ZW
7313 if (pair2 == PENT_PAIR_NP)
7314 return;
e075ae69 7315
78a0d70c
ZW
7316 /* Found something! Decide if we need to swap the order. */
7317 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
7318 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
7319 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
7320 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
7321 ix86_reorder_insn (insnp, e_ready);
7322 else
7323 ix86_reorder_insn (insnp, e_ready - 1);
7324}
e075ae69 7325
c6991660 7326static void
78a0d70c
ZW
7327ix86_sched_reorder_ppro (ready, e_ready)
7328 rtx *ready;
7329 rtx *e_ready;
7330{
7331 rtx decode[3];
7332 enum attr_ppro_uops cur_uops;
7333 int issued_this_cycle;
7334 rtx *insnp;
7335 int i;
e075ae69 7336
0f290768 7337 /* At this point .ppro.decode contains the state of the three
78a0d70c 7338 decoders from last "cycle". That is, those insns that were
0f290768 7339 actually independent. But here we're scheduling for the
78a0d70c
ZW
7340 decoder, and we may find things that are decodable in the
7341 same cycle. */
e075ae69 7342
0f290768 7343 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
78a0d70c 7344 issued_this_cycle = 0;
e075ae69 7345
78a0d70c
ZW
7346 insnp = e_ready;
7347 cur_uops = ix86_safe_ppro_uops (*insnp);
0a726ef1 7348
78a0d70c
ZW
7349 /* If the decoders are empty, and we've a complex insn at the
7350 head of the priority queue, let it issue without complaint. */
7351 if (decode[0] == NULL)
7352 {
7353 if (cur_uops == PPRO_UOPS_MANY)
7354 {
7355 decode[0] = *insnp;
7356 goto ppro_done;
7357 }
7358
7359 /* Otherwise, search for a 2-4 uop unsn to issue. */
7360 while (cur_uops != PPRO_UOPS_FEW)
7361 {
7362 if (insnp == ready)
7363 break;
7364 cur_uops = ix86_safe_ppro_uops (*--insnp);
7365 }
7366
7367 /* If so, move it to the head of the line. */
7368 if (cur_uops == PPRO_UOPS_FEW)
7369 ix86_reorder_insn (insnp, e_ready);
0a726ef1 7370
78a0d70c
ZW
7371 /* Issue the head of the queue. */
7372 issued_this_cycle = 1;
7373 decode[0] = *e_ready--;
7374 }
fb693d44 7375
78a0d70c
ZW
7376 /* Look for simple insns to fill in the other two slots. */
7377 for (i = 1; i < 3; ++i)
7378 if (decode[i] == NULL)
7379 {
7380 if (ready >= e_ready)
7381 goto ppro_done;
fb693d44 7382
e075ae69
RH
7383 insnp = e_ready;
7384 cur_uops = ix86_safe_ppro_uops (*insnp);
78a0d70c
ZW
7385 while (cur_uops != PPRO_UOPS_ONE)
7386 {
7387 if (insnp == ready)
7388 break;
7389 cur_uops = ix86_safe_ppro_uops (*--insnp);
7390 }
fb693d44 7391
78a0d70c
ZW
7392 /* Found one. Move it to the head of the queue and issue it. */
7393 if (cur_uops == PPRO_UOPS_ONE)
e075ae69 7394 {
78a0d70c
ZW
7395 ix86_reorder_insn (insnp, e_ready);
7396 decode[i] = *e_ready--;
7397 issued_this_cycle++;
7398 continue;
7399 }
fb693d44 7400
78a0d70c
ZW
7401 /* ??? Didn't find one. Ideally, here we would do a lazy split
7402 of 2-uop insns, issue one and queue the other. */
7403 }
fb693d44 7404
78a0d70c
ZW
7405 ppro_done:
7406 if (issued_this_cycle == 0)
7407 issued_this_cycle = 1;
7408 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
7409}
fb693d44 7410
0f290768 7411/* We are about to being issuing insns for this clock cycle.
78a0d70c
ZW
7412 Override the default sort algorithm to better slot instructions. */
7413int
7414ix86_sched_reorder (dump, sched_verbose, ready, n_ready, clock_var)
7415 FILE *dump ATTRIBUTE_UNUSED;
7416 int sched_verbose ATTRIBUTE_UNUSED;
7417 rtx *ready;
7418 int n_ready;
7419 int clock_var ATTRIBUTE_UNUSED;
7420{
7421 rtx *e_ready = ready + n_ready - 1;
fb693d44 7422
78a0d70c
ZW
7423 if (n_ready < 2)
7424 goto out;
e075ae69 7425
78a0d70c
ZW
7426 switch (ix86_cpu)
7427 {
7428 default:
7429 break;
e075ae69 7430
78a0d70c
ZW
7431 case PROCESSOR_PENTIUM:
7432 ix86_sched_reorder_pentium (ready, e_ready);
7433 break;
e075ae69 7434
78a0d70c
ZW
7435 case PROCESSOR_PENTIUMPRO:
7436 ix86_sched_reorder_ppro (ready, e_ready);
e075ae69 7437 break;
fb693d44
RH
7438 }
7439
e075ae69
RH
7440out:
7441 return ix86_issue_rate ();
7442}
fb693d44 7443
e075ae69
RH
7444/* We are about to issue INSN. Return the number of insns left on the
7445 ready queue that can be issued this cycle. */
b222082e 7446
e075ae69
RH
7447int
7448ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
7449 FILE *dump;
7450 int sched_verbose;
7451 rtx insn;
7452 int can_issue_more;
7453{
7454 int i;
7455 switch (ix86_cpu)
fb693d44 7456 {
e075ae69
RH
7457 default:
7458 return can_issue_more - 1;
fb693d44 7459
e075ae69
RH
7460 case PROCESSOR_PENTIUMPRO:
7461 {
7462 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 7463
e075ae69
RH
7464 if (uops == PPRO_UOPS_MANY)
7465 {
7466 if (sched_verbose)
7467 ix86_dump_ppro_packet (dump);
7468 ix86_sched_data.ppro.decode[0] = insn;
7469 ix86_sched_data.ppro.decode[1] = NULL;
7470 ix86_sched_data.ppro.decode[2] = NULL;
7471 if (sched_verbose)
7472 ix86_dump_ppro_packet (dump);
7473 ix86_sched_data.ppro.decode[0] = NULL;
7474 }
7475 else if (uops == PPRO_UOPS_FEW)
7476 {
7477 if (sched_verbose)
7478 ix86_dump_ppro_packet (dump);
7479 ix86_sched_data.ppro.decode[0] = insn;
7480 ix86_sched_data.ppro.decode[1] = NULL;
7481 ix86_sched_data.ppro.decode[2] = NULL;
7482 }
7483 else
7484 {
7485 for (i = 0; i < 3; ++i)
7486 if (ix86_sched_data.ppro.decode[i] == NULL)
7487 {
7488 ix86_sched_data.ppro.decode[i] = insn;
7489 break;
7490 }
7491 if (i == 3)
7492 abort ();
7493 if (i == 2)
7494 {
7495 if (sched_verbose)
7496 ix86_dump_ppro_packet (dump);
7497 ix86_sched_data.ppro.decode[0] = NULL;
7498 ix86_sched_data.ppro.decode[1] = NULL;
7499 ix86_sched_data.ppro.decode[2] = NULL;
7500 }
7501 }
7502 }
7503 return --ix86_sched_data.ppro.issued_this_cycle;
7504 }
fb693d44 7505}
a7180f70 7506\f
0e4970d7
RK
7507/* Walk through INSNS and look for MEM references whose address is DSTREG or
7508 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
7509 appropriate. */
7510
7511void
7512ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
7513 rtx insns;
7514 rtx dstref, srcref, dstreg, srcreg;
7515{
7516 rtx insn;
7517
7518 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
7519 if (INSN_P (insn))
7520 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
7521 dstreg, srcreg);
7522}
7523
7524/* Subroutine of above to actually do the updating by recursively walking
7525 the rtx. */
7526
7527static void
7528ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
7529 rtx x;
7530 rtx dstref, srcref, dstreg, srcreg;
7531{
7532 enum rtx_code code = GET_CODE (x);
7533 const char *format_ptr = GET_RTX_FORMAT (code);
7534 int i, j;
7535
7536 if (code == MEM && XEXP (x, 0) == dstreg)
7537 MEM_COPY_ATTRIBUTES (x, dstref);
7538 else if (code == MEM && XEXP (x, 0) == srcreg)
7539 MEM_COPY_ATTRIBUTES (x, srcref);
7540
7541 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
7542 {
7543 if (*format_ptr == 'e')
7544 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
7545 dstreg, srcreg);
7546 else if (*format_ptr == 'E')
7547 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
397be6cf 7548 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
0e4970d7
RK
7549 dstreg, srcreg);
7550 }
7551}
7552\f
a7180f70
BS
7553/* Compute the alignment given to a constant that is being placed in memory.
7554 EXP is the constant and ALIGN is the alignment that the object would
7555 ordinarily have.
7556 The value of this function is used instead of that alignment to align
7557 the object. */
7558
7559int
7560ix86_constant_alignment (exp, align)
7561 tree exp;
7562 int align;
7563{
7564 if (TREE_CODE (exp) == REAL_CST)
7565 {
7566 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
7567 return 64;
7568 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
7569 return 128;
7570 }
7571 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
7572 && align < 256)
7573 return 256;
7574
7575 return align;
7576}
7577
7578/* Compute the alignment for a static variable.
7579 TYPE is the data type, and ALIGN is the alignment that
7580 the object would ordinarily have. The value of this function is used
7581 instead of that alignment to align the object. */
7582
7583int
7584ix86_data_alignment (type, align)
7585 tree type;
7586 int align;
7587{
7588 if (AGGREGATE_TYPE_P (type)
7589 && TYPE_SIZE (type)
7590 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
7591 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
7592 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
7593 return 256;
7594
7595 if (TREE_CODE (type) == ARRAY_TYPE)
7596 {
7597 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
7598 return 64;
7599 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
7600 return 128;
7601 }
7602 else if (TREE_CODE (type) == COMPLEX_TYPE)
7603 {
0f290768 7604
a7180f70
BS
7605 if (TYPE_MODE (type) == DCmode && align < 64)
7606 return 64;
7607 if (TYPE_MODE (type) == XCmode && align < 128)
7608 return 128;
7609 }
7610 else if ((TREE_CODE (type) == RECORD_TYPE
7611 || TREE_CODE (type) == UNION_TYPE
7612 || TREE_CODE (type) == QUAL_UNION_TYPE)
7613 && TYPE_FIELDS (type))
7614 {
7615 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
7616 return 64;
7617 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
7618 return 128;
7619 }
7620 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
7621 || TREE_CODE (type) == INTEGER_TYPE)
7622 {
7623 if (TYPE_MODE (type) == DFmode && align < 64)
7624 return 64;
7625 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
7626 return 128;
7627 }
7628
7629 return align;
7630}
7631
7632/* Compute the alignment for a local variable.
7633 TYPE is the data type, and ALIGN is the alignment that
7634 the object would ordinarily have. The value of this macro is used
7635 instead of that alignment to align the object. */
7636
7637int
7638ix86_local_alignment (type, align)
7639 tree type;
7640 int align;
7641{
7642 if (TREE_CODE (type) == ARRAY_TYPE)
7643 {
7644 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
7645 return 64;
7646 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
7647 return 128;
7648 }
7649 else if (TREE_CODE (type) == COMPLEX_TYPE)
7650 {
7651 if (TYPE_MODE (type) == DCmode && align < 64)
7652 return 64;
7653 if (TYPE_MODE (type) == XCmode && align < 128)
7654 return 128;
7655 }
7656 else if ((TREE_CODE (type) == RECORD_TYPE
7657 || TREE_CODE (type) == UNION_TYPE
7658 || TREE_CODE (type) == QUAL_UNION_TYPE)
7659 && TYPE_FIELDS (type))
7660 {
7661 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
7662 return 64;
7663 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
7664 return 128;
7665 }
7666 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
7667 || TREE_CODE (type) == INTEGER_TYPE)
7668 {
0f290768 7669
a7180f70
BS
7670 if (TYPE_MODE (type) == DFmode && align < 64)
7671 return 64;
7672 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
7673 return 128;
7674 }
7675 return align;
7676}
bd793c65
BS
7677
7678#define def_builtin(NAME, TYPE, CODE) \
7679 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL_PTR)
7680struct builtin_description
7681{
7682 enum insn_code icode;
7683 const char * name;
7684 enum ix86_builtins code;
7685 enum rtx_code comparison;
7686 unsigned int flag;
7687};
7688
7689static struct builtin_description bdesc_comi[] =
7690{
7691 { CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
7692 { CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
7693 { CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
7694 { CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
7695 { CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
7696 { CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
7697 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
7698 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
7699 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
7700 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
7701 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
7702 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
7703};
7704
7705static struct builtin_description bdesc_2arg[] =
7706{
7707 /* SSE */
7708 { CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
7709 { CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
7710 { CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
7711 { CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
7712 { CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
7713 { CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
7714 { CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
7715 { CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
7716
7717 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
7718 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
7719 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
7720 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
7721 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
7722 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
7723 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
7724 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
7725 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
7726 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
7727 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
7728 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
7729 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
7730 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
7731 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
7732 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
7733 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
7734 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
7735 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
7736 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
7737 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
7738 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
7739 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
7740 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
7741
7742 { CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
7743 { CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
7744 { CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
7745 { CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
7746
7747 { CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
7748 { CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
7749 { CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
7750 { CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
7751
7752 { CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
7753 { CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
7754 { CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
7755 { CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
7756 { CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
7757
7758 /* MMX */
7759 { CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
7760 { CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
7761 { CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
7762 { CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
7763 { CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
7764 { CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
7765
7766 { CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
7767 { CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
7768 { CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
7769 { CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
7770 { CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
7771 { CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
7772 { CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
7773 { CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
7774
7775 { CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
7776 { CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
7777 { CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
7778
7779 { CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
7780 { CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
7781 { CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
7782 { CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
7783
7784 { CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
7785 { CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
7786
7787 { CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
7788 { CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
7789 { CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
7790 { CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
7791 { CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
7792 { CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
7793
7794 { CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
7795 { CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
7796 { CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
7797 { CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
7798
7799 { CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
7800 { CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
7801 { CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
7802 { CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
7803 { CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
7804 { CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
7805
7806 /* Special. */
7807 { CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
7808 { CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
7809 { CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
7810
7811 { CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
7812 { CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
7813
7814 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
7815 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
7816 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
7817 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
7818 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
7819 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
7820
7821 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
7822 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
7823 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
7824 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
7825 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
7826 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
7827
7828 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
7829 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
7830 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
7831 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
7832
7833 { CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
7834 { CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
7835
7836};
7837
7838static struct builtin_description bdesc_1arg[] =
7839{
7840 { CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
7841 { CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
7842
7843 { CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
7844 { CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
7845 { CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
7846
7847 { CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
7848 { CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
7849 { CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
7850 { CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
7851
7852};
7853
7854/* Expand all the target specific builtins. This is not called if TARGET_MMX
7855 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
7856 builtins. */
7857void
7858ix86_init_builtins ()
7859{
7860 struct builtin_description * d;
77ebd435 7861 size_t i;
cbd5937a 7862 tree endlink = void_list_node;
bd793c65
BS
7863
7864 tree pchar_type_node = build_pointer_type (char_type_node);
7865 tree pfloat_type_node = build_pointer_type (float_type_node);
7866 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
7867 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
7868
7869 /* Comparisons. */
7870 tree int_ftype_v4sf_v4sf
7871 = build_function_type (integer_type_node,
7872 tree_cons (NULL_TREE, V4SF_type_node,
7873 tree_cons (NULL_TREE,
7874 V4SF_type_node,
7875 endlink)));
7876 tree v4si_ftype_v4sf_v4sf
7877 = build_function_type (V4SI_type_node,
7878 tree_cons (NULL_TREE, V4SF_type_node,
7879 tree_cons (NULL_TREE,
7880 V4SF_type_node,
7881 endlink)));
7882 /* MMX/SSE/integer conversions. */
7883 tree int_ftype_v4sf_int
7884 = build_function_type (integer_type_node,
7885 tree_cons (NULL_TREE, V4SF_type_node,
7886 tree_cons (NULL_TREE,
7887 integer_type_node,
7888 endlink)));
7889 tree int_ftype_v4sf
7890 = build_function_type (integer_type_node,
7891 tree_cons (NULL_TREE, V4SF_type_node,
7892 endlink));
7893 tree int_ftype_v8qi
7894 = build_function_type (integer_type_node,
7895 tree_cons (NULL_TREE, V8QI_type_node,
7896 endlink));
7897 tree int_ftype_v2si
7898 = build_function_type (integer_type_node,
7899 tree_cons (NULL_TREE, V2SI_type_node,
7900 endlink));
7901 tree v2si_ftype_int
7902 = build_function_type (V2SI_type_node,
7903 tree_cons (NULL_TREE, integer_type_node,
7904 endlink));
7905 tree v4sf_ftype_v4sf_int
7906 = build_function_type (integer_type_node,
7907 tree_cons (NULL_TREE, V4SF_type_node,
7908 tree_cons (NULL_TREE, integer_type_node,
7909 endlink)));
7910 tree v4sf_ftype_v4sf_v2si
7911 = build_function_type (V4SF_type_node,
7912 tree_cons (NULL_TREE, V4SF_type_node,
7913 tree_cons (NULL_TREE, V2SI_type_node,
7914 endlink)));
7915 tree int_ftype_v4hi_int
7916 = build_function_type (integer_type_node,
7917 tree_cons (NULL_TREE, V4HI_type_node,
7918 tree_cons (NULL_TREE, integer_type_node,
7919 endlink)));
7920 tree v4hi_ftype_v4hi_int_int
332316cd 7921 = build_function_type (V4HI_type_node,
bd793c65
BS
7922 tree_cons (NULL_TREE, V4HI_type_node,
7923 tree_cons (NULL_TREE, integer_type_node,
7924 tree_cons (NULL_TREE,
7925 integer_type_node,
7926 endlink))));
7927 /* Miscellaneous. */
7928 tree v8qi_ftype_v4hi_v4hi
7929 = build_function_type (V8QI_type_node,
7930 tree_cons (NULL_TREE, V4HI_type_node,
7931 tree_cons (NULL_TREE, V4HI_type_node,
7932 endlink)));
7933 tree v4hi_ftype_v2si_v2si
7934 = build_function_type (V4HI_type_node,
7935 tree_cons (NULL_TREE, V2SI_type_node,
7936 tree_cons (NULL_TREE, V2SI_type_node,
7937 endlink)));
7938 tree v4sf_ftype_v4sf_v4sf_int
7939 = build_function_type (V4SF_type_node,
7940 tree_cons (NULL_TREE, V4SF_type_node,
7941 tree_cons (NULL_TREE, V4SF_type_node,
7942 tree_cons (NULL_TREE,
7943 integer_type_node,
7944 endlink))));
7945 tree v4hi_ftype_v8qi_v8qi
7946 = build_function_type (V4HI_type_node,
7947 tree_cons (NULL_TREE, V8QI_type_node,
7948 tree_cons (NULL_TREE, V8QI_type_node,
7949 endlink)));
7950 tree v2si_ftype_v4hi_v4hi
7951 = build_function_type (V2SI_type_node,
7952 tree_cons (NULL_TREE, V4HI_type_node,
7953 tree_cons (NULL_TREE, V4HI_type_node,
7954 endlink)));
7955 tree v4hi_ftype_v4hi_int
7956 = build_function_type (V4HI_type_node,
7957 tree_cons (NULL_TREE, V4HI_type_node,
7958 tree_cons (NULL_TREE, integer_type_node,
7959 endlink)));
7960 tree di_ftype_di_int
7961 = build_function_type (long_long_unsigned_type_node,
7962 tree_cons (NULL_TREE, long_long_unsigned_type_node,
7963 tree_cons (NULL_TREE, integer_type_node,
7964 endlink)));
7965 tree v8qi_ftype_v8qi_di
7966 = build_function_type (V8QI_type_node,
7967 tree_cons (NULL_TREE, V8QI_type_node,
7968 tree_cons (NULL_TREE,
7969 long_long_integer_type_node,
7970 endlink)));
7971 tree v4hi_ftype_v4hi_di
7972 = build_function_type (V4HI_type_node,
7973 tree_cons (NULL_TREE, V4HI_type_node,
7974 tree_cons (NULL_TREE,
7975 long_long_integer_type_node,
7976 endlink)));
7977 tree v2si_ftype_v2si_di
7978 = build_function_type (V2SI_type_node,
7979 tree_cons (NULL_TREE, V2SI_type_node,
7980 tree_cons (NULL_TREE,
7981 long_long_integer_type_node,
7982 endlink)));
7983 tree void_ftype_void
7984 = build_function_type (void_type_node, endlink);
7985 tree void_ftype_pchar_int
7986 = build_function_type (void_type_node,
7987 tree_cons (NULL_TREE, pchar_type_node,
7988 tree_cons (NULL_TREE, integer_type_node,
7989 endlink)));
7990 tree void_ftype_unsigned
7991 = build_function_type (void_type_node,
7992 tree_cons (NULL_TREE, unsigned_type_node,
7993 endlink));
7994 tree unsigned_ftype_void
7995 = build_function_type (unsigned_type_node, endlink);
7996 tree di_ftype_void
7997 = build_function_type (long_long_unsigned_type_node, endlink);
7998 tree ti_ftype_void
7999 = build_function_type (intTI_type_node, endlink);
8000 tree v2si_ftype_v4sf
8001 = build_function_type (V2SI_type_node,
8002 tree_cons (NULL_TREE, V4SF_type_node,
8003 endlink));
8004 /* Loads/stores. */
8005 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
8006 tree_cons (NULL_TREE, V8QI_type_node,
8007 tree_cons (NULL_TREE,
8008 pchar_type_node,
8009 endlink)));
8010 tree void_ftype_v8qi_v8qi_pchar
8011 = build_function_type (void_type_node, maskmovq_args);
8012 tree v4sf_ftype_pfloat
8013 = build_function_type (V4SF_type_node,
8014 tree_cons (NULL_TREE, pfloat_type_node,
8015 endlink));
8016 tree v4sf_ftype_float
8017 = build_function_type (V4SF_type_node,
8018 tree_cons (NULL_TREE, float_type_node,
8019 endlink));
8020 tree v4sf_ftype_float_float_float_float
8021 = build_function_type (V4SF_type_node,
8022 tree_cons (NULL_TREE, float_type_node,
8023 tree_cons (NULL_TREE, float_type_node,
8024 tree_cons (NULL_TREE,
8025 float_type_node,
8026 tree_cons (NULL_TREE,
8027 float_type_node,
8028 endlink)))));
8029 /* @@@ the type is bogus */
8030 tree v4sf_ftype_v4sf_pv2si
8031 = build_function_type (V4SF_type_node,
8032 tree_cons (NULL_TREE, V4SF_type_node,
8033 tree_cons (NULL_TREE, pv2si_type_node,
8034 endlink)));
8035 tree v4sf_ftype_pv2si_v4sf
8036 = build_function_type (V4SF_type_node,
8037 tree_cons (NULL_TREE, V4SF_type_node,
8038 tree_cons (NULL_TREE, pv2si_type_node,
8039 endlink)));
8040 tree void_ftype_pfloat_v4sf
8041 = build_function_type (void_type_node,
8042 tree_cons (NULL_TREE, pfloat_type_node,
8043 tree_cons (NULL_TREE, V4SF_type_node,
8044 endlink)));
8045 tree void_ftype_pdi_di
8046 = build_function_type (void_type_node,
8047 tree_cons (NULL_TREE, pdi_type_node,
8048 tree_cons (NULL_TREE,
8049 long_long_unsigned_type_node,
8050 endlink)));
8051 /* Normal vector unops. */
8052 tree v4sf_ftype_v4sf
8053 = build_function_type (V4SF_type_node,
8054 tree_cons (NULL_TREE, V4SF_type_node,
8055 endlink));
0f290768 8056
bd793c65
BS
8057 /* Normal vector binops. */
8058 tree v4sf_ftype_v4sf_v4sf
8059 = build_function_type (V4SF_type_node,
8060 tree_cons (NULL_TREE, V4SF_type_node,
8061 tree_cons (NULL_TREE, V4SF_type_node,
8062 endlink)));
8063 tree v8qi_ftype_v8qi_v8qi
8064 = build_function_type (V8QI_type_node,
8065 tree_cons (NULL_TREE, V8QI_type_node,
8066 tree_cons (NULL_TREE, V8QI_type_node,
8067 endlink)));
8068 tree v4hi_ftype_v4hi_v4hi
8069 = build_function_type (V4HI_type_node,
8070 tree_cons (NULL_TREE, V4HI_type_node,
8071 tree_cons (NULL_TREE, V4HI_type_node,
8072 endlink)));
8073 tree v2si_ftype_v2si_v2si
8074 = build_function_type (V2SI_type_node,
8075 tree_cons (NULL_TREE, V2SI_type_node,
8076 tree_cons (NULL_TREE, V2SI_type_node,
8077 endlink)));
8078 tree ti_ftype_ti_ti
8079 = build_function_type (intTI_type_node,
8080 tree_cons (NULL_TREE, intTI_type_node,
8081 tree_cons (NULL_TREE, intTI_type_node,
8082 endlink)));
8083 tree di_ftype_di_di
8084 = build_function_type (long_long_unsigned_type_node,
8085 tree_cons (NULL_TREE, long_long_unsigned_type_node,
8086 tree_cons (NULL_TREE,
8087 long_long_unsigned_type_node,
8088 endlink)));
8089
8090 /* Add all builtins that are more or less simple operations on two
8091 operands. */
8092 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
8093 {
8094 /* Use one of the operands; the target can have a different mode for
8095 mask-generating compares. */
8096 enum machine_mode mode;
8097 tree type;
8098
8099 if (d->name == 0)
8100 continue;
8101 mode = insn_data[d->icode].operand[1].mode;
8102
8103 if (! TARGET_SSE && ! VALID_MMX_REG_MODE (mode))
8104 continue;
8105
8106 switch (mode)
8107 {
8108 case V4SFmode:
8109 type = v4sf_ftype_v4sf_v4sf;
8110 break;
8111 case V8QImode:
8112 type = v8qi_ftype_v8qi_v8qi;
8113 break;
8114 case V4HImode:
8115 type = v4hi_ftype_v4hi_v4hi;
8116 break;
8117 case V2SImode:
8118 type = v2si_ftype_v2si_v2si;
8119 break;
8120 case TImode:
8121 type = ti_ftype_ti_ti;
8122 break;
8123 case DImode:
8124 type = di_ftype_di_di;
8125 break;
8126
8127 default:
8128 abort ();
8129 }
0f290768 8130
bd793c65
BS
8131 /* Override for comparisons. */
8132 if (d->icode == CODE_FOR_maskcmpv4sf3
8133 || d->icode == CODE_FOR_maskncmpv4sf3
8134 || d->icode == CODE_FOR_vmmaskcmpv4sf3
8135 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
8136 type = v4si_ftype_v4sf_v4sf;
8137
8138 def_builtin (d->name, type, d->code);
8139 }
8140
8141 /* Add the remaining MMX insns with somewhat more complicated types. */
8142 def_builtin ("__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
8143 def_builtin ("__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
8144 def_builtin ("__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
8145 def_builtin ("__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
8146 def_builtin ("__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
8147 def_builtin ("__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
8148 def_builtin ("__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
8149 def_builtin ("__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
8150 def_builtin ("__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
8151
8152 def_builtin ("__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
8153 def_builtin ("__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
8154 def_builtin ("__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
8155
8156 def_builtin ("__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
8157 def_builtin ("__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
8158
8159 def_builtin ("__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
8160 def_builtin ("__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
8161
8162 /* Everything beyond this point is SSE only. */
8163 if (! TARGET_SSE)
8164 return;
0f290768 8165
bd793c65
BS
8166 /* comi/ucomi insns. */
8167 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
8168 def_builtin (d->name, int_ftype_v4sf_v4sf, d->code);
8169
8170 def_builtin ("__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
8171 def_builtin ("__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
8172 def_builtin ("__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
8173
8174 def_builtin ("__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
8175 def_builtin ("__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
8176 def_builtin ("__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
8177 def_builtin ("__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
8178 def_builtin ("__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
8179 def_builtin ("__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
8180
8181 def_builtin ("__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
8182 def_builtin ("__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
8183
8184 def_builtin ("__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
8185
8186 def_builtin ("__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
8187 def_builtin ("__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
8188 def_builtin ("__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
8189 def_builtin ("__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
8190 def_builtin ("__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
8191 def_builtin ("__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
8192
8193 def_builtin ("__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
8194 def_builtin ("__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
8195 def_builtin ("__builtin_ia32_storehps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
8196 def_builtin ("__builtin_ia32_storelps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
8197
8198 def_builtin ("__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
8199 def_builtin ("__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
8200 def_builtin ("__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
8201 def_builtin ("__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
8202
8203 def_builtin ("__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
8204 def_builtin ("__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
8205
8206 def_builtin ("__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
8207
8208 def_builtin ("__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
8209 def_builtin ("__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
8210 def_builtin ("__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
8211 def_builtin ("__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
8212 def_builtin ("__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
8213 def_builtin ("__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
8214
8215 def_builtin ("__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
8216
8217 /* Composite intrinsics. */
8218 def_builtin ("__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
8219 def_builtin ("__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
8220 def_builtin ("__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
8221 def_builtin ("__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
8222 def_builtin ("__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
8223 def_builtin ("__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
8224 def_builtin ("__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
8225}
8226
8227/* Errors in the source file can cause expand_expr to return const0_rtx
8228 where we expect a vector. To avoid crashing, use one of the vector
8229 clear instructions. */
8230static rtx
8231safe_vector_operand (x, mode)
8232 rtx x;
8233 enum machine_mode mode;
8234{
8235 if (x != const0_rtx)
8236 return x;
8237 x = gen_reg_rtx (mode);
8238
8239 if (VALID_MMX_REG_MODE (mode))
8240 emit_insn (gen_mmx_clrdi (mode == DImode ? x
8241 : gen_rtx_SUBREG (DImode, x, 0)));
8242 else
8243 emit_insn (gen_sse_clrti (mode == TImode ? x
8244 : gen_rtx_SUBREG (TImode, x, 0)));
8245 return x;
8246}
8247
8248/* Subroutine of ix86_expand_builtin to take care of binop insns. */
8249
8250static rtx
8251ix86_expand_binop_builtin (icode, arglist, target)
8252 enum insn_code icode;
8253 tree arglist;
8254 rtx target;
8255{
8256 rtx pat;
8257 tree arg0 = TREE_VALUE (arglist);
8258 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8259 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8260 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8261 enum machine_mode tmode = insn_data[icode].operand[0].mode;
8262 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
8263 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
8264
8265 if (VECTOR_MODE_P (mode0))
8266 op0 = safe_vector_operand (op0, mode0);
8267 if (VECTOR_MODE_P (mode1))
8268 op1 = safe_vector_operand (op1, mode1);
8269
8270 if (! target
8271 || GET_MODE (target) != tmode
8272 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8273 target = gen_reg_rtx (tmode);
8274
8275 /* In case the insn wants input operands in modes different from
8276 the result, abort. */
8277 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
8278 abort ();
8279
8280 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8281 op0 = copy_to_mode_reg (mode0, op0);
8282 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8283 op1 = copy_to_mode_reg (mode1, op1);
8284
8285 pat = GEN_FCN (icode) (target, op0, op1);
8286 if (! pat)
8287 return 0;
8288 emit_insn (pat);
8289 return target;
8290}
8291
8292/* Subroutine of ix86_expand_builtin to take care of stores. */
8293
8294static rtx
8295ix86_expand_store_builtin (icode, arglist, shuffle)
8296 enum insn_code icode;
8297 tree arglist;
8298 int shuffle;
8299{
8300 rtx pat;
8301 tree arg0 = TREE_VALUE (arglist);
8302 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8303 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8304 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8305 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
8306 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
8307
8308 if (VECTOR_MODE_P (mode1))
8309 op1 = safe_vector_operand (op1, mode1);
8310
8311 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8312 if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
8313 op1 = copy_to_mode_reg (mode1, op1);
8314 if (shuffle >= 0)
8315 emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
8316 pat = GEN_FCN (icode) (op0, op1);
8317 if (pat)
8318 emit_insn (pat);
8319 return 0;
8320}
8321
8322/* Subroutine of ix86_expand_builtin to take care of unop insns. */
8323
8324static rtx
8325ix86_expand_unop_builtin (icode, arglist, target, do_load)
8326 enum insn_code icode;
8327 tree arglist;
8328 rtx target;
8329 int do_load;
8330{
8331 rtx pat;
8332 tree arg0 = TREE_VALUE (arglist);
8333 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8334 enum machine_mode tmode = insn_data[icode].operand[0].mode;
8335 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
8336
8337 if (! target
8338 || GET_MODE (target) != tmode
8339 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8340 target = gen_reg_rtx (tmode);
8341 if (do_load)
8342 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8343 else
8344 {
8345 if (VECTOR_MODE_P (mode0))
8346 op0 = safe_vector_operand (op0, mode0);
8347
8348 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8349 op0 = copy_to_mode_reg (mode0, op0);
8350 }
8351
8352 pat = GEN_FCN (icode) (target, op0);
8353 if (! pat)
8354 return 0;
8355 emit_insn (pat);
8356 return target;
8357}
8358
8359/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
8360 sqrtss, rsqrtss, rcpss. */
8361
8362static rtx
8363ix86_expand_unop1_builtin (icode, arglist, target)
8364 enum insn_code icode;
8365 tree arglist;
8366 rtx target;
8367{
8368 rtx pat;
8369 tree arg0 = TREE_VALUE (arglist);
8370 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8371 enum machine_mode tmode = insn_data[icode].operand[0].mode;
8372 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
8373
8374 if (! target
8375 || GET_MODE (target) != tmode
8376 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8377 target = gen_reg_rtx (tmode);
8378
8379 if (VECTOR_MODE_P (mode0))
8380 op0 = safe_vector_operand (op0, mode0);
8381
8382 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8383 op0 = copy_to_mode_reg (mode0, op0);
8384
8385 pat = GEN_FCN (icode) (target, op0, op0);
8386 if (! pat)
8387 return 0;
8388 emit_insn (pat);
8389 return target;
8390}
8391
8392/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
8393
8394static rtx
8395ix86_expand_sse_compare (d, arglist, target)
8396 struct builtin_description *d;
8397 tree arglist;
8398 rtx target;
8399{
8400 rtx pat;
8401 tree arg0 = TREE_VALUE (arglist);
8402 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8403 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8404 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8405 rtx op2;
8406 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
8407 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
8408 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
8409 enum rtx_code comparison = d->comparison;
8410
8411 if (VECTOR_MODE_P (mode0))
8412 op0 = safe_vector_operand (op0, mode0);
8413 if (VECTOR_MODE_P (mode1))
8414 op1 = safe_vector_operand (op1, mode1);
8415
8416 /* Swap operands if we have a comparison that isn't available in
8417 hardware. */
8418 if (d->flag)
8419 {
8420 target = gen_reg_rtx (tmode);
8421 emit_move_insn (target, op1);
8422 op1 = op0;
8423 op0 = target;
8424 comparison = swap_condition (comparison);
8425 }
8426 else if (! target
8427 || GET_MODE (target) != tmode
8428 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
8429 target = gen_reg_rtx (tmode);
8430
8431 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
8432 op0 = copy_to_mode_reg (mode0, op0);
8433 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
8434 op1 = copy_to_mode_reg (mode1, op1);
8435
8436 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
8437 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
8438 if (! pat)
8439 return 0;
8440 emit_insn (pat);
8441 return target;
8442}
8443
8444/* Subroutine of ix86_expand_builtin to take care of comi insns. */
8445
8446static rtx
8447ix86_expand_sse_comi (d, arglist, target)
8448 struct builtin_description *d;
8449 tree arglist;
8450 rtx target;
8451{
8452 rtx pat;
8453 tree arg0 = TREE_VALUE (arglist);
8454 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8455 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8456 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8457 rtx op2;
8458 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
8459 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
8460 enum rtx_code comparison = d->comparison;
8461
8462 if (VECTOR_MODE_P (mode0))
8463 op0 = safe_vector_operand (op0, mode0);
8464 if (VECTOR_MODE_P (mode1))
8465 op1 = safe_vector_operand (op1, mode1);
8466
8467 /* Swap operands if we have a comparison that isn't available in
8468 hardware. */
8469 if (d->flag)
8470 {
8471 rtx tmp = op1;
8472 op1 = op0;
8473 op0 = tmp;
8474 comparison = swap_condition (comparison);
8475 }
8476
8477 target = gen_reg_rtx (SImode);
8478 emit_move_insn (target, const0_rtx);
8479 target = gen_rtx_SUBREG (QImode, target, 0);
8480
8481 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
8482 op0 = copy_to_mode_reg (mode0, op0);
8483 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
8484 op1 = copy_to_mode_reg (mode1, op1);
8485
8486 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
8487 pat = GEN_FCN (d->icode) (op0, op1, op2);
8488 if (! pat)
8489 return 0;
8490 emit_insn (pat);
8491 emit_insn (gen_setcc_2 (target, op2));
8492
8493 return target;
8494}
8495
8496/* Expand an expression EXP that calls a built-in function,
8497 with result going to TARGET if that's convenient
8498 (and in mode MODE if that's convenient).
8499 SUBTARGET may be used as the target for computing one of EXP's operands.
8500 IGNORE is nonzero if the value is to be ignored. */
8501
8502rtx
8503ix86_expand_builtin (exp, target, subtarget, mode, ignore)
8504 tree exp;
8505 rtx target;
8506 rtx subtarget ATTRIBUTE_UNUSED;
8507 enum machine_mode mode ATTRIBUTE_UNUSED;
8508 int ignore ATTRIBUTE_UNUSED;
8509{
8510 struct builtin_description *d;
77ebd435 8511 size_t i;
bd793c65
BS
8512 enum insn_code icode;
8513 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8514 tree arglist = TREE_OPERAND (exp, 1);
8515 tree arg0, arg1, arg2, arg3;
8516 rtx op0, op1, op2, pat;
8517 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 8518 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
8519
8520 switch (fcode)
8521 {
8522 case IX86_BUILTIN_EMMS:
8523 emit_insn (gen_emms ());
8524 return 0;
8525
8526 case IX86_BUILTIN_SFENCE:
8527 emit_insn (gen_sfence ());
8528 return 0;
8529
8530 case IX86_BUILTIN_M_FROM_INT:
8531 target = gen_reg_rtx (DImode);
8532 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8533 emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
8534 return target;
8535
8536 case IX86_BUILTIN_M_TO_INT:
8537 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8538 op0 = copy_to_mode_reg (DImode, op0);
8539 target = gen_reg_rtx (SImode);
8540 emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
8541 return target;
8542
8543 case IX86_BUILTIN_PEXTRW:
8544 icode = CODE_FOR_mmx_pextrw;
8545 arg0 = TREE_VALUE (arglist);
8546 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8547 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8548 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8549 tmode = insn_data[icode].operand[0].mode;
8550 mode0 = insn_data[icode].operand[1].mode;
8551 mode1 = insn_data[icode].operand[2].mode;
8552
8553 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8554 op0 = copy_to_mode_reg (mode0, op0);
8555 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8556 {
8557 /* @@@ better error message */
8558 error ("selector must be an immediate");
8559 return const0_rtx;
8560 }
8561 if (target == 0
8562 || GET_MODE (target) != tmode
8563 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8564 target = gen_reg_rtx (tmode);
8565 pat = GEN_FCN (icode) (target, op0, op1);
8566 if (! pat)
8567 return 0;
8568 emit_insn (pat);
8569 return target;
8570
8571 case IX86_BUILTIN_PINSRW:
8572 icode = CODE_FOR_mmx_pinsrw;
8573 arg0 = TREE_VALUE (arglist);
8574 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8575 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8576 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8577 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8578 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8579 tmode = insn_data[icode].operand[0].mode;
8580 mode0 = insn_data[icode].operand[1].mode;
8581 mode1 = insn_data[icode].operand[2].mode;
8582 mode2 = insn_data[icode].operand[3].mode;
8583
8584 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8585 op0 = copy_to_mode_reg (mode0, op0);
8586 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8587 op1 = copy_to_mode_reg (mode1, op1);
8588 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
8589 {
8590 /* @@@ better error message */
8591 error ("selector must be an immediate");
8592 return const0_rtx;
8593 }
8594 if (target == 0
8595 || GET_MODE (target) != tmode
8596 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8597 target = gen_reg_rtx (tmode);
8598 pat = GEN_FCN (icode) (target, op0, op1, op2);
8599 if (! pat)
8600 return 0;
8601 emit_insn (pat);
8602 return target;
8603
8604 case IX86_BUILTIN_MASKMOVQ:
8605 icode = CODE_FOR_mmx_maskmovq;
8606 /* Note the arg order is different from the operand order. */
8607 arg1 = TREE_VALUE (arglist);
8608 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
8609 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8610 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8611 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8612 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8613 mode0 = insn_data[icode].operand[0].mode;
8614 mode1 = insn_data[icode].operand[1].mode;
8615 mode2 = insn_data[icode].operand[2].mode;
8616
8617 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8618 op0 = copy_to_mode_reg (mode0, op0);
8619 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
8620 op1 = copy_to_mode_reg (mode1, op1);
8621 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
8622 op2 = copy_to_mode_reg (mode2, op2);
8623 pat = GEN_FCN (icode) (op0, op1, op2);
8624 if (! pat)
8625 return 0;
8626 emit_insn (pat);
8627 return 0;
8628
8629 case IX86_BUILTIN_SQRTSS:
8630 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
8631 case IX86_BUILTIN_RSQRTSS:
8632 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
8633 case IX86_BUILTIN_RCPSS:
8634 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
8635
8636 case IX86_BUILTIN_LOADAPS:
8637 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
8638
8639 case IX86_BUILTIN_LOADUPS:
8640 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
8641
8642 case IX86_BUILTIN_STOREAPS:
8643 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
8644 case IX86_BUILTIN_STOREUPS:
8645 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
8646
8647 case IX86_BUILTIN_LOADSS:
8648 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
8649
8650 case IX86_BUILTIN_STORESS:
8651 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
8652
0f290768 8653 case IX86_BUILTIN_LOADHPS:
bd793c65
BS
8654 case IX86_BUILTIN_LOADLPS:
8655 icode = (fcode == IX86_BUILTIN_LOADHPS
8656 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
8657 arg0 = TREE_VALUE (arglist);
8658 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8659 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8660 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8661 tmode = insn_data[icode].operand[0].mode;
8662 mode0 = insn_data[icode].operand[1].mode;
8663 mode1 = insn_data[icode].operand[2].mode;
8664
8665 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8666 op0 = copy_to_mode_reg (mode0, op0);
8667 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
8668 if (target == 0
8669 || GET_MODE (target) != tmode
8670 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8671 target = gen_reg_rtx (tmode);
8672 pat = GEN_FCN (icode) (target, op0, op1);
8673 if (! pat)
8674 return 0;
8675 emit_insn (pat);
8676 return target;
0f290768 8677
bd793c65
BS
8678 case IX86_BUILTIN_STOREHPS:
8679 case IX86_BUILTIN_STORELPS:
8680 icode = (fcode == IX86_BUILTIN_STOREHPS
8681 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
8682 arg0 = TREE_VALUE (arglist);
8683 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8684 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8685 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8686 mode0 = insn_data[icode].operand[1].mode;
8687 mode1 = insn_data[icode].operand[2].mode;
8688
8689 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8690 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8691 op1 = copy_to_mode_reg (mode1, op1);
8692
8693 pat = GEN_FCN (icode) (op0, op0, op1);
8694 if (! pat)
8695 return 0;
8696 emit_insn (pat);
8697 return 0;
8698
8699 case IX86_BUILTIN_MOVNTPS:
8700 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
8701 case IX86_BUILTIN_MOVNTQ:
8702 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
8703
8704 case IX86_BUILTIN_LDMXCSR:
8705 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8706 target = assign_386_stack_local (SImode, 0);
8707 emit_move_insn (target, op0);
8708 emit_insn (gen_ldmxcsr (target));
8709 return 0;
8710
8711 case IX86_BUILTIN_STMXCSR:
8712 target = assign_386_stack_local (SImode, 0);
8713 emit_insn (gen_stmxcsr (target));
8714 return copy_to_mode_reg (SImode, target);
8715
8716 case IX86_BUILTIN_PREFETCH:
8717 icode = CODE_FOR_prefetch;
8718 arg0 = TREE_VALUE (arglist);
8719 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8720 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8721 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
332316cd
BS
8722 mode0 = insn_data[icode].operand[0].mode;
8723 mode1 = insn_data[icode].operand[1].mode;
bd793c65 8724
332316cd 8725 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
bd793c65
BS
8726 {
8727 /* @@@ better error message */
8728 error ("selector must be an immediate");
8729 return const0_rtx;
8730 }
8731
332316cd 8732 op0 = copy_to_mode_reg (Pmode, op0);
bd793c65
BS
8733 pat = GEN_FCN (icode) (op0, op1);
8734 if (! pat)
8735 return 0;
8736 emit_insn (pat);
8737 return target;
0f290768 8738
bd793c65
BS
8739 case IX86_BUILTIN_SHUFPS:
8740 icode = CODE_FOR_sse_shufps;
8741 arg0 = TREE_VALUE (arglist);
8742 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8743 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8744 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8745 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8746 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8747 tmode = insn_data[icode].operand[0].mode;
8748 mode0 = insn_data[icode].operand[1].mode;
8749 mode1 = insn_data[icode].operand[2].mode;
8750 mode2 = insn_data[icode].operand[3].mode;
8751
8752 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8753 op0 = copy_to_mode_reg (mode0, op0);
8754 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8755 op1 = copy_to_mode_reg (mode1, op1);
8756 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
8757 {
8758 /* @@@ better error message */
8759 error ("mask must be an immediate");
8760 return const0_rtx;
8761 }
8762 if (target == 0
8763 || GET_MODE (target) != tmode
8764 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8765 target = gen_reg_rtx (tmode);
8766 pat = GEN_FCN (icode) (target, op0, op1, op2);
8767 if (! pat)
8768 return 0;
8769 emit_insn (pat);
8770 return target;
8771
8772 case IX86_BUILTIN_PSHUFW:
8773 icode = CODE_FOR_mmx_pshufw;
8774 arg0 = TREE_VALUE (arglist);
8775 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8776 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8777 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8778 tmode = insn_data[icode].operand[0].mode;
8779 mode0 = insn_data[icode].operand[2].mode;
8780 mode1 = insn_data[icode].operand[3].mode;
8781
8782 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8783 op0 = copy_to_mode_reg (mode0, op0);
8784 if (! (*insn_data[icode].operand[3].predicate) (op1, mode1))
8785 {
8786 /* @@@ better error message */
8787 error ("mask must be an immediate");
8788 return const0_rtx;
8789 }
8790 if (target == 0
8791 || GET_MODE (target) != tmode
8792 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8793 target = gen_reg_rtx (tmode);
8794 pat = GEN_FCN (icode) (target, target, op0, op1);
8795 if (! pat)
8796 return 0;
8797 emit_insn (pat);
8798 return target;
8799
8800 /* Composite intrinsics. */
8801 case IX86_BUILTIN_SETPS1:
8802 target = assign_386_stack_local (SFmode, 0);
8803 arg0 = TREE_VALUE (arglist);
8804 emit_move_insn (change_address (target, SFmode, XEXP (target, 0)),
8805 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
8806 op0 = gen_reg_rtx (V4SFmode);
8807 emit_insn (gen_sse_loadss (op0, change_address (target, V4SFmode,
8808 XEXP (target, 0))));
8809 emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
8810 return op0;
0f290768 8811
bd793c65
BS
8812 case IX86_BUILTIN_SETPS:
8813 target = assign_386_stack_local (V4SFmode, 0);
8814 op0 = change_address (target, SFmode, XEXP (target, 0));
8815 arg0 = TREE_VALUE (arglist);
8816 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8817 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8818 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
8819 emit_move_insn (op0,
8820 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
8821 emit_move_insn (adj_offsettable_operand (op0, 4),
8822 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
8823 emit_move_insn (adj_offsettable_operand (op0, 8),
8824 expand_expr (arg2, NULL_RTX, VOIDmode, 0));
8825 emit_move_insn (adj_offsettable_operand (op0, 12),
8826 expand_expr (arg3, NULL_RTX, VOIDmode, 0));
8827 op0 = gen_reg_rtx (V4SFmode);
8828 emit_insn (gen_sse_movaps (op0, target));
8829 return op0;
8830
8831 case IX86_BUILTIN_CLRPS:
8832 target = gen_reg_rtx (TImode);
8833 emit_insn (gen_sse_clrti (target));
8834 return target;
8835
8836 case IX86_BUILTIN_LOADRPS:
8837 target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
8838 gen_reg_rtx (V4SFmode), 1);
8839 emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
8840 return target;
8841
8842 case IX86_BUILTIN_LOADPS1:
8843 target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
8844 gen_reg_rtx (V4SFmode), 1);
8845 emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
8846 return target;
8847
8848 case IX86_BUILTIN_STOREPS1:
8849 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
8850 case IX86_BUILTIN_STORERPS:
8851 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
8852
8853 case IX86_BUILTIN_MMX_ZERO:
8854 target = gen_reg_rtx (DImode);
8855 emit_insn (gen_mmx_clrdi (target));
8856 return target;
8857
8858 default:
8859 break;
8860 }
8861
8862 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
8863 if (d->code == fcode)
8864 {
8865 /* Compares are treated specially. */
8866 if (d->icode == CODE_FOR_maskcmpv4sf3
8867 || d->icode == CODE_FOR_vmmaskcmpv4sf3
8868 || d->icode == CODE_FOR_maskncmpv4sf3
8869 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
8870 return ix86_expand_sse_compare (d, arglist, target);
8871
8872 return ix86_expand_binop_builtin (d->icode, arglist, target);
8873 }
8874
8875 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
8876 if (d->code == fcode)
8877 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 8878
bd793c65
BS
8879 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
8880 if (d->code == fcode)
8881 return ix86_expand_sse_comi (d, arglist, target);
0f290768 8882
bd793c65
BS
8883 /* @@@ Should really do something sensible here. */
8884 return 0;
bd793c65 8885}
4211a8fb
JH
8886
8887/* Store OPERAND to the memory after reload is completed. This means
8888 that we can't easilly use assign_stack_local. */
8889rtx
8890ix86_force_to_memory (mode, operand)
8891 enum machine_mode mode;
8892 rtx operand;
8893{
8894 if (!reload_completed)
8895 abort ();
8896 switch (mode)
8897 {
8898 case DImode:
8899 {
8900 rtx operands[2];
8901 split_di (&operand, 1, operands, operands+1);
8902 emit_insn (
8903 gen_rtx_SET (VOIDmode,
8904 gen_rtx_MEM (SImode,
8905 gen_rtx_PRE_DEC (Pmode,
8906 stack_pointer_rtx)),
8907 operands[1]));
8908 emit_insn (
8909 gen_rtx_SET (VOIDmode,
8910 gen_rtx_MEM (SImode,
8911 gen_rtx_PRE_DEC (Pmode,
8912 stack_pointer_rtx)),
8913 operands[0]));
8914 }
8915 break;
8916 case HImode:
8917 /* It is better to store HImodes as SImodes. */
8918 if (!TARGET_PARTIAL_REG_STALL)
8919 operand = gen_lowpart (SImode, operand);
8920 /* FALLTHRU */
8921 case SImode:
8922 emit_insn (
8923 gen_rtx_SET (VOIDmode,
8924 gen_rtx_MEM (GET_MODE (operand),
8925 gen_rtx_PRE_DEC (SImode,
8926 stack_pointer_rtx)),
8927 operand));
8928 break;
8929 default:
8930 abort();
8931 }
8932 return gen_rtx_MEM (mode, stack_pointer_rtx);
8933}
8934
8935/* Free operand from the memory. */
8936void
8937ix86_free_from_memory (mode)
8938 enum machine_mode mode;
8939{
8940 /* Use LEA to deallocate stack space. In peephole2 it will be converted
8941 to pop or add instruction if registers are available. */
8942 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8943 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8944 GEN_INT (mode == DImode
8945 ? 8
8946 : mode == HImode && TARGET_PARTIAL_REG_STALL
8947 ? 2
8948 : 4))));
8949}
a946dd00 8950
f84aa48a
JH
8951/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
8952 QImode must go into class Q_REGS.
8953 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
8954 movdf to do mem-to-mem moves through integer regs. */
8955enum reg_class
8956ix86_preferred_reload_class (x, class)
8957 rtx x;
8958 enum reg_class class;
8959{
8960 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
8961 {
8962 /* SSE can't load any constant directly yet. */
8963 if (SSE_CLASS_P (class))
8964 return NO_REGS;
8965 /* Floats can load 0 and 1. */
8966 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
8967 {
8968 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
8969 if (MAYBE_SSE_CLASS_P (class))
8970 return (reg_class_subset_p (class, GENERAL_REGS)
8971 ? GENERAL_REGS : FLOAT_REGS);
8972 else
8973 return class;
8974 }
8975 /* General regs can load everything. */
8976 if (reg_class_subset_p (class, GENERAL_REGS))
8977 return GENERAL_REGS;
8978 /* In case we haven't resolved FLOAT or SSE yet, give up. */
8979 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
8980 return NO_REGS;
8981 }
8982 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
8983 return NO_REGS;
8984 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
8985 return Q_REGS;
8986 return class;
8987}
8988
8989/* If we are copying between general and FP registers, we need a memory
8990 location. The same is true for SSE and MMX registers.
8991
8992 The macro can't work reliably when one of the CLASSES is class containing
8993 registers from multiple units (SSE, MMX, integer). We avoid this by never
8994 combining those units in single alternative in the machine description.
8995 Ensure that this constraint holds to avoid unexpected surprises.
8996
8997 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
8998 enforce these sanity checks. */
8999int
9000ix86_secondary_memory_needed (class1, class2, mode, strict)
9001 enum reg_class class1, class2;
9002 enum machine_mode mode;
9003 int strict;
9004{
9005 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
9006 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
9007 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
9008 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
9009 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
9010 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
9011 {
9012 if (strict)
9013 abort ();
9014 else
9015 return 1;
9016 }
9017 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
9018 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
9019 && (mode) != SImode)
9020 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
9021 && (mode) != SImode));
9022}
9023/* Return the cost of moving data from a register in class CLASS1 to
9024 one in class CLASS2.
9025
9026 It is not required that the cost always equal 2 when FROM is the same as TO;
9027 on some machines it is expensive to move between registers if they are not
9028 general registers. */
9029int
9030ix86_register_move_cost (mode, class1, class2)
9031 enum machine_mode mode;
9032 enum reg_class class1, class2;
9033{
9034 /* In case we require secondary memory, compute cost of the store followed
9035 by load. In case of copying from general_purpose_register we may emit
9036 multiple stores followed by single load causing memory size mismatch
9037 stall. Count this as arbitarily high cost of 20. */
9038 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
9039 {
62415523 9040 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
f84aa48a 9041 return 10;
62415523
JH
9042 return (MEMORY_MOVE_COST (mode, class1, 0)
9043 + MEMORY_MOVE_COST (mode, class2, 1));
f84aa48a
JH
9044 }
9045 /* Moves between SSE/MMX and integer unit are expensive.
9046 ??? We should make this cost CPU specific. */
62415523
JH
9047 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
9048 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
9049 return ix86_cost->mmxsse_to_integer;
9050 if (MAYBE_FLOAT_CLASS_P (class1))
9051 return ix86_cost->fp_move;
9052 if (MAYBE_SSE_CLASS_P (class1))
9053 return ix86_cost->sse_move;
9054 if (MAYBE_MMX_CLASS_P (class1))
9055 return ix86_cost->mmx_move;
f84aa48a
JH
9056 return 2;
9057}
9058
a946dd00
JH
9059/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
9060int
9061ix86_hard_regno_mode_ok (regno, mode)
9062 int regno;
9063 enum machine_mode mode;
9064{
9065 /* Flags and only flags can only hold CCmode values. */
9066 if (CC_REGNO_P (regno))
9067 return GET_MODE_CLASS (mode) == MODE_CC;
9068 if (GET_MODE_CLASS (mode) == MODE_CC
9069 || GET_MODE_CLASS (mode) == MODE_RANDOM
9070 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
9071 return 0;
9072 if (FP_REGNO_P (regno))
9073 return VALID_FP_MODE_P (mode);
9074 if (SSE_REGNO_P (regno))
9075 return VALID_SSE_REG_MODE (mode);
9076 if (MMX_REGNO_P (regno))
9077 return VALID_MMX_REG_MODE (mode);
9078 /* We handle both integer and floats in the general purpose registers.
9079 In future we should be able to handle vector modes as well. */
9080 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
9081 return 0;
9082 /* Take care for QImode values - they can be in non-QI regs, but then
9083 they do cause partial register stalls. */
62415523 9084 if (regno < 4 || mode != QImode)
a946dd00
JH
9085 return 1;
9086 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
9087}
fa79946e
JH
9088
9089/* Return the cost of moving data of mode M between a
9090 register and memory. A value of 2 is the default; this cost is
9091 relative to those in `REGISTER_MOVE_COST'.
9092
9093 If moving between registers and memory is more expensive than
9094 between two registers, you should define this macro to express the
9095 relative cost.
9096
9097 Model also increased moving costs of QImode registers in non
9098 Q_REGS classes.
9099 */
9100int
9101ix86_memory_move_cost (mode, class, in)
9102 enum machine_mode mode;
9103 enum reg_class class;
9104 int in;
9105{
9106 if (FLOAT_CLASS_P (class))
9107 {
9108 int index;
9109 switch (mode)
9110 {
9111 case SFmode:
9112 index = 0;
9113 break;
9114 case DFmode:
9115 index = 1;
9116 break;
9117 case XFmode:
9118 case TFmode:
9119 index = 2;
9120 break;
9121 default:
9122 return 100;
9123 }
9124 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
9125 }
9126 if (SSE_CLASS_P (class))
9127 {
9128 int index;
9129 switch (GET_MODE_SIZE (mode))
9130 {
9131 case 4:
9132 index = 0;
9133 break;
9134 case 8:
9135 index = 1;
9136 break;
9137 case 16:
9138 index = 2;
9139 break;
9140 default:
9141 return 100;
9142 }
9143 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
9144 }
9145 if (MMX_CLASS_P (class))
9146 {
9147 int index;
9148 switch (GET_MODE_SIZE (mode))
9149 {
9150 case 4:
9151 index = 0;
9152 break;
9153 case 8:
9154 index = 1;
9155 break;
9156 default:
9157 return 100;
9158 }
9159 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
9160 }
9161 switch (GET_MODE_SIZE (mode))
9162 {
9163 case 1:
9164 if (in)
9165 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
9166 : ix86_cost->movzbl_load);
9167 else
9168 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
9169 : ix86_cost->int_store[0] + 4);
9170 break;
9171 case 2:
9172 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
9173 default:
9174 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
9175 if (mode == TFmode)
9176 mode = XFmode;
3bb7e126 9177 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
fa79946e
JH
9178 * (int) GET_MODE_SIZE (mode) / 4);
9179 }
9180}
This page took 2.327955 seconds and 5 git commands to generate.