]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
Daily bump.
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
8752c357 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
4592bdcb 3 Free Software Foundation, Inc.
2a2ab3f9
JVA
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
1fba7553 23#include <setjmp.h>
bb5177ac 24#include "system.h"
2a2ab3f9 25#include "rtl.h"
6baf1cc8
BS
26#include "tree.h"
27#include "tm_p.h"
2a2ab3f9
JVA
28#include "regs.h"
29#include "hard-reg-set.h"
30#include "real.h"
31#include "insn-config.h"
32#include "conditions.h"
33#include "insn-flags.h"
34#include "output.h"
35#include "insn-attr.h"
2a2ab3f9 36#include "flags.h"
a8ffcc81 37#include "except.h"
ecbc4695 38#include "function.h"
00c79232 39#include "recog.h"
ced8dd8c 40#include "expr.h"
f103890b 41#include "toplev.h"
e075ae69 42#include "basic-block.h"
1526a060 43#include "ggc.h"
2a2ab3f9 44
8dfe5673
RK
45#ifndef CHECK_STACK_LIMIT
46#define CHECK_STACK_LIMIT -1
47#endif
48
32b5b1aa
SC
49/* Processor costs (relative to an add) */
50struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 51 1, /* cost of an add instruction */
32b5b1aa
SC
52 1, /* cost of a lea instruction */
53 3, /* variable shift costs */
54 2, /* constant shift costs */
55 6, /* cost of starting a multiply */
56 1, /* cost of multiply per each bit set */
e075ae69 57 23, /* cost of a divide/mod */
96e7ae40 58 15, /* "large" insn */
e2e52e1b 59 3, /* MOVE_RATIO */
7c6b971d 60 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
61 {2, 4, 2}, /* cost of loading integer registers
62 in QImode, HImode and SImode.
0f290768 63 Relative to reg-reg move (2). */
96e7ae40
JH
64 {2, 4, 2}, /* cost of storing integer registers */
65 2, /* cost of reg,reg fld/fst */
66 {8, 8, 8}, /* cost of loading fp registers
67 in SFmode, DFmode and XFmode */
fa79946e
JH
68 {8, 8, 8}, /* cost of loading integer registers */
69 2, /* cost of moving MMX register */
70 {4, 8}, /* cost of loading MMX registers
71 in SImode and DImode */
72 {4, 8}, /* cost of storing MMX registers
73 in SImode and DImode */
74 2, /* cost of moving SSE register */
75 {4, 8, 16}, /* cost of loading SSE registers
76 in SImode, DImode and TImode */
77 {4, 8, 16}, /* cost of storing SSE registers
78 in SImode, DImode and TImode */
79 3, /* MMX or SSE register to integer */
32b5b1aa
SC
80};
81
82struct processor_costs i486_cost = { /* 486 specific costs */
83 1, /* cost of an add instruction */
84 1, /* cost of a lea instruction */
85 3, /* variable shift costs */
86 2, /* constant shift costs */
87 12, /* cost of starting a multiply */
88 1, /* cost of multiply per each bit set */
e075ae69 89 40, /* cost of a divide/mod */
96e7ae40 90 15, /* "large" insn */
e2e52e1b 91 3, /* MOVE_RATIO */
7c6b971d 92 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
93 {2, 4, 2}, /* cost of loading integer registers
94 in QImode, HImode and SImode.
0f290768 95 Relative to reg-reg move (2). */
96e7ae40
JH
96 {2, 4, 2}, /* cost of storing integer registers */
97 2, /* cost of reg,reg fld/fst */
98 {8, 8, 8}, /* cost of loading fp registers
99 in SFmode, DFmode and XFmode */
fa79946e
JH
100 {8, 8, 8}, /* cost of loading integer registers */
101 2, /* cost of moving MMX register */
102 {4, 8}, /* cost of loading MMX registers
103 in SImode and DImode */
104 {4, 8}, /* cost of storing MMX registers
105 in SImode and DImode */
106 2, /* cost of moving SSE register */
107 {4, 8, 16}, /* cost of loading SSE registers
108 in SImode, DImode and TImode */
109 {4, 8, 16}, /* cost of storing SSE registers
110 in SImode, DImode and TImode */
111 3 /* MMX or SSE register to integer */
32b5b1aa
SC
112};
113
e5cb57e8 114struct processor_costs pentium_cost = {
32b5b1aa
SC
115 1, /* cost of an add instruction */
116 1, /* cost of a lea instruction */
856b07a1 117 4, /* variable shift costs */
e5cb57e8 118 1, /* constant shift costs */
856b07a1
SC
119 11, /* cost of starting a multiply */
120 0, /* cost of multiply per each bit set */
e075ae69 121 25, /* cost of a divide/mod */
96e7ae40 122 8, /* "large" insn */
e2e52e1b 123 6, /* MOVE_RATIO */
7c6b971d 124 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
125 {2, 4, 2}, /* cost of loading integer registers
126 in QImode, HImode and SImode.
0f290768 127 Relative to reg-reg move (2). */
96e7ae40
JH
128 {2, 4, 2}, /* cost of storing integer registers */
129 2, /* cost of reg,reg fld/fst */
130 {2, 2, 6}, /* cost of loading fp registers
131 in SFmode, DFmode and XFmode */
fa79946e
JH
132 {4, 4, 6}, /* cost of loading integer registers */
133 8, /* cost of moving MMX register */
134 {8, 8}, /* cost of loading MMX registers
135 in SImode and DImode */
136 {8, 8}, /* cost of storing MMX registers
137 in SImode and DImode */
138 2, /* cost of moving SSE register */
139 {4, 8, 16}, /* cost of loading SSE registers
140 in SImode, DImode and TImode */
141 {4, 8, 16}, /* cost of storing SSE registers
142 in SImode, DImode and TImode */
143 3 /* MMX or SSE register to integer */
32b5b1aa
SC
144};
145
856b07a1
SC
146struct processor_costs pentiumpro_cost = {
147 1, /* cost of an add instruction */
148 1, /* cost of a lea instruction */
e075ae69 149 1, /* variable shift costs */
856b07a1 150 1, /* constant shift costs */
369e59b1 151 4, /* cost of starting a multiply */
856b07a1 152 0, /* cost of multiply per each bit set */
e075ae69 153 17, /* cost of a divide/mod */
96e7ae40 154 8, /* "large" insn */
e2e52e1b 155 6, /* MOVE_RATIO */
7c6b971d 156 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
157 {4, 4, 4}, /* cost of loading integer registers
158 in QImode, HImode and SImode.
0f290768 159 Relative to reg-reg move (2). */
96e7ae40
JH
160 {2, 2, 2}, /* cost of storing integer registers */
161 2, /* cost of reg,reg fld/fst */
162 {2, 2, 6}, /* cost of loading fp registers
163 in SFmode, DFmode and XFmode */
fa79946e
JH
164 {4, 4, 6}, /* cost of loading integer registers */
165 2, /* cost of moving MMX register */
166 {2, 2}, /* cost of loading MMX registers
167 in SImode and DImode */
168 {2, 2}, /* cost of storing MMX registers
169 in SImode and DImode */
170 2, /* cost of moving SSE register */
171 {2, 2, 8}, /* cost of loading SSE registers
172 in SImode, DImode and TImode */
173 {2, 2, 8}, /* cost of storing SSE registers
174 in SImode, DImode and TImode */
175 3 /* MMX or SSE register to integer */
856b07a1
SC
176};
177
a269a03c
JC
178struct processor_costs k6_cost = {
179 1, /* cost of an add instruction */
e075ae69 180 2, /* cost of a lea instruction */
a269a03c
JC
181 1, /* variable shift costs */
182 1, /* constant shift costs */
73fe76e4 183 3, /* cost of starting a multiply */
a269a03c 184 0, /* cost of multiply per each bit set */
e075ae69 185 18, /* cost of a divide/mod */
96e7ae40 186 8, /* "large" insn */
e2e52e1b 187 4, /* MOVE_RATIO */
7c6b971d 188 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
189 {4, 5, 4}, /* cost of loading integer registers
190 in QImode, HImode and SImode.
0f290768 191 Relative to reg-reg move (2). */
96e7ae40
JH
192 {2, 3, 2}, /* cost of storing integer registers */
193 4, /* cost of reg,reg fld/fst */
194 {6, 6, 6}, /* cost of loading fp registers
195 in SFmode, DFmode and XFmode */
fa79946e
JH
196 {4, 4, 4}, /* cost of loading integer registers */
197 2, /* cost of moving MMX register */
198 {2, 2}, /* cost of loading MMX registers
199 in SImode and DImode */
200 {2, 2}, /* cost of storing MMX registers
201 in SImode and DImode */
202 2, /* cost of moving SSE register */
203 {2, 2, 8}, /* cost of loading SSE registers
204 in SImode, DImode and TImode */
205 {2, 2, 8}, /* cost of storing SSE registers
206 in SImode, DImode and TImode */
207 6 /* MMX or SSE register to integer */
a269a03c
JC
208};
209
309ada50
JH
210struct processor_costs athlon_cost = {
211 1, /* cost of an add instruction */
0b5107cf 212 2, /* cost of a lea instruction */
309ada50
JH
213 1, /* variable shift costs */
214 1, /* constant shift costs */
215 5, /* cost of starting a multiply */
216 0, /* cost of multiply per each bit set */
0b5107cf 217 42, /* cost of a divide/mod */
309ada50 218 8, /* "large" insn */
e2e52e1b 219 9, /* MOVE_RATIO */
309ada50
JH
220 4, /* cost for loading QImode using movzbl */
221 {4, 5, 4}, /* cost of loading integer registers
222 in QImode, HImode and SImode.
0f290768 223 Relative to reg-reg move (2). */
309ada50
JH
224 {2, 3, 2}, /* cost of storing integer registers */
225 4, /* cost of reg,reg fld/fst */
0b5107cf 226 {6, 6, 20}, /* cost of loading fp registers
309ada50 227 in SFmode, DFmode and XFmode */
fa79946e
JH
228 {4, 4, 16}, /* cost of loading integer registers */
229 2, /* cost of moving MMX register */
230 {2, 2}, /* cost of loading MMX registers
231 in SImode and DImode */
232 {2, 2}, /* cost of storing MMX registers
233 in SImode and DImode */
234 2, /* cost of moving SSE register */
235 {2, 2, 8}, /* cost of loading SSE registers
236 in SImode, DImode and TImode */
237 {2, 2, 8}, /* cost of storing SSE registers
238 in SImode, DImode and TImode */
239 6 /* MMX or SSE register to integer */
309ada50
JH
240};
241
b4e89e2d
JH
242struct processor_costs pentium4_cost = {
243 1, /* cost of an add instruction */
244 1, /* cost of a lea instruction */
245 8, /* variable shift costs */
246 8, /* constant shift costs */
247 30, /* cost of starting a multiply */
248 0, /* cost of multiply per each bit set */
249 112, /* cost of a divide/mod */
250 16, /* "large" insn */
251 6, /* MOVE_RATIO */
252 2, /* cost for loading QImode using movzbl */
253 {4, 5, 4}, /* cost of loading integer registers
254 in QImode, HImode and SImode.
255 Relative to reg-reg move (2). */
256 {2, 3, 2}, /* cost of storing integer registers */
257 2, /* cost of reg,reg fld/fst */
258 {2, 2, 6}, /* cost of loading fp registers
259 in SFmode, DFmode and XFmode */
260 {4, 4, 6}, /* cost of loading integer registers */
261 2, /* cost of moving MMX register */
262 {2, 2}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {2, 2}, /* cost of storing MMX registers
265 in SImode and DImode */
266 12, /* cost of moving SSE register */
267 {12, 12, 12}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {2, 2, 8}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
271 10, /* MMX or SSE register to integer */
272};
273
32b5b1aa
SC
274struct processor_costs *ix86_cost = &pentium_cost;
275
a269a03c
JC
276/* Processor feature/optimization bitmasks. */
277#define m_386 (1<<PROCESSOR_I386)
278#define m_486 (1<<PROCESSOR_I486)
279#define m_PENT (1<<PROCESSOR_PENTIUM)
280#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
281#define m_K6 (1<<PROCESSOR_K6)
309ada50 282#define m_ATHLON (1<<PROCESSOR_ATHLON)
b4e89e2d 283#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
a269a03c 284
309ada50 285const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
b4e89e2d 286const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
a269a03c 287const int x86_zero_extend_with_and = m_486 | m_PENT;
b4e89e2d 288const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
e075ae69 289const int x86_double_with_add = ~m_386;
a269a03c 290const int x86_use_bit_test = m_386;
e2e52e1b 291const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
b4e89e2d
JH
292const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
293const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
294const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
e075ae69
RH
295const int x86_partial_reg_stall = m_PPRO;
296const int x86_use_loop = m_K6;
309ada50 297const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
e075ae69
RH
298const int x86_use_mov0 = m_K6;
299const int x86_use_cltd = ~(m_PENT | m_K6);
300const int x86_read_modify_write = ~m_PENT;
301const int x86_read_modify = ~(m_PENT | m_PPRO);
302const int x86_split_long_moves = m_PPRO;
e9e80858 303const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
b4e89e2d 304const int x86_single_stringop = m_386 | m_PENT4;
d9f32422
JH
305const int x86_qimode_math = ~(0);
306const int x86_promote_qi_regs = 0;
307const int x86_himode_math = ~(m_PPRO);
308const int x86_promote_hi_regs = m_PPRO;
b4e89e2d
JH
309const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
310const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
311const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
312const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
313const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
314const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
315const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
a269a03c 316
564d80f4 317#define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
2a2ab3f9 318
e075ae69
RH
319const char * const hi_reg_name[] = HI_REGISTER_NAMES;
320const char * const qi_reg_name[] = QI_REGISTER_NAMES;
321const char * const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
4c0d89b5
RS
322
323/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 324 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 325
e075ae69 326enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
327{
328 /* ax, dx, cx, bx */
ab408a86 329 AREG, DREG, CREG, BREG,
4c0d89b5 330 /* si, di, bp, sp */
e075ae69 331 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
332 /* FP registers */
333 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 334 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 335 /* arg pointer */
83774849 336 NON_Q_REGS,
564d80f4 337 /* flags, fpsr, dirflag, frame */
a7180f70
BS
338 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
339 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
340 SSE_REGS, SSE_REGS,
341 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
342 MMX_REGS, MMX_REGS
4c0d89b5 343};
c572e5ba 344
83774849
RH
345/* The "default" register map. */
346
0f290768 347int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
348{
349 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
350 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
351 -1, -1, -1, -1, /* arg, flags, fpsr, dir */
a7180f70
BS
352 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
353 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
83774849
RH
354};
355
0f7fa3d0
JH
356/* The "default" register map used in 64bit mode. */
357int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
358{
359 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
360 33, 34, 35, 36, 37, 38, 39, 40 /* fp regs */
361 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
362 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
363 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
364 8,9,10,11,12,13,14,15, /* extended integer registers */
365 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
366};
367
83774849
RH
368/* Define the register numbers to be used in Dwarf debugging information.
369 The SVR4 reference port C compiler uses the following register numbers
370 in its Dwarf output code:
371 0 for %eax (gcc regno = 0)
372 1 for %ecx (gcc regno = 2)
373 2 for %edx (gcc regno = 1)
374 3 for %ebx (gcc regno = 3)
375 4 for %esp (gcc regno = 7)
376 5 for %ebp (gcc regno = 6)
377 6 for %esi (gcc regno = 4)
378 7 for %edi (gcc regno = 5)
379 The following three DWARF register numbers are never generated by
380 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
381 believes these numbers have these meanings.
382 8 for %eip (no gcc equivalent)
383 9 for %eflags (gcc regno = 17)
384 10 for %trapno (no gcc equivalent)
385 It is not at all clear how we should number the FP stack registers
386 for the x86 architecture. If the version of SDB on x86/svr4 were
387 a bit less brain dead with respect to floating-point then we would
388 have a precedent to follow with respect to DWARF register numbers
389 for x86 FP registers, but the SDB on x86/svr4 is so completely
390 broken with respect to FP registers that it is hardly worth thinking
391 of it as something to strive for compatibility with.
392 The version of x86/svr4 SDB I have at the moment does (partially)
393 seem to believe that DWARF register number 11 is associated with
394 the x86 register %st(0), but that's about all. Higher DWARF
395 register numbers don't seem to be associated with anything in
396 particular, and even for DWARF regno 11, SDB only seems to under-
397 stand that it should say that a variable lives in %st(0) (when
398 asked via an `=' command) if we said it was in DWARF regno 11,
399 but SDB still prints garbage when asked for the value of the
400 variable in question (via a `/' command).
401 (Also note that the labels SDB prints for various FP stack regs
402 when doing an `x' command are all wrong.)
403 Note that these problems generally don't affect the native SVR4
404 C compiler because it doesn't allow the use of -O with -g and
405 because when it is *not* optimizing, it allocates a memory
406 location for each floating-point variable, and the memory
407 location is what gets described in the DWARF AT_location
408 attribute for the variable in question.
409 Regardless of the severe mental illness of the x86/svr4 SDB, we
410 do something sensible here and we use the following DWARF
411 register numbers. Note that these are all stack-top-relative
412 numbers.
413 11 for %st(0) (gcc regno = 8)
414 12 for %st(1) (gcc regno = 9)
415 13 for %st(2) (gcc regno = 10)
416 14 for %st(3) (gcc regno = 11)
417 15 for %st(4) (gcc regno = 12)
418 16 for %st(5) (gcc regno = 13)
419 17 for %st(6) (gcc regno = 14)
420 18 for %st(7) (gcc regno = 15)
421*/
0f290768 422int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
423{
424 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
425 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
3f3f2124 426 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
427 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
428 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
3f3f2124
JH
429 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
430 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
83774849
RH
431};
432
c572e5ba
JVA
433/* Test and compare insns in i386.md store the information needed to
434 generate branch and scc insns here. */
435
e075ae69
RH
436struct rtx_def *ix86_compare_op0 = NULL_RTX;
437struct rtx_def *ix86_compare_op1 = NULL_RTX;
f5316dfe 438
36edd3cc
BS
439#define MAX_386_STACK_LOCALS 2
440
441/* Define the structure for the machine field in struct function. */
442struct machine_function
443{
444 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
6fca22eb 445 int accesses_prev_frame;
36edd3cc
BS
446};
447
01d939e8 448#define ix86_stack_locals (cfun->machine->stack_locals)
36edd3cc 449
4dd2ac2c
JH
450/* Structure describing stack frame layout.
451 Stack grows downward:
452
453 [arguments]
454 <- ARG_POINTER
455 saved pc
456
457 saved frame pointer if frame_pointer_needed
458 <- HARD_FRAME_POINTER
459 [saved regs]
460
461 [padding1] \
462 )
463 [va_arg registers] (
464 > to_allocate <- FRAME_POINTER
465 [frame] (
466 )
467 [padding2] /
468 */
469struct ix86_frame
470{
471 int nregs;
472 int padding1;
473 HOST_WIDE_INT frame;
474 int padding2;
475 int outgoing_arguments_size;
476
477 HOST_WIDE_INT to_allocate;
478 /* The offsets relative to ARG_POINTER. */
479 HOST_WIDE_INT frame_pointer_offset;
480 HOST_WIDE_INT hard_frame_pointer_offset;
481 HOST_WIDE_INT stack_pointer_offset;
482};
483
c8c5cb99 484/* which cpu are we scheduling for */
e42ea7f9 485enum processor_type ix86_cpu;
c8c5cb99
SC
486
487/* which instruction set architecture to use. */
c942177e 488int ix86_arch;
c8c5cb99
SC
489
490/* Strings to hold which cpu and instruction set architecture to use. */
9c23aa47
ZW
491const char *ix86_cpu_string; /* for -mcpu=<xxx> */
492const char *ix86_arch_string; /* for -march=<xxx> */
c8c5cb99 493
0f290768 494/* # of registers to use to pass arguments. */
e075ae69 495const char *ix86_regparm_string;
e9a25f70 496
e075ae69
RH
497/* ix86_regparm_string as a number */
498int ix86_regparm;
e9a25f70
JL
499
500/* Alignment to use for loops and jumps: */
501
0f290768 502/* Power of two alignment for loops. */
e075ae69 503const char *ix86_align_loops_string;
e9a25f70 504
0f290768 505/* Power of two alignment for non-loop jumps. */
e075ae69 506const char *ix86_align_jumps_string;
e9a25f70 507
3af4bd89 508/* Power of two alignment for stack boundary in bytes. */
e075ae69 509const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
510
511/* Preferred alignment for stack boundary in bits. */
e075ae69 512int ix86_preferred_stack_boundary;
3af4bd89 513
e9a25f70 514/* Values 1-5: see jump.c */
e075ae69
RH
515int ix86_branch_cost;
516const char *ix86_branch_cost_string;
e9a25f70 517
0f290768 518/* Power of two alignment for functions. */
e075ae69
RH
519int ix86_align_funcs;
520const char *ix86_align_funcs_string;
b08de47e 521
0f290768 522/* Power of two alignment for loops. */
e075ae69 523int ix86_align_loops;
b08de47e 524
0f290768 525/* Power of two alignment for non-loop jumps. */
e075ae69
RH
526int ix86_align_jumps;
527\f
f6da8bc3
KG
528static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
529static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
e075ae69 530 int, int, FILE *));
f6da8bc3 531static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
3a3677ff
RH
532static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
533 rtx *, rtx *));
f6da8bc3
KG
534static rtx gen_push PARAMS ((rtx));
535static int memory_address_length PARAMS ((rtx addr));
536static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
537static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
538static int ix86_safe_length PARAMS ((rtx));
539static enum attr_memory ix86_safe_memory PARAMS ((rtx));
540static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
541static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
542static void ix86_dump_ppro_packet PARAMS ((FILE *));
543static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
544static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
e075ae69 545 rtx));
f6da8bc3
KG
546static void ix86_init_machine_status PARAMS ((struct function *));
547static void ix86_mark_machine_status PARAMS ((struct function *));
37b15744 548static void ix86_free_machine_status PARAMS ((struct function *));
2b589241 549static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
f6da8bc3 550static int ix86_safe_length_prefix PARAMS ((rtx));
0903fcab
JH
551static int ix86_nsaved_regs PARAMS((void));
552static void ix86_emit_save_regs PARAMS((void));
da2d1d3a 553static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int));
0903fcab 554static void ix86_emit_epilogue_esp_adjustment PARAMS((int));
0e4970d7 555static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
c6991660
KG
556static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
557static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
55efb413 558static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
e075ae69
RH
559
560struct ix86_address
561{
562 rtx base, index, disp;
563 HOST_WIDE_INT scale;
564};
b08de47e 565
e075ae69 566static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
bd793c65
BS
567
568struct builtin_description;
569static rtx ix86_expand_sse_comi PARAMS ((struct builtin_description *, tree,
570 rtx));
571static rtx ix86_expand_sse_compare PARAMS ((struct builtin_description *, tree,
572 rtx));
573static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
574static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
575static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
576static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
577static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
c0c102a9
JH
578static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
579static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
580 enum rtx_code *,
581 enum rtx_code *,
582 enum rtx_code *));
9e7adcb3
JH
583static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
584 rtx *, rtx *));
585static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
586static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
587static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
588static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
4dd2ac2c
JH
589static int ix86_save_reg PARAMS ((int));
590static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
e075ae69 591\f
f5316dfe
MM
592/* Sometimes certain combinations of command options do not make
593 sense on a particular target machine. You can define a macro
594 `OVERRIDE_OPTIONS' to take account of this. This macro, if
595 defined, is executed once just after all the command options have
596 been parsed.
597
598 Don't use this macro to turn on various extra optimizations for
599 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
600
601void
602override_options ()
603{
400500c4 604 int i;
e075ae69
RH
605 /* Comes from final.c -- no real reason to change it. */
606#define MAX_CODE_ALIGN 16
f5316dfe 607
c8c5cb99
SC
608 static struct ptt
609 {
e075ae69
RH
610 struct processor_costs *cost; /* Processor costs */
611 int target_enable; /* Target flags to enable. */
612 int target_disable; /* Target flags to disable. */
613 int align_loop; /* Default alignments. */
614 int align_jump;
615 int align_func;
616 int branch_cost;
617 }
0f290768 618 const processor_target_table[PROCESSOR_max] =
e075ae69
RH
619 {
620 {&i386_cost, 0, 0, 2, 2, 2, 1},
621 {&i486_cost, 0, 0, 4, 4, 4, 1},
622 {&pentium_cost, 0, 0, -4, -4, -4, 1},
623 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
309ada50 624 {&k6_cost, 0, 0, -5, -5, 4, 1},
b4e89e2d
JH
625 {&athlon_cost, 0, 0, 4, -4, 4, 1},
626 {&pentium4_cost, 0, 0, 2, 2, 2, 1}
e075ae69
RH
627 };
628
629 static struct pta
630 {
0f290768 631 const char *name; /* processor name or nickname. */
e075ae69
RH
632 enum processor_type processor;
633 }
0f290768 634 const processor_alias_table[] =
e075ae69
RH
635 {
636 {"i386", PROCESSOR_I386},
637 {"i486", PROCESSOR_I486},
638 {"i586", PROCESSOR_PENTIUM},
639 {"pentium", PROCESSOR_PENTIUM},
640 {"i686", PROCESSOR_PENTIUMPRO},
641 {"pentiumpro", PROCESSOR_PENTIUMPRO},
e075ae69 642 {"k6", PROCESSOR_K6},
309ada50 643 {"athlon", PROCESSOR_ATHLON},
b4e89e2d 644 {"pentium4", PROCESSOR_PENTIUM4},
3af4bd89 645 };
c8c5cb99 646
0f290768 647 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
c8c5cb99 648
f5316dfe
MM
649#ifdef SUBTARGET_OVERRIDE_OPTIONS
650 SUBTARGET_OVERRIDE_OPTIONS;
651#endif
652
5a6ee819 653 ix86_arch = PROCESSOR_I386;
e075ae69
RH
654 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
655
656 if (ix86_arch_string != 0)
657 {
e075ae69
RH
658 for (i = 0; i < pta_size; i++)
659 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
660 {
661 ix86_arch = processor_alias_table[i].processor;
662 /* Default cpu tuning to the architecture. */
663 ix86_cpu = ix86_arch;
664 break;
665 }
400500c4 666
e075ae69
RH
667 if (i == pta_size)
668 error ("bad value (%s) for -march= switch", ix86_arch_string);
669 }
670
671 if (ix86_cpu_string != 0)
672 {
e075ae69
RH
673 for (i = 0; i < pta_size; i++)
674 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
675 {
676 ix86_cpu = processor_alias_table[i].processor;
677 break;
678 }
679 if (i == pta_size)
680 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
681 }
682
683 ix86_cost = processor_target_table[ix86_cpu].cost;
684 target_flags |= processor_target_table[ix86_cpu].target_enable;
685 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
686
36edd3cc
BS
687 /* Arrange to set up i386_stack_locals for all functions. */
688 init_machine_status = ix86_init_machine_status;
1526a060 689 mark_machine_status = ix86_mark_machine_status;
37b15744 690 free_machine_status = ix86_free_machine_status;
36edd3cc 691
0f290768 692 /* Validate -mregparm= value. */
e075ae69 693 if (ix86_regparm_string)
b08de47e 694 {
400500c4
RK
695 i = atoi (ix86_regparm_string);
696 if (i < 0 || i > REGPARM_MAX)
697 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
698 else
699 ix86_regparm = i;
b08de47e
MM
700 }
701
e9a25f70 702 /* Validate -malign-loops= value, or provide default. */
e075ae69
RH
703 ix86_align_loops = processor_target_table[ix86_cpu].align_loop;
704 if (ix86_align_loops_string)
b08de47e 705 {
400500c4
RK
706 i = atoi (ix86_align_loops_string);
707 if (i < 0 || i > MAX_CODE_ALIGN)
708 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
709 else
710 ix86_align_loops = i;
b08de47e 711 }
3af4bd89
JH
712
713 /* Validate -malign-jumps= value, or provide default. */
e075ae69
RH
714 ix86_align_jumps = processor_target_table[ix86_cpu].align_jump;
715 if (ix86_align_jumps_string)
b08de47e 716 {
400500c4
RK
717 i = atoi (ix86_align_jumps_string);
718 if (i < 0 || i > MAX_CODE_ALIGN)
719 error ("-malign-jumps=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
720 else
721 ix86_align_jumps = i;
b08de47e 722 }
b08de47e 723
0f290768 724 /* Validate -malign-functions= value, or provide default. */
e075ae69
RH
725 ix86_align_funcs = processor_target_table[ix86_cpu].align_func;
726 if (ix86_align_funcs_string)
b08de47e 727 {
400500c4
RK
728 i = atoi (ix86_align_funcs_string);
729 if (i < 0 || i > MAX_CODE_ALIGN)
730 error ("-malign-functions=%d is not between 0 and %d",
731 i, MAX_CODE_ALIGN);
732 else
733 ix86_align_funcs = i;
b08de47e 734 }
3af4bd89 735
e4c0478d 736 /* Validate -mpreferred-stack-boundary= value, or provide default.
3af4bd89 737 The default of 128 bits is for Pentium III's SSE __m128. */
e075ae69
RH
738 ix86_preferred_stack_boundary = 128;
739 if (ix86_preferred_stack_boundary_string)
3af4bd89 740 {
400500c4 741 i = atoi (ix86_preferred_stack_boundary_string);
3af4bd89 742 if (i < 2 || i > 31)
400500c4
RK
743 error ("-mpreferred-stack-boundary=%d is not between 2 and 31", i);
744 else
745 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 746 }
77a989d1 747
0f290768 748 /* Validate -mbranch-cost= value, or provide default. */
e075ae69
RH
749 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
750 if (ix86_branch_cost_string)
804a8ee0 751 {
400500c4
RK
752 i = atoi (ix86_branch_cost_string);
753 if (i < 0 || i > 5)
754 error ("-mbranch-cost=%d is not between 0 and 5", i);
755 else
756 ix86_branch_cost = i;
804a8ee0 757 }
804a8ee0 758
e9a25f70
JL
759 /* Keep nonleaf frame pointers. */
760 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 761 flag_omit_frame_pointer = 1;
e075ae69
RH
762
763 /* If we're doing fast math, we don't care about comparison order
764 wrt NaNs. This lets us use a shorter comparison sequence. */
de6c5979 765 if (flag_unsafe_math_optimizations)
e075ae69
RH
766 target_flags &= ~MASK_IEEE_FP;
767
a7180f70
BS
768 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
769 on by -msse. */
770 if (TARGET_SSE)
771 target_flags |= MASK_MMX;
f5316dfe
MM
772}
773\f
32b5b1aa 774void
c6aded7c 775optimization_options (level, size)
32b5b1aa 776 int level;
bb5177ac 777 int size ATTRIBUTE_UNUSED;
32b5b1aa 778{
e9a25f70
JL
779 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
780 make the problem with not enough registers even worse. */
32b5b1aa
SC
781#ifdef INSN_SCHEDULING
782 if (level > 1)
783 flag_schedule_insns = 0;
784#endif
785}
b08de47e
MM
786\f
787/* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
788 attribute for DECL. The attributes in ATTRIBUTES have previously been
789 assigned to DECL. */
790
791int
e075ae69 792ix86_valid_decl_attribute_p (decl, attributes, identifier, args)
bb5177ac
RL
793 tree decl ATTRIBUTE_UNUSED;
794 tree attributes ATTRIBUTE_UNUSED;
795 tree identifier ATTRIBUTE_UNUSED;
796 tree args ATTRIBUTE_UNUSED;
b08de47e
MM
797{
798 return 0;
799}
800
801/* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
802 attribute for TYPE. The attributes in ATTRIBUTES have previously been
803 assigned to TYPE. */
804
805int
e075ae69 806ix86_valid_type_attribute_p (type, attributes, identifier, args)
b08de47e 807 tree type;
bb5177ac 808 tree attributes ATTRIBUTE_UNUSED;
b08de47e
MM
809 tree identifier;
810 tree args;
811{
812 if (TREE_CODE (type) != FUNCTION_TYPE
ac478ac0 813 && TREE_CODE (type) != METHOD_TYPE
b08de47e
MM
814 && TREE_CODE (type) != FIELD_DECL
815 && TREE_CODE (type) != TYPE_DECL)
816 return 0;
817
818 /* Stdcall attribute says callee is responsible for popping arguments
819 if they are not variable. */
820 if (is_attribute_p ("stdcall", identifier))
821 return (args == NULL_TREE);
822
0f290768 823 /* Cdecl attribute says the callee is a normal C declaration. */
b08de47e
MM
824 if (is_attribute_p ("cdecl", identifier))
825 return (args == NULL_TREE);
826
827 /* Regparm attribute specifies how many integer arguments are to be
0f290768 828 passed in registers. */
b08de47e
MM
829 if (is_attribute_p ("regparm", identifier))
830 {
831 tree cst;
832
e9a25f70 833 if (! args || TREE_CODE (args) != TREE_LIST
b08de47e
MM
834 || TREE_CHAIN (args) != NULL_TREE
835 || TREE_VALUE (args) == NULL_TREE)
836 return 0;
837
838 cst = TREE_VALUE (args);
839 if (TREE_CODE (cst) != INTEGER_CST)
840 return 0;
841
cce097f1 842 if (compare_tree_int (cst, REGPARM_MAX) > 0)
b08de47e
MM
843 return 0;
844
845 return 1;
846 }
847
848 return 0;
849}
850
851/* Return 0 if the attributes for two types are incompatible, 1 if they
852 are compatible, and 2 if they are nearly compatible (which causes a
853 warning to be generated). */
854
855int
e075ae69 856ix86_comp_type_attributes (type1, type2)
afcfe58c
MM
857 tree type1;
858 tree type2;
b08de47e 859{
0f290768 860 /* Check for mismatch of non-default calling convention. */
69ddee61 861 const char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
862
863 if (TREE_CODE (type1) != FUNCTION_TYPE)
864 return 1;
865
866 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
867 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
868 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 869 return 0;
b08de47e
MM
870 return 1;
871}
b08de47e
MM
872\f
873/* Value is the number of bytes of arguments automatically
874 popped when returning from a subroutine call.
875 FUNDECL is the declaration node of the function (as a tree),
876 FUNTYPE is the data type of the function (as a tree),
877 or for a library call it is an identifier node for the subroutine name.
878 SIZE is the number of bytes of arguments passed on the stack.
879
880 On the 80386, the RTD insn may be used to pop them if the number
881 of args is fixed, but if the number is variable then the caller
882 must pop them all. RTD can't be used for library calls now
883 because the library is compiled with the Unix compiler.
884 Use of RTD is a selectable option, since it is incompatible with
885 standard Unix calling sequences. If the option is not selected,
886 the caller must always pop the args.
887
888 The attribute stdcall is equivalent to RTD on a per module basis. */
889
890int
e075ae69 891ix86_return_pops_args (fundecl, funtype, size)
b08de47e
MM
892 tree fundecl;
893 tree funtype;
894 int size;
79325812 895{
3345ee7d 896 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 897
0f290768 898 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 899 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 900
0f290768 901 /* Stdcall functions will pop the stack if not variable args. */
698cdd84
SC
902 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
903 rtd = 1;
79325812 904
698cdd84
SC
905 if (rtd
906 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
907 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
908 == void_type_node)))
698cdd84
SC
909 return size;
910 }
79325812 911
e9a25f70 912 /* Lose any fake structure return argument. */
698cdd84
SC
913 if (aggregate_value_p (TREE_TYPE (funtype)))
914 return GET_MODE_SIZE (Pmode);
79325812 915
2614aac6 916 return 0;
b08de47e 917}
b08de47e
MM
918\f
919/* Argument support functions. */
920
921/* Initialize a variable CUM of type CUMULATIVE_ARGS
922 for a call to a function whose data type is FNTYPE.
923 For a library call, FNTYPE is 0. */
924
925void
926init_cumulative_args (cum, fntype, libname)
e9a25f70 927 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
b08de47e
MM
928 tree fntype; /* tree ptr for function decl */
929 rtx libname; /* SYMBOL_REF of library name or 0 */
930{
931 static CUMULATIVE_ARGS zero_cum;
932 tree param, next_param;
933
934 if (TARGET_DEBUG_ARG)
935 {
936 fprintf (stderr, "\ninit_cumulative_args (");
937 if (fntype)
e9a25f70
JL
938 fprintf (stderr, "fntype code = %s, ret code = %s",
939 tree_code_name[(int) TREE_CODE (fntype)],
940 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
941 else
942 fprintf (stderr, "no fntype");
943
944 if (libname)
945 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
946 }
947
948 *cum = zero_cum;
949
950 /* Set up the number of registers to use for passing arguments. */
e075ae69 951 cum->nregs = ix86_regparm;
b08de47e
MM
952 if (fntype)
953 {
954 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
e9a25f70 955
b08de47e
MM
956 if (attr)
957 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
958 }
959
960 /* Determine if this function has variable arguments. This is
961 indicated by the last argument being 'void_type_mode' if there
962 are no variable arguments. If there are variable arguments, then
963 we won't pass anything in registers */
964
965 if (cum->nregs)
966 {
967 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 968 param != 0; param = next_param)
b08de47e
MM
969 {
970 next_param = TREE_CHAIN (param);
e9a25f70 971 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
b08de47e
MM
972 cum->nregs = 0;
973 }
974 }
975
976 if (TARGET_DEBUG_ARG)
977 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
978
979 return;
980}
981
982/* Update the data in CUM to advance over an argument
983 of mode MODE and data type TYPE.
984 (TYPE is null for libcalls where that information may not be available.) */
985
986void
987function_arg_advance (cum, mode, type, named)
988 CUMULATIVE_ARGS *cum; /* current arg information */
989 enum machine_mode mode; /* current arg mode */
990 tree type; /* type of the argument or 0 if lib support */
991 int named; /* whether or not the argument was named */
992{
5ac9118e
KG
993 int bytes =
994 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
995 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
996
997 if (TARGET_DEBUG_ARG)
998 fprintf (stderr,
e9a25f70 999 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
b08de47e 1000 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
82a127a9 1001 if (TARGET_SSE && mode == TImode)
b08de47e 1002 {
82a127a9
CM
1003 cum->sse_words += words;
1004 cum->sse_nregs -= 1;
1005 cum->sse_regno += 1;
1006 if (cum->sse_nregs <= 0)
1007 {
1008 cum->sse_nregs = 0;
1009 cum->sse_regno = 0;
1010 }
b08de47e 1011 }
82a127a9
CM
1012 else
1013 {
1014 cum->words += words;
1015 cum->nregs -= words;
1016 cum->regno += words;
b08de47e 1017
82a127a9
CM
1018 if (cum->nregs <= 0)
1019 {
1020 cum->nregs = 0;
1021 cum->regno = 0;
1022 }
1023 }
b08de47e
MM
1024 return;
1025}
1026
1027/* Define where to put the arguments to a function.
1028 Value is zero to push the argument on the stack,
1029 or a hard register in which to store the argument.
1030
1031 MODE is the argument's machine mode.
1032 TYPE is the data type of the argument (as a tree).
1033 This is null for libcalls where that information may
1034 not be available.
1035 CUM is a variable of type CUMULATIVE_ARGS which gives info about
1036 the preceding args and about the function being called.
1037 NAMED is nonzero if this argument is a named parameter
1038 (otherwise it is an extra parameter matching an ellipsis). */
1039
1040struct rtx_def *
1041function_arg (cum, mode, type, named)
1042 CUMULATIVE_ARGS *cum; /* current arg information */
1043 enum machine_mode mode; /* current arg mode */
1044 tree type; /* type of the argument or 0 if lib support */
1045 int named; /* != 0 for normal args, == 0 for ... args */
1046{
1047 rtx ret = NULL_RTX;
5ac9118e
KG
1048 int bytes =
1049 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
1050 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1051
1052 switch (mode)
1053 {
0f290768 1054 /* For now, pass fp/complex values on the stack. */
e9a25f70 1055 default:
b08de47e
MM
1056 break;
1057
1058 case BLKmode:
1059 case DImode:
1060 case SImode:
1061 case HImode:
1062 case QImode:
1063 if (words <= cum->nregs)
f64cecad 1064 ret = gen_rtx_REG (mode, cum->regno);
b08de47e 1065 break;
82a127a9
CM
1066 case TImode:
1067 if (cum->sse_nregs)
1068 ret = gen_rtx_REG (mode, cum->sse_regno);
1069 break;
b08de47e
MM
1070 }
1071
1072 if (TARGET_DEBUG_ARG)
1073 {
1074 fprintf (stderr,
e9a25f70 1075 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
b08de47e
MM
1076 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
1077
1078 if (ret)
1079 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
1080 else
1081 fprintf (stderr, ", stack");
1082
1083 fprintf (stderr, " )\n");
1084 }
1085
1086 return ret;
1087}
e075ae69 1088\f
8bad7136
JL
1089
1090/* Return nonzero if OP is (const_int 1), else return zero. */
1091
1092int
1093const_int_1_operand (op, mode)
1094 rtx op;
1095 enum machine_mode mode ATTRIBUTE_UNUSED;
1096{
1097 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
1098}
1099
e075ae69
RH
1100/* Returns 1 if OP is either a symbol reference or a sum of a symbol
1101 reference and a constant. */
b08de47e
MM
1102
1103int
e075ae69
RH
1104symbolic_operand (op, mode)
1105 register rtx op;
1106 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 1107{
e075ae69 1108 switch (GET_CODE (op))
2a2ab3f9 1109 {
e075ae69
RH
1110 case SYMBOL_REF:
1111 case LABEL_REF:
1112 return 1;
1113
1114 case CONST:
1115 op = XEXP (op, 0);
1116 if (GET_CODE (op) == SYMBOL_REF
1117 || GET_CODE (op) == LABEL_REF
1118 || (GET_CODE (op) == UNSPEC
1119 && XINT (op, 1) >= 6
1120 && XINT (op, 1) <= 7))
1121 return 1;
1122 if (GET_CODE (op) != PLUS
1123 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1124 return 0;
1125
1126 op = XEXP (op, 0);
1127 if (GET_CODE (op) == SYMBOL_REF
1128 || GET_CODE (op) == LABEL_REF)
1129 return 1;
1130 /* Only @GOTOFF gets offsets. */
1131 if (GET_CODE (op) != UNSPEC
1132 || XINT (op, 1) != 7)
1133 return 0;
1134
1135 op = XVECEXP (op, 0, 0);
1136 if (GET_CODE (op) == SYMBOL_REF
1137 || GET_CODE (op) == LABEL_REF)
1138 return 1;
1139 return 0;
1140
1141 default:
1142 return 0;
2a2ab3f9
JVA
1143 }
1144}
2a2ab3f9 1145
e075ae69 1146/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 1147
e075ae69
RH
1148int
1149pic_symbolic_operand (op, mode)
1150 register rtx op;
1151 enum machine_mode mode ATTRIBUTE_UNUSED;
1152{
1153 if (GET_CODE (op) == CONST)
2a2ab3f9 1154 {
e075ae69
RH
1155 op = XEXP (op, 0);
1156 if (GET_CODE (op) == UNSPEC)
1157 return 1;
1158 if (GET_CODE (op) != PLUS
1159 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1160 return 0;
1161 op = XEXP (op, 0);
1162 if (GET_CODE (op) == UNSPEC)
1163 return 1;
2a2ab3f9 1164 }
e075ae69 1165 return 0;
2a2ab3f9 1166}
2a2ab3f9 1167
28d52ffb
RH
1168/* Test for a valid operand for a call instruction. Don't allow the
1169 arg pointer register or virtual regs since they may decay into
1170 reg + const, which the patterns can't handle. */
2a2ab3f9 1171
e075ae69
RH
1172int
1173call_insn_operand (op, mode)
1174 rtx op;
1175 enum machine_mode mode ATTRIBUTE_UNUSED;
1176{
e075ae69
RH
1177 /* Disallow indirect through a virtual register. This leads to
1178 compiler aborts when trying to eliminate them. */
1179 if (GET_CODE (op) == REG
1180 && (op == arg_pointer_rtx
564d80f4 1181 || op == frame_pointer_rtx
e075ae69
RH
1182 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
1183 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
1184 return 0;
2a2ab3f9 1185
28d52ffb
RH
1186 /* Disallow `call 1234'. Due to varying assembler lameness this
1187 gets either rejected or translated to `call .+1234'. */
1188 if (GET_CODE (op) == CONST_INT)
1189 return 0;
1190
cbbf65e0
RH
1191 /* Explicitly allow SYMBOL_REF even if pic. */
1192 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 1193 return 1;
2a2ab3f9 1194
cbbf65e0
RH
1195 /* Half-pic doesn't allow anything but registers and constants.
1196 We've just taken care of the later. */
1197 if (HALF_PIC_P ())
1198 return register_operand (op, Pmode);
1199
1200 /* Otherwise we can allow any general_operand in the address. */
1201 return general_operand (op, Pmode);
e075ae69 1202}
79325812 1203
e075ae69
RH
1204int
1205constant_call_address_operand (op, mode)
1206 rtx op;
1207 enum machine_mode mode ATTRIBUTE_UNUSED;
1208{
eaf19aba
JJ
1209 if (GET_CODE (op) == CONST
1210 && GET_CODE (XEXP (op, 0)) == PLUS
1211 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1212 op = XEXP (XEXP (op, 0), 0);
e1ff012c 1213 return GET_CODE (op) == SYMBOL_REF;
e075ae69 1214}
2a2ab3f9 1215
e075ae69 1216/* Match exactly zero and one. */
e9a25f70 1217
0f290768 1218int
e075ae69
RH
1219const0_operand (op, mode)
1220 register rtx op;
1221 enum machine_mode mode;
1222{
1223 return op == CONST0_RTX (mode);
1224}
e9a25f70 1225
0f290768 1226int
e075ae69
RH
1227const1_operand (op, mode)
1228 register rtx op;
1229 enum machine_mode mode ATTRIBUTE_UNUSED;
1230{
1231 return op == const1_rtx;
1232}
2a2ab3f9 1233
e075ae69 1234/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 1235
e075ae69
RH
1236int
1237const248_operand (op, mode)
1238 register rtx op;
1239 enum machine_mode mode ATTRIBUTE_UNUSED;
1240{
1241 return (GET_CODE (op) == CONST_INT
1242 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
1243}
e9a25f70 1244
e075ae69 1245/* True if this is a constant appropriate for an increment or decremenmt. */
81fd0956 1246
e075ae69
RH
1247int
1248incdec_operand (op, mode)
1249 register rtx op;
1250 enum machine_mode mode;
1251{
b4e89e2d
JH
1252 /* On Pentium4, the inc and dec operations causes extra dependancy on flag
1253 registers, since carry flag is not set. */
1254 if (TARGET_PENTIUM4 && !optimize_size)
1255 return 0;
e075ae69
RH
1256 if (op == const1_rtx || op == constm1_rtx)
1257 return 1;
1258 if (GET_CODE (op) != CONST_INT)
1259 return 0;
1260 if (mode == SImode && INTVAL (op) == (HOST_WIDE_INT) 0xffffffff)
1261 return 1;
1262 if (mode == HImode && INTVAL (op) == (HOST_WIDE_INT) 0xffff)
1263 return 1;
1264 if (mode == QImode && INTVAL (op) == (HOST_WIDE_INT) 0xff)
1265 return 1;
1266 return 0;
1267}
2a2ab3f9 1268
0f290768 1269/* Return false if this is the stack pointer, or any other fake
e075ae69
RH
1270 register eliminable to the stack pointer. Otherwise, this is
1271 a register operand.
2a2ab3f9 1272
e075ae69
RH
1273 This is used to prevent esp from being used as an index reg.
1274 Which would only happen in pathological cases. */
5f1ec3e6 1275
e075ae69
RH
1276int
1277reg_no_sp_operand (op, mode)
1278 register rtx op;
1279 enum machine_mode mode;
1280{
1281 rtx t = op;
1282 if (GET_CODE (t) == SUBREG)
1283 t = SUBREG_REG (t);
564d80f4 1284 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 1285 return 0;
2a2ab3f9 1286
e075ae69 1287 return register_operand (op, mode);
2a2ab3f9 1288}
b840bfb0 1289
915119a5
BS
1290int
1291mmx_reg_operand (op, mode)
1292 register rtx op;
bd793c65 1293 enum machine_mode mode ATTRIBUTE_UNUSED;
915119a5
BS
1294{
1295 return MMX_REG_P (op);
1296}
1297
2c5a510c
RH
1298/* Return false if this is any eliminable register. Otherwise
1299 general_operand. */
1300
1301int
1302general_no_elim_operand (op, mode)
1303 register rtx op;
1304 enum machine_mode mode;
1305{
1306 rtx t = op;
1307 if (GET_CODE (t) == SUBREG)
1308 t = SUBREG_REG (t);
1309 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1310 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1311 || t == virtual_stack_dynamic_rtx)
1312 return 0;
1313
1314 return general_operand (op, mode);
1315}
1316
1317/* Return false if this is any eliminable register. Otherwise
1318 register_operand or const_int. */
1319
1320int
1321nonmemory_no_elim_operand (op, mode)
1322 register rtx op;
1323 enum machine_mode mode;
1324{
1325 rtx t = op;
1326 if (GET_CODE (t) == SUBREG)
1327 t = SUBREG_REG (t);
1328 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1329 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1330 || t == virtual_stack_dynamic_rtx)
1331 return 0;
1332
1333 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
1334}
1335
e075ae69 1336/* Return true if op is a Q_REGS class register. */
b840bfb0 1337
e075ae69
RH
1338int
1339q_regs_operand (op, mode)
1340 register rtx op;
1341 enum machine_mode mode;
b840bfb0 1342{
e075ae69
RH
1343 if (mode != VOIDmode && GET_MODE (op) != mode)
1344 return 0;
1345 if (GET_CODE (op) == SUBREG)
1346 op = SUBREG_REG (op);
1347 return QI_REG_P (op);
0f290768 1348}
b840bfb0 1349
e075ae69 1350/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 1351
e075ae69
RH
1352int
1353non_q_regs_operand (op, mode)
1354 register rtx op;
1355 enum machine_mode mode;
1356{
1357 if (mode != VOIDmode && GET_MODE (op) != mode)
1358 return 0;
1359 if (GET_CODE (op) == SUBREG)
1360 op = SUBREG_REG (op);
1361 return NON_QI_REG_P (op);
0f290768 1362}
b840bfb0 1363
915119a5
BS
1364/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
1365 insns. */
1366int
1367sse_comparison_operator (op, mode)
1368 rtx op;
1369 enum machine_mode mode ATTRIBUTE_UNUSED;
1370{
1371 enum rtx_code code = GET_CODE (op);
a46d1d38
JH
1372 switch (code)
1373 {
1374 /* Operations supported directly. */
1375 case EQ:
1376 case LT:
1377 case LE:
1378 case UNORDERED:
1379 case NE:
1380 case UNGE:
1381 case UNGT:
1382 case ORDERED:
1383 return 1;
1384 /* These are equivalent to ones above in non-IEEE comparisons. */
1385 case UNEQ:
1386 case UNLT:
1387 case UNLE:
1388 case LTGT:
1389 case GE:
1390 case GT:
1391 return !TARGET_IEEE_FP;
1392 default:
1393 return 0;
1394 }
915119a5 1395}
9076b9c1 1396/* Return 1 if OP is a valid comparison operator in valid mode. */
e075ae69 1397int
9076b9c1
JH
1398ix86_comparison_operator (op, mode)
1399 register rtx op;
1400 enum machine_mode mode;
e075ae69 1401{
9076b9c1 1402 enum machine_mode inmode;
9a915772 1403 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
1404 if (mode != VOIDmode && GET_MODE (op) != mode)
1405 return 0;
9a915772
JH
1406 if (GET_RTX_CLASS (code) != '<')
1407 return 0;
1408 inmode = GET_MODE (XEXP (op, 0));
1409
1410 if (inmode == CCFPmode || inmode == CCFPUmode)
1411 {
1412 enum rtx_code second_code, bypass_code;
1413 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1414 return (bypass_code == NIL && second_code == NIL);
1415 }
1416 switch (code)
3a3677ff
RH
1417 {
1418 case EQ: case NE:
3a3677ff 1419 return 1;
9076b9c1 1420 case LT: case GE:
7e08e190 1421 if (inmode == CCmode || inmode == CCGCmode
9076b9c1
JH
1422 || inmode == CCGOCmode || inmode == CCNOmode)
1423 return 1;
1424 return 0;
7e08e190 1425 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
7e08e190 1426 if (inmode == CCmode)
9076b9c1
JH
1427 return 1;
1428 return 0;
1429 case GT: case LE:
7e08e190 1430 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
9076b9c1
JH
1431 return 1;
1432 return 0;
3a3677ff
RH
1433 default:
1434 return 0;
1435 }
1436}
1437
9076b9c1 1438/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3a3677ff 1439
9076b9c1
JH
1440int
1441fcmov_comparison_operator (op, mode)
3a3677ff
RH
1442 register rtx op;
1443 enum machine_mode mode;
1444{
b62d22a2 1445 enum machine_mode inmode;
9a915772 1446 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
1447 if (mode != VOIDmode && GET_MODE (op) != mode)
1448 return 0;
9a915772
JH
1449 if (GET_RTX_CLASS (code) != '<')
1450 return 0;
1451 inmode = GET_MODE (XEXP (op, 0));
1452 if (inmode == CCFPmode || inmode == CCFPUmode)
3a3677ff 1453 {
9a915772
JH
1454 enum rtx_code second_code, bypass_code;
1455 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1456 if (bypass_code != NIL || second_code != NIL)
1457 return 0;
1458 code = ix86_fp_compare_code_to_integer (code);
1459 }
1460 /* i387 supports just limited amount of conditional codes. */
1461 switch (code)
1462 {
1463 case LTU: case GTU: case LEU: case GEU:
1464 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
9076b9c1
JH
1465 return 1;
1466 return 0;
9a915772
JH
1467 case ORDERED: case UNORDERED:
1468 case EQ: case NE:
1469 return 1;
3a3677ff
RH
1470 default:
1471 return 0;
1472 }
e075ae69 1473}
b840bfb0 1474
e9e80858
JH
1475/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1476
1477int
1478promotable_binary_operator (op, mode)
1479 register rtx op;
1480 enum machine_mode mode ATTRIBUTE_UNUSED;
1481{
1482 switch (GET_CODE (op))
1483 {
1484 case MULT:
1485 /* Modern CPUs have same latency for HImode and SImode multiply,
1486 but 386 and 486 do HImode multiply faster. */
1487 return ix86_cpu > PROCESSOR_I486;
1488 case PLUS:
1489 case AND:
1490 case IOR:
1491 case XOR:
1492 case ASHIFT:
1493 return 1;
1494 default:
1495 return 0;
1496 }
1497}
1498
e075ae69
RH
1499/* Nearly general operand, but accept any const_double, since we wish
1500 to be able to drop them into memory rather than have them get pulled
1501 into registers. */
b840bfb0 1502
2a2ab3f9 1503int
e075ae69
RH
1504cmp_fp_expander_operand (op, mode)
1505 register rtx op;
1506 enum machine_mode mode;
2a2ab3f9 1507{
e075ae69 1508 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 1509 return 0;
e075ae69 1510 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 1511 return 1;
e075ae69 1512 return general_operand (op, mode);
2a2ab3f9
JVA
1513}
1514
e075ae69 1515/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
1516
1517int
e075ae69 1518ext_register_operand (op, mode)
2a2ab3f9 1519 register rtx op;
bb5177ac 1520 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 1521{
e075ae69
RH
1522 if (GET_MODE (op) != SImode && GET_MODE (op) != HImode)
1523 return 0;
1524 return register_operand (op, VOIDmode);
1525}
1526
1527/* Return 1 if this is a valid binary floating-point operation.
0f290768 1528 OP is the expression matched, and MODE is its mode. */
e075ae69
RH
1529
1530int
1531binary_fp_operator (op, mode)
1532 register rtx op;
1533 enum machine_mode mode;
1534{
1535 if (mode != VOIDmode && mode != GET_MODE (op))
1536 return 0;
1537
2a2ab3f9
JVA
1538 switch (GET_CODE (op))
1539 {
e075ae69
RH
1540 case PLUS:
1541 case MINUS:
1542 case MULT:
1543 case DIV:
1544 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 1545
2a2ab3f9
JVA
1546 default:
1547 return 0;
1548 }
1549}
fee2770d 1550
e075ae69
RH
1551int
1552mult_operator(op, mode)
1553 register rtx op;
1554 enum machine_mode mode ATTRIBUTE_UNUSED;
1555{
1556 return GET_CODE (op) == MULT;
1557}
1558
1559int
1560div_operator(op, mode)
1561 register rtx op;
1562 enum machine_mode mode ATTRIBUTE_UNUSED;
1563{
1564 return GET_CODE (op) == DIV;
1565}
0a726ef1
JL
1566
1567int
e075ae69
RH
1568arith_or_logical_operator (op, mode)
1569 rtx op;
1570 enum machine_mode mode;
0a726ef1 1571{
e075ae69
RH
1572 return ((mode == VOIDmode || GET_MODE (op) == mode)
1573 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
1574 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
1575}
1576
e075ae69 1577/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
1578
1579int
e075ae69
RH
1580memory_displacement_operand (op, mode)
1581 register rtx op;
1582 enum machine_mode mode;
4f2c8ebb 1583{
e075ae69 1584 struct ix86_address parts;
e9a25f70 1585
e075ae69
RH
1586 if (! memory_operand (op, mode))
1587 return 0;
1588
1589 if (! ix86_decompose_address (XEXP (op, 0), &parts))
1590 abort ();
1591
1592 return parts.disp != NULL_RTX;
4f2c8ebb
RS
1593}
1594
16189740 1595/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
1596 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1597
1598 ??? It seems likely that this will only work because cmpsi is an
1599 expander, and no actual insns use this. */
4f2c8ebb
RS
1600
1601int
e075ae69
RH
1602cmpsi_operand (op, mode)
1603 rtx op;
1604 enum machine_mode mode;
fee2770d 1605{
e075ae69
RH
1606 if (general_operand (op, mode))
1607 return 1;
1608
1609 if (GET_CODE (op) == AND
1610 && GET_MODE (op) == SImode
1611 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
1612 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
1613 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
1614 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
1615 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
1616 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 1617 return 1;
e9a25f70 1618
fee2770d
RS
1619 return 0;
1620}
d784886d 1621
e075ae69
RH
1622/* Returns 1 if OP is memory operand that can not be represented by the
1623 modRM array. */
d784886d
RK
1624
1625int
e075ae69 1626long_memory_operand (op, mode)
d784886d
RK
1627 register rtx op;
1628 enum machine_mode mode;
1629{
e075ae69 1630 if (! memory_operand (op, mode))
d784886d
RK
1631 return 0;
1632
e075ae69 1633 return memory_address_length (op) != 0;
d784886d 1634}
2247f6ed
JH
1635
1636/* Return nonzero if the rtx is known aligned. */
1637
1638int
1639aligned_operand (op, mode)
1640 rtx op;
1641 enum machine_mode mode;
1642{
1643 struct ix86_address parts;
1644
1645 if (!general_operand (op, mode))
1646 return 0;
1647
0f290768 1648 /* Registers and immediate operands are always "aligned". */
2247f6ed
JH
1649 if (GET_CODE (op) != MEM)
1650 return 1;
1651
0f290768 1652 /* Don't even try to do any aligned optimizations with volatiles. */
2247f6ed
JH
1653 if (MEM_VOLATILE_P (op))
1654 return 0;
1655
1656 op = XEXP (op, 0);
1657
1658 /* Pushes and pops are only valid on the stack pointer. */
1659 if (GET_CODE (op) == PRE_DEC
1660 || GET_CODE (op) == POST_INC)
1661 return 1;
1662
1663 /* Decode the address. */
1664 if (! ix86_decompose_address (op, &parts))
1665 abort ();
1666
1667 /* Look for some component that isn't known to be aligned. */
1668 if (parts.index)
1669 {
1670 if (parts.scale < 4
bdb429a5 1671 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
1672 return 0;
1673 }
1674 if (parts.base)
1675 {
bdb429a5 1676 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
1677 return 0;
1678 }
1679 if (parts.disp)
1680 {
1681 if (GET_CODE (parts.disp) != CONST_INT
1682 || (INTVAL (parts.disp) & 3) != 0)
1683 return 0;
1684 }
1685
1686 /* Didn't find one -- this must be an aligned address. */
1687 return 1;
1688}
e075ae69
RH
1689\f
1690/* Return true if the constant is something that can be loaded with
1691 a special instruction. Only handle 0.0 and 1.0; others are less
1692 worthwhile. */
57dbca5e
BS
1693
1694int
e075ae69
RH
1695standard_80387_constant_p (x)
1696 rtx x;
57dbca5e 1697{
2b04e52b 1698 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 1699 return -1;
2b04e52b
JH
1700 /* Note that on the 80387, other constants, such as pi, that we should support
1701 too. On some machines, these are much slower to load as standard constant,
1702 than to load from doubles in memory. */
1703 if (x == CONST0_RTX (GET_MODE (x)))
1704 return 1;
1705 if (x == CONST1_RTX (GET_MODE (x)))
1706 return 2;
e075ae69 1707 return 0;
57dbca5e
BS
1708}
1709
2b04e52b
JH
1710/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
1711 */
1712int
1713standard_sse_constant_p (x)
1714 rtx x;
1715{
1716 if (GET_CODE (x) != CONST_DOUBLE)
1717 return -1;
1718 return (x == CONST0_RTX (GET_MODE (x)));
1719}
1720
2a2ab3f9
JVA
1721/* Returns 1 if OP contains a symbol reference */
1722
1723int
1724symbolic_reference_mentioned_p (op)
1725 rtx op;
1726{
6f7d635c 1727 register const char *fmt;
2a2ab3f9
JVA
1728 register int i;
1729
1730 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
1731 return 1;
1732
1733 fmt = GET_RTX_FORMAT (GET_CODE (op));
1734 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
1735 {
1736 if (fmt[i] == 'E')
1737 {
1738 register int j;
1739
1740 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
1741 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
1742 return 1;
1743 }
e9a25f70 1744
2a2ab3f9
JVA
1745 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
1746 return 1;
1747 }
1748
1749 return 0;
1750}
e075ae69
RH
1751
1752/* Return 1 if it is appropriate to emit `ret' instructions in the
1753 body of a function. Do this only if the epilogue is simple, needing a
1754 couple of insns. Prior to reloading, we can't tell how many registers
1755 must be saved, so return 0 then. Return 0 if there is no frame
1756 marker to de-allocate.
1757
1758 If NON_SAVING_SETJMP is defined and true, then it is not possible
1759 for the epilogue to be simple, so return 0. This is a special case
1760 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1761 until final, but jump_optimize may need to know sooner if a
1762 `return' is OK. */
32b5b1aa
SC
1763
1764int
e075ae69 1765ix86_can_use_return_insn_p ()
32b5b1aa 1766{
4dd2ac2c 1767 struct ix86_frame frame;
9a7372d6 1768
e075ae69
RH
1769#ifdef NON_SAVING_SETJMP
1770 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
1771 return 0;
1772#endif
9a7372d6
RH
1773#ifdef FUNCTION_BLOCK_PROFILER_EXIT
1774 if (profile_block_flag == 2)
1775 return 0;
1776#endif
1777
1778 if (! reload_completed || frame_pointer_needed)
1779 return 0;
32b5b1aa 1780
9a7372d6
RH
1781 /* Don't allow more than 32 pop, since that's all we can do
1782 with one instruction. */
1783 if (current_function_pops_args
1784 && current_function_args_size >= 32768)
e075ae69 1785 return 0;
32b5b1aa 1786
4dd2ac2c
JH
1787 ix86_compute_frame_layout (&frame);
1788 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 1789}
6fca22eb
RH
1790
1791/* Value should be nonzero if functions must have frame pointers.
1792 Zero means the frame pointer need not be set up (and parms may
1793 be accessed via the stack pointer) in functions that seem suitable. */
1794
1795int
1796ix86_frame_pointer_required ()
1797{
1798 /* If we accessed previous frames, then the generated code expects
1799 to be able to access the saved ebp value in our frame. */
1800 if (cfun->machine->accesses_prev_frame)
1801 return 1;
1802
1803 /* Several x86 os'es need a frame pointer for other reasons,
1804 usually pertaining to setjmp. */
1805 if (SUBTARGET_FRAME_POINTER_REQUIRED)
1806 return 1;
1807
1808 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
1809 the frame pointer by default. Turn it back on now if we've not
1810 got a leaf function. */
1811 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
1812 return 1;
1813
1814 return 0;
1815}
1816
1817/* Record that the current function accesses previous call frames. */
1818
1819void
1820ix86_setup_frame_addresses ()
1821{
1822 cfun->machine->accesses_prev_frame = 1;
1823}
e075ae69 1824\f
4cf12e7e 1825static char pic_label_name[32];
e9a25f70 1826
e075ae69
RH
1827/* This function generates code for -fpic that loads %ebx with
1828 the return address of the caller and then returns. */
1829
1830void
4cf12e7e 1831ix86_asm_file_end (file)
e075ae69 1832 FILE *file;
e075ae69
RH
1833{
1834 rtx xops[2];
32b5b1aa 1835
4cf12e7e
RH
1836 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
1837 return;
32b5b1aa 1838
c7f0da1d
RH
1839 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
1840 to updating relocations to a section being discarded such that this
1841 doesn't work. Ought to detect this at configure time. */
1842#if 0 && defined (ASM_OUTPUT_SECTION_NAME)
4cf12e7e
RH
1843 /* The trick here is to create a linkonce section containing the
1844 pic label thunk, but to refer to it with an internal label.
1845 Because the label is internal, we don't have inter-dso name
1846 binding issues on hosts that don't support ".hidden".
e9a25f70 1847
4cf12e7e
RH
1848 In order to use these macros, however, we must create a fake
1849 function decl. */
1850 {
1851 tree decl = build_decl (FUNCTION_DECL,
1852 get_identifier ("i686.get_pc_thunk"),
1853 error_mark_node);
1854 DECL_ONE_ONLY (decl) = 1;
1855 UNIQUE_SECTION (decl, 0);
1856 named_section (decl, NULL, 0);
1857 }
1858#else
1859 text_section ();
1860#endif
0afeb08a 1861
4cf12e7e
RH
1862 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
1863 internal (non-global) label that's being emitted, it didn't make
1864 sense to have .type information for local labels. This caused
1865 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
1866 me debug info for a label that you're declaring non-global?) this
1867 was changed to call ASM_OUTPUT_LABEL() instead. */
1868
1869 ASM_OUTPUT_LABEL (file, pic_label_name);
1870
1871 xops[0] = pic_offset_table_rtx;
1872 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
1873 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
1874 output_asm_insn ("ret", xops);
32b5b1aa 1875}
32b5b1aa 1876
e075ae69
RH
1877void
1878load_pic_register ()
32b5b1aa 1879{
e075ae69 1880 rtx gotsym, pclab;
32b5b1aa 1881
a8a05998 1882 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
32b5b1aa 1883
e075ae69 1884 if (TARGET_DEEP_BRANCH_PREDICTION)
32b5b1aa 1885 {
4cf12e7e
RH
1886 if (! pic_label_name[0])
1887 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
e075ae69 1888 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
32b5b1aa 1889 }
e075ae69 1890 else
e5cb57e8 1891 {
e075ae69 1892 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
e5cb57e8 1893 }
e5cb57e8 1894
e075ae69 1895 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
2a2ab3f9 1896
e075ae69
RH
1897 if (! TARGET_DEEP_BRANCH_PREDICTION)
1898 emit_insn (gen_popsi1 (pic_offset_table_rtx));
79325812 1899
e075ae69 1900 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
e9a25f70 1901}
8dfe5673 1902
e075ae69 1903/* Generate an SImode "push" pattern for input ARG. */
e9a25f70 1904
e075ae69
RH
1905static rtx
1906gen_push (arg)
1907 rtx arg;
e9a25f70 1908{
c5c76735
JL
1909 return gen_rtx_SET (VOIDmode,
1910 gen_rtx_MEM (SImode,
1911 gen_rtx_PRE_DEC (SImode,
1912 stack_pointer_rtx)),
1913 arg);
e9a25f70
JL
1914}
1915
4dd2ac2c
JH
1916/* Return 1 if we need to save REGNO. */
1917static int
1918ix86_save_reg (regno)
1919 int regno;
1920{
1921 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1922 || current_function_uses_const_pool);
1923 return ((regs_ever_live[regno] && !call_used_regs[regno]
1924 && !fixed_regs[regno]
1925 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed))
1926 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used));
1927
1928}
1929
0903fcab
JH
1930/* Return number of registers to be saved on the stack. */
1931
1932static int
1933ix86_nsaved_regs ()
1934{
1935 int nregs = 0;
0903fcab
JH
1936 int regno;
1937
4dd2ac2c
JH
1938 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1939 if (ix86_save_reg (regno))
1940 nregs++;
0903fcab
JH
1941 return nregs;
1942}
1943
1944/* Return the offset between two registers, one to be eliminated, and the other
1945 its replacement, at the start of a routine. */
1946
1947HOST_WIDE_INT
1948ix86_initial_elimination_offset (from, to)
1949 int from;
1950 int to;
1951{
4dd2ac2c
JH
1952 struct ix86_frame frame;
1953 ix86_compute_frame_layout (&frame);
564d80f4
JH
1954
1955 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 1956 return frame.hard_frame_pointer_offset;
564d80f4
JH
1957 else if (from == FRAME_POINTER_REGNUM
1958 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 1959 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
1960 else
1961 {
564d80f4
JH
1962 if (to != STACK_POINTER_REGNUM)
1963 abort ();
1964 else if (from == ARG_POINTER_REGNUM)
4dd2ac2c 1965 return frame.stack_pointer_offset;
564d80f4
JH
1966 else if (from != FRAME_POINTER_REGNUM)
1967 abort ();
0903fcab 1968 else
4dd2ac2c 1969 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
1970 }
1971}
1972
4dd2ac2c 1973/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 1974
4dd2ac2c
JH
1975static void
1976ix86_compute_frame_layout (frame)
1977 struct ix86_frame *frame;
65954bd8 1978{
65954bd8 1979 HOST_WIDE_INT total_size;
564d80f4 1980 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
44affdae
JH
1981 int offset;
1982 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4dd2ac2c 1983 HOST_WIDE_INT size = get_frame_size ();
65954bd8 1984
4dd2ac2c 1985 frame->nregs = ix86_nsaved_regs ();
564d80f4 1986 total_size = size;
65954bd8 1987
4dd2ac2c
JH
1988 /* Skip return value and save base pointer. */
1989 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
1990
1991 frame->hard_frame_pointer_offset = offset;
564d80f4 1992
fcbfaa65
RK
1993 /* Do some sanity checking of stack_alignment_needed and
1994 preferred_alignment, since i386 port is the only using those features
1995 that may break easilly. */
564d80f4 1996
44affdae
JH
1997 if (size && !stack_alignment_needed)
1998 abort ();
44affdae
JH
1999 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
2000 abort ();
2001 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
2002 abort ();
2003 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
2004 abort ();
564d80f4 2005
4dd2ac2c
JH
2006 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
2007 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 2008
4dd2ac2c
JH
2009 /* Register save area */
2010 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 2011
4dd2ac2c
JH
2012 /* Align start of frame for local function. */
2013 frame->padding1 = ((offset + stack_alignment_needed - 1)
2014 & -stack_alignment_needed) - offset;
f73ad30e 2015
4dd2ac2c 2016 offset += frame->padding1;
65954bd8 2017
4dd2ac2c
JH
2018 /* Frame pointer points here. */
2019 frame->frame_pointer_offset = offset;
54ff41b7 2020
4dd2ac2c 2021 offset += size;
65954bd8 2022
4dd2ac2c 2023 /* Add outgoing arguments area. */
f73ad30e 2024 if (ACCUMULATE_OUTGOING_ARGS)
4dd2ac2c
JH
2025 {
2026 offset += current_function_outgoing_args_size;
2027 frame->outgoing_arguments_size = current_function_outgoing_args_size;
2028 }
2029 else
2030 frame->outgoing_arguments_size = 0;
564d80f4 2031
4dd2ac2c
JH
2032 /* Align stack boundary. */
2033 frame->padding2 = ((offset + preferred_alignment - 1)
2034 & -preferred_alignment) - offset;
2035
2036 offset += frame->padding2;
2037
2038 /* We've reached end of stack frame. */
2039 frame->stack_pointer_offset = offset;
2040
2041 /* Size prologue needs to allocate. */
2042 frame->to_allocate =
2043 (size + frame->padding1 + frame->padding2
2044 + frame->outgoing_arguments_size);
2045
2046#if 0
2047 fprintf (stderr, "nregs: %i\n", frame->nregs);
2048 fprintf (stderr, "size: %i\n", size);
2049 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
2050 fprintf (stderr, "padding1: %i\n", frame->padding1);
2051 fprintf (stderr, "padding2: %i\n", frame->padding2);
2052 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
2053 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
2054 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
2055 frame->hard_frame_pointer_offset);
2056 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
2057#endif
65954bd8
JL
2058}
2059
0903fcab
JH
2060/* Emit code to save registers in the prologue. */
2061
2062static void
2063ix86_emit_save_regs ()
2064{
2065 register int regno;
0903fcab 2066 rtx insn;
0903fcab 2067
4dd2ac2c
JH
2068 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
2069 if (ix86_save_reg (regno))
0903fcab
JH
2070 {
2071 insn = emit_insn (gen_push (gen_rtx_REG (SImode, regno)));
2072 RTX_FRAME_RELATED_P (insn) = 1;
2073 }
2074}
2075
0f290768 2076/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
2077
2078void
2079ix86_expand_prologue ()
2a2ab3f9 2080{
564d80f4 2081 rtx insn;
aae75261
JVA
2082 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2083 || current_function_uses_const_pool);
4dd2ac2c
JH
2084 struct ix86_frame frame;
2085
2086 ix86_compute_frame_layout (&frame);
79325812 2087
e075ae69
RH
2088 /* Note: AT&T enter does NOT have reversed args. Enter is probably
2089 slower on all targets. Also sdb doesn't like it. */
e9a25f70 2090
2a2ab3f9
JVA
2091 if (frame_pointer_needed)
2092 {
564d80f4 2093 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 2094 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 2095
564d80f4 2096 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 2097 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
2098 }
2099
1c71e60e 2100 ix86_emit_save_regs ();
564d80f4 2101
4dd2ac2c 2102 if (frame.to_allocate == 0)
8dfe5673 2103 ;
4dd2ac2c 2104 else if (! TARGET_STACK_PROBE || frame.to_allocate < CHECK_STACK_LIMIT)
469ac993 2105 {
e075ae69 2106 if (frame_pointer_needed)
1c71e60e
JH
2107 insn = emit_insn (gen_pro_epilogue_adjust_stack
2108 (stack_pointer_rtx, stack_pointer_rtx,
4dd2ac2c 2109 GEN_INT (-frame.to_allocate), hard_frame_pointer_rtx));
79325812 2110 else
e075ae69 2111 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
4dd2ac2c 2112 GEN_INT (-frame.to_allocate)));
e075ae69 2113 RTX_FRAME_RELATED_P (insn) = 1;
469ac993 2114 }
79325812 2115 else
8dfe5673 2116 {
e075ae69 2117 /* ??? Is this only valid for Win32? */
e9a25f70 2118
e075ae69 2119 rtx arg0, sym;
e9a25f70 2120
e075ae69 2121 arg0 = gen_rtx_REG (SImode, 0);
4dd2ac2c 2122 emit_move_insn (arg0, GEN_INT (frame.to_allocate));
77a989d1 2123
e075ae69
RH
2124 sym = gen_rtx_MEM (FUNCTION_MODE,
2125 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
2126 insn = emit_call_insn (gen_call (sym, const0_rtx));
2127
2128 CALL_INSN_FUNCTION_USAGE (insn)
276ab4a4
RH
2129 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
2130 CALL_INSN_FUNCTION_USAGE (insn));
e075ae69 2131 }
e9a25f70 2132
84530511
SC
2133#ifdef SUBTARGET_PROLOGUE
2134 SUBTARGET_PROLOGUE;
0f290768 2135#endif
84530511 2136
e9a25f70 2137 if (pic_reg_used)
e075ae69 2138 load_pic_register ();
77a989d1 2139
e9a25f70
JL
2140 /* If we are profiling, make sure no instructions are scheduled before
2141 the call to mcount. However, if -fpic, the above call will have
2142 done that. */
e075ae69 2143 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
e9a25f70 2144 emit_insn (gen_blockage ());
77a989d1
SC
2145}
2146
0903fcab
JH
2147/* Emit code to add TSIZE to esp value. Use POP instruction when
2148 profitable. */
2149
2150static void
2151ix86_emit_epilogue_esp_adjustment (tsize)
2152 int tsize;
2153{
bdeb029c
JH
2154 /* If a frame pointer is present, we must be sure to tie the sp
2155 to the fp so that we don't mis-schedule. */
2156 if (frame_pointer_needed)
2157 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2158 stack_pointer_rtx,
2159 GEN_INT (tsize),
2160 hard_frame_pointer_rtx));
0903fcab 2161 else
bdeb029c
JH
2162 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
2163 GEN_INT (tsize)));
0903fcab
JH
2164}
2165
da2d1d3a
JH
2166/* Emit code to restore saved registers using MOV insns. First register
2167 is restored from POINTER + OFFSET. */
2168static void
2169ix86_emit_restore_regs_using_mov (pointer, offset)
2170 rtx pointer;
2171 int offset;
2172{
2173 int regno;
da2d1d3a 2174
4dd2ac2c
JH
2175 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2176 if (ix86_save_reg (regno))
da2d1d3a 2177 {
4dd2ac2c
JH
2178 emit_move_insn (gen_rtx_REG (Pmode, regno),
2179 adj_offsettable_operand (gen_rtx_MEM (Pmode,
da2d1d3a
JH
2180 pointer),
2181 offset));
4dd2ac2c 2182 offset += UNITS_PER_WORD;
da2d1d3a
JH
2183 }
2184}
2185
0f290768 2186/* Restore function stack, frame, and registers. */
e9a25f70 2187
2a2ab3f9 2188void
cbbf65e0
RH
2189ix86_expand_epilogue (emit_return)
2190 int emit_return;
2a2ab3f9 2191{
1c71e60e 2192 int regno;
fdb8a883 2193 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 2194 struct ix86_frame frame;
65954bd8 2195 HOST_WIDE_INT offset;
4dd2ac2c
JH
2196
2197 ix86_compute_frame_layout (&frame);
2a2ab3f9 2198
1c71e60e 2199 /* Calculate start of saved registers relative to ebp. */
4dd2ac2c 2200 offset = -frame.nregs * UNITS_PER_WORD;
2a2ab3f9 2201
1c71e60e
JH
2202#ifdef FUNCTION_BLOCK_PROFILER_EXIT
2203 if (profile_block_flag == 2)
564d80f4 2204 {
1c71e60e 2205 FUNCTION_BLOCK_PROFILER_EXIT;
564d80f4 2206 }
1c71e60e 2207#endif
564d80f4 2208
fdb8a883
JW
2209 /* If we're only restoring one register and sp is not valid then
2210 using a move instruction to restore the register since it's
0f290768 2211 less work than reloading sp and popping the register.
da2d1d3a
JH
2212
2213 The default code result in stack adjustment using add/lea instruction,
2214 while this code results in LEAVE instruction (or discrete equivalent),
2215 so it is profitable in some other cases as well. Especially when there
2216 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2217 and there is exactly one register to pop. This heruistic may need some
2218 tuning in future. */
4dd2ac2c
JH
2219 if ((!sp_valid && frame.nregs <= 1)
2220 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
da2d1d3a 2221 || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
4dd2ac2c 2222 && frame.nregs == 1))
2a2ab3f9 2223 {
da2d1d3a
JH
2224 /* Restore registers. We can use ebp or esp to address the memory
2225 locations. If both are available, default to ebp, since offsets
2226 are known to be small. Only exception is esp pointing directly to the
2227 end of block of saved registers, where we may simplify addressing
2228 mode. */
2229
4dd2ac2c
JH
2230 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
2231 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
da2d1d3a
JH
2232 else
2233 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, offset);
2a2ab3f9 2234
da2d1d3a 2235 if (!frame_pointer_needed)
4dd2ac2c
JH
2236 ix86_emit_epilogue_esp_adjustment (frame.to_allocate
2237 + frame.nregs * UNITS_PER_WORD);
0f290768 2238 /* If not an i386, mov & pop is faster than "leave". */
da2d1d3a 2239 else if (TARGET_USE_LEAVE || optimize_size)
564d80f4 2240 emit_insn (gen_leave ());
c8c5cb99 2241 else
2a2ab3f9 2242 {
1c71e60e
JH
2243 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2244 hard_frame_pointer_rtx,
2245 const0_rtx,
2246 hard_frame_pointer_rtx));
564d80f4 2247 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
2248 }
2249 }
1c71e60e 2250 else
68f654ec 2251 {
1c71e60e
JH
2252 /* First step is to deallocate the stack frame so that we can
2253 pop the registers. */
2254 if (!sp_valid)
2255 {
2256 if (!frame_pointer_needed)
2257 abort ();
2258 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2259 hard_frame_pointer_rtx,
2260 GEN_INT (offset),
2261 hard_frame_pointer_rtx));
2262 }
4dd2ac2c
JH
2263 else if (frame.to_allocate)
2264 ix86_emit_epilogue_esp_adjustment (frame.to_allocate);
1c71e60e 2265
4dd2ac2c
JH
2266 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2267 if (ix86_save_reg (regno))
1c71e60e 2268 emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
4dd2ac2c
JH
2269 if (frame_pointer_needed)
2270 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
68f654ec 2271 }
68f654ec 2272
cbbf65e0
RH
2273 /* Sibcall epilogues don't want a return instruction. */
2274 if (! emit_return)
2275 return;
2276
2a2ab3f9
JVA
2277 if (current_function_pops_args && current_function_args_size)
2278 {
e075ae69 2279 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 2280
b8c752c8
UD
2281 /* i386 can only pop 64K bytes. If asked to pop more, pop
2282 return address, do explicit add, and jump indirectly to the
0f290768 2283 caller. */
2a2ab3f9 2284
b8c752c8 2285 if (current_function_pops_args >= 65536)
2a2ab3f9 2286 {
e075ae69 2287 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 2288
e075ae69
RH
2289 emit_insn (gen_popsi1 (ecx));
2290 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 2291 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 2292 }
79325812 2293 else
e075ae69
RH
2294 emit_jump_insn (gen_return_pop_internal (popc));
2295 }
2296 else
2297 emit_jump_insn (gen_return_internal ());
2298}
2299\f
2300/* Extract the parts of an RTL expression that is a valid memory address
2301 for an instruction. Return false if the structure of the address is
2302 grossly off. */
2303
2304static int
2305ix86_decompose_address (addr, out)
2306 register rtx addr;
2307 struct ix86_address *out;
2308{
2309 rtx base = NULL_RTX;
2310 rtx index = NULL_RTX;
2311 rtx disp = NULL_RTX;
2312 HOST_WIDE_INT scale = 1;
2313 rtx scale_rtx = NULL_RTX;
2314
2315 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
2316 base = addr;
2317 else if (GET_CODE (addr) == PLUS)
2318 {
2319 rtx op0 = XEXP (addr, 0);
2320 rtx op1 = XEXP (addr, 1);
2321 enum rtx_code code0 = GET_CODE (op0);
2322 enum rtx_code code1 = GET_CODE (op1);
2323
2324 if (code0 == REG || code0 == SUBREG)
2325 {
2326 if (code1 == REG || code1 == SUBREG)
2327 index = op0, base = op1; /* index + base */
2328 else
2329 base = op0, disp = op1; /* base + displacement */
2330 }
2331 else if (code0 == MULT)
e9a25f70 2332 {
e075ae69
RH
2333 index = XEXP (op0, 0);
2334 scale_rtx = XEXP (op0, 1);
2335 if (code1 == REG || code1 == SUBREG)
2336 base = op1; /* index*scale + base */
e9a25f70 2337 else
e075ae69
RH
2338 disp = op1; /* index*scale + disp */
2339 }
2340 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
2341 {
2342 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
2343 scale_rtx = XEXP (XEXP (op0, 0), 1);
2344 base = XEXP (op0, 1);
2345 disp = op1;
2a2ab3f9 2346 }
e075ae69
RH
2347 else if (code0 == PLUS)
2348 {
2349 index = XEXP (op0, 0); /* index + base + disp */
2350 base = XEXP (op0, 1);
2351 disp = op1;
2352 }
2353 else
2354 return FALSE;
2355 }
2356 else if (GET_CODE (addr) == MULT)
2357 {
2358 index = XEXP (addr, 0); /* index*scale */
2359 scale_rtx = XEXP (addr, 1);
2360 }
2361 else if (GET_CODE (addr) == ASHIFT)
2362 {
2363 rtx tmp;
2364
2365 /* We're called for lea too, which implements ashift on occasion. */
2366 index = XEXP (addr, 0);
2367 tmp = XEXP (addr, 1);
2368 if (GET_CODE (tmp) != CONST_INT)
2369 return FALSE;
2370 scale = INTVAL (tmp);
2371 if ((unsigned HOST_WIDE_INT) scale > 3)
2372 return FALSE;
2373 scale = 1 << scale;
2a2ab3f9 2374 }
2a2ab3f9 2375 else
e075ae69
RH
2376 disp = addr; /* displacement */
2377
2378 /* Extract the integral value of scale. */
2379 if (scale_rtx)
e9a25f70 2380 {
e075ae69
RH
2381 if (GET_CODE (scale_rtx) != CONST_INT)
2382 return FALSE;
2383 scale = INTVAL (scale_rtx);
e9a25f70 2384 }
3b3c6a3f 2385
e075ae69
RH
2386 /* Allow arg pointer and stack pointer as index if there is not scaling */
2387 if (base && index && scale == 1
564d80f4
JH
2388 && (index == arg_pointer_rtx || index == frame_pointer_rtx
2389 || index == stack_pointer_rtx))
e075ae69
RH
2390 {
2391 rtx tmp = base;
2392 base = index;
2393 index = tmp;
2394 }
2395
2396 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
2397 if ((base == hard_frame_pointer_rtx
2398 || base == frame_pointer_rtx
2399 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
2400 disp = const0_rtx;
2401
2402 /* Special case: on K6, [%esi] makes the instruction vector decoded.
2403 Avoid this by transforming to [%esi+0]. */
2404 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
2405 && base && !index && !disp
329e1d01 2406 && REG_P (base)
e075ae69
RH
2407 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
2408 disp = const0_rtx;
2409
2410 /* Special case: encode reg+reg instead of reg*2. */
2411 if (!base && index && scale && scale == 2)
2412 base = index, scale = 1;
0f290768 2413
e075ae69
RH
2414 /* Special case: scaling cannot be encoded without base or displacement. */
2415 if (!base && !disp && index && scale != 1)
2416 disp = const0_rtx;
2417
2418 out->base = base;
2419 out->index = index;
2420 out->disp = disp;
2421 out->scale = scale;
3b3c6a3f 2422
e075ae69
RH
2423 return TRUE;
2424}
01329426
JH
2425\f
2426/* Return cost of the memory address x.
2427 For i386, it is better to use a complex address than let gcc copy
2428 the address into a reg and make a new pseudo. But not if the address
2429 requires to two regs - that would mean more pseudos with longer
2430 lifetimes. */
2431int
2432ix86_address_cost (x)
2433 rtx x;
2434{
2435 struct ix86_address parts;
2436 int cost = 1;
3b3c6a3f 2437
01329426
JH
2438 if (!ix86_decompose_address (x, &parts))
2439 abort ();
2440
2441 /* More complex memory references are better. */
2442 if (parts.disp && parts.disp != const0_rtx)
2443 cost--;
2444
2445 /* Attempt to minimize number of registers in the address. */
2446 if ((parts.base
2447 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
2448 || (parts.index
2449 && (!REG_P (parts.index)
2450 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
2451 cost++;
2452
2453 if (parts.base
2454 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
2455 && parts.index
2456 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
2457 && parts.base != parts.index)
2458 cost++;
2459
2460 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
2461 since it's predecode logic can't detect the length of instructions
2462 and it degenerates to vector decoded. Increase cost of such
2463 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 2464 to split such addresses or even refuse such addresses at all.
01329426
JH
2465
2466 Following addressing modes are affected:
2467 [base+scale*index]
2468 [scale*index+disp]
2469 [base+index]
0f290768 2470
01329426
JH
2471 The first and last case may be avoidable by explicitly coding the zero in
2472 memory address, but I don't have AMD-K6 machine handy to check this
2473 theory. */
2474
2475 if (TARGET_K6
2476 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
2477 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
2478 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
2479 cost += 10;
0f290768 2480
01329426
JH
2481 return cost;
2482}
2483\f
b949ea8b
JW
2484/* If X is a machine specific address (i.e. a symbol or label being
2485 referenced as a displacement from the GOT implemented using an
2486 UNSPEC), then return the base term. Otherwise return X. */
2487
2488rtx
2489ix86_find_base_term (x)
2490 rtx x;
2491{
2492 rtx term;
2493
2494 if (GET_CODE (x) != PLUS
2495 || XEXP (x, 0) != pic_offset_table_rtx
2496 || GET_CODE (XEXP (x, 1)) != CONST)
2497 return x;
2498
2499 term = XEXP (XEXP (x, 1), 0);
2500
2501 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
2502 term = XEXP (term, 0);
2503
2504 if (GET_CODE (term) != UNSPEC
2505 || XVECLEN (term, 0) != 1
2506 || XINT (term, 1) != 7)
2507 return x;
2508
2509 term = XVECEXP (term, 0, 0);
2510
2511 if (GET_CODE (term) != SYMBOL_REF
2512 && GET_CODE (term) != LABEL_REF)
2513 return x;
2514
2515 return term;
2516}
2517\f
e075ae69
RH
2518/* Determine if a given CONST RTX is a valid memory displacement
2519 in PIC mode. */
0f290768 2520
59be65f6 2521int
91bb873f
RH
2522legitimate_pic_address_disp_p (disp)
2523 register rtx disp;
2524{
2525 if (GET_CODE (disp) != CONST)
2526 return 0;
2527 disp = XEXP (disp, 0);
2528
2529 if (GET_CODE (disp) == PLUS)
2530 {
2531 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
2532 return 0;
2533 disp = XEXP (disp, 0);
2534 }
2535
2536 if (GET_CODE (disp) != UNSPEC
2537 || XVECLEN (disp, 0) != 1)
2538 return 0;
2539
2540 /* Must be @GOT or @GOTOFF. */
2541 if (XINT (disp, 1) != 6
2542 && XINT (disp, 1) != 7)
2543 return 0;
2544
2545 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2546 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
2547 return 0;
2548
2549 return 1;
2550}
2551
e075ae69
RH
2552/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
2553 memory address for an instruction. The MODE argument is the machine mode
2554 for the MEM expression that wants to use this address.
2555
2556 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
2557 convert common non-canonical forms to canonical form so that they will
2558 be recognized. */
2559
3b3c6a3f
MM
2560int
2561legitimate_address_p (mode, addr, strict)
2562 enum machine_mode mode;
2563 register rtx addr;
2564 int strict;
2565{
e075ae69
RH
2566 struct ix86_address parts;
2567 rtx base, index, disp;
2568 HOST_WIDE_INT scale;
2569 const char *reason = NULL;
2570 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
2571
2572 if (TARGET_DEBUG_ADDR)
2573 {
2574 fprintf (stderr,
e9a25f70 2575 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 2576 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
2577 debug_rtx (addr);
2578 }
2579
e075ae69 2580 if (! ix86_decompose_address (addr, &parts))
3b3c6a3f 2581 {
e075ae69 2582 reason = "decomposition failed";
50e60bc3 2583 goto report_error;
3b3c6a3f
MM
2584 }
2585
e075ae69
RH
2586 base = parts.base;
2587 index = parts.index;
2588 disp = parts.disp;
2589 scale = parts.scale;
91f0226f 2590
e075ae69 2591 /* Validate base register.
e9a25f70
JL
2592
2593 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
2594 is one word out of a two word structure, which is represented internally
2595 as a DImode int. */
e9a25f70 2596
3b3c6a3f
MM
2597 if (base)
2598 {
e075ae69
RH
2599 reason_rtx = base;
2600
3d771dfd 2601 if (GET_CODE (base) != REG)
3b3c6a3f 2602 {
e075ae69 2603 reason = "base is not a register";
50e60bc3 2604 goto report_error;
3b3c6a3f
MM
2605 }
2606
c954bd01
RH
2607 if (GET_MODE (base) != Pmode)
2608 {
e075ae69 2609 reason = "base is not in Pmode";
50e60bc3 2610 goto report_error;
c954bd01
RH
2611 }
2612
e9a25f70
JL
2613 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
2614 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
3b3c6a3f 2615 {
e075ae69 2616 reason = "base is not valid";
50e60bc3 2617 goto report_error;
3b3c6a3f
MM
2618 }
2619 }
2620
e075ae69 2621 /* Validate index register.
e9a25f70
JL
2622
2623 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
2624 is one word out of a two word structure, which is represented internally
2625 as a DImode int. */
e075ae69
RH
2626
2627 if (index)
3b3c6a3f 2628 {
e075ae69
RH
2629 reason_rtx = index;
2630
2631 if (GET_CODE (index) != REG)
3b3c6a3f 2632 {
e075ae69 2633 reason = "index is not a register";
50e60bc3 2634 goto report_error;
3b3c6a3f
MM
2635 }
2636
e075ae69 2637 if (GET_MODE (index) != Pmode)
c954bd01 2638 {
e075ae69 2639 reason = "index is not in Pmode";
50e60bc3 2640 goto report_error;
c954bd01
RH
2641 }
2642
e075ae69
RH
2643 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
2644 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
3b3c6a3f 2645 {
e075ae69 2646 reason = "index is not valid";
50e60bc3 2647 goto report_error;
3b3c6a3f
MM
2648 }
2649 }
3b3c6a3f 2650
e075ae69
RH
2651 /* Validate scale factor. */
2652 if (scale != 1)
3b3c6a3f 2653 {
e075ae69
RH
2654 reason_rtx = GEN_INT (scale);
2655 if (!index)
3b3c6a3f 2656 {
e075ae69 2657 reason = "scale without index";
50e60bc3 2658 goto report_error;
3b3c6a3f
MM
2659 }
2660
e075ae69 2661 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 2662 {
e075ae69 2663 reason = "scale is not a valid multiplier";
50e60bc3 2664 goto report_error;
3b3c6a3f
MM
2665 }
2666 }
2667
91bb873f 2668 /* Validate displacement. */
3b3c6a3f
MM
2669 if (disp)
2670 {
e075ae69
RH
2671 reason_rtx = disp;
2672
91bb873f 2673 if (!CONSTANT_ADDRESS_P (disp))
3b3c6a3f 2674 {
e075ae69 2675 reason = "displacement is not constant";
50e60bc3 2676 goto report_error;
3b3c6a3f
MM
2677 }
2678
e075ae69 2679 if (GET_CODE (disp) == CONST_DOUBLE)
3b3c6a3f 2680 {
e075ae69 2681 reason = "displacement is a const_double";
50e60bc3 2682 goto report_error;
3b3c6a3f
MM
2683 }
2684
91bb873f 2685 if (flag_pic && SYMBOLIC_CONST (disp))
3b3c6a3f 2686 {
91bb873f
RH
2687 if (! legitimate_pic_address_disp_p (disp))
2688 {
e075ae69 2689 reason = "displacement is an invalid pic construct";
50e60bc3 2690 goto report_error;
91bb873f
RH
2691 }
2692
4e9efe54 2693 /* This code used to verify that a symbolic pic displacement
0f290768
KH
2694 includes the pic_offset_table_rtx register.
2695
4e9efe54
JH
2696 While this is good idea, unfortunately these constructs may
2697 be created by "adds using lea" optimization for incorrect
2698 code like:
2699
2700 int a;
2701 int foo(int i)
2702 {
2703 return *(&a+i);
2704 }
2705
50e60bc3 2706 This code is nonsensical, but results in addressing
4e9efe54
JH
2707 GOT table with pic_offset_table_rtx base. We can't
2708 just refuse it easilly, since it gets matched by
2709 "addsi3" pattern, that later gets split to lea in the
2710 case output register differs from input. While this
2711 can be handled by separate addsi pattern for this case
2712 that never results in lea, this seems to be easier and
2713 correct fix for crash to disable this test. */
3b3c6a3f 2714 }
91bb873f 2715 else if (HALF_PIC_P ())
3b3c6a3f 2716 {
91bb873f 2717 if (! HALF_PIC_ADDRESS_P (disp)
e075ae69 2718 || (base != NULL_RTX || index != NULL_RTX))
91bb873f 2719 {
e075ae69 2720 reason = "displacement is an invalid half-pic reference";
50e60bc3 2721 goto report_error;
91bb873f 2722 }
3b3c6a3f
MM
2723 }
2724 }
2725
e075ae69 2726 /* Everything looks valid. */
3b3c6a3f 2727 if (TARGET_DEBUG_ADDR)
e075ae69 2728 fprintf (stderr, "Success.\n");
3b3c6a3f 2729 return TRUE;
e075ae69 2730
50e60bc3 2731report_error:
e075ae69
RH
2732 if (TARGET_DEBUG_ADDR)
2733 {
2734 fprintf (stderr, "Error: %s\n", reason);
2735 debug_rtx (reason_rtx);
2736 }
2737 return FALSE;
3b3c6a3f 2738}
3b3c6a3f 2739\f
55efb413
JW
2740/* Return an unique alias set for the GOT. */
2741
0f290768 2742static HOST_WIDE_INT
55efb413
JW
2743ix86_GOT_alias_set ()
2744{
2745 static HOST_WIDE_INT set = -1;
2746 if (set == -1)
2747 set = new_alias_set ();
2748 return set;
0f290768 2749}
55efb413 2750
3b3c6a3f
MM
2751/* Return a legitimate reference for ORIG (an address) using the
2752 register REG. If REG is 0, a new pseudo is generated.
2753
91bb873f 2754 There are two types of references that must be handled:
3b3c6a3f
MM
2755
2756 1. Global data references must load the address from the GOT, via
2757 the PIC reg. An insn is emitted to do this load, and the reg is
2758 returned.
2759
91bb873f
RH
2760 2. Static data references, constant pool addresses, and code labels
2761 compute the address as an offset from the GOT, whose base is in
2762 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
2763 differentiate them from global data objects. The returned
2764 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
2765
2766 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 2767 reg also appears in the address. */
3b3c6a3f
MM
2768
2769rtx
2770legitimize_pic_address (orig, reg)
2771 rtx orig;
2772 rtx reg;
2773{
2774 rtx addr = orig;
2775 rtx new = orig;
91bb873f 2776 rtx base;
3b3c6a3f 2777
91bb873f
RH
2778 if (GET_CODE (addr) == LABEL_REF
2779 || (GET_CODE (addr) == SYMBOL_REF
2780 && (CONSTANT_POOL_ADDRESS_P (addr)
2781 || SYMBOL_REF_FLAG (addr))))
3b3c6a3f 2782 {
91bb873f
RH
2783 /* This symbol may be referenced via a displacement from the PIC
2784 base address (@GOTOFF). */
3b3c6a3f 2785
91bb873f 2786 current_function_uses_pic_offset_table = 1;
4859dd36
RH
2787 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
2788 new = gen_rtx_CONST (Pmode, new);
91bb873f 2789 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 2790
91bb873f
RH
2791 if (reg != 0)
2792 {
3b3c6a3f 2793 emit_move_insn (reg, new);
91bb873f 2794 new = reg;
3b3c6a3f 2795 }
3b3c6a3f 2796 }
91bb873f 2797 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 2798 {
91bb873f 2799 /* This symbol must be referenced via a load from the
0f290768 2800 Global Offset Table (@GOT). */
3b3c6a3f 2801
91bb873f 2802 current_function_uses_pic_offset_table = 1;
4859dd36
RH
2803 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
2804 new = gen_rtx_CONST (Pmode, new);
91bb873f
RH
2805 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2806 new = gen_rtx_MEM (Pmode, new);
2807 RTX_UNCHANGING_P (new) = 1;
0f290768 2808 MEM_ALIAS_SET (new) = ix86_GOT_alias_set ();
3b3c6a3f
MM
2809
2810 if (reg == 0)
2811 reg = gen_reg_rtx (Pmode);
91bb873f
RH
2812 emit_move_insn (reg, new);
2813 new = reg;
0f290768 2814 }
91bb873f
RH
2815 else
2816 {
2817 if (GET_CODE (addr) == CONST)
3b3c6a3f 2818 {
91bb873f
RH
2819 addr = XEXP (addr, 0);
2820 if (GET_CODE (addr) == UNSPEC)
2821 {
2822 /* Check that the unspec is one of the ones we generate? */
2823 }
2824 else if (GET_CODE (addr) != PLUS)
564d80f4 2825 abort ();
3b3c6a3f 2826 }
91bb873f
RH
2827 if (GET_CODE (addr) == PLUS)
2828 {
2829 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 2830
91bb873f
RH
2831 /* Check first to see if this is a constant offset from a @GOTOFF
2832 symbol reference. */
2833 if ((GET_CODE (op0) == LABEL_REF
2834 || (GET_CODE (op0) == SYMBOL_REF
2835 && (CONSTANT_POOL_ADDRESS_P (op0)
2836 || SYMBOL_REF_FLAG (op0))))
2837 && GET_CODE (op1) == CONST_INT)
2838 {
2839 current_function_uses_pic_offset_table = 1;
4859dd36
RH
2840 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
2841 new = gen_rtx_PLUS (Pmode, new, op1);
2842 new = gen_rtx_CONST (Pmode, new);
91bb873f
RH
2843 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2844
2845 if (reg != 0)
2846 {
2847 emit_move_insn (reg, new);
2848 new = reg;
2849 }
2850 }
2851 else
2852 {
2853 base = legitimize_pic_address (XEXP (addr, 0), reg);
2854 new = legitimize_pic_address (XEXP (addr, 1),
2855 base == reg ? NULL_RTX : reg);
2856
2857 if (GET_CODE (new) == CONST_INT)
2858 new = plus_constant (base, INTVAL (new));
2859 else
2860 {
2861 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
2862 {
2863 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
2864 new = XEXP (new, 1);
2865 }
2866 new = gen_rtx_PLUS (Pmode, base, new);
2867 }
2868 }
2869 }
3b3c6a3f
MM
2870 }
2871 return new;
2872}
2873\f
3b3c6a3f
MM
2874/* Try machine-dependent ways of modifying an illegitimate address
2875 to be legitimate. If we find one, return the new, valid address.
2876 This macro is used in only one place: `memory_address' in explow.c.
2877
2878 OLDX is the address as it was before break_out_memory_refs was called.
2879 In some cases it is useful to look at this to decide what needs to be done.
2880
2881 MODE and WIN are passed so that this macro can use
2882 GO_IF_LEGITIMATE_ADDRESS.
2883
2884 It is always safe for this macro to do nothing. It exists to recognize
2885 opportunities to optimize the output.
2886
2887 For the 80386, we handle X+REG by loading X into a register R and
2888 using R+REG. R will go in a general reg and indexing will be used.
2889 However, if REG is a broken-out memory address or multiplication,
2890 nothing needs to be done because REG can certainly go in a general reg.
2891
2892 When -fpic is used, special handling is needed for symbolic references.
2893 See comments by legitimize_pic_address in i386.c for details. */
2894
2895rtx
2896legitimize_address (x, oldx, mode)
2897 register rtx x;
bb5177ac 2898 register rtx oldx ATTRIBUTE_UNUSED;
3b3c6a3f
MM
2899 enum machine_mode mode;
2900{
2901 int changed = 0;
2902 unsigned log;
2903
2904 if (TARGET_DEBUG_ADDR)
2905 {
e9a25f70
JL
2906 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
2907 GET_MODE_NAME (mode));
3b3c6a3f
MM
2908 debug_rtx (x);
2909 }
2910
2911 if (flag_pic && SYMBOLIC_CONST (x))
2912 return legitimize_pic_address (x, 0);
2913
2914 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
2915 if (GET_CODE (x) == ASHIFT
2916 && GET_CODE (XEXP (x, 1)) == CONST_INT
2917 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
2918 {
2919 changed = 1;
a269a03c
JC
2920 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
2921 GEN_INT (1 << log));
3b3c6a3f
MM
2922 }
2923
2924 if (GET_CODE (x) == PLUS)
2925 {
0f290768 2926 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 2927
3b3c6a3f
MM
2928 if (GET_CODE (XEXP (x, 0)) == ASHIFT
2929 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2930 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
2931 {
2932 changed = 1;
c5c76735
JL
2933 XEXP (x, 0) = gen_rtx_MULT (Pmode,
2934 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
2935 GEN_INT (1 << log));
3b3c6a3f
MM
2936 }
2937
2938 if (GET_CODE (XEXP (x, 1)) == ASHIFT
2939 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
2940 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
2941 {
2942 changed = 1;
c5c76735
JL
2943 XEXP (x, 1) = gen_rtx_MULT (Pmode,
2944 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
2945 GEN_INT (1 << log));
3b3c6a3f
MM
2946 }
2947
0f290768 2948 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
2949 if (GET_CODE (XEXP (x, 1)) == MULT)
2950 {
2951 rtx tmp = XEXP (x, 0);
2952 XEXP (x, 0) = XEXP (x, 1);
2953 XEXP (x, 1) = tmp;
2954 changed = 1;
2955 }
2956
2957 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
2958 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
2959 created by virtual register instantiation, register elimination, and
2960 similar optimizations. */
2961 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
2962 {
2963 changed = 1;
c5c76735
JL
2964 x = gen_rtx_PLUS (Pmode,
2965 gen_rtx_PLUS (Pmode, XEXP (x, 0),
2966 XEXP (XEXP (x, 1), 0)),
2967 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
2968 }
2969
e9a25f70
JL
2970 /* Canonicalize
2971 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
2972 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
2973 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
2974 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
2975 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
2976 && CONSTANT_P (XEXP (x, 1)))
2977 {
00c79232
ML
2978 rtx constant;
2979 rtx other = NULL_RTX;
3b3c6a3f
MM
2980
2981 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2982 {
2983 constant = XEXP (x, 1);
2984 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
2985 }
2986 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
2987 {
2988 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
2989 other = XEXP (x, 1);
2990 }
2991 else
2992 constant = 0;
2993
2994 if (constant)
2995 {
2996 changed = 1;
c5c76735
JL
2997 x = gen_rtx_PLUS (Pmode,
2998 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
2999 XEXP (XEXP (XEXP (x, 0), 1), 0)),
3000 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
3001 }
3002 }
3003
3004 if (changed && legitimate_address_p (mode, x, FALSE))
3005 return x;
3006
3007 if (GET_CODE (XEXP (x, 0)) == MULT)
3008 {
3009 changed = 1;
3010 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
3011 }
3012
3013 if (GET_CODE (XEXP (x, 1)) == MULT)
3014 {
3015 changed = 1;
3016 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
3017 }
3018
3019 if (changed
3020 && GET_CODE (XEXP (x, 1)) == REG
3021 && GET_CODE (XEXP (x, 0)) == REG)
3022 return x;
3023
3024 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
3025 {
3026 changed = 1;
3027 x = legitimize_pic_address (x, 0);
3028 }
3029
3030 if (changed && legitimate_address_p (mode, x, FALSE))
3031 return x;
3032
3033 if (GET_CODE (XEXP (x, 0)) == REG)
3034 {
3035 register rtx temp = gen_reg_rtx (Pmode);
3036 register rtx val = force_operand (XEXP (x, 1), temp);
3037 if (val != temp)
3038 emit_move_insn (temp, val);
3039
3040 XEXP (x, 1) = temp;
3041 return x;
3042 }
3043
3044 else if (GET_CODE (XEXP (x, 1)) == REG)
3045 {
3046 register rtx temp = gen_reg_rtx (Pmode);
3047 register rtx val = force_operand (XEXP (x, 0), temp);
3048 if (val != temp)
3049 emit_move_insn (temp, val);
3050
3051 XEXP (x, 0) = temp;
3052 return x;
3053 }
3054 }
3055
3056 return x;
3057}
2a2ab3f9
JVA
3058\f
3059/* Print an integer constant expression in assembler syntax. Addition
3060 and subtraction are the only arithmetic that may appear in these
3061 expressions. FILE is the stdio stream to write to, X is the rtx, and
3062 CODE is the operand print code from the output string. */
3063
3064static void
3065output_pic_addr_const (file, x, code)
3066 FILE *file;
3067 rtx x;
3068 int code;
3069{
3070 char buf[256];
3071
3072 switch (GET_CODE (x))
3073 {
3074 case PC:
3075 if (flag_pic)
3076 putc ('.', file);
3077 else
3078 abort ();
3079 break;
3080
3081 case SYMBOL_REF:
91bb873f
RH
3082 assemble_name (file, XSTR (x, 0));
3083 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
3084 fputs ("@PLT", file);
2a2ab3f9
JVA
3085 break;
3086
91bb873f
RH
3087 case LABEL_REF:
3088 x = XEXP (x, 0);
3089 /* FALLTHRU */
2a2ab3f9
JVA
3090 case CODE_LABEL:
3091 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
3092 assemble_name (asm_out_file, buf);
3093 break;
3094
3095 case CONST_INT:
f64cecad 3096 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
3097 break;
3098
3099 case CONST:
3100 /* This used to output parentheses around the expression,
3101 but that does not work on the 386 (either ATT or BSD assembler). */
3102 output_pic_addr_const (file, XEXP (x, 0), code);
3103 break;
3104
3105 case CONST_DOUBLE:
3106 if (GET_MODE (x) == VOIDmode)
3107 {
3108 /* We can use %d if the number is <32 bits and positive. */
3109 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
3110 fprintf (file, "0x%lx%08lx",
3111 (unsigned long) CONST_DOUBLE_HIGH (x),
3112 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 3113 else
f64cecad 3114 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
3115 }
3116 else
3117 /* We can't handle floating point constants;
3118 PRINT_OPERAND must handle them. */
3119 output_operand_lossage ("floating constant misused");
3120 break;
3121
3122 case PLUS:
e9a25f70 3123 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
3124 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
3125 {
2a2ab3f9 3126 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 3127 putc ('+', file);
e9a25f70 3128 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 3129 }
91bb873f 3130 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 3131 {
2a2ab3f9 3132 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 3133 putc ('+', file);
e9a25f70 3134 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 3135 }
91bb873f
RH
3136 else
3137 abort ();
2a2ab3f9
JVA
3138 break;
3139
3140 case MINUS:
e075ae69 3141 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
2a2ab3f9 3142 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 3143 putc ('-', file);
2a2ab3f9 3144 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 3145 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
2a2ab3f9
JVA
3146 break;
3147
91bb873f
RH
3148 case UNSPEC:
3149 if (XVECLEN (x, 0) != 1)
77ebd435 3150 abort ();
91bb873f
RH
3151 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
3152 switch (XINT (x, 1))
77ebd435
AJ
3153 {
3154 case 6:
3155 fputs ("@GOT", file);
3156 break;
3157 case 7:
3158 fputs ("@GOTOFF", file);
3159 break;
3160 case 8:
3161 fputs ("@PLT", file);
3162 break;
3163 default:
3164 output_operand_lossage ("invalid UNSPEC as operand");
3165 break;
3166 }
91bb873f
RH
3167 break;
3168
2a2ab3f9
JVA
3169 default:
3170 output_operand_lossage ("invalid expression as operand");
3171 }
3172}
1865dbb5 3173
0f290768 3174/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
1865dbb5
JM
3175 We need to handle our special PIC relocations. */
3176
0f290768 3177void
1865dbb5
JM
3178i386_dwarf_output_addr_const (file, x)
3179 FILE *file;
3180 rtx x;
3181{
f0ca81d2 3182 fprintf (file, "%s", INT_ASM_OP);
1865dbb5
JM
3183 if (flag_pic)
3184 output_pic_addr_const (file, x, '\0');
3185 else
3186 output_addr_const (file, x);
3187 fputc ('\n', file);
3188}
3189
3190/* In the name of slightly smaller debug output, and to cater to
3191 general assembler losage, recognize PIC+GOTOFF and turn it back
3192 into a direct symbol reference. */
3193
3194rtx
3195i386_simplify_dwarf_addr (orig_x)
3196 rtx orig_x;
3197{
3198 rtx x = orig_x;
3199
3200 if (GET_CODE (x) != PLUS
3201 || GET_CODE (XEXP (x, 0)) != REG
3202 || GET_CODE (XEXP (x, 1)) != CONST)
3203 return orig_x;
3204
3205 x = XEXP (XEXP (x, 1), 0);
3206 if (GET_CODE (x) == UNSPEC
3adbce3d
RH
3207 && (XINT (x, 1) == 6
3208 || XINT (x, 1) == 7))
1865dbb5
JM
3209 return XVECEXP (x, 0, 0);
3210
3211 if (GET_CODE (x) == PLUS
3212 && GET_CODE (XEXP (x, 0)) == UNSPEC
3213 && GET_CODE (XEXP (x, 1)) == CONST_INT
3adbce3d
RH
3214 && (XINT (XEXP (x, 0), 1) == 6
3215 || XINT (XEXP (x, 0), 1) == 7))
1865dbb5
JM
3216 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
3217
3218 return orig_x;
3219}
2a2ab3f9 3220\f
a269a03c 3221static void
e075ae69 3222put_condition_code (code, mode, reverse, fp, file)
a269a03c 3223 enum rtx_code code;
e075ae69
RH
3224 enum machine_mode mode;
3225 int reverse, fp;
a269a03c
JC
3226 FILE *file;
3227{
a269a03c
JC
3228 const char *suffix;
3229
9a915772
JH
3230 if (mode == CCFPmode || mode == CCFPUmode)
3231 {
3232 enum rtx_code second_code, bypass_code;
3233 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3234 if (bypass_code != NIL || second_code != NIL)
3235 abort();
3236 code = ix86_fp_compare_code_to_integer (code);
3237 mode = CCmode;
3238 }
a269a03c
JC
3239 if (reverse)
3240 code = reverse_condition (code);
e075ae69 3241
a269a03c
JC
3242 switch (code)
3243 {
3244 case EQ:
3245 suffix = "e";
3246 break;
a269a03c
JC
3247 case NE:
3248 suffix = "ne";
3249 break;
a269a03c 3250 case GT:
7e08e190 3251 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
3252 abort ();
3253 suffix = "g";
a269a03c 3254 break;
a269a03c 3255 case GTU:
e075ae69
RH
3256 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
3257 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 3258 if (mode != CCmode)
0f290768 3259 abort ();
e075ae69 3260 suffix = fp ? "nbe" : "a";
a269a03c 3261 break;
a269a03c 3262 case LT:
9076b9c1 3263 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 3264 suffix = "s";
7e08e190 3265 else if (mode == CCmode || mode == CCGCmode)
e075ae69 3266 suffix = "l";
9076b9c1 3267 else
0f290768 3268 abort ();
a269a03c 3269 break;
a269a03c 3270 case LTU:
9076b9c1 3271 if (mode != CCmode)
0f290768 3272 abort ();
a269a03c
JC
3273 suffix = "b";
3274 break;
a269a03c 3275 case GE:
9076b9c1 3276 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 3277 suffix = "ns";
7e08e190 3278 else if (mode == CCmode || mode == CCGCmode)
e075ae69 3279 suffix = "ge";
9076b9c1 3280 else
0f290768 3281 abort ();
a269a03c 3282 break;
a269a03c 3283 case GEU:
e075ae69 3284 /* ??? As above. */
7e08e190 3285 if (mode != CCmode)
0f290768 3286 abort ();
7e08e190 3287 suffix = fp ? "nb" : "ae";
a269a03c 3288 break;
a269a03c 3289 case LE:
7e08e190 3290 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
3291 abort ();
3292 suffix = "le";
a269a03c 3293 break;
a269a03c 3294 case LEU:
9076b9c1
JH
3295 if (mode != CCmode)
3296 abort ();
7e08e190 3297 suffix = "be";
a269a03c 3298 break;
3a3677ff 3299 case UNORDERED:
9e7adcb3 3300 suffix = fp ? "u" : "p";
3a3677ff
RH
3301 break;
3302 case ORDERED:
9e7adcb3 3303 suffix = fp ? "nu" : "np";
3a3677ff 3304 break;
a269a03c
JC
3305 default:
3306 abort ();
3307 }
3308 fputs (suffix, file);
3309}
3310
e075ae69
RH
3311void
3312print_reg (x, code, file)
3313 rtx x;
3314 int code;
3315 FILE *file;
e5cb57e8 3316{
e075ae69 3317 if (REGNO (x) == ARG_POINTER_REGNUM
564d80f4 3318 || REGNO (x) == FRAME_POINTER_REGNUM
e075ae69
RH
3319 || REGNO (x) == FLAGS_REG
3320 || REGNO (x) == FPSR_REG)
3321 abort ();
e9a25f70 3322
e075ae69
RH
3323 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
3324 putc ('%', file);
3325
3326 if (code == 'w')
3327 code = 2;
3328 else if (code == 'b')
3329 code = 1;
3330 else if (code == 'k')
3331 code = 4;
3f3f2124
JH
3332 else if (code == 'q')
3333 code = 8;
e075ae69
RH
3334 else if (code == 'y')
3335 code = 3;
3336 else if (code == 'h')
3337 code = 0;
a7180f70
BS
3338 else if (code == 'm' || MMX_REG_P (x))
3339 code = 5;
e075ae69
RH
3340 else
3341 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 3342
3f3f2124
JH
3343 /* Irritatingly, AMD extended registers use different naming convention
3344 from the normal registers. */
3345 if (REX_INT_REG_P (x))
3346 {
3347 switch (code)
3348 {
3349 case 5:
3350 error ("Extended registers have no high halves\n");
3351 break;
3352 case 1:
3353 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
3354 break;
3355 case 2:
3356 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
3357 break;
3358 case 4:
3359 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
3360 break;
3361 case 8:
3362 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
3363 break;
3364 default:
3365 error ("Unsupported operand size for extended register.\n");
3366 break;
3367 }
3368 return;
3369 }
e075ae69
RH
3370 switch (code)
3371 {
a7180f70
BS
3372 case 5:
3373 fputs (hi_reg_name[REGNO (x)], file);
3374 break;
e075ae69
RH
3375 case 3:
3376 if (STACK_TOP_P (x))
3377 {
3378 fputs ("st(0)", file);
3379 break;
3380 }
3381 /* FALLTHRU */
e075ae69 3382 case 8:
3f3f2124 3383 case 4:
e075ae69 3384 case 12:
446988df 3385 if (! ANY_FP_REG_P (x))
3f3f2124 3386 putc (code == 8 ? 'r' : 'e', file);
e075ae69 3387 /* FALLTHRU */
a7180f70 3388 case 16:
e075ae69
RH
3389 case 2:
3390 fputs (hi_reg_name[REGNO (x)], file);
3391 break;
3392 case 1:
3393 fputs (qi_reg_name[REGNO (x)], file);
3394 break;
3395 case 0:
3396 fputs (qi_high_reg_name[REGNO (x)], file);
3397 break;
3398 default:
3399 abort ();
fe25fea3 3400 }
e5cb57e8
SC
3401}
3402
2a2ab3f9 3403/* Meaning of CODE:
fe25fea3 3404 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 3405 C -- print opcode suffix for set/cmov insn.
fe25fea3 3406 c -- like C, but print reversed condition
2a2ab3f9
JVA
3407 R -- print the prefix for register names.
3408 z -- print the opcode suffix for the size of the current operand.
3409 * -- print a star (in certain assembler syntax)
fb204271 3410 A -- print an absolute memory reference.
2a2ab3f9 3411 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
3412 s -- print a shift double count, followed by the assemblers argument
3413 delimiter.
fe25fea3
SC
3414 b -- print the QImode name of the register for the indicated operand.
3415 %b0 would print %al if operands[0] is reg 0.
3416 w -- likewise, print the HImode name of the register.
3417 k -- likewise, print the SImode name of the register.
3f3f2124 3418 q -- likewise, print the DImode name of the register.
fe25fea3 3419 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
a7180f70 3420 y -- print "st(0)" instead of "st" as a register.
a46d1d38
JH
3421 m -- print "st(n)" as an mmx register.
3422 D -- print condition for SSE cmp instruction.
3423 */
2a2ab3f9
JVA
3424
3425void
3426print_operand (file, x, code)
3427 FILE *file;
3428 rtx x;
3429 int code;
3430{
3431 if (code)
3432 {
3433 switch (code)
3434 {
3435 case '*':
e075ae69 3436 if (ASSEMBLER_DIALECT == 0)
2a2ab3f9
JVA
3437 putc ('*', file);
3438 return;
3439
fb204271
DN
3440 case 'A':
3441 if (ASSEMBLER_DIALECT == 0)
3442 putc ('*', file);
3443 else if (ASSEMBLER_DIALECT == 1)
3444 {
3445 /* Intel syntax. For absolute addresses, registers should not
3446 be surrounded by braces. */
3447 if (GET_CODE (x) != REG)
3448 {
3449 putc ('[', file);
3450 PRINT_OPERAND (file, x, 0);
3451 putc (']', file);
3452 return;
3453 }
3454 }
3455
3456 PRINT_OPERAND (file, x, 0);
3457 return;
3458
3459
2a2ab3f9 3460 case 'L':
e075ae69
RH
3461 if (ASSEMBLER_DIALECT == 0)
3462 putc ('l', file);
2a2ab3f9
JVA
3463 return;
3464
3465 case 'W':
e075ae69
RH
3466 if (ASSEMBLER_DIALECT == 0)
3467 putc ('w', file);
2a2ab3f9
JVA
3468 return;
3469
3470 case 'B':
e075ae69
RH
3471 if (ASSEMBLER_DIALECT == 0)
3472 putc ('b', file);
2a2ab3f9
JVA
3473 return;
3474
3475 case 'Q':
e075ae69
RH
3476 if (ASSEMBLER_DIALECT == 0)
3477 putc ('l', file);
2a2ab3f9
JVA
3478 return;
3479
3480 case 'S':
e075ae69
RH
3481 if (ASSEMBLER_DIALECT == 0)
3482 putc ('s', file);
2a2ab3f9
JVA
3483 return;
3484
5f1ec3e6 3485 case 'T':
e075ae69
RH
3486 if (ASSEMBLER_DIALECT == 0)
3487 putc ('t', file);
5f1ec3e6
JVA
3488 return;
3489
2a2ab3f9
JVA
3490 case 'z':
3491 /* 387 opcodes don't get size suffixes if the operands are
0f290768 3492 registers. */
2a2ab3f9
JVA
3493
3494 if (STACK_REG_P (x))
3495 return;
3496
3497 /* this is the size of op from size of operand */
3498 switch (GET_MODE_SIZE (GET_MODE (x)))
3499 {
2a2ab3f9 3500 case 2:
155d8a47
JW
3501#ifdef HAVE_GAS_FILDS_FISTS
3502 putc ('s', file);
3503#endif
2a2ab3f9
JVA
3504 return;
3505
3506 case 4:
3507 if (GET_MODE (x) == SFmode)
3508 {
e075ae69 3509 putc ('s', file);
2a2ab3f9
JVA
3510 return;
3511 }
3512 else
e075ae69 3513 putc ('l', file);
2a2ab3f9
JVA
3514 return;
3515
5f1ec3e6 3516 case 12:
2b589241 3517 case 16:
e075ae69
RH
3518 putc ('t', file);
3519 return;
5f1ec3e6 3520
2a2ab3f9
JVA
3521 case 8:
3522 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
3523 {
3524#ifdef GAS_MNEMONICS
e075ae69 3525 putc ('q', file);
56c0e8fa 3526#else
e075ae69
RH
3527 putc ('l', file);
3528 putc ('l', file);
56c0e8fa
JVA
3529#endif
3530 }
e075ae69
RH
3531 else
3532 putc ('l', file);
2a2ab3f9 3533 return;
155d8a47
JW
3534
3535 default:
3536 abort ();
2a2ab3f9 3537 }
4af3895e
JVA
3538
3539 case 'b':
3540 case 'w':
3541 case 'k':
3f3f2124 3542 case 'q':
4af3895e
JVA
3543 case 'h':
3544 case 'y':
a7180f70 3545 case 'm':
5cb6195d 3546 case 'X':
e075ae69 3547 case 'P':
4af3895e
JVA
3548 break;
3549
2d49677f
SC
3550 case 's':
3551 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
3552 {
3553 PRINT_OPERAND (file, x, 0);
e075ae69 3554 putc (',', file);
2d49677f 3555 }
a269a03c
JC
3556 return;
3557
a46d1d38
JH
3558 case 'D':
3559 /* Little bit of braindamage here. The SSE compare instructions
3560 does use completely different names for the comparisons that the
3561 fp conditional moves. */
3562 switch (GET_CODE (x))
3563 {
3564 case EQ:
3565 case UNEQ:
3566 fputs ("eq", file);
3567 break;
3568 case LT:
3569 case UNLT:
3570 fputs ("lt", file);
3571 break;
3572 case LE:
3573 case UNLE:
3574 fputs ("le", file);
3575 break;
3576 case UNORDERED:
3577 fputs ("unord", file);
3578 break;
3579 case NE:
3580 case LTGT:
3581 fputs ("neq", file);
3582 break;
3583 case UNGE:
3584 case GE:
3585 fputs ("nlt", file);
3586 break;
3587 case UNGT:
3588 case GT:
3589 fputs ("nle", file);
3590 break;
3591 case ORDERED:
3592 fputs ("ord", file);
3593 break;
3594 default:
3595 abort ();
3596 break;
3597 }
3598 return;
1853aadd 3599 case 'C':
e075ae69 3600 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 3601 return;
fe25fea3 3602 case 'F':
e075ae69 3603 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
3604 return;
3605
e9a25f70 3606 /* Like above, but reverse condition */
e075ae69
RH
3607 case 'c':
3608 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
3609 return;
fe25fea3 3610 case 'f':
e075ae69 3611 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 3612 return;
e5cb57e8 3613
4af3895e 3614 default:
68daafd4
JVA
3615 {
3616 char str[50];
68daafd4
JVA
3617 sprintf (str, "invalid operand code `%c'", code);
3618 output_operand_lossage (str);
3619 }
2a2ab3f9
JVA
3620 }
3621 }
e9a25f70 3622
2a2ab3f9
JVA
3623 if (GET_CODE (x) == REG)
3624 {
3625 PRINT_REG (x, code, file);
3626 }
e9a25f70 3627
2a2ab3f9
JVA
3628 else if (GET_CODE (x) == MEM)
3629 {
e075ae69
RH
3630 /* No `byte ptr' prefix for call instructions. */
3631 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
2a2ab3f9 3632 {
69ddee61 3633 const char * size;
e075ae69
RH
3634 switch (GET_MODE_SIZE (GET_MODE (x)))
3635 {
3636 case 1: size = "BYTE"; break;
3637 case 2: size = "WORD"; break;
3638 case 4: size = "DWORD"; break;
3639 case 8: size = "QWORD"; break;
3640 case 12: size = "XWORD"; break;
a7180f70 3641 case 16: size = "XMMWORD"; break;
e075ae69 3642 default:
564d80f4 3643 abort ();
e075ae69 3644 }
fb204271
DN
3645
3646 /* Check for explicit size override (codes 'b', 'w' and 'k') */
3647 if (code == 'b')
3648 size = "BYTE";
3649 else if (code == 'w')
3650 size = "WORD";
3651 else if (code == 'k')
3652 size = "DWORD";
3653
e075ae69
RH
3654 fputs (size, file);
3655 fputs (" PTR ", file);
2a2ab3f9 3656 }
e075ae69
RH
3657
3658 x = XEXP (x, 0);
3659 if (flag_pic && CONSTANT_ADDRESS_P (x))
3660 output_pic_addr_const (file, x, code);
2a2ab3f9 3661 else
e075ae69 3662 output_address (x);
2a2ab3f9 3663 }
e9a25f70 3664
2a2ab3f9
JVA
3665 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
3666 {
e9a25f70
JL
3667 REAL_VALUE_TYPE r;
3668 long l;
3669
5f1ec3e6
JVA
3670 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3671 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69
RH
3672
3673 if (ASSEMBLER_DIALECT == 0)
3674 putc ('$', file);
52267fcb 3675 fprintf (file, "0x%lx", l);
5f1ec3e6 3676 }
e9a25f70 3677
0f290768 3678 /* These float cases don't actually occur as immediate operands. */
5f1ec3e6
JVA
3679 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
3680 {
e9a25f70
JL
3681 REAL_VALUE_TYPE r;
3682 char dstr[30];
3683
5f1ec3e6
JVA
3684 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3685 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3686 fprintf (file, "%s", dstr);
2a2ab3f9 3687 }
e9a25f70 3688
2b589241
JH
3689 else if (GET_CODE (x) == CONST_DOUBLE
3690 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
2a2ab3f9 3691 {
e9a25f70
JL
3692 REAL_VALUE_TYPE r;
3693 char dstr[30];
3694
5f1ec3e6
JVA
3695 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3696 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3697 fprintf (file, "%s", dstr);
2a2ab3f9 3698 }
79325812 3699 else
2a2ab3f9 3700 {
4af3895e 3701 if (code != 'P')
2a2ab3f9 3702 {
695dac07 3703 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69
RH
3704 {
3705 if (ASSEMBLER_DIALECT == 0)
3706 putc ('$', file);
3707 }
2a2ab3f9
JVA
3708 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
3709 || GET_CODE (x) == LABEL_REF)
e075ae69
RH
3710 {
3711 if (ASSEMBLER_DIALECT == 0)
3712 putc ('$', file);
3713 else
3714 fputs ("OFFSET FLAT:", file);
3715 }
2a2ab3f9 3716 }
e075ae69
RH
3717 if (GET_CODE (x) == CONST_INT)
3718 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3719 else if (flag_pic)
2a2ab3f9
JVA
3720 output_pic_addr_const (file, x, code);
3721 else
3722 output_addr_const (file, x);
3723 }
3724}
3725\f
3726/* Print a memory operand whose address is ADDR. */
3727
3728void
3729print_operand_address (file, addr)
3730 FILE *file;
3731 register rtx addr;
3732{
e075ae69
RH
3733 struct ix86_address parts;
3734 rtx base, index, disp;
3735 int scale;
e9a25f70 3736
e075ae69
RH
3737 if (! ix86_decompose_address (addr, &parts))
3738 abort ();
e9a25f70 3739
e075ae69
RH
3740 base = parts.base;
3741 index = parts.index;
3742 disp = parts.disp;
3743 scale = parts.scale;
e9a25f70 3744
e075ae69
RH
3745 if (!base && !index)
3746 {
3747 /* Displacement only requires special attention. */
e9a25f70 3748
e075ae69 3749 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 3750 {
e075ae69 3751 if (ASSEMBLER_DIALECT != 0)
fb204271
DN
3752 {
3753 if (USER_LABEL_PREFIX[0] == 0)
3754 putc ('%', file);
3755 fputs ("ds:", file);
3756 }
e075ae69 3757 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
2a2ab3f9 3758 }
e075ae69
RH
3759 else if (flag_pic)
3760 output_pic_addr_const (file, addr, 0);
3761 else
3762 output_addr_const (file, addr);
3763 }
3764 else
3765 {
3766 if (ASSEMBLER_DIALECT == 0)
2a2ab3f9 3767 {
e075ae69 3768 if (disp)
2a2ab3f9 3769 {
c399861d 3770 if (flag_pic)
e075ae69
RH
3771 output_pic_addr_const (file, disp, 0);
3772 else if (GET_CODE (disp) == LABEL_REF)
3773 output_asm_label (disp);
2a2ab3f9 3774 else
e075ae69 3775 output_addr_const (file, disp);
2a2ab3f9
JVA
3776 }
3777
e075ae69
RH
3778 putc ('(', file);
3779 if (base)
3780 PRINT_REG (base, 0, file);
3781 if (index)
2a2ab3f9 3782 {
e075ae69
RH
3783 putc (',', file);
3784 PRINT_REG (index, 0, file);
3785 if (scale != 1)
3786 fprintf (file, ",%d", scale);
2a2ab3f9 3787 }
e075ae69 3788 putc (')', file);
2a2ab3f9 3789 }
2a2ab3f9
JVA
3790 else
3791 {
e075ae69 3792 rtx offset = NULL_RTX;
e9a25f70 3793
e075ae69
RH
3794 if (disp)
3795 {
3796 /* Pull out the offset of a symbol; print any symbol itself. */
3797 if (GET_CODE (disp) == CONST
3798 && GET_CODE (XEXP (disp, 0)) == PLUS
3799 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
3800 {
3801 offset = XEXP (XEXP (disp, 0), 1);
3802 disp = gen_rtx_CONST (VOIDmode,
3803 XEXP (XEXP (disp, 0), 0));
3804 }
ce193852 3805
e075ae69
RH
3806 if (flag_pic)
3807 output_pic_addr_const (file, disp, 0);
3808 else if (GET_CODE (disp) == LABEL_REF)
3809 output_asm_label (disp);
3810 else if (GET_CODE (disp) == CONST_INT)
3811 offset = disp;
3812 else
3813 output_addr_const (file, disp);
3814 }
e9a25f70 3815
e075ae69
RH
3816 putc ('[', file);
3817 if (base)
a8620236 3818 {
e075ae69
RH
3819 PRINT_REG (base, 0, file);
3820 if (offset)
3821 {
3822 if (INTVAL (offset) >= 0)
3823 putc ('+', file);
3824 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3825 }
a8620236 3826 }
e075ae69
RH
3827 else if (offset)
3828 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 3829 else
e075ae69 3830 putc ('0', file);
e9a25f70 3831
e075ae69
RH
3832 if (index)
3833 {
3834 putc ('+', file);
3835 PRINT_REG (index, 0, file);
3836 if (scale != 1)
3837 fprintf (file, "*%d", scale);
3838 }
3839 putc (']', file);
3840 }
2a2ab3f9
JVA
3841 }
3842}
3843\f
3844/* Split one or more DImode RTL references into pairs of SImode
3845 references. The RTL can be REG, offsettable MEM, integer constant, or
3846 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
3847 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 3848 that parallel "operands". */
2a2ab3f9
JVA
3849
3850void
3851split_di (operands, num, lo_half, hi_half)
3852 rtx operands[];
3853 int num;
3854 rtx lo_half[], hi_half[];
3855{
3856 while (num--)
3857 {
57dbca5e 3858 rtx op = operands[num];
e075ae69
RH
3859 if (CONSTANT_P (op))
3860 split_double (op, &lo_half[num], &hi_half[num]);
3861 else if (! reload_completed)
a269a03c
JC
3862 {
3863 lo_half[num] = gen_lowpart (SImode, op);
3864 hi_half[num] = gen_highpart (SImode, op);
3865 }
3866 else if (GET_CODE (op) == REG)
2a2ab3f9 3867 {
57dbca5e
BS
3868 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
3869 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
2a2ab3f9 3870 }
57dbca5e 3871 else if (offsettable_memref_p (op))
2a2ab3f9 3872 {
57dbca5e
BS
3873 rtx lo_addr = XEXP (op, 0);
3874 rtx hi_addr = XEXP (adj_offsettable_operand (op, 4), 0);
3875 lo_half[num] = change_address (op, SImode, lo_addr);
3876 hi_half[num] = change_address (op, SImode, hi_addr);
2a2ab3f9
JVA
3877 }
3878 else
564d80f4 3879 abort ();
2a2ab3f9
JVA
3880 }
3881}
3882\f
2a2ab3f9
JVA
3883/* Output code to perform a 387 binary operation in INSN, one of PLUS,
3884 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
3885 is the expression of the binary operation. The output may either be
3886 emitted here, or returned to the caller, like all output_* functions.
3887
3888 There is no guarantee that the operands are the same mode, as they
0f290768 3889 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 3890
e3c2afab
AM
3891#ifndef SYSV386_COMPAT
3892/* Set to 1 for compatibility with brain-damaged assemblers. No-one
3893 wants to fix the assemblers because that causes incompatibility
3894 with gcc. No-one wants to fix gcc because that causes
3895 incompatibility with assemblers... You can use the option of
3896 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
3897#define SYSV386_COMPAT 1
3898#endif
3899
69ddee61 3900const char *
2a2ab3f9
JVA
3901output_387_binary_op (insn, operands)
3902 rtx insn;
3903 rtx *operands;
3904{
e3c2afab 3905 static char buf[30];
69ddee61 3906 const char *p;
1deaa899
JH
3907 const char *ssep;
3908 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
2a2ab3f9 3909
e3c2afab
AM
3910#ifdef ENABLE_CHECKING
3911 /* Even if we do not want to check the inputs, this documents input
3912 constraints. Which helps in understanding the following code. */
3913 if (STACK_REG_P (operands[0])
3914 && ((REG_P (operands[1])
3915 && REGNO (operands[0]) == REGNO (operands[1])
3916 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
3917 || (REG_P (operands[2])
3918 && REGNO (operands[0]) == REGNO (operands[2])
3919 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
3920 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
3921 ; /* ok */
1deaa899 3922 else if (!is_sse)
e3c2afab
AM
3923 abort ();
3924#endif
3925
2a2ab3f9
JVA
3926 switch (GET_CODE (operands[3]))
3927 {
3928 case PLUS:
e075ae69
RH
3929 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3930 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3931 p = "fiadd";
3932 else
3933 p = "fadd";
1deaa899 3934 ssep = "add";
2a2ab3f9
JVA
3935 break;
3936
3937 case MINUS:
e075ae69
RH
3938 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3939 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3940 p = "fisub";
3941 else
3942 p = "fsub";
1deaa899 3943 ssep = "sub";
2a2ab3f9
JVA
3944 break;
3945
3946 case MULT:
e075ae69
RH
3947 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3948 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3949 p = "fimul";
3950 else
3951 p = "fmul";
1deaa899 3952 ssep = "mul";
2a2ab3f9
JVA
3953 break;
3954
3955 case DIV:
e075ae69
RH
3956 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3957 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3958 p = "fidiv";
3959 else
3960 p = "fdiv";
1deaa899 3961 ssep = "div";
2a2ab3f9
JVA
3962 break;
3963
3964 default:
3965 abort ();
3966 }
3967
1deaa899
JH
3968 if (is_sse)
3969 {
3970 strcpy (buf, ssep);
3971 if (GET_MODE (operands[0]) == SFmode)
3972 strcat (buf, "ss\t{%2, %0|%0, %2}");
3973 else
3974 strcat (buf, "sd\t{%2, %0|%0, %2}");
3975 return buf;
3976 }
e075ae69 3977 strcpy (buf, p);
2a2ab3f9
JVA
3978
3979 switch (GET_CODE (operands[3]))
3980 {
3981 case MULT:
3982 case PLUS:
3983 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
3984 {
e3c2afab 3985 rtx temp = operands[2];
2a2ab3f9
JVA
3986 operands[2] = operands[1];
3987 operands[1] = temp;
3988 }
3989
e3c2afab
AM
3990 /* know operands[0] == operands[1]. */
3991
2a2ab3f9 3992 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
3993 {
3994 p = "%z2\t%2";
3995 break;
3996 }
2a2ab3f9
JVA
3997
3998 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
3999 {
4000 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
4001 /* How is it that we are storing to a dead operand[2]?
4002 Well, presumably operands[1] is dead too. We can't
4003 store the result to st(0) as st(0) gets popped on this
4004 instruction. Instead store to operands[2] (which I
4005 think has to be st(1)). st(1) will be popped later.
4006 gcc <= 2.8.1 didn't have this check and generated
4007 assembly code that the Unixware assembler rejected. */
4008 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 4009 else
e3c2afab 4010 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 4011 break;
6b28fd63 4012 }
2a2ab3f9
JVA
4013
4014 if (STACK_TOP_P (operands[0]))
e3c2afab 4015 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 4016 else
e3c2afab 4017 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 4018 break;
2a2ab3f9
JVA
4019
4020 case MINUS:
4021 case DIV:
4022 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
4023 {
4024 p = "r%z1\t%1";
4025 break;
4026 }
2a2ab3f9
JVA
4027
4028 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
4029 {
4030 p = "%z2\t%2";
4031 break;
4032 }
2a2ab3f9 4033
2a2ab3f9 4034 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 4035 {
e3c2afab
AM
4036#if SYSV386_COMPAT
4037 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
4038 derived assemblers, confusingly reverse the direction of
4039 the operation for fsub{r} and fdiv{r} when the
4040 destination register is not st(0). The Intel assembler
4041 doesn't have this brain damage. Read !SYSV386_COMPAT to
4042 figure out what the hardware really does. */
4043 if (STACK_TOP_P (operands[0]))
4044 p = "{p\t%0, %2|rp\t%2, %0}";
4045 else
4046 p = "{rp\t%2, %0|p\t%0, %2}";
4047#else
6b28fd63 4048 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
4049 /* As above for fmul/fadd, we can't store to st(0). */
4050 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 4051 else
e3c2afab
AM
4052 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
4053#endif
e075ae69 4054 break;
6b28fd63 4055 }
2a2ab3f9
JVA
4056
4057 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 4058 {
e3c2afab 4059#if SYSV386_COMPAT
6b28fd63 4060 if (STACK_TOP_P (operands[0]))
e3c2afab 4061 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 4062 else
e3c2afab
AM
4063 p = "{p\t%1, %0|rp\t%0, %1}";
4064#else
4065 if (STACK_TOP_P (operands[0]))
4066 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
4067 else
4068 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
4069#endif
e075ae69 4070 break;
6b28fd63 4071 }
2a2ab3f9
JVA
4072
4073 if (STACK_TOP_P (operands[0]))
4074 {
4075 if (STACK_TOP_P (operands[1]))
e3c2afab 4076 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 4077 else
e3c2afab 4078 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 4079 break;
2a2ab3f9
JVA
4080 }
4081 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
4082 {
4083#if SYSV386_COMPAT
4084 p = "{\t%1, %0|r\t%0, %1}";
4085#else
4086 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
4087#endif
4088 }
2a2ab3f9 4089 else
e3c2afab
AM
4090 {
4091#if SYSV386_COMPAT
4092 p = "{r\t%2, %0|\t%0, %2}";
4093#else
4094 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
4095#endif
4096 }
e075ae69 4097 break;
2a2ab3f9
JVA
4098
4099 default:
4100 abort ();
4101 }
e075ae69
RH
4102
4103 strcat (buf, p);
4104 return buf;
2a2ab3f9 4105}
e075ae69 4106
2a2ab3f9 4107/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 4108 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 4109 operand may be [SDX]Fmode. */
2a2ab3f9 4110
69ddee61 4111const char *
2a2ab3f9
JVA
4112output_fix_trunc (insn, operands)
4113 rtx insn;
4114 rtx *operands;
4115{
4116 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69
RH
4117 int dimode_p = GET_MODE (operands[0]) == DImode;
4118 rtx xops[4];
2a2ab3f9 4119
e075ae69
RH
4120 /* Jump through a hoop or two for DImode, since the hardware has no
4121 non-popping instruction. We used to do this a different way, but
4122 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
4123 if (dimode_p && !stack_top_dies)
4124 output_asm_insn ("fld\t%y1", operands);
e075ae69
RH
4125
4126 if (! STACK_TOP_P (operands[1]))
10195bd8
JW
4127 abort ();
4128
e075ae69
RH
4129 xops[0] = GEN_INT (12);
4130 xops[1] = adj_offsettable_operand (operands[2], 1);
4131 xops[1] = change_address (xops[1], QImode, NULL_RTX);
305f097e 4132
e075ae69
RH
4133 xops[2] = operands[0];
4134 if (GET_CODE (operands[0]) != MEM)
4135 xops[2] = operands[3];
2a2ab3f9 4136
e075ae69
RH
4137 output_asm_insn ("fnstcw\t%2", operands);
4138 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands);
4139 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops);
4140 output_asm_insn ("fldcw\t%2", operands);
4141 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands);
e9a25f70 4142
e075ae69
RH
4143 if (stack_top_dies || dimode_p)
4144 output_asm_insn ("fistp%z2\t%2", xops);
10195bd8 4145 else
e075ae69
RH
4146 output_asm_insn ("fist%z2\t%2", xops);
4147
4148 output_asm_insn ("fldcw\t%2", operands);
10195bd8 4149
e075ae69 4150 if (GET_CODE (operands[0]) != MEM)
2a2ab3f9 4151 {
e075ae69 4152 if (dimode_p)
2e14a41b 4153 {
e075ae69
RH
4154 split_di (operands+0, 1, xops+0, xops+1);
4155 split_di (operands+3, 1, xops+2, xops+3);
4156 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4157 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops);
2e14a41b 4158 }
46d21d2c 4159 else if (GET_MODE (operands[0]) == SImode)
e3c2afab 4160 output_asm_insn ("mov{l}\t{%3, %0|%0, %3}", operands);
46d21d2c
JW
4161 else
4162 output_asm_insn ("mov{w}\t{%3, %0|%0, %3}", operands);
2a2ab3f9 4163 }
2a2ab3f9 4164
e075ae69 4165 return "";
2a2ab3f9 4166}
cda749b1 4167
e075ae69
RH
4168/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
4169 should be used and 2 when fnstsw should be used. UNORDERED_P is true
4170 when fucom should be used. */
4171
69ddee61 4172const char *
e075ae69 4173output_fp_compare (insn, operands, eflags_p, unordered_p)
cda749b1
JW
4174 rtx insn;
4175 rtx *operands;
e075ae69 4176 int eflags_p, unordered_p;
cda749b1 4177{
e075ae69
RH
4178 int stack_top_dies;
4179 rtx cmp_op0 = operands[0];
4180 rtx cmp_op1 = operands[1];
0644b628 4181 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
e075ae69
RH
4182
4183 if (eflags_p == 2)
4184 {
4185 cmp_op0 = cmp_op1;
4186 cmp_op1 = operands[2];
4187 }
0644b628
JH
4188 if (is_sse)
4189 {
4190 if (GET_MODE (operands[0]) == SFmode)
4191 if (unordered_p)
4192 return "ucomiss\t{%1, %0|%0, %1}";
4193 else
4194 return "comiss\t{%1, %0|%0, %y}";
4195 else
4196 if (unordered_p)
4197 return "ucomisd\t{%1, %0|%0, %1}";
4198 else
4199 return "comisd\t{%1, %0|%0, %y}";
4200 }
cda749b1 4201
e075ae69 4202 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
4203 abort ();
4204
e075ae69 4205 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 4206
e075ae69
RH
4207 if (STACK_REG_P (cmp_op1)
4208 && stack_top_dies
4209 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
4210 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 4211 {
e075ae69
RH
4212 /* If both the top of the 387 stack dies, and the other operand
4213 is also a stack register that dies, then this must be a
4214 `fcompp' float compare */
4215
4216 if (eflags_p == 1)
4217 {
4218 /* There is no double popping fcomi variant. Fortunately,
4219 eflags is immune from the fstp's cc clobbering. */
4220 if (unordered_p)
4221 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
4222 else
4223 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
4224 return "fstp\t%y0";
4225 }
4226 else
cda749b1 4227 {
e075ae69
RH
4228 if (eflags_p == 2)
4229 {
4230 if (unordered_p)
4231 return "fucompp\n\tfnstsw\t%0";
4232 else
4233 return "fcompp\n\tfnstsw\t%0";
4234 }
cda749b1
JW
4235 else
4236 {
e075ae69
RH
4237 if (unordered_p)
4238 return "fucompp";
4239 else
4240 return "fcompp";
cda749b1
JW
4241 }
4242 }
cda749b1
JW
4243 }
4244 else
4245 {
e075ae69 4246 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 4247
0f290768 4248 static const char * const alt[24] =
e075ae69
RH
4249 {
4250 "fcom%z1\t%y1",
4251 "fcomp%z1\t%y1",
4252 "fucom%z1\t%y1",
4253 "fucomp%z1\t%y1",
0f290768 4254
e075ae69
RH
4255 "ficom%z1\t%y1",
4256 "ficomp%z1\t%y1",
4257 NULL,
4258 NULL,
4259
4260 "fcomi\t{%y1, %0|%0, %y1}",
4261 "fcomip\t{%y1, %0|%0, %y1}",
4262 "fucomi\t{%y1, %0|%0, %y1}",
4263 "fucomip\t{%y1, %0|%0, %y1}",
4264
4265 NULL,
4266 NULL,
4267 NULL,
4268 NULL,
4269
4270 "fcom%z2\t%y2\n\tfnstsw\t%0",
4271 "fcomp%z2\t%y2\n\tfnstsw\t%0",
4272 "fucom%z2\t%y2\n\tfnstsw\t%0",
4273 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 4274
e075ae69
RH
4275 "ficom%z2\t%y2\n\tfnstsw\t%0",
4276 "ficomp%z2\t%y2\n\tfnstsw\t%0",
4277 NULL,
4278 NULL
4279 };
4280
4281 int mask;
69ddee61 4282 const char *ret;
e075ae69
RH
4283
4284 mask = eflags_p << 3;
4285 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
4286 mask |= unordered_p << 1;
4287 mask |= stack_top_dies;
4288
4289 if (mask >= 24)
4290 abort ();
4291 ret = alt[mask];
4292 if (ret == NULL)
4293 abort ();
cda749b1 4294
e075ae69 4295 return ret;
cda749b1
JW
4296 }
4297}
2a2ab3f9 4298
e075ae69 4299/* Output assembler code to FILE to initialize basic-block profiling.
2a2ab3f9 4300
e075ae69 4301 If profile_block_flag == 2
2a2ab3f9 4302
e075ae69
RH
4303 Output code to call the subroutine `__bb_init_trace_func'
4304 and pass two parameters to it. The first parameter is
4305 the address of a block allocated in the object module.
4306 The second parameter is the number of the first basic block
4307 of the function.
2a2ab3f9 4308
e075ae69 4309 The name of the block is a local symbol made with this statement:
0f290768 4310
e075ae69 4311 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
2a2ab3f9 4312
e075ae69
RH
4313 Of course, since you are writing the definition of
4314 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4315 can take a short cut in the definition of this macro and use the
4316 name that you know will result.
2a2ab3f9 4317
e075ae69
RH
4318 The number of the first basic block of the function is
4319 passed to the macro in BLOCK_OR_LABEL.
2a2ab3f9 4320
e075ae69
RH
4321 If described in a virtual assembler language the code to be
4322 output looks like:
2a2ab3f9 4323
e075ae69
RH
4324 parameter1 <- LPBX0
4325 parameter2 <- BLOCK_OR_LABEL
4326 call __bb_init_trace_func
2a2ab3f9 4327
e075ae69 4328 else if profile_block_flag != 0
e74389ff 4329
e075ae69
RH
4330 Output code to call the subroutine `__bb_init_func'
4331 and pass one single parameter to it, which is the same
4332 as the first parameter to `__bb_init_trace_func'.
e74389ff 4333
e075ae69
RH
4334 The first word of this parameter is a flag which will be nonzero if
4335 the object module has already been initialized. So test this word
4336 first, and do not call `__bb_init_func' if the flag is nonzero.
4337 Note: When profile_block_flag == 2 the test need not be done
4338 but `__bb_init_trace_func' *must* be called.
e74389ff 4339
e075ae69
RH
4340 BLOCK_OR_LABEL may be used to generate a label number as a
4341 branch destination in case `__bb_init_func' will not be called.
e74389ff 4342
e075ae69
RH
4343 If described in a virtual assembler language the code to be
4344 output looks like:
2a2ab3f9 4345
e075ae69
RH
4346 cmp (LPBX0),0
4347 jne local_label
4348 parameter1 <- LPBX0
4349 call __bb_init_func
4350 local_label:
4351*/
c572e5ba 4352
e075ae69
RH
4353void
4354ix86_output_function_block_profiler (file, block_or_label)
4355 FILE *file;
4356 int block_or_label;
c572e5ba 4357{
e075ae69
RH
4358 static int num_func = 0;
4359 rtx xops[8];
4360 char block_table[80], false_label[80];
c572e5ba 4361
e075ae69 4362 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
e9a25f70 4363
e075ae69
RH
4364 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4365 xops[5] = stack_pointer_rtx;
4366 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
2a2ab3f9 4367
e075ae69 4368 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
c572e5ba 4369
e075ae69 4370 switch (profile_block_flag)
c572e5ba 4371 {
e075ae69
RH
4372 case 2:
4373 xops[2] = GEN_INT (block_or_label);
4374 xops[3] = gen_rtx_MEM (Pmode,
4375 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
4376 xops[6] = GEN_INT (8);
e9a25f70 4377
e075ae69
RH
4378 output_asm_insn ("push{l}\t%2", xops);
4379 if (!flag_pic)
4380 output_asm_insn ("push{l}\t%1", xops);
e9a25f70 4381 else
870a0c2c 4382 {
e075ae69
RH
4383 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4384 output_asm_insn ("push{l}\t%7", xops);
870a0c2c 4385 }
e075ae69
RH
4386 output_asm_insn ("call\t%P3", xops);
4387 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4388 break;
c572e5ba 4389
e075ae69
RH
4390 default:
4391 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
c572e5ba 4392
e075ae69
RH
4393 xops[0] = const0_rtx;
4394 xops[2] = gen_rtx_MEM (Pmode,
4395 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
4396 xops[3] = gen_rtx_MEM (Pmode,
4397 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
4398 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
4399 xops[6] = GEN_INT (4);
a14003ee 4400
e075ae69 4401 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
446ba526 4402
e075ae69
RH
4403 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
4404 output_asm_insn ("jne\t%2", xops);
870a0c2c 4405
e075ae69
RH
4406 if (!flag_pic)
4407 output_asm_insn ("push{l}\t%1", xops);
4408 else
4409 {
4410 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
4411 output_asm_insn ("push{l}\t%7", xops);
870a0c2c 4412 }
e075ae69
RH
4413 output_asm_insn ("call\t%P3", xops);
4414 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4415 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
4416 num_func++;
4417 break;
c572e5ba 4418 }
2a2ab3f9 4419}
305f097e 4420
e075ae69
RH
4421/* Output assembler code to FILE to increment a counter associated
4422 with basic block number BLOCKNO.
305f097e 4423
e075ae69 4424 If profile_block_flag == 2
ecbc4695 4425
e075ae69
RH
4426 Output code to initialize the global structure `__bb' and
4427 call the function `__bb_trace_func' which will increment the
4428 counter.
ecbc4695 4429
e075ae69
RH
4430 `__bb' consists of two words. In the first word the number
4431 of the basic block has to be stored. In the second word
0f290768 4432 the address of a block allocated in the object module
e075ae69 4433 has to be stored.
ecbc4695 4434
e075ae69 4435 The basic block number is given by BLOCKNO.
ecbc4695 4436
0f290768 4437 The address of the block is given by the label created with
305f097e 4438
e075ae69 4439 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
305f097e 4440
e075ae69 4441 by FUNCTION_BLOCK_PROFILER.
ecbc4695 4442
e075ae69
RH
4443 Of course, since you are writing the definition of
4444 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4445 can take a short cut in the definition of this macro and use the
4446 name that you know will result.
305f097e 4447
e075ae69
RH
4448 If described in a virtual assembler language the code to be
4449 output looks like:
305f097e 4450
e075ae69
RH
4451 move BLOCKNO -> (__bb)
4452 move LPBX0 -> (__bb+4)
4453 call __bb_trace_func
305f097e 4454
e075ae69
RH
4455 Note that function `__bb_trace_func' must not change the
4456 machine state, especially the flag register. To grant
4457 this, you must output code to save and restore registers
4458 either in this macro or in the macros MACHINE_STATE_SAVE
4459 and MACHINE_STATE_RESTORE. The last two macros will be
4460 used in the function `__bb_trace_func', so you must make
0f290768 4461 sure that the function prologue does not change any
e075ae69 4462 register prior to saving it with MACHINE_STATE_SAVE.
305f097e 4463
e075ae69 4464 else if profile_block_flag != 0
305f097e 4465
e075ae69
RH
4466 Output code to increment the counter directly.
4467 Basic blocks are numbered separately from zero within each
4468 compiled object module. The count associated with block number
0f290768 4469 BLOCKNO is at index BLOCKNO in an array of words; the name of
e075ae69 4470 this array is a local symbol made with this statement:
32b5b1aa 4471
e075ae69 4472 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
32b5b1aa 4473
e075ae69
RH
4474 Of course, since you are writing the definition of
4475 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4476 can take a short cut in the definition of this macro and use the
0f290768 4477 name that you know will result.
32b5b1aa 4478
e075ae69
RH
4479 If described in a virtual assembler language the code to be
4480 output looks like:
32b5b1aa 4481
e075ae69
RH
4482 inc (LPBX2+4*BLOCKNO)
4483*/
32b5b1aa 4484
e075ae69
RH
4485void
4486ix86_output_block_profiler (file, blockno)
4487 FILE *file ATTRIBUTE_UNUSED;
4488 int blockno;
4489{
4490 rtx xops[8], cnt_rtx;
4491 char counts[80];
4492 char *block_table = counts;
4493
4494 switch (profile_block_flag)
4495 {
4496 case 2:
4497 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
32b5b1aa 4498
e075ae69
RH
4499 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4500 xops[2] = GEN_INT (blockno);
4501 xops[3] = gen_rtx_MEM (Pmode,
4502 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
4503 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
4504 xops[5] = plus_constant (xops[4], 4);
4505 xops[0] = gen_rtx_MEM (SImode, xops[4]);
4506 xops[6] = gen_rtx_MEM (SImode, xops[5]);
79325812 4507
e075ae69 4508 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
32b5b1aa 4509
e075ae69
RH
4510 output_asm_insn ("pushf", xops);
4511 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4512 if (flag_pic)
32b5b1aa 4513 {
e075ae69
RH
4514 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4515 output_asm_insn ("push{l}\t%7", xops);
4516 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4517 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
4518 output_asm_insn ("pop{l}\t%7", xops);
4519 }
4520 else
4521 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
4522 output_asm_insn ("call\t%P3", xops);
4523 output_asm_insn ("popf", xops);
32b5b1aa 4524
e075ae69 4525 break;
32b5b1aa 4526
e075ae69
RH
4527 default:
4528 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
4529 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
4530 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
32b5b1aa 4531
e075ae69
RH
4532 if (blockno)
4533 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
32b5b1aa 4534
e075ae69
RH
4535 if (flag_pic)
4536 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
32b5b1aa 4537
e075ae69
RH
4538 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
4539 output_asm_insn ("inc{l}\t%0", xops);
32b5b1aa 4540
e075ae69 4541 break;
32b5b1aa 4542 }
32b5b1aa 4543}
32b5b1aa 4544\f
79325812 4545void
e075ae69
RH
4546ix86_expand_move (mode, operands)
4547 enum machine_mode mode;
4548 rtx operands[];
32b5b1aa 4549{
e075ae69 4550 int strict = (reload_in_progress || reload_completed);
e075ae69 4551 rtx insn;
e9a25f70 4552
e075ae69 4553 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
32b5b1aa 4554 {
e075ae69 4555 /* Emit insns to move operands[1] into operands[0]. */
e9a25f70 4556
e075ae69
RH
4557 if (GET_CODE (operands[0]) == MEM)
4558 operands[1] = force_reg (Pmode, operands[1]);
4559 else
32b5b1aa 4560 {
e075ae69
RH
4561 rtx temp = operands[0];
4562 if (GET_CODE (temp) != REG)
4563 temp = gen_reg_rtx (Pmode);
4564 temp = legitimize_pic_address (operands[1], temp);
4565 if (temp == operands[0])
4566 return;
4567 operands[1] = temp;
32b5b1aa 4568 }
e075ae69
RH
4569 }
4570 else
4571 {
d7a29404
JH
4572 if (GET_CODE (operands[0]) == MEM
4573 && (GET_MODE (operands[0]) == QImode
4574 || !push_operand (operands[0], mode))
4575 && GET_CODE (operands[1]) == MEM)
e075ae69 4576 operands[1] = force_reg (mode, operands[1]);
e9a25f70 4577
2c5a510c
RH
4578 if (push_operand (operands[0], mode)
4579 && ! general_no_elim_operand (operands[1], mode))
4580 operands[1] = copy_to_mode_reg (mode, operands[1]);
4581
e075ae69 4582 if (FLOAT_MODE_P (mode))
32b5b1aa 4583 {
d7a29404
JH
4584 /* If we are loading a floating point constant to a register,
4585 force the value to memory now, since we'll get better code
4586 out the back end. */
e075ae69
RH
4587
4588 if (strict)
4589 ;
e075ae69 4590 else if (GET_CODE (operands[1]) == CONST_DOUBLE
d7a29404 4591 && register_operand (operands[0], mode))
e075ae69 4592 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
32b5b1aa 4593 }
32b5b1aa 4594 }
e9a25f70 4595
e075ae69 4596 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
e9a25f70 4597
e075ae69
RH
4598 emit_insn (insn);
4599}
e9a25f70 4600
e075ae69
RH
4601/* Attempt to expand a binary operator. Make the expansion closer to the
4602 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 4603 memory references (one output, two input) in a single insn. */
e9a25f70 4604
e075ae69
RH
4605void
4606ix86_expand_binary_operator (code, mode, operands)
4607 enum rtx_code code;
4608 enum machine_mode mode;
4609 rtx operands[];
4610{
4611 int matching_memory;
4612 rtx src1, src2, dst, op, clob;
4613
4614 dst = operands[0];
4615 src1 = operands[1];
4616 src2 = operands[2];
4617
4618 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
4619 if (GET_RTX_CLASS (code) == 'c'
4620 && (rtx_equal_p (dst, src2)
4621 || immediate_operand (src1, mode)))
4622 {
4623 rtx temp = src1;
4624 src1 = src2;
4625 src2 = temp;
32b5b1aa 4626 }
e9a25f70 4627
e075ae69
RH
4628 /* If the destination is memory, and we do not have matching source
4629 operands, do things in registers. */
4630 matching_memory = 0;
4631 if (GET_CODE (dst) == MEM)
32b5b1aa 4632 {
e075ae69
RH
4633 if (rtx_equal_p (dst, src1))
4634 matching_memory = 1;
4635 else if (GET_RTX_CLASS (code) == 'c'
4636 && rtx_equal_p (dst, src2))
4637 matching_memory = 2;
4638 else
4639 dst = gen_reg_rtx (mode);
4640 }
0f290768 4641
e075ae69
RH
4642 /* Both source operands cannot be in memory. */
4643 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
4644 {
4645 if (matching_memory != 2)
4646 src2 = force_reg (mode, src2);
4647 else
4648 src1 = force_reg (mode, src1);
32b5b1aa 4649 }
e9a25f70 4650
06a964de
JH
4651 /* If the operation is not commutable, source 1 cannot be a constant
4652 or non-matching memory. */
0f290768 4653 if ((CONSTANT_P (src1)
06a964de
JH
4654 || (!matching_memory && GET_CODE (src1) == MEM))
4655 && GET_RTX_CLASS (code) != 'c')
e075ae69 4656 src1 = force_reg (mode, src1);
0f290768 4657
e075ae69 4658 /* If optimizing, copy to regs to improve CSE */
fe577e58 4659 if (optimize && ! no_new_pseudos)
32b5b1aa 4660 {
e075ae69
RH
4661 if (GET_CODE (dst) == MEM)
4662 dst = gen_reg_rtx (mode);
4663 if (GET_CODE (src1) == MEM)
4664 src1 = force_reg (mode, src1);
4665 if (GET_CODE (src2) == MEM)
4666 src2 = force_reg (mode, src2);
32b5b1aa 4667 }
e9a25f70 4668
e075ae69
RH
4669 /* Emit the instruction. */
4670
4671 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
4672 if (reload_in_progress)
4673 {
4674 /* Reload doesn't know about the flags register, and doesn't know that
4675 it doesn't want to clobber it. We can only do this with PLUS. */
4676 if (code != PLUS)
4677 abort ();
4678 emit_insn (op);
4679 }
4680 else
32b5b1aa 4681 {
e075ae69
RH
4682 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4683 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 4684 }
e9a25f70 4685
e075ae69
RH
4686 /* Fix up the destination if needed. */
4687 if (dst != operands[0])
4688 emit_move_insn (operands[0], dst);
4689}
4690
4691/* Return TRUE or FALSE depending on whether the binary operator meets the
4692 appropriate constraints. */
4693
4694int
4695ix86_binary_operator_ok (code, mode, operands)
4696 enum rtx_code code;
4697 enum machine_mode mode ATTRIBUTE_UNUSED;
4698 rtx operands[3];
4699{
4700 /* Both source operands cannot be in memory. */
4701 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
4702 return 0;
4703 /* If the operation is not commutable, source 1 cannot be a constant. */
4704 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
4705 return 0;
4706 /* If the destination is memory, we must have a matching source operand. */
4707 if (GET_CODE (operands[0]) == MEM
4708 && ! (rtx_equal_p (operands[0], operands[1])
4709 || (GET_RTX_CLASS (code) == 'c'
4710 && rtx_equal_p (operands[0], operands[2]))))
4711 return 0;
06a964de
JH
4712 /* If the operation is not commutable and the source 1 is memory, we must
4713 have a matching destionation. */
4714 if (GET_CODE (operands[1]) == MEM
4715 && GET_RTX_CLASS (code) != 'c'
4716 && ! rtx_equal_p (operands[0], operands[1]))
4717 return 0;
e075ae69
RH
4718 return 1;
4719}
4720
4721/* Attempt to expand a unary operator. Make the expansion closer to the
4722 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 4723 memory references (one output, one input) in a single insn. */
e075ae69 4724
9d81fc27 4725void
e075ae69
RH
4726ix86_expand_unary_operator (code, mode, operands)
4727 enum rtx_code code;
4728 enum machine_mode mode;
4729 rtx operands[];
4730{
06a964de
JH
4731 int matching_memory;
4732 rtx src, dst, op, clob;
4733
4734 dst = operands[0];
4735 src = operands[1];
e075ae69 4736
06a964de
JH
4737 /* If the destination is memory, and we do not have matching source
4738 operands, do things in registers. */
4739 matching_memory = 0;
4740 if (GET_CODE (dst) == MEM)
32b5b1aa 4741 {
06a964de
JH
4742 if (rtx_equal_p (dst, src))
4743 matching_memory = 1;
e075ae69 4744 else
06a964de 4745 dst = gen_reg_rtx (mode);
32b5b1aa 4746 }
e9a25f70 4747
06a964de
JH
4748 /* When source operand is memory, destination must match. */
4749 if (!matching_memory && GET_CODE (src) == MEM)
4750 src = force_reg (mode, src);
0f290768 4751
06a964de 4752 /* If optimizing, copy to regs to improve CSE */
fe577e58 4753 if (optimize && ! no_new_pseudos)
06a964de
JH
4754 {
4755 if (GET_CODE (dst) == MEM)
4756 dst = gen_reg_rtx (mode);
4757 if (GET_CODE (src) == MEM)
4758 src = force_reg (mode, src);
4759 }
4760
4761 /* Emit the instruction. */
4762
4763 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
4764 if (reload_in_progress || code == NOT)
4765 {
4766 /* Reload doesn't know about the flags register, and doesn't know that
4767 it doesn't want to clobber it. */
4768 if (code != NOT)
4769 abort ();
4770 emit_insn (op);
4771 }
4772 else
4773 {
4774 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4775 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4776 }
4777
4778 /* Fix up the destination if needed. */
4779 if (dst != operands[0])
4780 emit_move_insn (operands[0], dst);
e075ae69
RH
4781}
4782
4783/* Return TRUE or FALSE depending on whether the unary operator meets the
4784 appropriate constraints. */
4785
4786int
4787ix86_unary_operator_ok (code, mode, operands)
4788 enum rtx_code code ATTRIBUTE_UNUSED;
4789 enum machine_mode mode ATTRIBUTE_UNUSED;
4790 rtx operands[2] ATTRIBUTE_UNUSED;
4791{
06a964de
JH
4792 /* If one of operands is memory, source and destination must match. */
4793 if ((GET_CODE (operands[0]) == MEM
4794 || GET_CODE (operands[1]) == MEM)
4795 && ! rtx_equal_p (operands[0], operands[1]))
4796 return FALSE;
e075ae69
RH
4797 return TRUE;
4798}
4799
16189740
RH
4800/* Return TRUE or FALSE depending on whether the first SET in INSN
4801 has source and destination with matching CC modes, and that the
4802 CC mode is at least as constrained as REQ_MODE. */
4803
4804int
4805ix86_match_ccmode (insn, req_mode)
4806 rtx insn;
4807 enum machine_mode req_mode;
4808{
4809 rtx set;
4810 enum machine_mode set_mode;
4811
4812 set = PATTERN (insn);
4813 if (GET_CODE (set) == PARALLEL)
4814 set = XVECEXP (set, 0, 0);
4815 if (GET_CODE (set) != SET)
4816 abort ();
9076b9c1
JH
4817 if (GET_CODE (SET_SRC (set)) != COMPARE)
4818 abort ();
16189740
RH
4819
4820 set_mode = GET_MODE (SET_DEST (set));
4821 switch (set_mode)
4822 {
9076b9c1
JH
4823 case CCNOmode:
4824 if (req_mode != CCNOmode
4825 && (req_mode != CCmode
4826 || XEXP (SET_SRC (set), 1) != const0_rtx))
4827 return 0;
4828 break;
16189740 4829 case CCmode:
9076b9c1 4830 if (req_mode == CCGCmode)
16189740
RH
4831 return 0;
4832 /* FALLTHRU */
9076b9c1
JH
4833 case CCGCmode:
4834 if (req_mode == CCGOCmode || req_mode == CCNOmode)
4835 return 0;
4836 /* FALLTHRU */
4837 case CCGOCmode:
16189740
RH
4838 if (req_mode == CCZmode)
4839 return 0;
4840 /* FALLTHRU */
4841 case CCZmode:
4842 break;
4843
4844 default:
4845 abort ();
4846 }
4847
4848 return (GET_MODE (SET_SRC (set)) == set_mode);
4849}
4850
e075ae69
RH
4851/* Generate insn patterns to do an integer compare of OPERANDS. */
4852
4853static rtx
4854ix86_expand_int_compare (code, op0, op1)
4855 enum rtx_code code;
4856 rtx op0, op1;
4857{
4858 enum machine_mode cmpmode;
4859 rtx tmp, flags;
4860
4861 cmpmode = SELECT_CC_MODE (code, op0, op1);
4862 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
4863
4864 /* This is very simple, but making the interface the same as in the
4865 FP case makes the rest of the code easier. */
4866 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
4867 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
4868
4869 /* Return the test that should be put into the flags user, i.e.
4870 the bcc, scc, or cmov instruction. */
4871 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
4872}
4873
3a3677ff
RH
4874/* Figure out whether to use ordered or unordered fp comparisons.
4875 Return the appropriate mode to use. */
e075ae69 4876
b1cdafbb 4877enum machine_mode
3a3677ff 4878ix86_fp_compare_mode (code)
8752c357 4879 enum rtx_code code ATTRIBUTE_UNUSED;
e075ae69 4880{
9e7adcb3
JH
4881 /* ??? In order to make all comparisons reversible, we do all comparisons
4882 non-trapping when compiling for IEEE. Once gcc is able to distinguish
4883 all forms trapping and nontrapping comparisons, we can make inequality
4884 comparisons trapping again, since it results in better code when using
4885 FCOM based compares. */
4886 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
4887}
4888
9076b9c1
JH
4889enum machine_mode
4890ix86_cc_mode (code, op0, op1)
4891 enum rtx_code code;
4892 rtx op0, op1;
4893{
4894 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
4895 return ix86_fp_compare_mode (code);
4896 switch (code)
4897 {
4898 /* Only zero flag is needed. */
4899 case EQ: /* ZF=0 */
4900 case NE: /* ZF!=0 */
4901 return CCZmode;
4902 /* Codes needing carry flag. */
265dab10
JH
4903 case GEU: /* CF=0 */
4904 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
4905 case LTU: /* CF=1 */
4906 case LEU: /* CF=1 | ZF=1 */
265dab10 4907 return CCmode;
9076b9c1
JH
4908 /* Codes possibly doable only with sign flag when
4909 comparing against zero. */
4910 case GE: /* SF=OF or SF=0 */
7e08e190 4911 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
4912 if (op1 == const0_rtx)
4913 return CCGOCmode;
4914 else
4915 /* For other cases Carry flag is not required. */
4916 return CCGCmode;
4917 /* Codes doable only with sign flag when comparing
4918 against zero, but we miss jump instruction for it
4919 so we need to use relational tests agains overflow
4920 that thus needs to be zero. */
4921 case GT: /* ZF=0 & SF=OF */
4922 case LE: /* ZF=1 | SF<>OF */
4923 if (op1 == const0_rtx)
4924 return CCNOmode;
4925 else
4926 return CCGCmode;
4927 default:
0f290768 4928 abort ();
9076b9c1
JH
4929 }
4930}
4931
3a3677ff
RH
4932/* Return true if we should use an FCOMI instruction for this fp comparison. */
4933
a940d8bd 4934int
3a3677ff 4935ix86_use_fcomi_compare (code)
9e7adcb3 4936 enum rtx_code code ATTRIBUTE_UNUSED;
3a3677ff 4937{
9e7adcb3
JH
4938 enum rtx_code swapped_code = swap_condition (code);
4939 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
4940 || (ix86_fp_comparison_cost (swapped_code)
4941 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
4942}
4943
0f290768 4944/* Swap, force into registers, or otherwise massage the two operands
3a3677ff
RH
4945 to a fp comparison. The operands are updated in place; the new
4946 comparsion code is returned. */
4947
4948static enum rtx_code
4949ix86_prepare_fp_compare_args (code, pop0, pop1)
4950 enum rtx_code code;
4951 rtx *pop0, *pop1;
4952{
4953 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
4954 rtx op0 = *pop0, op1 = *pop1;
4955 enum machine_mode op_mode = GET_MODE (op0);
0644b628 4956 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
3a3677ff 4957
e075ae69 4958 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
4959 The same is true of the XFmode compare instructions. The same is
4960 true of the fcomi compare instructions. */
4961
0644b628
JH
4962 if (!is_sse
4963 && (fpcmp_mode == CCFPUmode
4964 || op_mode == XFmode
4965 || op_mode == TFmode
4966 || ix86_use_fcomi_compare (code)))
e075ae69 4967 {
3a3677ff
RH
4968 op0 = force_reg (op_mode, op0);
4969 op1 = force_reg (op_mode, op1);
e075ae69
RH
4970 }
4971 else
4972 {
4973 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
4974 things around if they appear profitable, otherwise force op0
4975 into a register. */
4976
4977 if (standard_80387_constant_p (op0) == 0
4978 || (GET_CODE (op0) == MEM
4979 && ! (standard_80387_constant_p (op1) == 0
4980 || GET_CODE (op1) == MEM)))
32b5b1aa 4981 {
e075ae69
RH
4982 rtx tmp;
4983 tmp = op0, op0 = op1, op1 = tmp;
4984 code = swap_condition (code);
4985 }
4986
4987 if (GET_CODE (op0) != REG)
3a3677ff 4988 op0 = force_reg (op_mode, op0);
e075ae69
RH
4989
4990 if (CONSTANT_P (op1))
4991 {
4992 if (standard_80387_constant_p (op1))
3a3677ff 4993 op1 = force_reg (op_mode, op1);
e075ae69 4994 else
3a3677ff 4995 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
4996 }
4997 }
e9a25f70 4998
9e7adcb3
JH
4999 /* Try to rearrange the comparison to make it cheaper. */
5000 if (ix86_fp_comparison_cost (code)
5001 > ix86_fp_comparison_cost (swap_condition (code))
5002 && (GET_CODE (op0) == REG || !reload_completed))
5003 {
5004 rtx tmp;
5005 tmp = op0, op0 = op1, op1 = tmp;
5006 code = swap_condition (code);
5007 if (GET_CODE (op0) != REG)
5008 op0 = force_reg (op_mode, op0);
5009 }
5010
3a3677ff
RH
5011 *pop0 = op0;
5012 *pop1 = op1;
5013 return code;
5014}
5015
c0c102a9
JH
5016/* Convert comparison codes we use to represent FP comparison to integer
5017 code that will result in proper branch. Return UNKNOWN if no such code
5018 is available. */
5019static enum rtx_code
5020ix86_fp_compare_code_to_integer (code)
5021 enum rtx_code code;
5022{
5023 switch (code)
5024 {
5025 case GT:
5026 return GTU;
5027 case GE:
5028 return GEU;
5029 case ORDERED:
5030 case UNORDERED:
5031 return code;
5032 break;
5033 case UNEQ:
5034 return EQ;
5035 break;
5036 case UNLT:
5037 return LTU;
5038 break;
5039 case UNLE:
5040 return LEU;
5041 break;
5042 case LTGT:
5043 return NE;
5044 break;
5045 default:
5046 return UNKNOWN;
5047 }
5048}
5049
5050/* Split comparison code CODE into comparisons we can do using branch
5051 instructions. BYPASS_CODE is comparison code for branch that will
5052 branch around FIRST_CODE and SECOND_CODE. If some of branches
5053 is not required, set value to NIL.
5054 We never require more than two branches. */
5055static void
5056ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
5057 enum rtx_code code, *bypass_code, *first_code, *second_code;
5058{
5059 *first_code = code;
5060 *bypass_code = NIL;
5061 *second_code = NIL;
5062
5063 /* The fcomi comparison sets flags as follows:
5064
5065 cmp ZF PF CF
5066 > 0 0 0
5067 < 0 0 1
5068 = 1 0 0
5069 un 1 1 1 */
5070
5071 switch (code)
5072 {
5073 case GT: /* GTU - CF=0 & ZF=0 */
5074 case GE: /* GEU - CF=0 */
5075 case ORDERED: /* PF=0 */
5076 case UNORDERED: /* PF=1 */
5077 case UNEQ: /* EQ - ZF=1 */
5078 case UNLT: /* LTU - CF=1 */
5079 case UNLE: /* LEU - CF=1 | ZF=1 */
5080 case LTGT: /* EQ - ZF=0 */
5081 break;
5082 case LT: /* LTU - CF=1 - fails on unordered */
5083 *first_code = UNLT;
5084 *bypass_code = UNORDERED;
5085 break;
5086 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
5087 *first_code = UNLE;
5088 *bypass_code = UNORDERED;
5089 break;
5090 case EQ: /* EQ - ZF=1 - fails on unordered */
5091 *first_code = UNEQ;
5092 *bypass_code = UNORDERED;
5093 break;
5094 case NE: /* NE - ZF=0 - fails on unordered */
5095 *first_code = LTGT;
5096 *second_code = UNORDERED;
5097 break;
5098 case UNGE: /* GEU - CF=0 - fails on unordered */
5099 *first_code = GE;
5100 *second_code = UNORDERED;
5101 break;
5102 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
5103 *first_code = GT;
5104 *second_code = UNORDERED;
5105 break;
5106 default:
5107 abort ();
5108 }
5109 if (!TARGET_IEEE_FP)
5110 {
5111 *second_code = NIL;
5112 *bypass_code = NIL;
5113 }
5114}
5115
9e7adcb3
JH
5116/* Return cost of comparison done fcom + arithmetics operations on AX.
5117 All following functions do use number of instructions as an cost metrics.
5118 In future this should be tweaked to compute bytes for optimize_size and
5119 take into account performance of various instructions on various CPUs. */
5120static int
5121ix86_fp_comparison_arithmetics_cost (code)
5122 enum rtx_code code;
5123{
5124 if (!TARGET_IEEE_FP)
5125 return 4;
5126 /* The cost of code output by ix86_expand_fp_compare. */
5127 switch (code)
5128 {
5129 case UNLE:
5130 case UNLT:
5131 case LTGT:
5132 case GT:
5133 case GE:
5134 case UNORDERED:
5135 case ORDERED:
5136 case UNEQ:
5137 return 4;
5138 break;
5139 case LT:
5140 case NE:
5141 case EQ:
5142 case UNGE:
5143 return 5;
5144 break;
5145 case LE:
5146 case UNGT:
5147 return 6;
5148 break;
5149 default:
5150 abort ();
5151 }
5152}
5153
5154/* Return cost of comparison done using fcomi operation.
5155 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5156static int
5157ix86_fp_comparison_fcomi_cost (code)
5158 enum rtx_code code;
5159{
5160 enum rtx_code bypass_code, first_code, second_code;
5161 /* Return arbitarily high cost when instruction is not supported - this
5162 prevents gcc from using it. */
5163 if (!TARGET_CMOVE)
5164 return 1024;
5165 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5166 return (bypass_code != NIL || second_code != NIL) + 2;
5167}
5168
5169/* Return cost of comparison done using sahf operation.
5170 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5171static int
5172ix86_fp_comparison_sahf_cost (code)
5173 enum rtx_code code;
5174{
5175 enum rtx_code bypass_code, first_code, second_code;
5176 /* Return arbitarily high cost when instruction is not preferred - this
5177 avoids gcc from using it. */
5178 if (!TARGET_USE_SAHF && !optimize_size)
5179 return 1024;
5180 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5181 return (bypass_code != NIL || second_code != NIL) + 3;
5182}
5183
5184/* Compute cost of the comparison done using any method.
5185 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5186static int
5187ix86_fp_comparison_cost (code)
5188 enum rtx_code code;
5189{
5190 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
5191 int min;
5192
5193 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
5194 sahf_cost = ix86_fp_comparison_sahf_cost (code);
5195
5196 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
5197 if (min > sahf_cost)
5198 min = sahf_cost;
5199 if (min > fcomi_cost)
5200 min = fcomi_cost;
5201 return min;
5202}
c0c102a9 5203
3a3677ff
RH
5204/* Generate insn patterns to do a floating point compare of OPERANDS. */
5205
9e7adcb3
JH
5206static rtx
5207ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
3a3677ff
RH
5208 enum rtx_code code;
5209 rtx op0, op1, scratch;
9e7adcb3
JH
5210 rtx *second_test;
5211 rtx *bypass_test;
3a3677ff
RH
5212{
5213 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 5214 rtx tmp, tmp2;
9e7adcb3 5215 int cost = ix86_fp_comparison_cost (code);
c0c102a9 5216 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
5217
5218 fpcmp_mode = ix86_fp_compare_mode (code);
5219 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
5220
9e7adcb3
JH
5221 if (second_test)
5222 *second_test = NULL_RTX;
5223 if (bypass_test)
5224 *bypass_test = NULL_RTX;
5225
c0c102a9
JH
5226 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5227
9e7adcb3
JH
5228 /* Do fcomi/sahf based test when profitable. */
5229 if ((bypass_code == NIL || bypass_test)
5230 && (second_code == NIL || second_test)
5231 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 5232 {
c0c102a9
JH
5233 if (TARGET_CMOVE)
5234 {
5235 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5236 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
5237 tmp);
5238 emit_insn (tmp);
5239 }
5240 else
5241 {
5242 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5243 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
bf71a4f8
JH
5244 if (!scratch)
5245 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
5246 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
5247 emit_insn (gen_x86_sahf_1 (scratch));
5248 }
e075ae69
RH
5249
5250 /* The FP codes work out to act like unsigned. */
9a915772 5251 intcmp_mode = fpcmp_mode;
9e7adcb3
JH
5252 code = first_code;
5253 if (bypass_code != NIL)
5254 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
5255 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5256 const0_rtx);
5257 if (second_code != NIL)
5258 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
5259 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5260 const0_rtx);
e075ae69
RH
5261 }
5262 else
5263 {
5264 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69
RH
5265 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5266 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
bf71a4f8
JH
5267 if (!scratch)
5268 scratch = gen_reg_rtx (HImode);
3a3677ff 5269 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 5270
9a915772
JH
5271 /* In the unordered case, we have to check C2 for NaN's, which
5272 doesn't happen to work out to anything nice combination-wise.
5273 So do some bit twiddling on the value we've got in AH to come
5274 up with an appropriate set of condition codes. */
e075ae69 5275
9a915772
JH
5276 intcmp_mode = CCNOmode;
5277 switch (code)
32b5b1aa 5278 {
9a915772
JH
5279 case GT:
5280 case UNGT:
5281 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 5282 {
3a3677ff 5283 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 5284 code = EQ;
9a915772
JH
5285 }
5286 else
5287 {
5288 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5289 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5290 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
5291 intcmp_mode = CCmode;
5292 code = GEU;
5293 }
5294 break;
5295 case LT:
5296 case UNLT:
5297 if (code == LT && TARGET_IEEE_FP)
5298 {
3a3677ff
RH
5299 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5300 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
5301 intcmp_mode = CCmode;
5302 code = EQ;
9a915772
JH
5303 }
5304 else
5305 {
5306 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
5307 code = NE;
5308 }
5309 break;
5310 case GE:
5311 case UNGE:
5312 if (code == GE || !TARGET_IEEE_FP)
5313 {
3a3677ff 5314 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 5315 code = EQ;
9a915772
JH
5316 }
5317 else
5318 {
5319 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5320 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
5321 GEN_INT (0x01)));
5322 code = NE;
5323 }
5324 break;
5325 case LE:
5326 case UNLE:
5327 if (code == LE && TARGET_IEEE_FP)
5328 {
3a3677ff
RH
5329 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5330 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5331 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
5332 intcmp_mode = CCmode;
5333 code = LTU;
9a915772
JH
5334 }
5335 else
5336 {
5337 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
5338 code = NE;
5339 }
5340 break;
5341 case EQ:
5342 case UNEQ:
5343 if (code == EQ && TARGET_IEEE_FP)
5344 {
3a3677ff
RH
5345 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5346 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
5347 intcmp_mode = CCmode;
5348 code = EQ;
9a915772
JH
5349 }
5350 else
5351 {
3a3677ff
RH
5352 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
5353 code = NE;
5354 break;
9a915772
JH
5355 }
5356 break;
5357 case NE:
5358 case LTGT:
5359 if (code == NE && TARGET_IEEE_FP)
5360 {
3a3677ff 5361 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
5362 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
5363 GEN_INT (0x40)));
3a3677ff 5364 code = NE;
9a915772
JH
5365 }
5366 else
5367 {
3a3677ff
RH
5368 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
5369 code = EQ;
32b5b1aa 5370 }
9a915772
JH
5371 break;
5372
5373 case UNORDERED:
5374 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
5375 code = NE;
5376 break;
5377 case ORDERED:
5378 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
5379 code = EQ;
5380 break;
5381
5382 default:
5383 abort ();
32b5b1aa 5384 }
32b5b1aa 5385 }
e075ae69
RH
5386
5387 /* Return the test that should be put into the flags user, i.e.
5388 the bcc, scc, or cmov instruction. */
5389 return gen_rtx_fmt_ee (code, VOIDmode,
5390 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5391 const0_rtx);
5392}
5393
9e3e266c 5394rtx
a1b8572c 5395ix86_expand_compare (code, second_test, bypass_test)
e075ae69 5396 enum rtx_code code;
a1b8572c 5397 rtx *second_test, *bypass_test;
e075ae69
RH
5398{
5399 rtx op0, op1, ret;
5400 op0 = ix86_compare_op0;
5401 op1 = ix86_compare_op1;
5402
a1b8572c
JH
5403 if (second_test)
5404 *second_test = NULL_RTX;
5405 if (bypass_test)
5406 *bypass_test = NULL_RTX;
5407
e075ae69 5408 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
bf71a4f8 5409 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
77ebd435 5410 second_test, bypass_test);
32b5b1aa 5411 else
e075ae69
RH
5412 ret = ix86_expand_int_compare (code, op0, op1);
5413
5414 return ret;
5415}
5416
5417void
3a3677ff 5418ix86_expand_branch (code, label)
e075ae69 5419 enum rtx_code code;
e075ae69
RH
5420 rtx label;
5421{
3a3677ff 5422 rtx tmp;
e075ae69 5423
3a3677ff 5424 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 5425 {
3a3677ff
RH
5426 case QImode:
5427 case HImode:
5428 case SImode:
a1b8572c 5429 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
5430 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5431 gen_rtx_LABEL_REF (VOIDmode, label),
5432 pc_rtx);
5433 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 5434 return;
e075ae69 5435
3a3677ff
RH
5436 case SFmode:
5437 case DFmode:
0f290768 5438 case XFmode:
2b589241 5439 case TFmode:
3a3677ff
RH
5440 /* Don't expand the comparison early, so that we get better code
5441 when jump or whoever decides to reverse the comparison. */
5442 {
5443 rtvec vec;
5444 int use_fcomi;
5445
5446 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
5447 &ix86_compare_op1);
5448
0b9aaeee 5449 tmp = gen_rtx_fmt_ee (code, VOIDmode,
3a3677ff
RH
5450 ix86_compare_op0, ix86_compare_op1);
5451 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5452 gen_rtx_LABEL_REF (VOIDmode, label),
5453 pc_rtx);
5454 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
5455
5456 use_fcomi = ix86_use_fcomi_compare (code);
5457 vec = rtvec_alloc (3 + !use_fcomi);
5458 RTVEC_ELT (vec, 0) = tmp;
5459 RTVEC_ELT (vec, 1)
5460 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
5461 RTVEC_ELT (vec, 2)
5462 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
5463 if (! use_fcomi)
5464 RTVEC_ELT (vec, 3)
5465 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
5466
5467 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
5468 return;
5469 }
32b5b1aa 5470
3a3677ff
RH
5471 case DImode:
5472 /* Expand DImode branch into multiple compare+branch. */
5473 {
5474 rtx lo[2], hi[2], label2;
5475 enum rtx_code code1, code2, code3;
32b5b1aa 5476
3a3677ff
RH
5477 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
5478 {
5479 tmp = ix86_compare_op0;
5480 ix86_compare_op0 = ix86_compare_op1;
5481 ix86_compare_op1 = tmp;
5482 code = swap_condition (code);
5483 }
5484 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
5485 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 5486
3a3677ff
RH
5487 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
5488 avoid two branches. This costs one extra insn, so disable when
5489 optimizing for size. */
32b5b1aa 5490
3a3677ff
RH
5491 if ((code == EQ || code == NE)
5492 && (!optimize_size
5493 || hi[1] == const0_rtx || lo[1] == const0_rtx))
5494 {
5495 rtx xor0, xor1;
32b5b1aa 5496
3a3677ff
RH
5497 xor1 = hi[0];
5498 if (hi[1] != const0_rtx)
5499 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
5500 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 5501
3a3677ff
RH
5502 xor0 = lo[0];
5503 if (lo[1] != const0_rtx)
5504 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
5505 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 5506
3a3677ff
RH
5507 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
5508 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 5509
3a3677ff
RH
5510 ix86_compare_op0 = tmp;
5511 ix86_compare_op1 = const0_rtx;
5512 ix86_expand_branch (code, label);
5513 return;
5514 }
e075ae69 5515
1f9124e4
JJ
5516 /* Otherwise, if we are doing less-than or greater-or-equal-than,
5517 op1 is a constant and the low word is zero, then we can just
5518 examine the high word. */
32b5b1aa 5519
1f9124e4
JJ
5520 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
5521 switch (code)
5522 {
5523 case LT: case LTU: case GE: case GEU:
5524 ix86_compare_op0 = hi[0];
5525 ix86_compare_op1 = hi[1];
5526 ix86_expand_branch (code, label);
5527 return;
5528 default:
5529 break;
5530 }
e075ae69 5531
3a3677ff 5532 /* Otherwise, we need two or three jumps. */
e075ae69 5533
3a3677ff 5534 label2 = gen_label_rtx ();
e075ae69 5535
3a3677ff
RH
5536 code1 = code;
5537 code2 = swap_condition (code);
5538 code3 = unsigned_condition (code);
e075ae69 5539
3a3677ff
RH
5540 switch (code)
5541 {
5542 case LT: case GT: case LTU: case GTU:
5543 break;
e075ae69 5544
3a3677ff
RH
5545 case LE: code1 = LT; code2 = GT; break;
5546 case GE: code1 = GT; code2 = LT; break;
5547 case LEU: code1 = LTU; code2 = GTU; break;
5548 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 5549
3a3677ff
RH
5550 case EQ: code1 = NIL; code2 = NE; break;
5551 case NE: code2 = NIL; break;
e075ae69 5552
3a3677ff
RH
5553 default:
5554 abort ();
5555 }
e075ae69 5556
3a3677ff
RH
5557 /*
5558 * a < b =>
5559 * if (hi(a) < hi(b)) goto true;
5560 * if (hi(a) > hi(b)) goto false;
5561 * if (lo(a) < lo(b)) goto true;
5562 * false:
5563 */
5564
5565 ix86_compare_op0 = hi[0];
5566 ix86_compare_op1 = hi[1];
5567
5568 if (code1 != NIL)
5569 ix86_expand_branch (code1, label);
5570 if (code2 != NIL)
5571 ix86_expand_branch (code2, label2);
5572
5573 ix86_compare_op0 = lo[0];
5574 ix86_compare_op1 = lo[1];
5575 ix86_expand_branch (code3, label);
5576
5577 if (code2 != NIL)
5578 emit_label (label2);
5579 return;
5580 }
e075ae69 5581
3a3677ff
RH
5582 default:
5583 abort ();
5584 }
32b5b1aa 5585}
e075ae69 5586
9e7adcb3
JH
5587/* Split branch based on floating point condition. */
5588void
5589ix86_split_fp_branch (condition, op1, op2, target1, target2, tmp)
5590 rtx condition, op1, op2, target1, target2, tmp;
5591{
5592 rtx second, bypass;
5593 rtx label = NULL_RTX;
5594 enum rtx_code code = GET_CODE (condition);
9e7adcb3
JH
5595
5596 if (target2 != pc_rtx)
5597 {
5598 rtx tmp = target2;
5599 code = reverse_condition_maybe_unordered (code);
5600 target2 = target1;
5601 target1 = tmp;
5602 }
5603
5604 condition = ix86_expand_fp_compare (code, op1, op2,
5605 tmp, &second, &bypass);
5606 if (bypass != NULL_RTX)
5607 {
5608 label = gen_label_rtx ();
5609 emit_jump_insn (gen_rtx_SET
5610 (VOIDmode, pc_rtx,
5611 gen_rtx_IF_THEN_ELSE (VOIDmode,
5612 bypass,
5613 gen_rtx_LABEL_REF (VOIDmode,
5614 label),
5615 pc_rtx)));
5616 }
5617 /* AMD Athlon and probably other CPUs too have fast bypass path between the
5618 comparison and first branch. The second branch takes longer to execute
5619 so place first branch the worse predicable one if possible. */
5620 if (second != NULL_RTX
5621 && (GET_CODE (second) == UNORDERED || GET_CODE (second) == ORDERED))
5622 {
5623 rtx tmp = condition;
5624 condition = second;
5625 second = tmp;
5626 }
5627 emit_jump_insn (gen_rtx_SET
5628 (VOIDmode, pc_rtx,
5629 gen_rtx_IF_THEN_ELSE (VOIDmode,
5630 condition, target1, target2)));
5631 if (second != NULL_RTX)
5632 emit_jump_insn (gen_rtx_SET
5633 (VOIDmode, pc_rtx,
5634 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1, target2)));
5635 if (label != NULL_RTX)
5636 emit_label (label);
5637}
5638
32b5b1aa 5639int
3a3677ff 5640ix86_expand_setcc (code, dest)
e075ae69 5641 enum rtx_code code;
e075ae69 5642 rtx dest;
32b5b1aa 5643{
a1b8572c
JH
5644 rtx ret, tmp, tmpreg;
5645 rtx second_test, bypass_test;
e075ae69
RH
5646 int type;
5647
5648 if (GET_MODE (ix86_compare_op0) == DImode)
5649 return 0; /* FAIL */
5650
5651 /* Three modes of generation:
5652 0 -- destination does not overlap compare sources:
5653 clear dest first, emit strict_low_part setcc.
5654 1 -- destination does overlap compare sources:
5655 emit subreg setcc, zero extend.
5656 2 -- destination is in QImode:
5657 emit setcc only.
5658 */
5659
5660 type = 0;
e075ae69
RH
5661
5662 if (GET_MODE (dest) == QImode)
5663 type = 2;
5664 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
a500c31b 5665 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
e075ae69
RH
5666 type = 1;
5667
5668 if (type == 0)
5669 emit_move_insn (dest, const0_rtx);
5670
a1b8572c 5671 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
5672 PUT_MODE (ret, QImode);
5673
5674 tmp = dest;
a1b8572c 5675 tmpreg = dest;
e075ae69 5676 if (type == 0)
32b5b1aa 5677 {
e075ae69 5678 tmp = gen_lowpart (QImode, dest);
a1b8572c 5679 tmpreg = tmp;
e075ae69
RH
5680 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
5681 }
5682 else if (type == 1)
5683 {
5684 if (!cse_not_expected)
5685 tmp = gen_reg_rtx (QImode);
5686 else
5687 tmp = gen_lowpart (QImode, dest);
a1b8572c 5688 tmpreg = tmp;
e075ae69 5689 }
32b5b1aa 5690
e075ae69 5691 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
5692 if (bypass_test || second_test)
5693 {
5694 rtx test = second_test;
5695 int bypass = 0;
5696 rtx tmp2 = gen_reg_rtx (QImode);
5697 if (bypass_test)
5698 {
5699 if (second_test)
5700 abort();
5701 test = bypass_test;
5702 bypass = 1;
5703 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
5704 }
5705 PUT_MODE (test, QImode);
5706 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
5707
5708 if (bypass)
5709 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
5710 else
5711 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
5712 }
e075ae69
RH
5713
5714 if (type == 1)
5715 {
5716 rtx clob;
5717
5718 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
5719 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
5720 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
5721 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5722 emit_insn (tmp);
32b5b1aa 5723 }
e075ae69
RH
5724
5725 return 1; /* DONE */
32b5b1aa 5726}
e075ae69 5727
32b5b1aa 5728int
e075ae69
RH
5729ix86_expand_int_movcc (operands)
5730 rtx operands[];
32b5b1aa 5731{
e075ae69
RH
5732 enum rtx_code code = GET_CODE (operands[1]), compare_code;
5733 rtx compare_seq, compare_op;
a1b8572c 5734 rtx second_test, bypass_test;
32b5b1aa 5735
36583fea
JH
5736 /* When the compare code is not LTU or GEU, we can not use sbbl case.
5737 In case comparsion is done with immediate, we can convert it to LTU or
5738 GEU by altering the integer. */
5739
5740 if ((code == LEU || code == GTU)
5741 && GET_CODE (ix86_compare_op1) == CONST_INT
5742 && GET_MODE (operands[0]) != HImode
5743 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
0f290768 5744 && GET_CODE (operands[2]) == CONST_INT
36583fea
JH
5745 && GET_CODE (operands[3]) == CONST_INT)
5746 {
5747 if (code == LEU)
5748 code = LTU;
5749 else
5750 code = GEU;
5751 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
5752 }
3a3677ff 5753
e075ae69 5754 start_sequence ();
a1b8572c 5755 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
5756 compare_seq = gen_sequence ();
5757 end_sequence ();
5758
5759 compare_code = GET_CODE (compare_op);
5760
5761 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
5762 HImode insns, we'd be swallowed in word prefix ops. */
5763
5764 if (GET_MODE (operands[0]) != HImode
0f290768 5765 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
5766 && GET_CODE (operands[3]) == CONST_INT)
5767 {
5768 rtx out = operands[0];
5769 HOST_WIDE_INT ct = INTVAL (operands[2]);
5770 HOST_WIDE_INT cf = INTVAL (operands[3]);
5771 HOST_WIDE_INT diff;
5772
a1b8572c
JH
5773 if ((compare_code == LTU || compare_code == GEU)
5774 && !second_test && !bypass_test)
e075ae69 5775 {
e075ae69
RH
5776
5777 /* Detect overlap between destination and compare sources. */
5778 rtx tmp = out;
5779
0f290768 5780 /* To simplify rest of code, restrict to the GEU case. */
36583fea
JH
5781 if (compare_code == LTU)
5782 {
5783 int tmp = ct;
5784 ct = cf;
5785 cf = tmp;
5786 compare_code = reverse_condition (compare_code);
5787 code = reverse_condition (code);
5788 }
5789 diff = ct - cf;
5790
e075ae69 5791 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
a500c31b 5792 || reg_overlap_mentioned_p (out, ix86_compare_op1))
e075ae69
RH
5793 tmp = gen_reg_rtx (SImode);
5794
5795 emit_insn (compare_seq);
5796 emit_insn (gen_x86_movsicc_0_m1 (tmp));
5797
36583fea
JH
5798 if (diff == 1)
5799 {
5800 /*
5801 * cmpl op0,op1
5802 * sbbl dest,dest
5803 * [addl dest, ct]
5804 *
5805 * Size 5 - 8.
5806 */
5807 if (ct)
5808 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5809 }
5810 else if (cf == -1)
5811 {
5812 /*
5813 * cmpl op0,op1
5814 * sbbl dest,dest
5815 * orl $ct, dest
5816 *
5817 * Size 8.
5818 */
5819 emit_insn (gen_iorsi3 (out, out, GEN_INT (ct)));
5820 }
5821 else if (diff == -1 && ct)
5822 {
5823 /*
5824 * cmpl op0,op1
5825 * sbbl dest,dest
5826 * xorl $-1, dest
5827 * [addl dest, cf]
5828 *
5829 * Size 8 - 11.
5830 */
5831 emit_insn (gen_one_cmplsi2 (tmp, tmp));
5832 if (cf)
5833 emit_insn (gen_addsi3 (out, out, GEN_INT (cf)));
5834 }
5835 else
5836 {
5837 /*
5838 * cmpl op0,op1
5839 * sbbl dest,dest
5840 * andl cf - ct, dest
5841 * [addl dest, ct]
5842 *
5843 * Size 8 - 11.
5844 */
5845 emit_insn (gen_andsi3 (out, out, GEN_INT (cf - ct)));
5846 if (ct)
5847 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5848 }
e075ae69
RH
5849
5850 if (tmp != out)
5851 emit_move_insn (out, tmp);
5852
5853 return 1; /* DONE */
5854 }
5855
5856 diff = ct - cf;
5857 if (diff < 0)
5858 {
5859 HOST_WIDE_INT tmp;
5860 tmp = ct, ct = cf, cf = tmp;
5861 diff = -diff;
734dba19
JH
5862 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
5863 {
5864 /* We may be reversing unordered compare to normal compare, that
5865 is not valid in general (we may convert non-trapping condition
5866 to trapping one), however on i386 we currently emit all
5867 comparisons unordered. */
5868 compare_code = reverse_condition_maybe_unordered (compare_code);
5869 code = reverse_condition_maybe_unordered (code);
5870 }
5871 else
5872 {
5873 compare_code = reverse_condition (compare_code);
5874 code = reverse_condition (code);
5875 }
e075ae69
RH
5876 }
5877 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
5878 || diff == 3 || diff == 5 || diff == 9)
5879 {
5880 /*
5881 * xorl dest,dest
5882 * cmpl op1,op2
5883 * setcc dest
5884 * lea cf(dest*(ct-cf)),dest
5885 *
5886 * Size 14.
5887 *
5888 * This also catches the degenerate setcc-only case.
5889 */
5890
5891 rtx tmp;
5892 int nops;
5893
5894 out = emit_store_flag (out, code, ix86_compare_op0,
5895 ix86_compare_op1, VOIDmode, 0, 1);
5896
5897 nops = 0;
5898 if (diff == 1)
5899 tmp = out;
5900 else
5901 {
5902 tmp = gen_rtx_MULT (SImode, out, GEN_INT (diff & ~1));
5903 nops++;
5904 if (diff & 1)
5905 {
5906 tmp = gen_rtx_PLUS (SImode, tmp, out);
5907 nops++;
5908 }
5909 }
5910 if (cf != 0)
5911 {
5912 tmp = gen_rtx_PLUS (SImode, tmp, GEN_INT (cf));
5913 nops++;
5914 }
5915 if (tmp != out)
5916 {
5917 if (nops == 0)
5918 emit_move_insn (out, tmp);
5919 else if (nops == 1)
5920 {
5921 rtx clob;
5922
5923 clob = gen_rtx_REG (CCmode, FLAGS_REG);
5924 clob = gen_rtx_CLOBBER (VOIDmode, clob);
5925
5926 tmp = gen_rtx_SET (VOIDmode, out, tmp);
5927 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5928 emit_insn (tmp);
5929 }
5930 else
5931 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
5932 }
5933 if (out != operands[0])
5934 emit_move_insn (operands[0], out);
5935
5936 return 1; /* DONE */
5937 }
5938
5939 /*
5940 * General case: Jumpful:
5941 * xorl dest,dest cmpl op1, op2
5942 * cmpl op1, op2 movl ct, dest
5943 * setcc dest jcc 1f
5944 * decl dest movl cf, dest
5945 * andl (cf-ct),dest 1:
5946 * addl ct,dest
0f290768 5947 *
e075ae69
RH
5948 * Size 20. Size 14.
5949 *
5950 * This is reasonably steep, but branch mispredict costs are
5951 * high on modern cpus, so consider failing only if optimizing
5952 * for space.
5953 *
5954 * %%% Parameterize branch_cost on the tuning architecture, then
5955 * use that. The 80386 couldn't care less about mispredicts.
5956 */
5957
5958 if (!optimize_size && !TARGET_CMOVE)
5959 {
5960 if (ct == 0)
5961 {
5962 ct = cf;
5963 cf = 0;
734dba19
JH
5964 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
5965 {
5966 /* We may be reversing unordered compare to normal compare,
5967 that is not valid in general (we may convert non-trapping
5968 condition to trapping one), however on i386 we currently
5969 emit all comparisons unordered. */
5970 compare_code = reverse_condition_maybe_unordered (compare_code);
5971 code = reverse_condition_maybe_unordered (code);
5972 }
5973 else
5974 {
5975 compare_code = reverse_condition (compare_code);
5976 code = reverse_condition (code);
5977 }
e075ae69
RH
5978 }
5979
5980 out = emit_store_flag (out, code, ix86_compare_op0,
5981 ix86_compare_op1, VOIDmode, 0, 1);
5982
5983 emit_insn (gen_addsi3 (out, out, constm1_rtx));
5984 emit_insn (gen_andsi3 (out, out, GEN_INT (cf-ct)));
5985 if (ct != 0)
5986 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5987 if (out != operands[0])
5988 emit_move_insn (operands[0], out);
5989
5990 return 1; /* DONE */
5991 }
5992 }
5993
5994 if (!TARGET_CMOVE)
5995 {
5996 /* Try a few things more with specific constants and a variable. */
5997
78a0d70c 5998 optab op;
e075ae69
RH
5999 rtx var, orig_out, out, tmp;
6000
6001 if (optimize_size)
6002 return 0; /* FAIL */
6003
0f290768 6004 /* If one of the two operands is an interesting constant, load a
e075ae69 6005 constant with the above and mask it in with a logical operation. */
0f290768 6006
e075ae69
RH
6007 if (GET_CODE (operands[2]) == CONST_INT)
6008 {
6009 var = operands[3];
6010 if (INTVAL (operands[2]) == 0)
6011 operands[3] = constm1_rtx, op = and_optab;
6012 else if (INTVAL (operands[2]) == -1)
6013 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
6014 else
6015 return 0; /* FAIL */
e075ae69
RH
6016 }
6017 else if (GET_CODE (operands[3]) == CONST_INT)
6018 {
6019 var = operands[2];
6020 if (INTVAL (operands[3]) == 0)
6021 operands[2] = constm1_rtx, op = and_optab;
6022 else if (INTVAL (operands[3]) == -1)
6023 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
6024 else
6025 return 0; /* FAIL */
e075ae69 6026 }
78a0d70c 6027 else
e075ae69
RH
6028 return 0; /* FAIL */
6029
6030 orig_out = operands[0];
6031 tmp = gen_reg_rtx (GET_MODE (orig_out));
6032 operands[0] = tmp;
6033
6034 /* Recurse to get the constant loaded. */
6035 if (ix86_expand_int_movcc (operands) == 0)
6036 return 0; /* FAIL */
6037
6038 /* Mask in the interesting variable. */
6039 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
6040 OPTAB_WIDEN);
6041 if (out != orig_out)
6042 emit_move_insn (orig_out, out);
6043
6044 return 1; /* DONE */
6045 }
6046
6047 /*
6048 * For comparison with above,
6049 *
6050 * movl cf,dest
6051 * movl ct,tmp
6052 * cmpl op1,op2
6053 * cmovcc tmp,dest
6054 *
6055 * Size 15.
6056 */
6057
6058 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
6059 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
6060 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
6061 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
6062
a1b8572c
JH
6063 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
6064 {
6065 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
6066 emit_move_insn (tmp, operands[3]);
6067 operands[3] = tmp;
6068 }
6069 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
6070 {
6071 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
6072 emit_move_insn (tmp, operands[2]);
6073 operands[2] = tmp;
6074 }
6075
e075ae69
RH
6076 emit_insn (compare_seq);
6077 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6078 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6079 compare_op, operands[2],
6080 operands[3])));
a1b8572c
JH
6081 if (bypass_test)
6082 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6083 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6084 bypass_test,
6085 operands[3],
6086 operands[0])));
6087 if (second_test)
6088 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6089 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6090 second_test,
6091 operands[2],
6092 operands[0])));
e075ae69
RH
6093
6094 return 1; /* DONE */
e9a25f70 6095}
e075ae69 6096
32b5b1aa 6097int
e075ae69
RH
6098ix86_expand_fp_movcc (operands)
6099 rtx operands[];
32b5b1aa 6100{
e075ae69 6101 enum rtx_code code;
e075ae69 6102 rtx tmp;
a1b8572c 6103 rtx compare_op, second_test, bypass_test;
32b5b1aa 6104
0073023d
JH
6105 /* For SF/DFmode conditional moves based on comparisons
6106 in same mode, we may want to use SSE min/max instructions. */
6107 if (((TARGET_SSE && GET_MODE (operands[0]) == SFmode)
6108 || (TARGET_SSE2 && GET_MODE (operands[0]) == DFmode))
6109 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
6110 /* We may be called from the post-reload splitter. */
6111 && (!REG_P (operands[0])
6112 || SSE_REG_P (operands[0])
6113 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
6114 {
6115 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
6116 code = GET_CODE (operands[1]);
6117
6118 /* See if we have (cross) match between comparison operands and
6119 conditional move operands. */
6120 if (rtx_equal_p (operands[2], op1))
6121 {
6122 rtx tmp = op0;
6123 op0 = op1;
6124 op1 = tmp;
6125 code = reverse_condition_maybe_unordered (code);
6126 }
6127 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
6128 {
6129 /* Check for min operation. */
6130 if (code == LT)
6131 {
6132 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
6133 if (memory_operand (op0, VOIDmode))
6134 op0 = force_reg (GET_MODE (operands[0]), op0);
6135 if (GET_MODE (operands[0]) == SFmode)
6136 emit_insn (gen_minsf3 (operands[0], op0, op1));
6137 else
6138 emit_insn (gen_mindf3 (operands[0], op0, op1));
6139 return 1;
6140 }
6141 /* Check for max operation. */
6142 if (code == GT)
6143 {
6144 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
6145 if (memory_operand (op0, VOIDmode))
6146 op0 = force_reg (GET_MODE (operands[0]), op0);
6147 if (GET_MODE (operands[0]) == SFmode)
6148 emit_insn (gen_maxsf3 (operands[0], op0, op1));
6149 else
6150 emit_insn (gen_maxdf3 (operands[0], op0, op1));
6151 return 1;
6152 }
6153 }
6154 /* Manage condition to be sse_comparison_operator. In case we are
6155 in non-ieee mode, try to canonicalize the destination operand
6156 to be first in the comparison - this helps reload to avoid extra
6157 moves. */
6158 if (!sse_comparison_operator (operands[1], VOIDmode)
6159 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
6160 {
6161 rtx tmp = ix86_compare_op0;
6162 ix86_compare_op0 = ix86_compare_op1;
6163 ix86_compare_op1 = tmp;
6164 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
6165 VOIDmode, ix86_compare_op0,
6166 ix86_compare_op1);
6167 }
6168 /* Similary try to manage result to be first operand of conditional
6169 move. */
6170 if (rtx_equal_p (operands[0], operands[3]))
6171 {
6172 rtx tmp = operands[2];
6173 operands[2] = operands[3];
6174 operands[2] = tmp;
6175 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
6176 (GET_CODE (operands[1])),
6177 VOIDmode, ix86_compare_op0,
6178 ix86_compare_op1);
6179 }
6180 if (GET_MODE (operands[0]) == SFmode)
6181 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
6182 operands[2], operands[3],
6183 ix86_compare_op0, ix86_compare_op1));
6184 else
6185 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
6186 operands[2], operands[3],
6187 ix86_compare_op0, ix86_compare_op1));
6188 return 1;
6189 }
6190
e075ae69 6191 /* The floating point conditional move instructions don't directly
0f290768 6192 support conditions resulting from a signed integer comparison. */
32b5b1aa 6193
e075ae69 6194 code = GET_CODE (operands[1]);
a1b8572c 6195 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
6196
6197 /* The floating point conditional move instructions don't directly
6198 support signed integer comparisons. */
6199
a1b8572c 6200 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 6201 {
a1b8572c
JH
6202 if (second_test != NULL || bypass_test != NULL)
6203 abort();
e075ae69 6204 tmp = gen_reg_rtx (QImode);
3a3677ff 6205 ix86_expand_setcc (code, tmp);
e075ae69
RH
6206 code = NE;
6207 ix86_compare_op0 = tmp;
6208 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
6209 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
6210 }
6211 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
6212 {
6213 tmp = gen_reg_rtx (GET_MODE (operands[0]));
6214 emit_move_insn (tmp, operands[3]);
6215 operands[3] = tmp;
6216 }
6217 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
6218 {
6219 tmp = gen_reg_rtx (GET_MODE (operands[0]));
6220 emit_move_insn (tmp, operands[2]);
6221 operands[2] = tmp;
e075ae69 6222 }
e9a25f70 6223
e075ae69
RH
6224 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6225 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9e7adcb3 6226 compare_op,
e075ae69
RH
6227 operands[2],
6228 operands[3])));
a1b8572c
JH
6229 if (bypass_test)
6230 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6231 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6232 bypass_test,
6233 operands[3],
6234 operands[0])));
6235 if (second_test)
6236 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6237 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6238 second_test,
6239 operands[2],
6240 operands[0])));
32b5b1aa 6241
e075ae69 6242 return 1;
32b5b1aa
SC
6243}
6244
2450a057
JH
6245/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
6246 works for floating pointer parameters and nonoffsetable memories.
6247 For pushes, it returns just stack offsets; the values will be saved
6248 in the right order. Maximally three parts are generated. */
6249
2b589241 6250static int
2450a057
JH
6251ix86_split_to_parts (operand, parts, mode)
6252 rtx operand;
6253 rtx *parts;
6254 enum machine_mode mode;
32b5b1aa 6255{
2b589241 6256 int size = mode == TFmode ? 3 : GET_MODE_SIZE (mode) / 4;
2450a057 6257
a7180f70
BS
6258 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
6259 abort ();
2450a057
JH
6260 if (size < 2 || size > 3)
6261 abort ();
6262
d7a29404
JH
6263 /* Optimize constant pool reference to immediates. This is used by fp moves,
6264 that force all constants to memory to allow combining. */
6265
6266 if (GET_CODE (operand) == MEM
6267 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
6268 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
6269 operand = get_pool_constant (XEXP (operand, 0));
6270
2450a057 6271 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 6272 {
2450a057
JH
6273 /* The only non-offsetable memories we handle are pushes. */
6274 if (! push_operand (operand, VOIDmode))
6275 abort ();
6276
6277 PUT_MODE (operand, SImode);
6278 parts[0] = parts[1] = parts[2] = operand;
6279 }
6280 else
6281 {
6282 if (mode == DImode)
6283 split_di (&operand, 1, &parts[0], &parts[1]);
6284 else
e075ae69 6285 {
2450a057
JH
6286 if (REG_P (operand))
6287 {
6288 if (!reload_completed)
6289 abort ();
6290 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
6291 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
6292 if (size == 3)
6293 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
6294 }
6295 else if (offsettable_memref_p (operand))
6296 {
6297 PUT_MODE (operand, SImode);
6298 parts[0] = operand;
6299 parts[1] = adj_offsettable_operand (operand, 4);
6300 if (size == 3)
6301 parts[2] = adj_offsettable_operand (operand, 8);
6302 }
6303 else if (GET_CODE (operand) == CONST_DOUBLE)
6304 {
6305 REAL_VALUE_TYPE r;
2b589241 6306 long l[4];
2450a057
JH
6307
6308 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
6309 switch (mode)
6310 {
6311 case XFmode:
2b589241 6312 case TFmode:
2450a057
JH
6313 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
6314 parts[2] = GEN_INT (l[2]);
6315 break;
6316 case DFmode:
6317 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
6318 break;
6319 default:
6320 abort ();
6321 }
6322 parts[1] = GEN_INT (l[1]);
6323 parts[0] = GEN_INT (l[0]);
6324 }
6325 else
6326 abort ();
e075ae69 6327 }
2450a057
JH
6328 }
6329
2b589241 6330 return size;
2450a057
JH
6331}
6332
6333/* Emit insns to perform a move or push of DI, DF, and XF values.
6334 Return false when normal moves are needed; true when all required
6335 insns have been emitted. Operands 2-4 contain the input values
6336 int the correct order; operands 5-7 contain the output values. */
6337
0f290768 6338int
2450a057
JH
6339ix86_split_long_move (operands1)
6340 rtx operands1[];
6341{
6342 rtx part[2][3];
6343 rtx operands[2];
2b589241 6344 int size;
2450a057
JH
6345 int push = 0;
6346 int collisions = 0;
6347
6348 /* Make our own copy to avoid clobbering the operands. */
6349 operands[0] = copy_rtx (operands1[0]);
6350 operands[1] = copy_rtx (operands1[1]);
6351
2450a057
JH
6352 /* The only non-offsettable memory we handle is push. */
6353 if (push_operand (operands[0], VOIDmode))
6354 push = 1;
6355 else if (GET_CODE (operands[0]) == MEM
6356 && ! offsettable_memref_p (operands[0]))
6357 abort ();
6358
2b589241 6359 size = ix86_split_to_parts (operands[0], part[0], GET_MODE (operands1[0]));
2450a057
JH
6360 ix86_split_to_parts (operands[1], part[1], GET_MODE (operands1[0]));
6361
6362 /* When emitting push, take care for source operands on the stack. */
6363 if (push && GET_CODE (operands[1]) == MEM
6364 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
6365 {
6366 if (size == 3)
6367 part[1][1] = part[1][2];
6368 part[1][0] = part[1][1];
6369 }
6370
0f290768 6371 /* We need to do copy in the right order in case an address register
2450a057
JH
6372 of the source overlaps the destination. */
6373 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
6374 {
6375 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
6376 collisions++;
6377 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
6378 collisions++;
6379 if (size == 3
6380 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
6381 collisions++;
6382
6383 /* Collision in the middle part can be handled by reordering. */
6384 if (collisions == 1 && size == 3
6385 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 6386 {
2450a057
JH
6387 rtx tmp;
6388 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
6389 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
6390 }
e075ae69 6391
2450a057
JH
6392 /* If there are more collisions, we can't handle it by reordering.
6393 Do an lea to the last part and use only one colliding move. */
6394 else if (collisions > 1)
6395 {
6396 collisions = 1;
6397 emit_insn (gen_rtx_SET (VOIDmode, part[0][size - 1],
6398 XEXP (part[1][0], 0)));
6399 part[1][0] = change_address (part[1][0], SImode, part[0][size - 1]);
6400 part[1][1] = adj_offsettable_operand (part[1][0], 4);
6401 if (size == 3)
6402 part[1][2] = adj_offsettable_operand (part[1][0], 8);
6403 }
6404 }
6405
6406 if (push)
6407 {
6408 if (size == 3)
2b589241
JH
6409 {
6410 /* We use only first 12 bytes of TFmode value, but for pushing we
6411 are required to adjust stack as if we were pushing real 16byte
6412 value. */
6413 if (GET_MODE (operands1[0]) == TFmode)
6414 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
6415 GEN_INT (-4)));
6416 emit_insn (gen_push (part[1][2]));
6417 }
2450a057
JH
6418 emit_insn (gen_push (part[1][1]));
6419 emit_insn (gen_push (part[1][0]));
6420 return 1;
6421 }
6422
6423 /* Choose correct order to not overwrite the source before it is copied. */
6424 if ((REG_P (part[0][0])
6425 && REG_P (part[1][1])
6426 && (REGNO (part[0][0]) == REGNO (part[1][1])
6427 || (size == 3
6428 && REGNO (part[0][0]) == REGNO (part[1][2]))))
6429 || (collisions > 0
6430 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
6431 {
6432 if (size == 3)
6433 {
6434 operands1[2] = part[0][2];
6435 operands1[3] = part[0][1];
6436 operands1[4] = part[0][0];
6437 operands1[5] = part[1][2];
6438 operands1[6] = part[1][1];
6439 operands1[7] = part[1][0];
6440 }
6441 else
6442 {
6443 operands1[2] = part[0][1];
6444 operands1[3] = part[0][0];
6445 operands1[5] = part[1][1];
6446 operands1[6] = part[1][0];
6447 }
6448 }
6449 else
6450 {
6451 if (size == 3)
6452 {
6453 operands1[2] = part[0][0];
6454 operands1[3] = part[0][1];
6455 operands1[4] = part[0][2];
6456 operands1[5] = part[1][0];
6457 operands1[6] = part[1][1];
6458 operands1[7] = part[1][2];
6459 }
6460 else
6461 {
6462 operands1[2] = part[0][0];
6463 operands1[3] = part[0][1];
6464 operands1[5] = part[1][0];
6465 operands1[6] = part[1][1];
e075ae69
RH
6466 }
6467 }
32b5b1aa 6468
e9a25f70 6469 return 0;
32b5b1aa 6470}
32b5b1aa 6471
e075ae69
RH
6472void
6473ix86_split_ashldi (operands, scratch)
6474 rtx *operands, scratch;
32b5b1aa 6475{
e075ae69
RH
6476 rtx low[2], high[2];
6477 int count;
b985a30f 6478
e075ae69
RH
6479 if (GET_CODE (operands[2]) == CONST_INT)
6480 {
6481 split_di (operands, 2, low, high);
6482 count = INTVAL (operands[2]) & 63;
32b5b1aa 6483
e075ae69
RH
6484 if (count >= 32)
6485 {
6486 emit_move_insn (high[0], low[1]);
6487 emit_move_insn (low[0], const0_rtx);
b985a30f 6488
e075ae69
RH
6489 if (count > 32)
6490 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
6491 }
6492 else
6493 {
6494 if (!rtx_equal_p (operands[0], operands[1]))
6495 emit_move_insn (operands[0], operands[1]);
6496 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
6497 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
6498 }
6499 }
6500 else
6501 {
6502 if (!rtx_equal_p (operands[0], operands[1]))
6503 emit_move_insn (operands[0], operands[1]);
b985a30f 6504
e075ae69 6505 split_di (operands, 1, low, high);
b985a30f 6506
e075ae69
RH
6507 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
6508 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 6509
fe577e58 6510 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 6511 {
fe577e58 6512 if (! no_new_pseudos)
e075ae69
RH
6513 scratch = force_reg (SImode, const0_rtx);
6514 else
6515 emit_move_insn (scratch, const0_rtx);
6516
6517 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
6518 scratch));
6519 }
6520 else
6521 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
6522 }
e9a25f70 6523}
32b5b1aa 6524
e075ae69
RH
6525void
6526ix86_split_ashrdi (operands, scratch)
6527 rtx *operands, scratch;
32b5b1aa 6528{
e075ae69
RH
6529 rtx low[2], high[2];
6530 int count;
32b5b1aa 6531
e075ae69
RH
6532 if (GET_CODE (operands[2]) == CONST_INT)
6533 {
6534 split_di (operands, 2, low, high);
6535 count = INTVAL (operands[2]) & 63;
32b5b1aa 6536
e075ae69
RH
6537 if (count >= 32)
6538 {
6539 emit_move_insn (low[0], high[1]);
32b5b1aa 6540
e075ae69
RH
6541 if (! reload_completed)
6542 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
6543 else
6544 {
6545 emit_move_insn (high[0], low[0]);
6546 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
6547 }
6548
6549 if (count > 32)
6550 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
6551 }
6552 else
6553 {
6554 if (!rtx_equal_p (operands[0], operands[1]))
6555 emit_move_insn (operands[0], operands[1]);
6556 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
6557 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
6558 }
6559 }
6560 else
32b5b1aa 6561 {
e075ae69
RH
6562 if (!rtx_equal_p (operands[0], operands[1]))
6563 emit_move_insn (operands[0], operands[1]);
6564
6565 split_di (operands, 1, low, high);
6566
6567 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
6568 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
6569
fe577e58 6570 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 6571 {
fe577e58 6572 if (! no_new_pseudos)
e075ae69
RH
6573 scratch = gen_reg_rtx (SImode);
6574 emit_move_insn (scratch, high[0]);
6575 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
6576 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
6577 scratch));
6578 }
6579 else
6580 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 6581 }
e075ae69 6582}
32b5b1aa 6583
e075ae69
RH
6584void
6585ix86_split_lshrdi (operands, scratch)
6586 rtx *operands, scratch;
6587{
6588 rtx low[2], high[2];
6589 int count;
32b5b1aa 6590
e075ae69 6591 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 6592 {
e075ae69
RH
6593 split_di (operands, 2, low, high);
6594 count = INTVAL (operands[2]) & 63;
6595
6596 if (count >= 32)
c7271385 6597 {
e075ae69
RH
6598 emit_move_insn (low[0], high[1]);
6599 emit_move_insn (high[0], const0_rtx);
32b5b1aa 6600
e075ae69
RH
6601 if (count > 32)
6602 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
6603 }
6604 else
6605 {
6606 if (!rtx_equal_p (operands[0], operands[1]))
6607 emit_move_insn (operands[0], operands[1]);
6608 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
6609 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
6610 }
32b5b1aa 6611 }
e075ae69
RH
6612 else
6613 {
6614 if (!rtx_equal_p (operands[0], operands[1]))
6615 emit_move_insn (operands[0], operands[1]);
32b5b1aa 6616
e075ae69
RH
6617 split_di (operands, 1, low, high);
6618
6619 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
6620 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
6621
6622 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 6623 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 6624 {
fe577e58 6625 if (! no_new_pseudos)
e075ae69
RH
6626 scratch = force_reg (SImode, const0_rtx);
6627 else
6628 emit_move_insn (scratch, const0_rtx);
6629
6630 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
6631 scratch));
6632 }
6633 else
6634 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
6635 }
32b5b1aa 6636}
3f803cd9 6637
e075ae69
RH
6638/* Expand the appropriate insns for doing strlen if not just doing
6639 repnz; scasb
6640
6641 out = result, initialized with the start address
6642 align_rtx = alignment of the address.
6643 scratch = scratch register, initialized with the startaddress when
77ebd435 6644 not aligned, otherwise undefined
3f803cd9
SC
6645
6646 This is just the body. It needs the initialisations mentioned above and
6647 some address computing at the end. These things are done in i386.md. */
6648
e075ae69
RH
6649void
6650ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch)
6651 rtx out, align_rtx, scratch;
3f803cd9 6652{
e075ae69
RH
6653 int align;
6654 rtx tmp;
6655 rtx align_2_label = NULL_RTX;
6656 rtx align_3_label = NULL_RTX;
6657 rtx align_4_label = gen_label_rtx ();
6658 rtx end_0_label = gen_label_rtx ();
e075ae69 6659 rtx mem;
e2e52e1b 6660 rtx tmpreg = gen_reg_rtx (SImode);
e075ae69
RH
6661
6662 align = 0;
6663 if (GET_CODE (align_rtx) == CONST_INT)
6664 align = INTVAL (align_rtx);
3f803cd9 6665
e9a25f70 6666 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 6667
e9a25f70 6668 /* Is there a known alignment and is it less than 4? */
e075ae69 6669 if (align < 4)
3f803cd9 6670 {
e9a25f70 6671 /* Is there a known alignment and is it not 2? */
e075ae69 6672 if (align != 2)
3f803cd9 6673 {
e075ae69
RH
6674 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
6675 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
6676
6677 /* Leave just the 3 lower bits. */
6678 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (3),
6679 NULL_RTX, 0, OPTAB_WIDEN);
6680
9076b9c1 6681 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
77ebd435 6682 SImode, 1, 0, align_4_label);
9076b9c1
JH
6683 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
6684 SImode, 1, 0, align_2_label);
6685 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
6686 SImode, 1, 0, align_3_label);
3f803cd9
SC
6687 }
6688 else
6689 {
e9a25f70
JL
6690 /* Since the alignment is 2, we have to check 2 or 0 bytes;
6691 check if is aligned to 4 - byte. */
e9a25f70 6692
e075ae69
RH
6693 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (2),
6694 NULL_RTX, 0, OPTAB_WIDEN);
6695
9076b9c1
JH
6696 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
6697 SImode, 1, 0, align_4_label);
3f803cd9
SC
6698 }
6699
e075ae69 6700 mem = gen_rtx_MEM (QImode, out);
e9a25f70 6701
e075ae69 6702 /* Now compare the bytes. */
e9a25f70 6703
0f290768 6704 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1
JH
6705 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6706 QImode, 1, 0, end_0_label);
3f803cd9 6707
0f290768 6708 /* Increment the address. */
e075ae69 6709 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 6710
e075ae69
RH
6711 /* Not needed with an alignment of 2 */
6712 if (align != 2)
6713 {
6714 emit_label (align_2_label);
3f803cd9 6715
9076b9c1
JH
6716 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6717 QImode, 1, 0, end_0_label);
e075ae69
RH
6718
6719 emit_insn (gen_addsi3 (out, out, const1_rtx));
6720
6721 emit_label (align_3_label);
6722 }
6723
9076b9c1
JH
6724 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6725 QImode, 1, 0, end_0_label);
e075ae69
RH
6726
6727 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
6728 }
6729
e075ae69
RH
6730 /* Generate loop to check 4 bytes at a time. It is not a good idea to
6731 align this loop. It gives only huge programs, but does not help to
6732 speed up. */
6733 emit_label (align_4_label);
3f803cd9 6734
e075ae69
RH
6735 mem = gen_rtx_MEM (SImode, out);
6736 emit_move_insn (scratch, mem);
e075ae69 6737 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 6738
e2e52e1b
JH
6739 /* This formula yields a nonzero result iff one of the bytes is zero.
6740 This saves three branches inside loop and many cycles. */
6741
6742 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
6743 emit_insn (gen_one_cmplsi2 (scratch, scratch));
6744 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
6745 emit_insn (gen_andsi3 (tmpreg, tmpreg, GEN_INT (0x80808080)));
9076b9c1
JH
6746 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0,
6747 SImode, 1, 0, align_4_label);
e2e52e1b
JH
6748
6749 if (TARGET_CMOVE)
6750 {
6751 rtx reg = gen_reg_rtx (SImode);
6752 emit_move_insn (reg, tmpreg);
6753 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
6754
0f290768 6755 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 6756 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
6757 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6758 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6759 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
6760 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
6761 reg,
6762 tmpreg)));
e2e52e1b
JH
6763 /* Emit lea manually to avoid clobbering of flags. */
6764 emit_insn (gen_rtx_SET (SImode, reg,
6765 gen_rtx_PLUS (SImode, out, GEN_INT (2))));
6766
6767 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6768 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6769 emit_insn (gen_rtx_SET (VOIDmode, out,
6770 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
6771 reg,
6772 out)));
e2e52e1b
JH
6773
6774 }
6775 else
6776 {
6777 rtx end_2_label = gen_label_rtx ();
6778 /* Is zero in the first two bytes? */
6779
16189740 6780 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
6781 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6782 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
6783 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
6784 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
6785 pc_rtx);
6786 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
6787 JUMP_LABEL (tmp) = end_2_label;
6788
0f290768 6789 /* Not in the first two. Move two bytes forward. */
e2e52e1b
JH
6790 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
6791 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
6792
6793 emit_label (end_2_label);
6794
6795 }
6796
0f290768 6797 /* Avoid branch in fixing the byte. */
e2e52e1b 6798 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190
JH
6799 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
6800 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
e075ae69
RH
6801
6802 emit_label (end_0_label);
6803}
6804\f
e075ae69
RH
6805/* Clear stack slot assignments remembered from previous functions.
6806 This is called from INIT_EXPANDERS once before RTL is emitted for each
6807 function. */
6808
36edd3cc
BS
6809static void
6810ix86_init_machine_status (p)
1526a060 6811 struct function *p;
e075ae69 6812{
37b15744
RH
6813 p->machine = (struct machine_function *)
6814 xcalloc (1, sizeof (struct machine_function));
e075ae69
RH
6815}
6816
1526a060
BS
6817/* Mark machine specific bits of P for GC. */
6818static void
6819ix86_mark_machine_status (p)
6820 struct function *p;
6821{
37b15744 6822 struct machine_function *machine = p->machine;
1526a060
BS
6823 enum machine_mode mode;
6824 int n;
6825
37b15744
RH
6826 if (! machine)
6827 return;
6828
1526a060
BS
6829 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
6830 mode = (enum machine_mode) ((int) mode + 1))
6831 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
37b15744
RH
6832 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
6833}
6834
6835static void
6836ix86_free_machine_status (p)
6837 struct function *p;
6838{
6839 free (p->machine);
6840 p->machine = NULL;
1526a060
BS
6841}
6842
e075ae69
RH
6843/* Return a MEM corresponding to a stack slot with mode MODE.
6844 Allocate a new slot if necessary.
6845
6846 The RTL for a function can have several slots available: N is
6847 which slot to use. */
6848
6849rtx
6850assign_386_stack_local (mode, n)
6851 enum machine_mode mode;
6852 int n;
6853{
6854 if (n < 0 || n >= MAX_386_STACK_LOCALS)
6855 abort ();
6856
6857 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
6858 ix86_stack_locals[(int) mode][n]
6859 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
6860
6861 return ix86_stack_locals[(int) mode][n];
6862}
6863\f
6864/* Calculate the length of the memory address in the instruction
6865 encoding. Does not include the one-byte modrm, opcode, or prefix. */
6866
6867static int
6868memory_address_length (addr)
6869 rtx addr;
6870{
6871 struct ix86_address parts;
6872 rtx base, index, disp;
6873 int len;
6874
6875 if (GET_CODE (addr) == PRE_DEC
6876 || GET_CODE (addr) == POST_INC)
6877 return 0;
3f803cd9 6878
e075ae69
RH
6879 if (! ix86_decompose_address (addr, &parts))
6880 abort ();
3f803cd9 6881
e075ae69
RH
6882 base = parts.base;
6883 index = parts.index;
6884 disp = parts.disp;
6885 len = 0;
3f803cd9 6886
e075ae69
RH
6887 /* Register Indirect. */
6888 if (base && !index && !disp)
6889 {
6890 /* Special cases: ebp and esp need the two-byte modrm form. */
6891 if (addr == stack_pointer_rtx
6892 || addr == arg_pointer_rtx
564d80f4
JH
6893 || addr == frame_pointer_rtx
6894 || addr == hard_frame_pointer_rtx)
e075ae69 6895 len = 1;
3f803cd9 6896 }
e9a25f70 6897
e075ae69
RH
6898 /* Direct Addressing. */
6899 else if (disp && !base && !index)
6900 len = 4;
6901
3f803cd9
SC
6902 else
6903 {
e075ae69
RH
6904 /* Find the length of the displacement constant. */
6905 if (disp)
6906 {
6907 if (GET_CODE (disp) == CONST_INT
6908 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
6909 len = 1;
6910 else
6911 len = 4;
6912 }
3f803cd9 6913
e075ae69
RH
6914 /* An index requires the two-byte modrm form. */
6915 if (index)
6916 len += 1;
3f803cd9
SC
6917 }
6918
e075ae69
RH
6919 return len;
6920}
79325812 6921
6ef67412
JH
6922/* Compute default value for "length_immediate" attribute. When SHORTFORM is set
6923 expect that insn have 8bit immediate alternative. */
e075ae69 6924int
6ef67412 6925ix86_attr_length_immediate_default (insn, shortform)
e075ae69 6926 rtx insn;
6ef67412 6927 int shortform;
e075ae69 6928{
6ef67412
JH
6929 int len = 0;
6930 int i;
6c698a6d 6931 extract_insn_cached (insn);
6ef67412
JH
6932 for (i = recog_data.n_operands - 1; i >= 0; --i)
6933 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 6934 {
6ef67412 6935 if (len)
3071fab5 6936 abort ();
6ef67412
JH
6937 if (shortform
6938 && GET_CODE (recog_data.operand[i]) == CONST_INT
6939 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
6940 len = 1;
6941 else
6942 {
6943 switch (get_attr_mode (insn))
6944 {
6945 case MODE_QI:
6946 len+=1;
6947 break;
6948 case MODE_HI:
6949 len+=2;
6950 break;
6951 case MODE_SI:
6952 len+=4;
6953 break;
6954 default:
6955 fatal_insn ("Unknown insn mode", insn);
6956 }
6957 }
3071fab5 6958 }
6ef67412
JH
6959 return len;
6960}
6961/* Compute default value for "length_address" attribute. */
6962int
6963ix86_attr_length_address_default (insn)
6964 rtx insn;
6965{
6966 int i;
6c698a6d 6967 extract_insn_cached (insn);
1ccbefce
RH
6968 for (i = recog_data.n_operands - 1; i >= 0; --i)
6969 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 6970 {
6ef67412 6971 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
6972 break;
6973 }
6ef67412 6974 return 0;
3f803cd9 6975}
e075ae69
RH
6976\f
6977/* Return the maximum number of instructions a cpu can issue. */
b657fc39 6978
e075ae69
RH
6979int
6980ix86_issue_rate ()
b657fc39 6981{
e075ae69 6982 switch (ix86_cpu)
b657fc39 6983 {
e075ae69
RH
6984 case PROCESSOR_PENTIUM:
6985 case PROCESSOR_K6:
6986 return 2;
79325812 6987
e075ae69 6988 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
6989 case PROCESSOR_PENTIUM4:
6990 case PROCESSOR_ATHLON:
e075ae69 6991 return 3;
b657fc39 6992
b657fc39 6993 default:
e075ae69 6994 return 1;
b657fc39 6995 }
b657fc39
L
6996}
6997
e075ae69
RH
6998/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
6999 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 7000
e075ae69
RH
7001static int
7002ix86_flags_dependant (insn, dep_insn, insn_type)
7003 rtx insn, dep_insn;
7004 enum attr_type insn_type;
7005{
7006 rtx set, set2;
b657fc39 7007
e075ae69
RH
7008 /* Simplify the test for uninteresting insns. */
7009 if (insn_type != TYPE_SETCC
7010 && insn_type != TYPE_ICMOV
7011 && insn_type != TYPE_FCMOV
7012 && insn_type != TYPE_IBR)
7013 return 0;
b657fc39 7014
e075ae69
RH
7015 if ((set = single_set (dep_insn)) != 0)
7016 {
7017 set = SET_DEST (set);
7018 set2 = NULL_RTX;
7019 }
7020 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
7021 && XVECLEN (PATTERN (dep_insn), 0) == 2
7022 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
7023 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
7024 {
7025 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
7026 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
7027 }
78a0d70c
ZW
7028 else
7029 return 0;
b657fc39 7030
78a0d70c
ZW
7031 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
7032 return 0;
b657fc39 7033
78a0d70c
ZW
7034 /* This test is true if the dependant insn reads the flags but
7035 not any other potentially set register. */
7036 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
7037 return 0;
7038
7039 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
7040 return 0;
7041
7042 return 1;
e075ae69 7043}
b657fc39 7044
e075ae69
RH
7045/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
7046 address with operands set by DEP_INSN. */
7047
7048static int
7049ix86_agi_dependant (insn, dep_insn, insn_type)
7050 rtx insn, dep_insn;
7051 enum attr_type insn_type;
7052{
7053 rtx addr;
7054
7055 if (insn_type == TYPE_LEA)
5fbdde42
RH
7056 {
7057 addr = PATTERN (insn);
7058 if (GET_CODE (addr) == SET)
7059 ;
7060 else if (GET_CODE (addr) == PARALLEL
7061 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
7062 addr = XVECEXP (addr, 0, 0);
7063 else
7064 abort ();
7065 addr = SET_SRC (addr);
7066 }
e075ae69
RH
7067 else
7068 {
7069 int i;
6c698a6d 7070 extract_insn_cached (insn);
1ccbefce
RH
7071 for (i = recog_data.n_operands - 1; i >= 0; --i)
7072 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 7073 {
1ccbefce 7074 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
7075 goto found;
7076 }
7077 return 0;
7078 found:;
b657fc39
L
7079 }
7080
e075ae69 7081 return modified_in_p (addr, dep_insn);
b657fc39 7082}
a269a03c
JC
7083
7084int
e075ae69 7085ix86_adjust_cost (insn, link, dep_insn, cost)
a269a03c
JC
7086 rtx insn, link, dep_insn;
7087 int cost;
7088{
e075ae69 7089 enum attr_type insn_type, dep_insn_type;
0b5107cf 7090 enum attr_memory memory;
e075ae69 7091 rtx set, set2;
9b00189f 7092 int dep_insn_code_number;
a269a03c 7093
309ada50 7094 /* Anti and output depenancies have zero cost on all CPUs. */
e075ae69 7095 if (REG_NOTE_KIND (link) != 0)
309ada50 7096 return 0;
a269a03c 7097
9b00189f
JH
7098 dep_insn_code_number = recog_memoized (dep_insn);
7099
e075ae69 7100 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 7101 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 7102 return cost;
a269a03c 7103
1c71e60e
JH
7104 insn_type = get_attr_type (insn);
7105 dep_insn_type = get_attr_type (dep_insn);
9b00189f 7106
1c71e60e
JH
7107 /* Prologue and epilogue allocators can have a false dependency on ebp.
7108 This results in one cycle extra stall on Pentium prologue scheduling,
7109 so handle this important case manually. */
7110 if (dep_insn_code_number == CODE_FOR_pro_epilogue_adjust_stack
7111 && dep_insn_type == TYPE_ALU
9b00189f
JH
7112 && !reg_mentioned_p (stack_pointer_rtx, insn))
7113 return 0;
7114
a269a03c
JC
7115 switch (ix86_cpu)
7116 {
7117 case PROCESSOR_PENTIUM:
e075ae69
RH
7118 /* Address Generation Interlock adds a cycle of latency. */
7119 if (ix86_agi_dependant (insn, dep_insn, insn_type))
7120 cost += 1;
7121
7122 /* ??? Compares pair with jump/setcc. */
7123 if (ix86_flags_dependant (insn, dep_insn, insn_type))
7124 cost = 0;
7125
7126 /* Floating point stores require value to be ready one cycle ealier. */
0f290768 7127 if (insn_type == TYPE_FMOV
e075ae69
RH
7128 && get_attr_memory (insn) == MEMORY_STORE
7129 && !ix86_agi_dependant (insn, dep_insn, insn_type))
7130 cost += 1;
7131 break;
a269a03c 7132
e075ae69 7133 case PROCESSOR_PENTIUMPRO:
0f290768 7134 /* Since we can't represent delayed latencies of load+operation,
e075ae69
RH
7135 increase the cost here for non-imov insns. */
7136 if (dep_insn_type != TYPE_IMOV
7137 && dep_insn_type != TYPE_FMOV
0b5107cf
JH
7138 && ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
7139 || memory == MEMORY_BOTH))
e075ae69
RH
7140 cost += 1;
7141
7142 /* INT->FP conversion is expensive. */
7143 if (get_attr_fp_int_src (dep_insn))
7144 cost += 5;
7145
7146 /* There is one cycle extra latency between an FP op and a store. */
7147 if (insn_type == TYPE_FMOV
7148 && (set = single_set (dep_insn)) != NULL_RTX
7149 && (set2 = single_set (insn)) != NULL_RTX
7150 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
7151 && GET_CODE (SET_DEST (set2)) == MEM)
7152 cost += 1;
7153 break;
a269a03c 7154
e075ae69
RH
7155 case PROCESSOR_K6:
7156 /* The esp dependency is resolved before the instruction is really
7157 finished. */
7158 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
7159 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
7160 return 1;
a269a03c 7161
0f290768 7162 /* Since we can't represent delayed latencies of load+operation,
e075ae69 7163 increase the cost here for non-imov insns. */
0b5107cf
JH
7164 if ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
7165 || memory == MEMORY_BOTH)
e075ae69
RH
7166 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
7167
7168 /* INT->FP conversion is expensive. */
7169 if (get_attr_fp_int_src (dep_insn))
7170 cost += 5;
a14003ee 7171 break;
e075ae69 7172
309ada50 7173 case PROCESSOR_ATHLON:
0b5107cf
JH
7174 if ((memory = get_attr_memory (dep_insn)) == MEMORY_LOAD
7175 || memory == MEMORY_BOTH)
7176 {
7177 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
7178 cost += 2;
7179 else
7180 cost += 3;
7181 }
309ada50 7182
a269a03c 7183 default:
a269a03c
JC
7184 break;
7185 }
7186
7187 return cost;
7188}
0a726ef1 7189
e075ae69
RH
7190static union
7191{
7192 struct ppro_sched_data
7193 {
7194 rtx decode[3];
7195 int issued_this_cycle;
7196 } ppro;
7197} ix86_sched_data;
0a726ef1 7198
e075ae69
RH
7199static int
7200ix86_safe_length (insn)
7201 rtx insn;
7202{
7203 if (recog_memoized (insn) >= 0)
7204 return get_attr_length(insn);
7205 else
7206 return 128;
7207}
0a726ef1 7208
e075ae69
RH
7209static int
7210ix86_safe_length_prefix (insn)
7211 rtx insn;
7212{
7213 if (recog_memoized (insn) >= 0)
7214 return get_attr_length(insn);
7215 else
7216 return 0;
7217}
7218
7219static enum attr_memory
7220ix86_safe_memory (insn)
7221 rtx insn;
7222{
7223 if (recog_memoized (insn) >= 0)
7224 return get_attr_memory(insn);
7225 else
7226 return MEMORY_UNKNOWN;
7227}
0a726ef1 7228
e075ae69
RH
7229static enum attr_pent_pair
7230ix86_safe_pent_pair (insn)
7231 rtx insn;
7232{
7233 if (recog_memoized (insn) >= 0)
7234 return get_attr_pent_pair(insn);
7235 else
7236 return PENT_PAIR_NP;
7237}
0a726ef1 7238
e075ae69
RH
7239static enum attr_ppro_uops
7240ix86_safe_ppro_uops (insn)
7241 rtx insn;
7242{
7243 if (recog_memoized (insn) >= 0)
7244 return get_attr_ppro_uops (insn);
7245 else
7246 return PPRO_UOPS_MANY;
7247}
0a726ef1 7248
e075ae69
RH
7249static void
7250ix86_dump_ppro_packet (dump)
7251 FILE *dump;
0a726ef1 7252{
e075ae69 7253 if (ix86_sched_data.ppro.decode[0])
0a726ef1 7254 {
e075ae69
RH
7255 fprintf (dump, "PPRO packet: %d",
7256 INSN_UID (ix86_sched_data.ppro.decode[0]));
7257 if (ix86_sched_data.ppro.decode[1])
7258 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
7259 if (ix86_sched_data.ppro.decode[2])
7260 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
7261 fputc ('\n', dump);
7262 }
7263}
0a726ef1 7264
e075ae69 7265/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 7266
e075ae69
RH
7267void
7268ix86_sched_init (dump, sched_verbose)
7269 FILE *dump ATTRIBUTE_UNUSED;
7270 int sched_verbose ATTRIBUTE_UNUSED;
7271{
7272 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
7273}
7274
7275/* Shift INSN to SLOT, and shift everything else down. */
7276
7277static void
7278ix86_reorder_insn (insnp, slot)
7279 rtx *insnp, *slot;
7280{
7281 if (insnp != slot)
7282 {
7283 rtx insn = *insnp;
0f290768 7284 do
e075ae69
RH
7285 insnp[0] = insnp[1];
7286 while (++insnp != slot);
7287 *insnp = insn;
0a726ef1 7288 }
e075ae69
RH
7289}
7290
7291/* Find an instruction with given pairability and minimal amount of cycles
7292 lost by the fact that the CPU waits for both pipelines to finish before
7293 reading next instructions. Also take care that both instructions together
7294 can not exceed 7 bytes. */
7295
7296static rtx *
7297ix86_pent_find_pair (e_ready, ready, type, first)
7298 rtx *e_ready;
7299 rtx *ready;
7300 enum attr_pent_pair type;
7301 rtx first;
7302{
7303 int mincycles, cycles;
7304 enum attr_pent_pair tmp;
7305 enum attr_memory memory;
7306 rtx *insnp, *bestinsnp = NULL;
0a726ef1 7307
e075ae69
RH
7308 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
7309 return NULL;
0a726ef1 7310
e075ae69
RH
7311 memory = ix86_safe_memory (first);
7312 cycles = result_ready_cost (first);
7313 mincycles = INT_MAX;
7314
7315 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
7316 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
7317 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
6ec6d558 7318 {
e075ae69
RH
7319 enum attr_memory second_memory;
7320 int secondcycles, currentcycles;
7321
7322 second_memory = ix86_safe_memory (*insnp);
7323 secondcycles = result_ready_cost (*insnp);
7324 currentcycles = abs (cycles - secondcycles);
7325
7326 if (secondcycles >= 1 && cycles >= 1)
6ec6d558 7327 {
e075ae69
RH
7328 /* Two read/modify/write instructions together takes two
7329 cycles longer. */
7330 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
7331 currentcycles += 2;
0f290768 7332
e075ae69
RH
7333 /* Read modify/write instruction followed by read/modify
7334 takes one cycle longer. */
7335 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
7336 && tmp != PENT_PAIR_UV
7337 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
7338 currentcycles += 1;
6ec6d558 7339 }
e075ae69
RH
7340 if (currentcycles < mincycles)
7341 bestinsnp = insnp, mincycles = currentcycles;
6ec6d558 7342 }
0a726ef1 7343
e075ae69
RH
7344 return bestinsnp;
7345}
7346
78a0d70c 7347/* Subroutines of ix86_sched_reorder. */
e075ae69 7348
c6991660 7349static void
78a0d70c 7350ix86_sched_reorder_pentium (ready, e_ready)
e075ae69 7351 rtx *ready;
78a0d70c 7352 rtx *e_ready;
e075ae69 7353{
78a0d70c 7354 enum attr_pent_pair pair1, pair2;
e075ae69 7355 rtx *insnp;
e075ae69 7356
78a0d70c
ZW
7357 /* This wouldn't be necessary if Haifa knew that static insn ordering
7358 is important to which pipe an insn is issued to. So we have to make
7359 some minor rearrangements. */
e075ae69 7360
78a0d70c
ZW
7361 pair1 = ix86_safe_pent_pair (*e_ready);
7362
7363 /* If the first insn is non-pairable, let it be. */
7364 if (pair1 == PENT_PAIR_NP)
7365 return;
7366
7367 pair2 = PENT_PAIR_NP;
7368 insnp = 0;
7369
7370 /* If the first insn is UV or PV pairable, search for a PU
7371 insn to go with. */
7372 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
e075ae69 7373 {
78a0d70c
ZW
7374 insnp = ix86_pent_find_pair (e_ready-1, ready,
7375 PENT_PAIR_PU, *e_ready);
7376 if (insnp)
7377 pair2 = PENT_PAIR_PU;
7378 }
e075ae69 7379
78a0d70c
ZW
7380 /* If the first insn is PU or UV pairable, search for a PV
7381 insn to go with. */
7382 if (pair2 == PENT_PAIR_NP
7383 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
7384 {
7385 insnp = ix86_pent_find_pair (e_ready-1, ready,
7386 PENT_PAIR_PV, *e_ready);
7387 if (insnp)
7388 pair2 = PENT_PAIR_PV;
7389 }
e075ae69 7390
78a0d70c
ZW
7391 /* If the first insn is pairable, search for a UV
7392 insn to go with. */
7393 if (pair2 == PENT_PAIR_NP)
7394 {
7395 insnp = ix86_pent_find_pair (e_ready-1, ready,
7396 PENT_PAIR_UV, *e_ready);
7397 if (insnp)
7398 pair2 = PENT_PAIR_UV;
7399 }
e075ae69 7400
78a0d70c
ZW
7401 if (pair2 == PENT_PAIR_NP)
7402 return;
e075ae69 7403
78a0d70c
ZW
7404 /* Found something! Decide if we need to swap the order. */
7405 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
7406 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
7407 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
7408 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
7409 ix86_reorder_insn (insnp, e_ready);
7410 else
7411 ix86_reorder_insn (insnp, e_ready - 1);
7412}
e075ae69 7413
c6991660 7414static void
78a0d70c
ZW
7415ix86_sched_reorder_ppro (ready, e_ready)
7416 rtx *ready;
7417 rtx *e_ready;
7418{
7419 rtx decode[3];
7420 enum attr_ppro_uops cur_uops;
7421 int issued_this_cycle;
7422 rtx *insnp;
7423 int i;
e075ae69 7424
0f290768 7425 /* At this point .ppro.decode contains the state of the three
78a0d70c 7426 decoders from last "cycle". That is, those insns that were
0f290768 7427 actually independent. But here we're scheduling for the
78a0d70c
ZW
7428 decoder, and we may find things that are decodable in the
7429 same cycle. */
e075ae69 7430
0f290768 7431 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
78a0d70c 7432 issued_this_cycle = 0;
e075ae69 7433
78a0d70c
ZW
7434 insnp = e_ready;
7435 cur_uops = ix86_safe_ppro_uops (*insnp);
0a726ef1 7436
78a0d70c
ZW
7437 /* If the decoders are empty, and we've a complex insn at the
7438 head of the priority queue, let it issue without complaint. */
7439 if (decode[0] == NULL)
7440 {
7441 if (cur_uops == PPRO_UOPS_MANY)
7442 {
7443 decode[0] = *insnp;
7444 goto ppro_done;
7445 }
7446
7447 /* Otherwise, search for a 2-4 uop unsn to issue. */
7448 while (cur_uops != PPRO_UOPS_FEW)
7449 {
7450 if (insnp == ready)
7451 break;
7452 cur_uops = ix86_safe_ppro_uops (*--insnp);
7453 }
7454
7455 /* If so, move it to the head of the line. */
7456 if (cur_uops == PPRO_UOPS_FEW)
7457 ix86_reorder_insn (insnp, e_ready);
0a726ef1 7458
78a0d70c
ZW
7459 /* Issue the head of the queue. */
7460 issued_this_cycle = 1;
7461 decode[0] = *e_ready--;
7462 }
fb693d44 7463
78a0d70c
ZW
7464 /* Look for simple insns to fill in the other two slots. */
7465 for (i = 1; i < 3; ++i)
7466 if (decode[i] == NULL)
7467 {
7468 if (ready >= e_ready)
7469 goto ppro_done;
fb693d44 7470
e075ae69
RH
7471 insnp = e_ready;
7472 cur_uops = ix86_safe_ppro_uops (*insnp);
78a0d70c
ZW
7473 while (cur_uops != PPRO_UOPS_ONE)
7474 {
7475 if (insnp == ready)
7476 break;
7477 cur_uops = ix86_safe_ppro_uops (*--insnp);
7478 }
fb693d44 7479
78a0d70c
ZW
7480 /* Found one. Move it to the head of the queue and issue it. */
7481 if (cur_uops == PPRO_UOPS_ONE)
e075ae69 7482 {
78a0d70c
ZW
7483 ix86_reorder_insn (insnp, e_ready);
7484 decode[i] = *e_ready--;
7485 issued_this_cycle++;
7486 continue;
7487 }
fb693d44 7488
78a0d70c
ZW
7489 /* ??? Didn't find one. Ideally, here we would do a lazy split
7490 of 2-uop insns, issue one and queue the other. */
7491 }
fb693d44 7492
78a0d70c
ZW
7493 ppro_done:
7494 if (issued_this_cycle == 0)
7495 issued_this_cycle = 1;
7496 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
7497}
fb693d44 7498
0f290768 7499/* We are about to being issuing insns for this clock cycle.
78a0d70c
ZW
7500 Override the default sort algorithm to better slot instructions. */
7501int
7502ix86_sched_reorder (dump, sched_verbose, ready, n_ready, clock_var)
7503 FILE *dump ATTRIBUTE_UNUSED;
7504 int sched_verbose ATTRIBUTE_UNUSED;
7505 rtx *ready;
7506 int n_ready;
7507 int clock_var ATTRIBUTE_UNUSED;
7508{
7509 rtx *e_ready = ready + n_ready - 1;
fb693d44 7510
78a0d70c
ZW
7511 if (n_ready < 2)
7512 goto out;
e075ae69 7513
78a0d70c
ZW
7514 switch (ix86_cpu)
7515 {
7516 default:
7517 break;
e075ae69 7518
78a0d70c
ZW
7519 case PROCESSOR_PENTIUM:
7520 ix86_sched_reorder_pentium (ready, e_ready);
7521 break;
e075ae69 7522
78a0d70c
ZW
7523 case PROCESSOR_PENTIUMPRO:
7524 ix86_sched_reorder_ppro (ready, e_ready);
e075ae69 7525 break;
fb693d44
RH
7526 }
7527
e075ae69
RH
7528out:
7529 return ix86_issue_rate ();
7530}
fb693d44 7531
e075ae69
RH
7532/* We are about to issue INSN. Return the number of insns left on the
7533 ready queue that can be issued this cycle. */
b222082e 7534
e075ae69
RH
7535int
7536ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
7537 FILE *dump;
7538 int sched_verbose;
7539 rtx insn;
7540 int can_issue_more;
7541{
7542 int i;
7543 switch (ix86_cpu)
fb693d44 7544 {
e075ae69
RH
7545 default:
7546 return can_issue_more - 1;
fb693d44 7547
e075ae69
RH
7548 case PROCESSOR_PENTIUMPRO:
7549 {
7550 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 7551
e075ae69
RH
7552 if (uops == PPRO_UOPS_MANY)
7553 {
7554 if (sched_verbose)
7555 ix86_dump_ppro_packet (dump);
7556 ix86_sched_data.ppro.decode[0] = insn;
7557 ix86_sched_data.ppro.decode[1] = NULL;
7558 ix86_sched_data.ppro.decode[2] = NULL;
7559 if (sched_verbose)
7560 ix86_dump_ppro_packet (dump);
7561 ix86_sched_data.ppro.decode[0] = NULL;
7562 }
7563 else if (uops == PPRO_UOPS_FEW)
7564 {
7565 if (sched_verbose)
7566 ix86_dump_ppro_packet (dump);
7567 ix86_sched_data.ppro.decode[0] = insn;
7568 ix86_sched_data.ppro.decode[1] = NULL;
7569 ix86_sched_data.ppro.decode[2] = NULL;
7570 }
7571 else
7572 {
7573 for (i = 0; i < 3; ++i)
7574 if (ix86_sched_data.ppro.decode[i] == NULL)
7575 {
7576 ix86_sched_data.ppro.decode[i] = insn;
7577 break;
7578 }
7579 if (i == 3)
7580 abort ();
7581 if (i == 2)
7582 {
7583 if (sched_verbose)
7584 ix86_dump_ppro_packet (dump);
7585 ix86_sched_data.ppro.decode[0] = NULL;
7586 ix86_sched_data.ppro.decode[1] = NULL;
7587 ix86_sched_data.ppro.decode[2] = NULL;
7588 }
7589 }
7590 }
7591 return --ix86_sched_data.ppro.issued_this_cycle;
7592 }
fb693d44 7593}
a7180f70 7594\f
0e4970d7
RK
7595/* Walk through INSNS and look for MEM references whose address is DSTREG or
7596 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
7597 appropriate. */
7598
7599void
7600ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
7601 rtx insns;
7602 rtx dstref, srcref, dstreg, srcreg;
7603{
7604 rtx insn;
7605
7606 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
7607 if (INSN_P (insn))
7608 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
7609 dstreg, srcreg);
7610}
7611
7612/* Subroutine of above to actually do the updating by recursively walking
7613 the rtx. */
7614
7615static void
7616ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
7617 rtx x;
7618 rtx dstref, srcref, dstreg, srcreg;
7619{
7620 enum rtx_code code = GET_CODE (x);
7621 const char *format_ptr = GET_RTX_FORMAT (code);
7622 int i, j;
7623
7624 if (code == MEM && XEXP (x, 0) == dstreg)
7625 MEM_COPY_ATTRIBUTES (x, dstref);
7626 else if (code == MEM && XEXP (x, 0) == srcreg)
7627 MEM_COPY_ATTRIBUTES (x, srcref);
7628
7629 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
7630 {
7631 if (*format_ptr == 'e')
7632 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
7633 dstreg, srcreg);
7634 else if (*format_ptr == 'E')
7635 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
397be6cf 7636 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
0e4970d7
RK
7637 dstreg, srcreg);
7638 }
7639}
7640\f
a7180f70
BS
7641/* Compute the alignment given to a constant that is being placed in memory.
7642 EXP is the constant and ALIGN is the alignment that the object would
7643 ordinarily have.
7644 The value of this function is used instead of that alignment to align
7645 the object. */
7646
7647int
7648ix86_constant_alignment (exp, align)
7649 tree exp;
7650 int align;
7651{
7652 if (TREE_CODE (exp) == REAL_CST)
7653 {
7654 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
7655 return 64;
7656 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
7657 return 128;
7658 }
7659 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
7660 && align < 256)
7661 return 256;
7662
7663 return align;
7664}
7665
7666/* Compute the alignment for a static variable.
7667 TYPE is the data type, and ALIGN is the alignment that
7668 the object would ordinarily have. The value of this function is used
7669 instead of that alignment to align the object. */
7670
7671int
7672ix86_data_alignment (type, align)
7673 tree type;
7674 int align;
7675{
7676 if (AGGREGATE_TYPE_P (type)
7677 && TYPE_SIZE (type)
7678 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
7679 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
7680 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
7681 return 256;
7682
7683 if (TREE_CODE (type) == ARRAY_TYPE)
7684 {
7685 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
7686 return 64;
7687 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
7688 return 128;
7689 }
7690 else if (TREE_CODE (type) == COMPLEX_TYPE)
7691 {
0f290768 7692
a7180f70
BS
7693 if (TYPE_MODE (type) == DCmode && align < 64)
7694 return 64;
7695 if (TYPE_MODE (type) == XCmode && align < 128)
7696 return 128;
7697 }
7698 else if ((TREE_CODE (type) == RECORD_TYPE
7699 || TREE_CODE (type) == UNION_TYPE
7700 || TREE_CODE (type) == QUAL_UNION_TYPE)
7701 && TYPE_FIELDS (type))
7702 {
7703 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
7704 return 64;
7705 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
7706 return 128;
7707 }
7708 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
7709 || TREE_CODE (type) == INTEGER_TYPE)
7710 {
7711 if (TYPE_MODE (type) == DFmode && align < 64)
7712 return 64;
7713 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
7714 return 128;
7715 }
7716
7717 return align;
7718}
7719
7720/* Compute the alignment for a local variable.
7721 TYPE is the data type, and ALIGN is the alignment that
7722 the object would ordinarily have. The value of this macro is used
7723 instead of that alignment to align the object. */
7724
7725int
7726ix86_local_alignment (type, align)
7727 tree type;
7728 int align;
7729{
7730 if (TREE_CODE (type) == ARRAY_TYPE)
7731 {
7732 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
7733 return 64;
7734 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
7735 return 128;
7736 }
7737 else if (TREE_CODE (type) == COMPLEX_TYPE)
7738 {
7739 if (TYPE_MODE (type) == DCmode && align < 64)
7740 return 64;
7741 if (TYPE_MODE (type) == XCmode && align < 128)
7742 return 128;
7743 }
7744 else if ((TREE_CODE (type) == RECORD_TYPE
7745 || TREE_CODE (type) == UNION_TYPE
7746 || TREE_CODE (type) == QUAL_UNION_TYPE)
7747 && TYPE_FIELDS (type))
7748 {
7749 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
7750 return 64;
7751 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
7752 return 128;
7753 }
7754 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
7755 || TREE_CODE (type) == INTEGER_TYPE)
7756 {
0f290768 7757
a7180f70
BS
7758 if (TYPE_MODE (type) == DFmode && align < 64)
7759 return 64;
7760 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
7761 return 128;
7762 }
7763 return align;
7764}
bd793c65
BS
7765
7766#define def_builtin(NAME, TYPE, CODE) \
7767 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL_PTR)
7768struct builtin_description
7769{
7770 enum insn_code icode;
7771 const char * name;
7772 enum ix86_builtins code;
7773 enum rtx_code comparison;
7774 unsigned int flag;
7775};
7776
7777static struct builtin_description bdesc_comi[] =
7778{
7779 { CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
7780 { CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
7781 { CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
7782 { CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
7783 { CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
7784 { CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
7785 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
7786 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
7787 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
7788 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
7789 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
7790 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
7791};
7792
7793static struct builtin_description bdesc_2arg[] =
7794{
7795 /* SSE */
7796 { CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
7797 { CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
7798 { CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
7799 { CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
7800 { CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
7801 { CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
7802 { CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
7803 { CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
7804
7805 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
7806 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
7807 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
7808 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
7809 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
7810 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
7811 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
7812 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
7813 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
7814 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
7815 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
7816 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
7817 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
7818 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
7819 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
7820 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
7821 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
7822 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
7823 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
7824 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
7825 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
7826 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
7827 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
7828 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
7829
7830 { CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
7831 { CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
7832 { CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
7833 { CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
7834
7835 { CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
7836 { CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
7837 { CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
7838 { CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
7839
7840 { CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
7841 { CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
7842 { CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
7843 { CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
7844 { CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
7845
7846 /* MMX */
7847 { CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
7848 { CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
7849 { CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
7850 { CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
7851 { CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
7852 { CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
7853
7854 { CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
7855 { CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
7856 { CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
7857 { CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
7858 { CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
7859 { CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
7860 { CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
7861 { CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
7862
7863 { CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
7864 { CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
7865 { CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
7866
7867 { CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
7868 { CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
7869 { CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
7870 { CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
7871
7872 { CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
7873 { CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
7874
7875 { CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
7876 { CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
7877 { CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
7878 { CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
7879 { CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
7880 { CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
7881
7882 { CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
7883 { CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
7884 { CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
7885 { CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
7886
7887 { CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
7888 { CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
7889 { CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
7890 { CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
7891 { CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
7892 { CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
7893
7894 /* Special. */
7895 { CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
7896 { CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
7897 { CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
7898
7899 { CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
7900 { CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
7901
7902 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
7903 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
7904 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
7905 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
7906 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
7907 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
7908
7909 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
7910 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
7911 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
7912 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
7913 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
7914 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
7915
7916 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
7917 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
7918 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
7919 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
7920
7921 { CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
7922 { CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
7923
7924};
7925
7926static struct builtin_description bdesc_1arg[] =
7927{
7928 { CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
7929 { CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
7930
7931 { CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
7932 { CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
7933 { CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
7934
7935 { CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
7936 { CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
7937 { CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
7938 { CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
7939
7940};
7941
7942/* Expand all the target specific builtins. This is not called if TARGET_MMX
7943 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
7944 builtins. */
7945void
7946ix86_init_builtins ()
7947{
7948 struct builtin_description * d;
77ebd435 7949 size_t i;
cbd5937a 7950 tree endlink = void_list_node;
bd793c65
BS
7951
7952 tree pchar_type_node = build_pointer_type (char_type_node);
7953 tree pfloat_type_node = build_pointer_type (float_type_node);
7954 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
7955 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
7956
7957 /* Comparisons. */
7958 tree int_ftype_v4sf_v4sf
7959 = build_function_type (integer_type_node,
7960 tree_cons (NULL_TREE, V4SF_type_node,
7961 tree_cons (NULL_TREE,
7962 V4SF_type_node,
7963 endlink)));
7964 tree v4si_ftype_v4sf_v4sf
7965 = build_function_type (V4SI_type_node,
7966 tree_cons (NULL_TREE, V4SF_type_node,
7967 tree_cons (NULL_TREE,
7968 V4SF_type_node,
7969 endlink)));
7970 /* MMX/SSE/integer conversions. */
7971 tree int_ftype_v4sf_int
7972 = build_function_type (integer_type_node,
7973 tree_cons (NULL_TREE, V4SF_type_node,
7974 tree_cons (NULL_TREE,
7975 integer_type_node,
7976 endlink)));
7977 tree int_ftype_v4sf
7978 = build_function_type (integer_type_node,
7979 tree_cons (NULL_TREE, V4SF_type_node,
7980 endlink));
7981 tree int_ftype_v8qi
7982 = build_function_type (integer_type_node,
7983 tree_cons (NULL_TREE, V8QI_type_node,
7984 endlink));
7985 tree int_ftype_v2si
7986 = build_function_type (integer_type_node,
7987 tree_cons (NULL_TREE, V2SI_type_node,
7988 endlink));
7989 tree v2si_ftype_int
7990 = build_function_type (V2SI_type_node,
7991 tree_cons (NULL_TREE, integer_type_node,
7992 endlink));
7993 tree v4sf_ftype_v4sf_int
7994 = build_function_type (integer_type_node,
7995 tree_cons (NULL_TREE, V4SF_type_node,
7996 tree_cons (NULL_TREE, integer_type_node,
7997 endlink)));
7998 tree v4sf_ftype_v4sf_v2si
7999 = build_function_type (V4SF_type_node,
8000 tree_cons (NULL_TREE, V4SF_type_node,
8001 tree_cons (NULL_TREE, V2SI_type_node,
8002 endlink)));
8003 tree int_ftype_v4hi_int
8004 = build_function_type (integer_type_node,
8005 tree_cons (NULL_TREE, V4HI_type_node,
8006 tree_cons (NULL_TREE, integer_type_node,
8007 endlink)));
8008 tree v4hi_ftype_v4hi_int_int
332316cd 8009 = build_function_type (V4HI_type_node,
bd793c65
BS
8010 tree_cons (NULL_TREE, V4HI_type_node,
8011 tree_cons (NULL_TREE, integer_type_node,
8012 tree_cons (NULL_TREE,
8013 integer_type_node,
8014 endlink))));
8015 /* Miscellaneous. */
8016 tree v8qi_ftype_v4hi_v4hi
8017 = build_function_type (V8QI_type_node,
8018 tree_cons (NULL_TREE, V4HI_type_node,
8019 tree_cons (NULL_TREE, V4HI_type_node,
8020 endlink)));
8021 tree v4hi_ftype_v2si_v2si
8022 = build_function_type (V4HI_type_node,
8023 tree_cons (NULL_TREE, V2SI_type_node,
8024 tree_cons (NULL_TREE, V2SI_type_node,
8025 endlink)));
8026 tree v4sf_ftype_v4sf_v4sf_int
8027 = build_function_type (V4SF_type_node,
8028 tree_cons (NULL_TREE, V4SF_type_node,
8029 tree_cons (NULL_TREE, V4SF_type_node,
8030 tree_cons (NULL_TREE,
8031 integer_type_node,
8032 endlink))));
8033 tree v4hi_ftype_v8qi_v8qi
8034 = build_function_type (V4HI_type_node,
8035 tree_cons (NULL_TREE, V8QI_type_node,
8036 tree_cons (NULL_TREE, V8QI_type_node,
8037 endlink)));
8038 tree v2si_ftype_v4hi_v4hi
8039 = build_function_type (V2SI_type_node,
8040 tree_cons (NULL_TREE, V4HI_type_node,
8041 tree_cons (NULL_TREE, V4HI_type_node,
8042 endlink)));
8043 tree v4hi_ftype_v4hi_int
8044 = build_function_type (V4HI_type_node,
8045 tree_cons (NULL_TREE, V4HI_type_node,
8046 tree_cons (NULL_TREE, integer_type_node,
8047 endlink)));
8048 tree di_ftype_di_int
8049 = build_function_type (long_long_unsigned_type_node,
8050 tree_cons (NULL_TREE, long_long_unsigned_type_node,
8051 tree_cons (NULL_TREE, integer_type_node,
8052 endlink)));
8053 tree v8qi_ftype_v8qi_di
8054 = build_function_type (V8QI_type_node,
8055 tree_cons (NULL_TREE, V8QI_type_node,
8056 tree_cons (NULL_TREE,
8057 long_long_integer_type_node,
8058 endlink)));
8059 tree v4hi_ftype_v4hi_di
8060 = build_function_type (V4HI_type_node,
8061 tree_cons (NULL_TREE, V4HI_type_node,
8062 tree_cons (NULL_TREE,
8063 long_long_integer_type_node,
8064 endlink)));
8065 tree v2si_ftype_v2si_di
8066 = build_function_type (V2SI_type_node,
8067 tree_cons (NULL_TREE, V2SI_type_node,
8068 tree_cons (NULL_TREE,
8069 long_long_integer_type_node,
8070 endlink)));
8071 tree void_ftype_void
8072 = build_function_type (void_type_node, endlink);
8073 tree void_ftype_pchar_int
8074 = build_function_type (void_type_node,
8075 tree_cons (NULL_TREE, pchar_type_node,
8076 tree_cons (NULL_TREE, integer_type_node,
8077 endlink)));
8078 tree void_ftype_unsigned
8079 = build_function_type (void_type_node,
8080 tree_cons (NULL_TREE, unsigned_type_node,
8081 endlink));
8082 tree unsigned_ftype_void
8083 = build_function_type (unsigned_type_node, endlink);
8084 tree di_ftype_void
8085 = build_function_type (long_long_unsigned_type_node, endlink);
8086 tree ti_ftype_void
8087 = build_function_type (intTI_type_node, endlink);
8088 tree v2si_ftype_v4sf
8089 = build_function_type (V2SI_type_node,
8090 tree_cons (NULL_TREE, V4SF_type_node,
8091 endlink));
8092 /* Loads/stores. */
8093 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
8094 tree_cons (NULL_TREE, V8QI_type_node,
8095 tree_cons (NULL_TREE,
8096 pchar_type_node,
8097 endlink)));
8098 tree void_ftype_v8qi_v8qi_pchar
8099 = build_function_type (void_type_node, maskmovq_args);
8100 tree v4sf_ftype_pfloat
8101 = build_function_type (V4SF_type_node,
8102 tree_cons (NULL_TREE, pfloat_type_node,
8103 endlink));
8104 tree v4sf_ftype_float
8105 = build_function_type (V4SF_type_node,
8106 tree_cons (NULL_TREE, float_type_node,
8107 endlink));
8108 tree v4sf_ftype_float_float_float_float
8109 = build_function_type (V4SF_type_node,
8110 tree_cons (NULL_TREE, float_type_node,
8111 tree_cons (NULL_TREE, float_type_node,
8112 tree_cons (NULL_TREE,
8113 float_type_node,
8114 tree_cons (NULL_TREE,
8115 float_type_node,
8116 endlink)))));
8117 /* @@@ the type is bogus */
8118 tree v4sf_ftype_v4sf_pv2si
8119 = build_function_type (V4SF_type_node,
8120 tree_cons (NULL_TREE, V4SF_type_node,
8121 tree_cons (NULL_TREE, pv2si_type_node,
8122 endlink)));
8123 tree v4sf_ftype_pv2si_v4sf
8124 = build_function_type (V4SF_type_node,
8125 tree_cons (NULL_TREE, V4SF_type_node,
8126 tree_cons (NULL_TREE, pv2si_type_node,
8127 endlink)));
8128 tree void_ftype_pfloat_v4sf
8129 = build_function_type (void_type_node,
8130 tree_cons (NULL_TREE, pfloat_type_node,
8131 tree_cons (NULL_TREE, V4SF_type_node,
8132 endlink)));
8133 tree void_ftype_pdi_di
8134 = build_function_type (void_type_node,
8135 tree_cons (NULL_TREE, pdi_type_node,
8136 tree_cons (NULL_TREE,
8137 long_long_unsigned_type_node,
8138 endlink)));
8139 /* Normal vector unops. */
8140 tree v4sf_ftype_v4sf
8141 = build_function_type (V4SF_type_node,
8142 tree_cons (NULL_TREE, V4SF_type_node,
8143 endlink));
0f290768 8144
bd793c65
BS
8145 /* Normal vector binops. */
8146 tree v4sf_ftype_v4sf_v4sf
8147 = build_function_type (V4SF_type_node,
8148 tree_cons (NULL_TREE, V4SF_type_node,
8149 tree_cons (NULL_TREE, V4SF_type_node,
8150 endlink)));
8151 tree v8qi_ftype_v8qi_v8qi
8152 = build_function_type (V8QI_type_node,
8153 tree_cons (NULL_TREE, V8QI_type_node,
8154 tree_cons (NULL_TREE, V8QI_type_node,
8155 endlink)));
8156 tree v4hi_ftype_v4hi_v4hi
8157 = build_function_type (V4HI_type_node,
8158 tree_cons (NULL_TREE, V4HI_type_node,
8159 tree_cons (NULL_TREE, V4HI_type_node,
8160 endlink)));
8161 tree v2si_ftype_v2si_v2si
8162 = build_function_type (V2SI_type_node,
8163 tree_cons (NULL_TREE, V2SI_type_node,
8164 tree_cons (NULL_TREE, V2SI_type_node,
8165 endlink)));
8166 tree ti_ftype_ti_ti
8167 = build_function_type (intTI_type_node,
8168 tree_cons (NULL_TREE, intTI_type_node,
8169 tree_cons (NULL_TREE, intTI_type_node,
8170 endlink)));
8171 tree di_ftype_di_di
8172 = build_function_type (long_long_unsigned_type_node,
8173 tree_cons (NULL_TREE, long_long_unsigned_type_node,
8174 tree_cons (NULL_TREE,
8175 long_long_unsigned_type_node,
8176 endlink)));
8177
8178 /* Add all builtins that are more or less simple operations on two
8179 operands. */
8180 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
8181 {
8182 /* Use one of the operands; the target can have a different mode for
8183 mask-generating compares. */
8184 enum machine_mode mode;
8185 tree type;
8186
8187 if (d->name == 0)
8188 continue;
8189 mode = insn_data[d->icode].operand[1].mode;
8190
8191 if (! TARGET_SSE && ! VALID_MMX_REG_MODE (mode))
8192 continue;
8193
8194 switch (mode)
8195 {
8196 case V4SFmode:
8197 type = v4sf_ftype_v4sf_v4sf;
8198 break;
8199 case V8QImode:
8200 type = v8qi_ftype_v8qi_v8qi;
8201 break;
8202 case V4HImode:
8203 type = v4hi_ftype_v4hi_v4hi;
8204 break;
8205 case V2SImode:
8206 type = v2si_ftype_v2si_v2si;
8207 break;
8208 case TImode:
8209 type = ti_ftype_ti_ti;
8210 break;
8211 case DImode:
8212 type = di_ftype_di_di;
8213 break;
8214
8215 default:
8216 abort ();
8217 }
0f290768 8218
bd793c65
BS
8219 /* Override for comparisons. */
8220 if (d->icode == CODE_FOR_maskcmpv4sf3
8221 || d->icode == CODE_FOR_maskncmpv4sf3
8222 || d->icode == CODE_FOR_vmmaskcmpv4sf3
8223 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
8224 type = v4si_ftype_v4sf_v4sf;
8225
8226 def_builtin (d->name, type, d->code);
8227 }
8228
8229 /* Add the remaining MMX insns with somewhat more complicated types. */
8230 def_builtin ("__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
8231 def_builtin ("__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
8232 def_builtin ("__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
8233 def_builtin ("__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
8234 def_builtin ("__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
8235 def_builtin ("__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
8236 def_builtin ("__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
8237 def_builtin ("__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
8238 def_builtin ("__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
8239
8240 def_builtin ("__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
8241 def_builtin ("__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
8242 def_builtin ("__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
8243
8244 def_builtin ("__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
8245 def_builtin ("__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
8246
8247 def_builtin ("__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
8248 def_builtin ("__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
8249
8250 /* Everything beyond this point is SSE only. */
8251 if (! TARGET_SSE)
8252 return;
0f290768 8253
bd793c65
BS
8254 /* comi/ucomi insns. */
8255 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
8256 def_builtin (d->name, int_ftype_v4sf_v4sf, d->code);
8257
8258 def_builtin ("__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
8259 def_builtin ("__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
8260 def_builtin ("__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
8261
8262 def_builtin ("__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
8263 def_builtin ("__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
8264 def_builtin ("__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
8265 def_builtin ("__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
8266 def_builtin ("__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
8267 def_builtin ("__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
8268
8269 def_builtin ("__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
8270 def_builtin ("__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
8271
8272 def_builtin ("__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
8273
8274 def_builtin ("__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
8275 def_builtin ("__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
8276 def_builtin ("__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
8277 def_builtin ("__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
8278 def_builtin ("__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
8279 def_builtin ("__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
8280
8281 def_builtin ("__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
8282 def_builtin ("__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
8283 def_builtin ("__builtin_ia32_storehps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
8284 def_builtin ("__builtin_ia32_storelps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
8285
8286 def_builtin ("__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
8287 def_builtin ("__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
8288 def_builtin ("__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
8289 def_builtin ("__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
8290
8291 def_builtin ("__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
8292 def_builtin ("__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
8293
8294 def_builtin ("__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
8295
8296 def_builtin ("__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
8297 def_builtin ("__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
8298 def_builtin ("__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
8299 def_builtin ("__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
8300 def_builtin ("__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
8301 def_builtin ("__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
8302
8303 def_builtin ("__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
8304
8305 /* Composite intrinsics. */
8306 def_builtin ("__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
8307 def_builtin ("__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
8308 def_builtin ("__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
8309 def_builtin ("__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
8310 def_builtin ("__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
8311 def_builtin ("__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
8312 def_builtin ("__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
8313}
8314
8315/* Errors in the source file can cause expand_expr to return const0_rtx
8316 where we expect a vector. To avoid crashing, use one of the vector
8317 clear instructions. */
8318static rtx
8319safe_vector_operand (x, mode)
8320 rtx x;
8321 enum machine_mode mode;
8322{
8323 if (x != const0_rtx)
8324 return x;
8325 x = gen_reg_rtx (mode);
8326
8327 if (VALID_MMX_REG_MODE (mode))
8328 emit_insn (gen_mmx_clrdi (mode == DImode ? x
8329 : gen_rtx_SUBREG (DImode, x, 0)));
8330 else
8331 emit_insn (gen_sse_clrti (mode == TImode ? x
8332 : gen_rtx_SUBREG (TImode, x, 0)));
8333 return x;
8334}
8335
8336/* Subroutine of ix86_expand_builtin to take care of binop insns. */
8337
8338static rtx
8339ix86_expand_binop_builtin (icode, arglist, target)
8340 enum insn_code icode;
8341 tree arglist;
8342 rtx target;
8343{
8344 rtx pat;
8345 tree arg0 = TREE_VALUE (arglist);
8346 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8347 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8348 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8349 enum machine_mode tmode = insn_data[icode].operand[0].mode;
8350 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
8351 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
8352
8353 if (VECTOR_MODE_P (mode0))
8354 op0 = safe_vector_operand (op0, mode0);
8355 if (VECTOR_MODE_P (mode1))
8356 op1 = safe_vector_operand (op1, mode1);
8357
8358 if (! target
8359 || GET_MODE (target) != tmode
8360 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8361 target = gen_reg_rtx (tmode);
8362
8363 /* In case the insn wants input operands in modes different from
8364 the result, abort. */
8365 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
8366 abort ();
8367
8368 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8369 op0 = copy_to_mode_reg (mode0, op0);
8370 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8371 op1 = copy_to_mode_reg (mode1, op1);
8372
8373 pat = GEN_FCN (icode) (target, op0, op1);
8374 if (! pat)
8375 return 0;
8376 emit_insn (pat);
8377 return target;
8378}
8379
8380/* Subroutine of ix86_expand_builtin to take care of stores. */
8381
8382static rtx
8383ix86_expand_store_builtin (icode, arglist, shuffle)
8384 enum insn_code icode;
8385 tree arglist;
8386 int shuffle;
8387{
8388 rtx pat;
8389 tree arg0 = TREE_VALUE (arglist);
8390 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8391 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8392 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8393 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
8394 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
8395
8396 if (VECTOR_MODE_P (mode1))
8397 op1 = safe_vector_operand (op1, mode1);
8398
8399 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8400 if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
8401 op1 = copy_to_mode_reg (mode1, op1);
8402 if (shuffle >= 0)
8403 emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
8404 pat = GEN_FCN (icode) (op0, op1);
8405 if (pat)
8406 emit_insn (pat);
8407 return 0;
8408}
8409
8410/* Subroutine of ix86_expand_builtin to take care of unop insns. */
8411
8412static rtx
8413ix86_expand_unop_builtin (icode, arglist, target, do_load)
8414 enum insn_code icode;
8415 tree arglist;
8416 rtx target;
8417 int do_load;
8418{
8419 rtx pat;
8420 tree arg0 = TREE_VALUE (arglist);
8421 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8422 enum machine_mode tmode = insn_data[icode].operand[0].mode;
8423 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
8424
8425 if (! target
8426 || GET_MODE (target) != tmode
8427 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8428 target = gen_reg_rtx (tmode);
8429 if (do_load)
8430 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8431 else
8432 {
8433 if (VECTOR_MODE_P (mode0))
8434 op0 = safe_vector_operand (op0, mode0);
8435
8436 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8437 op0 = copy_to_mode_reg (mode0, op0);
8438 }
8439
8440 pat = GEN_FCN (icode) (target, op0);
8441 if (! pat)
8442 return 0;
8443 emit_insn (pat);
8444 return target;
8445}
8446
8447/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
8448 sqrtss, rsqrtss, rcpss. */
8449
8450static rtx
8451ix86_expand_unop1_builtin (icode, arglist, target)
8452 enum insn_code icode;
8453 tree arglist;
8454 rtx target;
8455{
8456 rtx pat;
8457 tree arg0 = TREE_VALUE (arglist);
8458 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8459 enum machine_mode tmode = insn_data[icode].operand[0].mode;
8460 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
8461
8462 if (! target
8463 || GET_MODE (target) != tmode
8464 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8465 target = gen_reg_rtx (tmode);
8466
8467 if (VECTOR_MODE_P (mode0))
8468 op0 = safe_vector_operand (op0, mode0);
8469
8470 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8471 op0 = copy_to_mode_reg (mode0, op0);
8472
8473 pat = GEN_FCN (icode) (target, op0, op0);
8474 if (! pat)
8475 return 0;
8476 emit_insn (pat);
8477 return target;
8478}
8479
8480/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
8481
8482static rtx
8483ix86_expand_sse_compare (d, arglist, target)
8484 struct builtin_description *d;
8485 tree arglist;
8486 rtx target;
8487{
8488 rtx pat;
8489 tree arg0 = TREE_VALUE (arglist);
8490 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8491 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8492 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8493 rtx op2;
8494 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
8495 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
8496 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
8497 enum rtx_code comparison = d->comparison;
8498
8499 if (VECTOR_MODE_P (mode0))
8500 op0 = safe_vector_operand (op0, mode0);
8501 if (VECTOR_MODE_P (mode1))
8502 op1 = safe_vector_operand (op1, mode1);
8503
8504 /* Swap operands if we have a comparison that isn't available in
8505 hardware. */
8506 if (d->flag)
8507 {
8508 target = gen_reg_rtx (tmode);
8509 emit_move_insn (target, op1);
8510 op1 = op0;
8511 op0 = target;
8512 comparison = swap_condition (comparison);
8513 }
8514 else if (! target
8515 || GET_MODE (target) != tmode
8516 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
8517 target = gen_reg_rtx (tmode);
8518
8519 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
8520 op0 = copy_to_mode_reg (mode0, op0);
8521 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
8522 op1 = copy_to_mode_reg (mode1, op1);
8523
8524 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
8525 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
8526 if (! pat)
8527 return 0;
8528 emit_insn (pat);
8529 return target;
8530}
8531
8532/* Subroutine of ix86_expand_builtin to take care of comi insns. */
8533
8534static rtx
8535ix86_expand_sse_comi (d, arglist, target)
8536 struct builtin_description *d;
8537 tree arglist;
8538 rtx target;
8539{
8540 rtx pat;
8541 tree arg0 = TREE_VALUE (arglist);
8542 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8543 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8544 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8545 rtx op2;
8546 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
8547 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
8548 enum rtx_code comparison = d->comparison;
8549
8550 if (VECTOR_MODE_P (mode0))
8551 op0 = safe_vector_operand (op0, mode0);
8552 if (VECTOR_MODE_P (mode1))
8553 op1 = safe_vector_operand (op1, mode1);
8554
8555 /* Swap operands if we have a comparison that isn't available in
8556 hardware. */
8557 if (d->flag)
8558 {
8559 rtx tmp = op1;
8560 op1 = op0;
8561 op0 = tmp;
8562 comparison = swap_condition (comparison);
8563 }
8564
8565 target = gen_reg_rtx (SImode);
8566 emit_move_insn (target, const0_rtx);
8567 target = gen_rtx_SUBREG (QImode, target, 0);
8568
8569 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
8570 op0 = copy_to_mode_reg (mode0, op0);
8571 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
8572 op1 = copy_to_mode_reg (mode1, op1);
8573
8574 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
8575 pat = GEN_FCN (d->icode) (op0, op1, op2);
8576 if (! pat)
8577 return 0;
8578 emit_insn (pat);
8579 emit_insn (gen_setcc_2 (target, op2));
8580
8581 return target;
8582}
8583
8584/* Expand an expression EXP that calls a built-in function,
8585 with result going to TARGET if that's convenient
8586 (and in mode MODE if that's convenient).
8587 SUBTARGET may be used as the target for computing one of EXP's operands.
8588 IGNORE is nonzero if the value is to be ignored. */
8589
8590rtx
8591ix86_expand_builtin (exp, target, subtarget, mode, ignore)
8592 tree exp;
8593 rtx target;
8594 rtx subtarget ATTRIBUTE_UNUSED;
8595 enum machine_mode mode ATTRIBUTE_UNUSED;
8596 int ignore ATTRIBUTE_UNUSED;
8597{
8598 struct builtin_description *d;
77ebd435 8599 size_t i;
bd793c65
BS
8600 enum insn_code icode;
8601 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8602 tree arglist = TREE_OPERAND (exp, 1);
8603 tree arg0, arg1, arg2, arg3;
8604 rtx op0, op1, op2, pat;
8605 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 8606 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
8607
8608 switch (fcode)
8609 {
8610 case IX86_BUILTIN_EMMS:
8611 emit_insn (gen_emms ());
8612 return 0;
8613
8614 case IX86_BUILTIN_SFENCE:
8615 emit_insn (gen_sfence ());
8616 return 0;
8617
8618 case IX86_BUILTIN_M_FROM_INT:
8619 target = gen_reg_rtx (DImode);
8620 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8621 emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
8622 return target;
8623
8624 case IX86_BUILTIN_M_TO_INT:
8625 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8626 op0 = copy_to_mode_reg (DImode, op0);
8627 target = gen_reg_rtx (SImode);
8628 emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
8629 return target;
8630
8631 case IX86_BUILTIN_PEXTRW:
8632 icode = CODE_FOR_mmx_pextrw;
8633 arg0 = TREE_VALUE (arglist);
8634 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8635 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8636 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8637 tmode = insn_data[icode].operand[0].mode;
8638 mode0 = insn_data[icode].operand[1].mode;
8639 mode1 = insn_data[icode].operand[2].mode;
8640
8641 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8642 op0 = copy_to_mode_reg (mode0, op0);
8643 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8644 {
8645 /* @@@ better error message */
8646 error ("selector must be an immediate");
8647 return const0_rtx;
8648 }
8649 if (target == 0
8650 || GET_MODE (target) != tmode
8651 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8652 target = gen_reg_rtx (tmode);
8653 pat = GEN_FCN (icode) (target, op0, op1);
8654 if (! pat)
8655 return 0;
8656 emit_insn (pat);
8657 return target;
8658
8659 case IX86_BUILTIN_PINSRW:
8660 icode = CODE_FOR_mmx_pinsrw;
8661 arg0 = TREE_VALUE (arglist);
8662 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8663 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8664 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8665 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8666 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8667 tmode = insn_data[icode].operand[0].mode;
8668 mode0 = insn_data[icode].operand[1].mode;
8669 mode1 = insn_data[icode].operand[2].mode;
8670 mode2 = insn_data[icode].operand[3].mode;
8671
8672 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8673 op0 = copy_to_mode_reg (mode0, op0);
8674 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8675 op1 = copy_to_mode_reg (mode1, op1);
8676 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
8677 {
8678 /* @@@ better error message */
8679 error ("selector must be an immediate");
8680 return const0_rtx;
8681 }
8682 if (target == 0
8683 || GET_MODE (target) != tmode
8684 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8685 target = gen_reg_rtx (tmode);
8686 pat = GEN_FCN (icode) (target, op0, op1, op2);
8687 if (! pat)
8688 return 0;
8689 emit_insn (pat);
8690 return target;
8691
8692 case IX86_BUILTIN_MASKMOVQ:
8693 icode = CODE_FOR_mmx_maskmovq;
8694 /* Note the arg order is different from the operand order. */
8695 arg1 = TREE_VALUE (arglist);
8696 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
8697 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8698 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8699 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8700 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8701 mode0 = insn_data[icode].operand[0].mode;
8702 mode1 = insn_data[icode].operand[1].mode;
8703 mode2 = insn_data[icode].operand[2].mode;
8704
8705 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8706 op0 = copy_to_mode_reg (mode0, op0);
8707 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
8708 op1 = copy_to_mode_reg (mode1, op1);
8709 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
8710 op2 = copy_to_mode_reg (mode2, op2);
8711 pat = GEN_FCN (icode) (op0, op1, op2);
8712 if (! pat)
8713 return 0;
8714 emit_insn (pat);
8715 return 0;
8716
8717 case IX86_BUILTIN_SQRTSS:
8718 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
8719 case IX86_BUILTIN_RSQRTSS:
8720 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
8721 case IX86_BUILTIN_RCPSS:
8722 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
8723
8724 case IX86_BUILTIN_LOADAPS:
8725 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
8726
8727 case IX86_BUILTIN_LOADUPS:
8728 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
8729
8730 case IX86_BUILTIN_STOREAPS:
8731 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
8732 case IX86_BUILTIN_STOREUPS:
8733 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
8734
8735 case IX86_BUILTIN_LOADSS:
8736 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
8737
8738 case IX86_BUILTIN_STORESS:
8739 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
8740
0f290768 8741 case IX86_BUILTIN_LOADHPS:
bd793c65
BS
8742 case IX86_BUILTIN_LOADLPS:
8743 icode = (fcode == IX86_BUILTIN_LOADHPS
8744 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
8745 arg0 = TREE_VALUE (arglist);
8746 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8747 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8748 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8749 tmode = insn_data[icode].operand[0].mode;
8750 mode0 = insn_data[icode].operand[1].mode;
8751 mode1 = insn_data[icode].operand[2].mode;
8752
8753 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8754 op0 = copy_to_mode_reg (mode0, op0);
8755 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
8756 if (target == 0
8757 || GET_MODE (target) != tmode
8758 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8759 target = gen_reg_rtx (tmode);
8760 pat = GEN_FCN (icode) (target, op0, op1);
8761 if (! pat)
8762 return 0;
8763 emit_insn (pat);
8764 return target;
0f290768 8765
bd793c65
BS
8766 case IX86_BUILTIN_STOREHPS:
8767 case IX86_BUILTIN_STORELPS:
8768 icode = (fcode == IX86_BUILTIN_STOREHPS
8769 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
8770 arg0 = TREE_VALUE (arglist);
8771 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8772 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8773 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8774 mode0 = insn_data[icode].operand[1].mode;
8775 mode1 = insn_data[icode].operand[2].mode;
8776
8777 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8778 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8779 op1 = copy_to_mode_reg (mode1, op1);
8780
8781 pat = GEN_FCN (icode) (op0, op0, op1);
8782 if (! pat)
8783 return 0;
8784 emit_insn (pat);
8785 return 0;
8786
8787 case IX86_BUILTIN_MOVNTPS:
8788 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
8789 case IX86_BUILTIN_MOVNTQ:
8790 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
8791
8792 case IX86_BUILTIN_LDMXCSR:
8793 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8794 target = assign_386_stack_local (SImode, 0);
8795 emit_move_insn (target, op0);
8796 emit_insn (gen_ldmxcsr (target));
8797 return 0;
8798
8799 case IX86_BUILTIN_STMXCSR:
8800 target = assign_386_stack_local (SImode, 0);
8801 emit_insn (gen_stmxcsr (target));
8802 return copy_to_mode_reg (SImode, target);
8803
8804 case IX86_BUILTIN_PREFETCH:
8805 icode = CODE_FOR_prefetch;
8806 arg0 = TREE_VALUE (arglist);
8807 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8808 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8809 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
332316cd
BS
8810 mode0 = insn_data[icode].operand[0].mode;
8811 mode1 = insn_data[icode].operand[1].mode;
bd793c65 8812
332316cd 8813 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
bd793c65
BS
8814 {
8815 /* @@@ better error message */
8816 error ("selector must be an immediate");
8817 return const0_rtx;
8818 }
8819
332316cd 8820 op0 = copy_to_mode_reg (Pmode, op0);
bd793c65
BS
8821 pat = GEN_FCN (icode) (op0, op1);
8822 if (! pat)
8823 return 0;
8824 emit_insn (pat);
8825 return target;
0f290768 8826
bd793c65
BS
8827 case IX86_BUILTIN_SHUFPS:
8828 icode = CODE_FOR_sse_shufps;
8829 arg0 = TREE_VALUE (arglist);
8830 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8831 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8832 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8833 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8834 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8835 tmode = insn_data[icode].operand[0].mode;
8836 mode0 = insn_data[icode].operand[1].mode;
8837 mode1 = insn_data[icode].operand[2].mode;
8838 mode2 = insn_data[icode].operand[3].mode;
8839
8840 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8841 op0 = copy_to_mode_reg (mode0, op0);
8842 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8843 op1 = copy_to_mode_reg (mode1, op1);
8844 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
8845 {
8846 /* @@@ better error message */
8847 error ("mask must be an immediate");
8848 return const0_rtx;
8849 }
8850 if (target == 0
8851 || GET_MODE (target) != tmode
8852 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8853 target = gen_reg_rtx (tmode);
8854 pat = GEN_FCN (icode) (target, op0, op1, op2);
8855 if (! pat)
8856 return 0;
8857 emit_insn (pat);
8858 return target;
8859
8860 case IX86_BUILTIN_PSHUFW:
8861 icode = CODE_FOR_mmx_pshufw;
8862 arg0 = TREE_VALUE (arglist);
8863 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8864 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8865 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8866 tmode = insn_data[icode].operand[0].mode;
8867 mode0 = insn_data[icode].operand[2].mode;
8868 mode1 = insn_data[icode].operand[3].mode;
8869
8870 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8871 op0 = copy_to_mode_reg (mode0, op0);
8872 if (! (*insn_data[icode].operand[3].predicate) (op1, mode1))
8873 {
8874 /* @@@ better error message */
8875 error ("mask must be an immediate");
8876 return const0_rtx;
8877 }
8878 if (target == 0
8879 || GET_MODE (target) != tmode
8880 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8881 target = gen_reg_rtx (tmode);
8882 pat = GEN_FCN (icode) (target, target, op0, op1);
8883 if (! pat)
8884 return 0;
8885 emit_insn (pat);
8886 return target;
8887
8888 /* Composite intrinsics. */
8889 case IX86_BUILTIN_SETPS1:
8890 target = assign_386_stack_local (SFmode, 0);
8891 arg0 = TREE_VALUE (arglist);
8892 emit_move_insn (change_address (target, SFmode, XEXP (target, 0)),
8893 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
8894 op0 = gen_reg_rtx (V4SFmode);
8895 emit_insn (gen_sse_loadss (op0, change_address (target, V4SFmode,
8896 XEXP (target, 0))));
8897 emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
8898 return op0;
0f290768 8899
bd793c65
BS
8900 case IX86_BUILTIN_SETPS:
8901 target = assign_386_stack_local (V4SFmode, 0);
8902 op0 = change_address (target, SFmode, XEXP (target, 0));
8903 arg0 = TREE_VALUE (arglist);
8904 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8905 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8906 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
8907 emit_move_insn (op0,
8908 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
8909 emit_move_insn (adj_offsettable_operand (op0, 4),
8910 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
8911 emit_move_insn (adj_offsettable_operand (op0, 8),
8912 expand_expr (arg2, NULL_RTX, VOIDmode, 0));
8913 emit_move_insn (adj_offsettable_operand (op0, 12),
8914 expand_expr (arg3, NULL_RTX, VOIDmode, 0));
8915 op0 = gen_reg_rtx (V4SFmode);
8916 emit_insn (gen_sse_movaps (op0, target));
8917 return op0;
8918
8919 case IX86_BUILTIN_CLRPS:
8920 target = gen_reg_rtx (TImode);
8921 emit_insn (gen_sse_clrti (target));
8922 return target;
8923
8924 case IX86_BUILTIN_LOADRPS:
8925 target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
8926 gen_reg_rtx (V4SFmode), 1);
8927 emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
8928 return target;
8929
8930 case IX86_BUILTIN_LOADPS1:
8931 target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
8932 gen_reg_rtx (V4SFmode), 1);
8933 emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
8934 return target;
8935
8936 case IX86_BUILTIN_STOREPS1:
8937 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
8938 case IX86_BUILTIN_STORERPS:
8939 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
8940
8941 case IX86_BUILTIN_MMX_ZERO:
8942 target = gen_reg_rtx (DImode);
8943 emit_insn (gen_mmx_clrdi (target));
8944 return target;
8945
8946 default:
8947 break;
8948 }
8949
8950 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
8951 if (d->code == fcode)
8952 {
8953 /* Compares are treated specially. */
8954 if (d->icode == CODE_FOR_maskcmpv4sf3
8955 || d->icode == CODE_FOR_vmmaskcmpv4sf3
8956 || d->icode == CODE_FOR_maskncmpv4sf3
8957 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
8958 return ix86_expand_sse_compare (d, arglist, target);
8959
8960 return ix86_expand_binop_builtin (d->icode, arglist, target);
8961 }
8962
8963 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
8964 if (d->code == fcode)
8965 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 8966
bd793c65
BS
8967 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
8968 if (d->code == fcode)
8969 return ix86_expand_sse_comi (d, arglist, target);
0f290768 8970
bd793c65
BS
8971 /* @@@ Should really do something sensible here. */
8972 return 0;
bd793c65 8973}
4211a8fb
JH
8974
8975/* Store OPERAND to the memory after reload is completed. This means
8976 that we can't easilly use assign_stack_local. */
8977rtx
8978ix86_force_to_memory (mode, operand)
8979 enum machine_mode mode;
8980 rtx operand;
8981{
8982 if (!reload_completed)
8983 abort ();
8984 switch (mode)
8985 {
8986 case DImode:
8987 {
8988 rtx operands[2];
8989 split_di (&operand, 1, operands, operands+1);
8990 emit_insn (
8991 gen_rtx_SET (VOIDmode,
8992 gen_rtx_MEM (SImode,
8993 gen_rtx_PRE_DEC (Pmode,
8994 stack_pointer_rtx)),
8995 operands[1]));
8996 emit_insn (
8997 gen_rtx_SET (VOIDmode,
8998 gen_rtx_MEM (SImode,
8999 gen_rtx_PRE_DEC (Pmode,
9000 stack_pointer_rtx)),
9001 operands[0]));
9002 }
9003 break;
9004 case HImode:
9005 /* It is better to store HImodes as SImodes. */
9006 if (!TARGET_PARTIAL_REG_STALL)
9007 operand = gen_lowpart (SImode, operand);
9008 /* FALLTHRU */
9009 case SImode:
9010 emit_insn (
9011 gen_rtx_SET (VOIDmode,
9012 gen_rtx_MEM (GET_MODE (operand),
9013 gen_rtx_PRE_DEC (SImode,
9014 stack_pointer_rtx)),
9015 operand));
9016 break;
9017 default:
9018 abort();
9019 }
9020 return gen_rtx_MEM (mode, stack_pointer_rtx);
9021}
9022
9023/* Free operand from the memory. */
9024void
9025ix86_free_from_memory (mode)
9026 enum machine_mode mode;
9027{
9028 /* Use LEA to deallocate stack space. In peephole2 it will be converted
9029 to pop or add instruction if registers are available. */
9030 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9031 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9032 GEN_INT (mode == DImode
9033 ? 8
9034 : mode == HImode && TARGET_PARTIAL_REG_STALL
9035 ? 2
9036 : 4))));
9037}
a946dd00 9038
f84aa48a
JH
9039/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
9040 QImode must go into class Q_REGS.
9041 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
9042 movdf to do mem-to-mem moves through integer regs. */
9043enum reg_class
9044ix86_preferred_reload_class (x, class)
9045 rtx x;
9046 enum reg_class class;
9047{
9048 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
9049 {
9050 /* SSE can't load any constant directly yet. */
9051 if (SSE_CLASS_P (class))
9052 return NO_REGS;
9053 /* Floats can load 0 and 1. */
9054 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
9055 {
9056 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
9057 if (MAYBE_SSE_CLASS_P (class))
9058 return (reg_class_subset_p (class, GENERAL_REGS)
9059 ? GENERAL_REGS : FLOAT_REGS);
9060 else
9061 return class;
9062 }
9063 /* General regs can load everything. */
9064 if (reg_class_subset_p (class, GENERAL_REGS))
9065 return GENERAL_REGS;
9066 /* In case we haven't resolved FLOAT or SSE yet, give up. */
9067 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
9068 return NO_REGS;
9069 }
9070 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
9071 return NO_REGS;
9072 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
9073 return Q_REGS;
9074 return class;
9075}
9076
9077/* If we are copying between general and FP registers, we need a memory
9078 location. The same is true for SSE and MMX registers.
9079
9080 The macro can't work reliably when one of the CLASSES is class containing
9081 registers from multiple units (SSE, MMX, integer). We avoid this by never
9082 combining those units in single alternative in the machine description.
9083 Ensure that this constraint holds to avoid unexpected surprises.
9084
9085 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
9086 enforce these sanity checks. */
9087int
9088ix86_secondary_memory_needed (class1, class2, mode, strict)
9089 enum reg_class class1, class2;
9090 enum machine_mode mode;
9091 int strict;
9092{
9093 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
9094 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
9095 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
9096 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
9097 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
9098 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
9099 {
9100 if (strict)
9101 abort ();
9102 else
9103 return 1;
9104 }
9105 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
9106 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
9107 && (mode) != SImode)
9108 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
9109 && (mode) != SImode));
9110}
9111/* Return the cost of moving data from a register in class CLASS1 to
9112 one in class CLASS2.
9113
9114 It is not required that the cost always equal 2 when FROM is the same as TO;
9115 on some machines it is expensive to move between registers if they are not
9116 general registers. */
9117int
9118ix86_register_move_cost (mode, class1, class2)
9119 enum machine_mode mode;
9120 enum reg_class class1, class2;
9121{
9122 /* In case we require secondary memory, compute cost of the store followed
9123 by load. In case of copying from general_purpose_register we may emit
9124 multiple stores followed by single load causing memory size mismatch
9125 stall. Count this as arbitarily high cost of 20. */
9126 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
9127 {
62415523 9128 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
f84aa48a 9129 return 10;
62415523
JH
9130 return (MEMORY_MOVE_COST (mode, class1, 0)
9131 + MEMORY_MOVE_COST (mode, class2, 1));
f84aa48a
JH
9132 }
9133 /* Moves between SSE/MMX and integer unit are expensive.
9134 ??? We should make this cost CPU specific. */
62415523
JH
9135 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
9136 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
9137 return ix86_cost->mmxsse_to_integer;
9138 if (MAYBE_FLOAT_CLASS_P (class1))
9139 return ix86_cost->fp_move;
9140 if (MAYBE_SSE_CLASS_P (class1))
9141 return ix86_cost->sse_move;
9142 if (MAYBE_MMX_CLASS_P (class1))
9143 return ix86_cost->mmx_move;
f84aa48a
JH
9144 return 2;
9145}
9146
a946dd00
JH
9147/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
9148int
9149ix86_hard_regno_mode_ok (regno, mode)
9150 int regno;
9151 enum machine_mode mode;
9152{
9153 /* Flags and only flags can only hold CCmode values. */
9154 if (CC_REGNO_P (regno))
9155 return GET_MODE_CLASS (mode) == MODE_CC;
9156 if (GET_MODE_CLASS (mode) == MODE_CC
9157 || GET_MODE_CLASS (mode) == MODE_RANDOM
9158 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
9159 return 0;
9160 if (FP_REGNO_P (regno))
9161 return VALID_FP_MODE_P (mode);
9162 if (SSE_REGNO_P (regno))
9163 return VALID_SSE_REG_MODE (mode);
9164 if (MMX_REGNO_P (regno))
9165 return VALID_MMX_REG_MODE (mode);
9166 /* We handle both integer and floats in the general purpose registers.
9167 In future we should be able to handle vector modes as well. */
9168 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
9169 return 0;
9170 /* Take care for QImode values - they can be in non-QI regs, but then
9171 they do cause partial register stalls. */
62415523 9172 if (regno < 4 || mode != QImode)
a946dd00
JH
9173 return 1;
9174 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
9175}
fa79946e
JH
9176
9177/* Return the cost of moving data of mode M between a
9178 register and memory. A value of 2 is the default; this cost is
9179 relative to those in `REGISTER_MOVE_COST'.
9180
9181 If moving between registers and memory is more expensive than
9182 between two registers, you should define this macro to express the
9183 relative cost.
9184
9185 Model also increased moving costs of QImode registers in non
9186 Q_REGS classes.
9187 */
9188int
9189ix86_memory_move_cost (mode, class, in)
9190 enum machine_mode mode;
9191 enum reg_class class;
9192 int in;
9193{
9194 if (FLOAT_CLASS_P (class))
9195 {
9196 int index;
9197 switch (mode)
9198 {
9199 case SFmode:
9200 index = 0;
9201 break;
9202 case DFmode:
9203 index = 1;
9204 break;
9205 case XFmode:
9206 case TFmode:
9207 index = 2;
9208 break;
9209 default:
9210 return 100;
9211 }
9212 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
9213 }
9214 if (SSE_CLASS_P (class))
9215 {
9216 int index;
9217 switch (GET_MODE_SIZE (mode))
9218 {
9219 case 4:
9220 index = 0;
9221 break;
9222 case 8:
9223 index = 1;
9224 break;
9225 case 16:
9226 index = 2;
9227 break;
9228 default:
9229 return 100;
9230 }
9231 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
9232 }
9233 if (MMX_CLASS_P (class))
9234 {
9235 int index;
9236 switch (GET_MODE_SIZE (mode))
9237 {
9238 case 4:
9239 index = 0;
9240 break;
9241 case 8:
9242 index = 1;
9243 break;
9244 default:
9245 return 100;
9246 }
9247 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
9248 }
9249 switch (GET_MODE_SIZE (mode))
9250 {
9251 case 1:
9252 if (in)
9253 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
9254 : ix86_cost->movzbl_load);
9255 else
9256 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
9257 : ix86_cost->int_store[0] + 4);
9258 break;
9259 case 2:
9260 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
9261 default:
9262 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
9263 if (mode == TFmode)
9264 mode = XFmode;
3bb7e126 9265 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
fa79946e
JH
9266 * (int) GET_MODE_SIZE (mode) / 4);
9267 }
9268}
This page took 2.204101 seconds and 5 git commands to generate.