]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
sh.c (sh_expand_prologue): Insns that set up the PIC register may be dead.
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
8752c357 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
4592bdcb 3 Free Software Foundation, Inc.
2a2ab3f9
JVA
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
1fba7553 23#include <setjmp.h>
bb5177ac 24#include "system.h"
2a2ab3f9 25#include "rtl.h"
6baf1cc8
BS
26#include "tree.h"
27#include "tm_p.h"
2a2ab3f9
JVA
28#include "regs.h"
29#include "hard-reg-set.h"
30#include "real.h"
31#include "insn-config.h"
32#include "conditions.h"
33#include "insn-flags.h"
34#include "output.h"
35#include "insn-attr.h"
2a2ab3f9 36#include "flags.h"
a8ffcc81 37#include "except.h"
ecbc4695 38#include "function.h"
00c79232 39#include "recog.h"
ced8dd8c 40#include "expr.h"
f103890b 41#include "toplev.h"
e075ae69 42#include "basic-block.h"
1526a060 43#include "ggc.h"
2a2ab3f9 44
8dfe5673
RK
45#ifndef CHECK_STACK_LIMIT
46#define CHECK_STACK_LIMIT -1
47#endif
48
32b5b1aa
SC
49/* Processor costs (relative to an add) */
50struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 51 1, /* cost of an add instruction */
32b5b1aa
SC
52 1, /* cost of a lea instruction */
53 3, /* variable shift costs */
54 2, /* constant shift costs */
55 6, /* cost of starting a multiply */
56 1, /* cost of multiply per each bit set */
e075ae69 57 23, /* cost of a divide/mod */
96e7ae40 58 15, /* "large" insn */
e2e52e1b 59 3, /* MOVE_RATIO */
7c6b971d 60 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
61 {2, 4, 2}, /* cost of loading integer registers
62 in QImode, HImode and SImode.
0f290768 63 Relative to reg-reg move (2). */
96e7ae40
JH
64 {2, 4, 2}, /* cost of storing integer registers */
65 2, /* cost of reg,reg fld/fst */
66 {8, 8, 8}, /* cost of loading fp registers
67 in SFmode, DFmode and XFmode */
fa79946e
JH
68 {8, 8, 8}, /* cost of loading integer registers */
69 2, /* cost of moving MMX register */
70 {4, 8}, /* cost of loading MMX registers
71 in SImode and DImode */
72 {4, 8}, /* cost of storing MMX registers
73 in SImode and DImode */
74 2, /* cost of moving SSE register */
75 {4, 8, 16}, /* cost of loading SSE registers
76 in SImode, DImode and TImode */
77 {4, 8, 16}, /* cost of storing SSE registers
78 in SImode, DImode and TImode */
79 3, /* MMX or SSE register to integer */
32b5b1aa
SC
80};
81
82struct processor_costs i486_cost = { /* 486 specific costs */
83 1, /* cost of an add instruction */
84 1, /* cost of a lea instruction */
85 3, /* variable shift costs */
86 2, /* constant shift costs */
87 12, /* cost of starting a multiply */
88 1, /* cost of multiply per each bit set */
e075ae69 89 40, /* cost of a divide/mod */
96e7ae40 90 15, /* "large" insn */
e2e52e1b 91 3, /* MOVE_RATIO */
7c6b971d 92 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
93 {2, 4, 2}, /* cost of loading integer registers
94 in QImode, HImode and SImode.
0f290768 95 Relative to reg-reg move (2). */
96e7ae40
JH
96 {2, 4, 2}, /* cost of storing integer registers */
97 2, /* cost of reg,reg fld/fst */
98 {8, 8, 8}, /* cost of loading fp registers
99 in SFmode, DFmode and XFmode */
fa79946e
JH
100 {8, 8, 8}, /* cost of loading integer registers */
101 2, /* cost of moving MMX register */
102 {4, 8}, /* cost of loading MMX registers
103 in SImode and DImode */
104 {4, 8}, /* cost of storing MMX registers
105 in SImode and DImode */
106 2, /* cost of moving SSE register */
107 {4, 8, 16}, /* cost of loading SSE registers
108 in SImode, DImode and TImode */
109 {4, 8, 16}, /* cost of storing SSE registers
110 in SImode, DImode and TImode */
111 3 /* MMX or SSE register to integer */
32b5b1aa
SC
112};
113
e5cb57e8 114struct processor_costs pentium_cost = {
32b5b1aa
SC
115 1, /* cost of an add instruction */
116 1, /* cost of a lea instruction */
856b07a1 117 4, /* variable shift costs */
e5cb57e8 118 1, /* constant shift costs */
856b07a1
SC
119 11, /* cost of starting a multiply */
120 0, /* cost of multiply per each bit set */
e075ae69 121 25, /* cost of a divide/mod */
96e7ae40 122 8, /* "large" insn */
e2e52e1b 123 6, /* MOVE_RATIO */
7c6b971d 124 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
125 {2, 4, 2}, /* cost of loading integer registers
126 in QImode, HImode and SImode.
0f290768 127 Relative to reg-reg move (2). */
96e7ae40
JH
128 {2, 4, 2}, /* cost of storing integer registers */
129 2, /* cost of reg,reg fld/fst */
130 {2, 2, 6}, /* cost of loading fp registers
131 in SFmode, DFmode and XFmode */
fa79946e
JH
132 {4, 4, 6}, /* cost of loading integer registers */
133 8, /* cost of moving MMX register */
134 {8, 8}, /* cost of loading MMX registers
135 in SImode and DImode */
136 {8, 8}, /* cost of storing MMX registers
137 in SImode and DImode */
138 2, /* cost of moving SSE register */
139 {4, 8, 16}, /* cost of loading SSE registers
140 in SImode, DImode and TImode */
141 {4, 8, 16}, /* cost of storing SSE registers
142 in SImode, DImode and TImode */
143 3 /* MMX or SSE register to integer */
32b5b1aa
SC
144};
145
856b07a1
SC
146struct processor_costs pentiumpro_cost = {
147 1, /* cost of an add instruction */
148 1, /* cost of a lea instruction */
e075ae69 149 1, /* variable shift costs */
856b07a1 150 1, /* constant shift costs */
369e59b1 151 4, /* cost of starting a multiply */
856b07a1 152 0, /* cost of multiply per each bit set */
e075ae69 153 17, /* cost of a divide/mod */
96e7ae40 154 8, /* "large" insn */
e2e52e1b 155 6, /* MOVE_RATIO */
7c6b971d 156 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
157 {4, 4, 4}, /* cost of loading integer registers
158 in QImode, HImode and SImode.
0f290768 159 Relative to reg-reg move (2). */
96e7ae40
JH
160 {2, 2, 2}, /* cost of storing integer registers */
161 2, /* cost of reg,reg fld/fst */
162 {2, 2, 6}, /* cost of loading fp registers
163 in SFmode, DFmode and XFmode */
fa79946e
JH
164 {4, 4, 6}, /* cost of loading integer registers */
165 2, /* cost of moving MMX register */
166 {2, 2}, /* cost of loading MMX registers
167 in SImode and DImode */
168 {2, 2}, /* cost of storing MMX registers
169 in SImode and DImode */
170 2, /* cost of moving SSE register */
171 {2, 2, 8}, /* cost of loading SSE registers
172 in SImode, DImode and TImode */
173 {2, 2, 8}, /* cost of storing SSE registers
174 in SImode, DImode and TImode */
175 3 /* MMX or SSE register to integer */
856b07a1
SC
176};
177
a269a03c
JC
178struct processor_costs k6_cost = {
179 1, /* cost of an add instruction */
e075ae69 180 2, /* cost of a lea instruction */
a269a03c
JC
181 1, /* variable shift costs */
182 1, /* constant shift costs */
73fe76e4 183 3, /* cost of starting a multiply */
a269a03c 184 0, /* cost of multiply per each bit set */
e075ae69 185 18, /* cost of a divide/mod */
96e7ae40 186 8, /* "large" insn */
e2e52e1b 187 4, /* MOVE_RATIO */
7c6b971d 188 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
189 {4, 5, 4}, /* cost of loading integer registers
190 in QImode, HImode and SImode.
0f290768 191 Relative to reg-reg move (2). */
96e7ae40
JH
192 {2, 3, 2}, /* cost of storing integer registers */
193 4, /* cost of reg,reg fld/fst */
194 {6, 6, 6}, /* cost of loading fp registers
195 in SFmode, DFmode and XFmode */
fa79946e
JH
196 {4, 4, 4}, /* cost of loading integer registers */
197 2, /* cost of moving MMX register */
198 {2, 2}, /* cost of loading MMX registers
199 in SImode and DImode */
200 {2, 2}, /* cost of storing MMX registers
201 in SImode and DImode */
202 2, /* cost of moving SSE register */
203 {2, 2, 8}, /* cost of loading SSE registers
204 in SImode, DImode and TImode */
205 {2, 2, 8}, /* cost of storing SSE registers
206 in SImode, DImode and TImode */
207 6 /* MMX or SSE register to integer */
a269a03c
JC
208};
209
309ada50
JH
210struct processor_costs athlon_cost = {
211 1, /* cost of an add instruction */
0b5107cf 212 2, /* cost of a lea instruction */
309ada50
JH
213 1, /* variable shift costs */
214 1, /* constant shift costs */
215 5, /* cost of starting a multiply */
216 0, /* cost of multiply per each bit set */
0b5107cf 217 42, /* cost of a divide/mod */
309ada50 218 8, /* "large" insn */
e2e52e1b 219 9, /* MOVE_RATIO */
309ada50
JH
220 4, /* cost for loading QImode using movzbl */
221 {4, 5, 4}, /* cost of loading integer registers
222 in QImode, HImode and SImode.
0f290768 223 Relative to reg-reg move (2). */
309ada50
JH
224 {2, 3, 2}, /* cost of storing integer registers */
225 4, /* cost of reg,reg fld/fst */
0b5107cf 226 {6, 6, 20}, /* cost of loading fp registers
309ada50 227 in SFmode, DFmode and XFmode */
fa79946e
JH
228 {4, 4, 16}, /* cost of loading integer registers */
229 2, /* cost of moving MMX register */
230 {2, 2}, /* cost of loading MMX registers
231 in SImode and DImode */
232 {2, 2}, /* cost of storing MMX registers
233 in SImode and DImode */
234 2, /* cost of moving SSE register */
235 {2, 2, 8}, /* cost of loading SSE registers
236 in SImode, DImode and TImode */
237 {2, 2, 8}, /* cost of storing SSE registers
238 in SImode, DImode and TImode */
239 6 /* MMX or SSE register to integer */
309ada50
JH
240};
241
b4e89e2d
JH
242struct processor_costs pentium4_cost = {
243 1, /* cost of an add instruction */
244 1, /* cost of a lea instruction */
245 8, /* variable shift costs */
246 8, /* constant shift costs */
247 30, /* cost of starting a multiply */
248 0, /* cost of multiply per each bit set */
249 112, /* cost of a divide/mod */
250 16, /* "large" insn */
251 6, /* MOVE_RATIO */
252 2, /* cost for loading QImode using movzbl */
253 {4, 5, 4}, /* cost of loading integer registers
254 in QImode, HImode and SImode.
255 Relative to reg-reg move (2). */
256 {2, 3, 2}, /* cost of storing integer registers */
257 2, /* cost of reg,reg fld/fst */
258 {2, 2, 6}, /* cost of loading fp registers
259 in SFmode, DFmode and XFmode */
260 {4, 4, 6}, /* cost of loading integer registers */
261 2, /* cost of moving MMX register */
262 {2, 2}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {2, 2}, /* cost of storing MMX registers
265 in SImode and DImode */
266 12, /* cost of moving SSE register */
267 {12, 12, 12}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {2, 2, 8}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
271 10, /* MMX or SSE register to integer */
272};
273
32b5b1aa
SC
274struct processor_costs *ix86_cost = &pentium_cost;
275
a269a03c
JC
276/* Processor feature/optimization bitmasks. */
277#define m_386 (1<<PROCESSOR_I386)
278#define m_486 (1<<PROCESSOR_I486)
279#define m_PENT (1<<PROCESSOR_PENTIUM)
280#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
281#define m_K6 (1<<PROCESSOR_K6)
309ada50 282#define m_ATHLON (1<<PROCESSOR_ATHLON)
b4e89e2d 283#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
a269a03c 284
309ada50 285const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
b4e89e2d 286const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
a269a03c 287const int x86_zero_extend_with_and = m_486 | m_PENT;
b4e89e2d 288const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
e075ae69 289const int x86_double_with_add = ~m_386;
a269a03c 290const int x86_use_bit_test = m_386;
e2e52e1b 291const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
b4e89e2d
JH
292const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
293const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
294const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
e075ae69
RH
295const int x86_partial_reg_stall = m_PPRO;
296const int x86_use_loop = m_K6;
309ada50 297const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
e075ae69
RH
298const int x86_use_mov0 = m_K6;
299const int x86_use_cltd = ~(m_PENT | m_K6);
300const int x86_read_modify_write = ~m_PENT;
301const int x86_read_modify = ~(m_PENT | m_PPRO);
302const int x86_split_long_moves = m_PPRO;
e9e80858 303const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
b4e89e2d 304const int x86_single_stringop = m_386 | m_PENT4;
d9f32422
JH
305const int x86_qimode_math = ~(0);
306const int x86_promote_qi_regs = 0;
307const int x86_himode_math = ~(m_PPRO);
308const int x86_promote_hi_regs = m_PPRO;
b4e89e2d
JH
309const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
310const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
311const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
312const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
313const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
314const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
315const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
a269a03c 316
564d80f4 317#define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
2a2ab3f9 318
e075ae69
RH
319const char * const hi_reg_name[] = HI_REGISTER_NAMES;
320const char * const qi_reg_name[] = QI_REGISTER_NAMES;
321const char * const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
4c0d89b5
RS
322
323/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 324 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 325
e075ae69 326enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
327{
328 /* ax, dx, cx, bx */
ab408a86 329 AREG, DREG, CREG, BREG,
4c0d89b5 330 /* si, di, bp, sp */
e075ae69 331 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
332 /* FP registers */
333 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 334 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 335 /* arg pointer */
83774849 336 NON_Q_REGS,
564d80f4 337 /* flags, fpsr, dirflag, frame */
a7180f70
BS
338 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
339 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
340 SSE_REGS, SSE_REGS,
341 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30
JH
342 MMX_REGS, MMX_REGS,
343 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
344 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
345 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
346 SSE_REGS, SSE_REGS,
4c0d89b5 347};
c572e5ba 348
3d117b30 349/* The "default" register map used in 32bit mode. */
83774849 350
0f290768 351int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
352{
353 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
354 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
3d117b30 355 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
356 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
357 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
358 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
359 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
360};
361
0f7fa3d0
JH
362/* The "default" register map used in 64bit mode. */
363int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
364{
365 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
366 33, 34, 35, 36, 37, 38, 39, 40 /* fp regs */
367 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
368 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
369 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
370 8,9,10,11,12,13,14,15, /* extended integer registers */
371 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
372};
373
83774849
RH
374/* Define the register numbers to be used in Dwarf debugging information.
375 The SVR4 reference port C compiler uses the following register numbers
376 in its Dwarf output code:
377 0 for %eax (gcc regno = 0)
378 1 for %ecx (gcc regno = 2)
379 2 for %edx (gcc regno = 1)
380 3 for %ebx (gcc regno = 3)
381 4 for %esp (gcc regno = 7)
382 5 for %ebp (gcc regno = 6)
383 6 for %esi (gcc regno = 4)
384 7 for %edi (gcc regno = 5)
385 The following three DWARF register numbers are never generated by
386 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
387 believes these numbers have these meanings.
388 8 for %eip (no gcc equivalent)
389 9 for %eflags (gcc regno = 17)
390 10 for %trapno (no gcc equivalent)
391 It is not at all clear how we should number the FP stack registers
392 for the x86 architecture. If the version of SDB on x86/svr4 were
393 a bit less brain dead with respect to floating-point then we would
394 have a precedent to follow with respect to DWARF register numbers
395 for x86 FP registers, but the SDB on x86/svr4 is so completely
396 broken with respect to FP registers that it is hardly worth thinking
397 of it as something to strive for compatibility with.
398 The version of x86/svr4 SDB I have at the moment does (partially)
399 seem to believe that DWARF register number 11 is associated with
400 the x86 register %st(0), but that's about all. Higher DWARF
401 register numbers don't seem to be associated with anything in
402 particular, and even for DWARF regno 11, SDB only seems to under-
403 stand that it should say that a variable lives in %st(0) (when
404 asked via an `=' command) if we said it was in DWARF regno 11,
405 but SDB still prints garbage when asked for the value of the
406 variable in question (via a `/' command).
407 (Also note that the labels SDB prints for various FP stack regs
408 when doing an `x' command are all wrong.)
409 Note that these problems generally don't affect the native SVR4
410 C compiler because it doesn't allow the use of -O with -g and
411 because when it is *not* optimizing, it allocates a memory
412 location for each floating-point variable, and the memory
413 location is what gets described in the DWARF AT_location
414 attribute for the variable in question.
415 Regardless of the severe mental illness of the x86/svr4 SDB, we
416 do something sensible here and we use the following DWARF
417 register numbers. Note that these are all stack-top-relative
418 numbers.
419 11 for %st(0) (gcc regno = 8)
420 12 for %st(1) (gcc regno = 9)
421 13 for %st(2) (gcc regno = 10)
422 14 for %st(3) (gcc regno = 11)
423 15 for %st(4) (gcc regno = 12)
424 16 for %st(5) (gcc regno = 13)
425 17 for %st(6) (gcc regno = 14)
426 18 for %st(7) (gcc regno = 15)
427*/
0f290768 428int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
429{
430 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
431 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
3f3f2124 432 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
433 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
434 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
3f3f2124
JH
435 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
436 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
83774849
RH
437};
438
c572e5ba
JVA
439/* Test and compare insns in i386.md store the information needed to
440 generate branch and scc insns here. */
441
e075ae69
RH
442struct rtx_def *ix86_compare_op0 = NULL_RTX;
443struct rtx_def *ix86_compare_op1 = NULL_RTX;
f5316dfe 444
36edd3cc
BS
445#define MAX_386_STACK_LOCALS 2
446
447/* Define the structure for the machine field in struct function. */
448struct machine_function
449{
450 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
6fca22eb 451 int accesses_prev_frame;
36edd3cc
BS
452};
453
01d939e8 454#define ix86_stack_locals (cfun->machine->stack_locals)
36edd3cc 455
4dd2ac2c
JH
456/* Structure describing stack frame layout.
457 Stack grows downward:
458
459 [arguments]
460 <- ARG_POINTER
461 saved pc
462
463 saved frame pointer if frame_pointer_needed
464 <- HARD_FRAME_POINTER
465 [saved regs]
466
467 [padding1] \
468 )
469 [va_arg registers] (
470 > to_allocate <- FRAME_POINTER
471 [frame] (
472 )
473 [padding2] /
474 */
475struct ix86_frame
476{
477 int nregs;
478 int padding1;
479 HOST_WIDE_INT frame;
480 int padding2;
481 int outgoing_arguments_size;
482
483 HOST_WIDE_INT to_allocate;
484 /* The offsets relative to ARG_POINTER. */
485 HOST_WIDE_INT frame_pointer_offset;
486 HOST_WIDE_INT hard_frame_pointer_offset;
487 HOST_WIDE_INT stack_pointer_offset;
488};
489
6189a572
JH
490/* Code model option as passed by user. */
491const char *ix86_cmodel_string;
492/* Parsed value. */
493enum cmodel ix86_cmodel;
494
c8c5cb99 495/* which cpu are we scheduling for */
e42ea7f9 496enum processor_type ix86_cpu;
c8c5cb99
SC
497
498/* which instruction set architecture to use. */
c942177e 499int ix86_arch;
c8c5cb99
SC
500
501/* Strings to hold which cpu and instruction set architecture to use. */
9c23aa47
ZW
502const char *ix86_cpu_string; /* for -mcpu=<xxx> */
503const char *ix86_arch_string; /* for -march=<xxx> */
c8c5cb99 504
0f290768 505/* # of registers to use to pass arguments. */
e075ae69 506const char *ix86_regparm_string;
e9a25f70 507
e075ae69
RH
508/* ix86_regparm_string as a number */
509int ix86_regparm;
e9a25f70
JL
510
511/* Alignment to use for loops and jumps: */
512
0f290768 513/* Power of two alignment for loops. */
e075ae69 514const char *ix86_align_loops_string;
e9a25f70 515
0f290768 516/* Power of two alignment for non-loop jumps. */
e075ae69 517const char *ix86_align_jumps_string;
e9a25f70 518
3af4bd89 519/* Power of two alignment for stack boundary in bytes. */
e075ae69 520const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
521
522/* Preferred alignment for stack boundary in bits. */
e075ae69 523int ix86_preferred_stack_boundary;
3af4bd89 524
e9a25f70 525/* Values 1-5: see jump.c */
e075ae69
RH
526int ix86_branch_cost;
527const char *ix86_branch_cost_string;
e9a25f70 528
0f290768 529/* Power of two alignment for functions. */
e075ae69
RH
530int ix86_align_funcs;
531const char *ix86_align_funcs_string;
b08de47e 532
0f290768 533/* Power of two alignment for loops. */
e075ae69 534int ix86_align_loops;
b08de47e 535
0f290768 536/* Power of two alignment for non-loop jumps. */
e075ae69
RH
537int ix86_align_jumps;
538\f
f6da8bc3
KG
539static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
540static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
e075ae69 541 int, int, FILE *));
f6da8bc3 542static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
3a3677ff
RH
543static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
544 rtx *, rtx *));
f6da8bc3
KG
545static rtx gen_push PARAMS ((rtx));
546static int memory_address_length PARAMS ((rtx addr));
547static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
548static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
549static int ix86_safe_length PARAMS ((rtx));
550static enum attr_memory ix86_safe_memory PARAMS ((rtx));
551static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
552static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
553static void ix86_dump_ppro_packet PARAMS ((FILE *));
554static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
555static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
e075ae69 556 rtx));
f6da8bc3
KG
557static void ix86_init_machine_status PARAMS ((struct function *));
558static void ix86_mark_machine_status PARAMS ((struct function *));
37b15744 559static void ix86_free_machine_status PARAMS ((struct function *));
2b589241 560static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
f6da8bc3 561static int ix86_safe_length_prefix PARAMS ((rtx));
0903fcab
JH
562static int ix86_nsaved_regs PARAMS((void));
563static void ix86_emit_save_regs PARAMS((void));
da2d1d3a 564static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int));
0903fcab 565static void ix86_emit_epilogue_esp_adjustment PARAMS((int));
0e4970d7 566static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
c6991660
KG
567static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
568static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
55efb413 569static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
e075ae69
RH
570
571struct ix86_address
572{
573 rtx base, index, disp;
574 HOST_WIDE_INT scale;
575};
b08de47e 576
e075ae69 577static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
bd793c65
BS
578
579struct builtin_description;
580static rtx ix86_expand_sse_comi PARAMS ((struct builtin_description *, tree,
581 rtx));
582static rtx ix86_expand_sse_compare PARAMS ((struct builtin_description *, tree,
583 rtx));
584static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
585static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
586static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
587static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
588static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
c0c102a9
JH
589static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
590static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
591 enum rtx_code *,
592 enum rtx_code *,
593 enum rtx_code *));
9e7adcb3
JH
594static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
595 rtx *, rtx *));
596static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
597static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
598static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
599static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
4dd2ac2c
JH
600static int ix86_save_reg PARAMS ((int));
601static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
e075ae69 602\f
f5316dfe
MM
603/* Sometimes certain combinations of command options do not make
604 sense on a particular target machine. You can define a macro
605 `OVERRIDE_OPTIONS' to take account of this. This macro, if
606 defined, is executed once just after all the command options have
607 been parsed.
608
609 Don't use this macro to turn on various extra optimizations for
610 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
611
612void
613override_options ()
614{
400500c4 615 int i;
e075ae69
RH
616 /* Comes from final.c -- no real reason to change it. */
617#define MAX_CODE_ALIGN 16
f5316dfe 618
c8c5cb99
SC
619 static struct ptt
620 {
e075ae69
RH
621 struct processor_costs *cost; /* Processor costs */
622 int target_enable; /* Target flags to enable. */
623 int target_disable; /* Target flags to disable. */
624 int align_loop; /* Default alignments. */
625 int align_jump;
626 int align_func;
627 int branch_cost;
628 }
0f290768 629 const processor_target_table[PROCESSOR_max] =
e075ae69
RH
630 {
631 {&i386_cost, 0, 0, 2, 2, 2, 1},
632 {&i486_cost, 0, 0, 4, 4, 4, 1},
633 {&pentium_cost, 0, 0, -4, -4, -4, 1},
634 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
309ada50 635 {&k6_cost, 0, 0, -5, -5, 4, 1},
b4e89e2d
JH
636 {&athlon_cost, 0, 0, 4, -4, 4, 1},
637 {&pentium4_cost, 0, 0, 2, 2, 2, 1}
e075ae69
RH
638 };
639
640 static struct pta
641 {
0f290768 642 const char *name; /* processor name or nickname. */
e075ae69
RH
643 enum processor_type processor;
644 }
0f290768 645 const processor_alias_table[] =
e075ae69
RH
646 {
647 {"i386", PROCESSOR_I386},
648 {"i486", PROCESSOR_I486},
649 {"i586", PROCESSOR_PENTIUM},
650 {"pentium", PROCESSOR_PENTIUM},
651 {"i686", PROCESSOR_PENTIUMPRO},
652 {"pentiumpro", PROCESSOR_PENTIUMPRO},
e075ae69 653 {"k6", PROCESSOR_K6},
309ada50 654 {"athlon", PROCESSOR_ATHLON},
b4e89e2d 655 {"pentium4", PROCESSOR_PENTIUM4},
3af4bd89 656 };
c8c5cb99 657
0f290768 658 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
c8c5cb99 659
f5316dfe
MM
660#ifdef SUBTARGET_OVERRIDE_OPTIONS
661 SUBTARGET_OVERRIDE_OPTIONS;
662#endif
663
5a6ee819 664 ix86_arch = PROCESSOR_I386;
e075ae69
RH
665 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
666
6189a572
JH
667 if (ix86_cmodel_string != 0)
668 {
669 if (!strcmp (ix86_cmodel_string, "small"))
670 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
671 else if (flag_pic)
672 sorry ("Code model %s not supported in PIC mode", ix86_cmodel_string);
673 else if (!strcmp (ix86_cmodel_string, "32"))
674 ix86_cmodel = CM_32;
675 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
676 ix86_cmodel = CM_KERNEL;
677 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
678 ix86_cmodel = CM_MEDIUM;
679 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
680 ix86_cmodel = CM_LARGE;
681 else
682 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
683 }
684 else
685 {
686 ix86_cmodel = CM_32;
687 if (TARGET_64BIT)
688 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
689 }
690 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
691 error ("Code model `%s' not supported in the %s bit mode.",
692 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
693 if (ix86_cmodel == CM_LARGE)
694 sorry ("Code model `large' not supported yet.");
695
e075ae69
RH
696 if (ix86_arch_string != 0)
697 {
e075ae69
RH
698 for (i = 0; i < pta_size; i++)
699 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
700 {
701 ix86_arch = processor_alias_table[i].processor;
702 /* Default cpu tuning to the architecture. */
703 ix86_cpu = ix86_arch;
704 break;
705 }
400500c4 706
e075ae69
RH
707 if (i == pta_size)
708 error ("bad value (%s) for -march= switch", ix86_arch_string);
709 }
710
711 if (ix86_cpu_string != 0)
712 {
e075ae69
RH
713 for (i = 0; i < pta_size; i++)
714 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
715 {
716 ix86_cpu = processor_alias_table[i].processor;
717 break;
718 }
719 if (i == pta_size)
720 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
721 }
722
723 ix86_cost = processor_target_table[ix86_cpu].cost;
724 target_flags |= processor_target_table[ix86_cpu].target_enable;
725 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
726
36edd3cc
BS
727 /* Arrange to set up i386_stack_locals for all functions. */
728 init_machine_status = ix86_init_machine_status;
1526a060 729 mark_machine_status = ix86_mark_machine_status;
37b15744 730 free_machine_status = ix86_free_machine_status;
36edd3cc 731
0f290768 732 /* Validate -mregparm= value. */
e075ae69 733 if (ix86_regparm_string)
b08de47e 734 {
400500c4
RK
735 i = atoi (ix86_regparm_string);
736 if (i < 0 || i > REGPARM_MAX)
737 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
738 else
739 ix86_regparm = i;
b08de47e
MM
740 }
741
e9a25f70 742 /* Validate -malign-loops= value, or provide default. */
e075ae69
RH
743 ix86_align_loops = processor_target_table[ix86_cpu].align_loop;
744 if (ix86_align_loops_string)
b08de47e 745 {
400500c4
RK
746 i = atoi (ix86_align_loops_string);
747 if (i < 0 || i > MAX_CODE_ALIGN)
748 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
749 else
750 ix86_align_loops = i;
b08de47e 751 }
3af4bd89
JH
752
753 /* Validate -malign-jumps= value, or provide default. */
e075ae69
RH
754 ix86_align_jumps = processor_target_table[ix86_cpu].align_jump;
755 if (ix86_align_jumps_string)
b08de47e 756 {
400500c4
RK
757 i = atoi (ix86_align_jumps_string);
758 if (i < 0 || i > MAX_CODE_ALIGN)
759 error ("-malign-jumps=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
760 else
761 ix86_align_jumps = i;
b08de47e 762 }
b08de47e 763
0f290768 764 /* Validate -malign-functions= value, or provide default. */
e075ae69
RH
765 ix86_align_funcs = processor_target_table[ix86_cpu].align_func;
766 if (ix86_align_funcs_string)
b08de47e 767 {
400500c4
RK
768 i = atoi (ix86_align_funcs_string);
769 if (i < 0 || i > MAX_CODE_ALIGN)
770 error ("-malign-functions=%d is not between 0 and %d",
771 i, MAX_CODE_ALIGN);
772 else
773 ix86_align_funcs = i;
b08de47e 774 }
3af4bd89 775
e4c0478d 776 /* Validate -mpreferred-stack-boundary= value, or provide default.
3af4bd89 777 The default of 128 bits is for Pentium III's SSE __m128. */
e075ae69
RH
778 ix86_preferred_stack_boundary = 128;
779 if (ix86_preferred_stack_boundary_string)
3af4bd89 780 {
400500c4 781 i = atoi (ix86_preferred_stack_boundary_string);
3af4bd89 782 if (i < 2 || i > 31)
400500c4
RK
783 error ("-mpreferred-stack-boundary=%d is not between 2 and 31", i);
784 else
785 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 786 }
77a989d1 787
0f290768 788 /* Validate -mbranch-cost= value, or provide default. */
e075ae69
RH
789 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
790 if (ix86_branch_cost_string)
804a8ee0 791 {
400500c4
RK
792 i = atoi (ix86_branch_cost_string);
793 if (i < 0 || i > 5)
794 error ("-mbranch-cost=%d is not between 0 and 5", i);
795 else
796 ix86_branch_cost = i;
804a8ee0 797 }
804a8ee0 798
e9a25f70
JL
799 /* Keep nonleaf frame pointers. */
800 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 801 flag_omit_frame_pointer = 1;
e075ae69
RH
802
803 /* If we're doing fast math, we don't care about comparison order
804 wrt NaNs. This lets us use a shorter comparison sequence. */
de6c5979 805 if (flag_unsafe_math_optimizations)
e075ae69
RH
806 target_flags &= ~MASK_IEEE_FP;
807
a7180f70
BS
808 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
809 on by -msse. */
810 if (TARGET_SSE)
811 target_flags |= MASK_MMX;
f5316dfe
MM
812}
813\f
32b5b1aa 814void
c6aded7c 815optimization_options (level, size)
32b5b1aa 816 int level;
bb5177ac 817 int size ATTRIBUTE_UNUSED;
32b5b1aa 818{
e9a25f70
JL
819 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
820 make the problem with not enough registers even worse. */
32b5b1aa
SC
821#ifdef INSN_SCHEDULING
822 if (level > 1)
823 flag_schedule_insns = 0;
824#endif
825}
b08de47e
MM
826\f
827/* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
828 attribute for DECL. The attributes in ATTRIBUTES have previously been
829 assigned to DECL. */
830
831int
e075ae69 832ix86_valid_decl_attribute_p (decl, attributes, identifier, args)
bb5177ac
RL
833 tree decl ATTRIBUTE_UNUSED;
834 tree attributes ATTRIBUTE_UNUSED;
835 tree identifier ATTRIBUTE_UNUSED;
836 tree args ATTRIBUTE_UNUSED;
b08de47e
MM
837{
838 return 0;
839}
840
841/* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
842 attribute for TYPE. The attributes in ATTRIBUTES have previously been
843 assigned to TYPE. */
844
845int
e075ae69 846ix86_valid_type_attribute_p (type, attributes, identifier, args)
b08de47e 847 tree type;
bb5177ac 848 tree attributes ATTRIBUTE_UNUSED;
b08de47e
MM
849 tree identifier;
850 tree args;
851{
852 if (TREE_CODE (type) != FUNCTION_TYPE
ac478ac0 853 && TREE_CODE (type) != METHOD_TYPE
b08de47e
MM
854 && TREE_CODE (type) != FIELD_DECL
855 && TREE_CODE (type) != TYPE_DECL)
856 return 0;
857
858 /* Stdcall attribute says callee is responsible for popping arguments
859 if they are not variable. */
860 if (is_attribute_p ("stdcall", identifier))
861 return (args == NULL_TREE);
862
0f290768 863 /* Cdecl attribute says the callee is a normal C declaration. */
b08de47e
MM
864 if (is_attribute_p ("cdecl", identifier))
865 return (args == NULL_TREE);
866
867 /* Regparm attribute specifies how many integer arguments are to be
0f290768 868 passed in registers. */
b08de47e
MM
869 if (is_attribute_p ("regparm", identifier))
870 {
871 tree cst;
872
e9a25f70 873 if (! args || TREE_CODE (args) != TREE_LIST
b08de47e
MM
874 || TREE_CHAIN (args) != NULL_TREE
875 || TREE_VALUE (args) == NULL_TREE)
876 return 0;
877
878 cst = TREE_VALUE (args);
879 if (TREE_CODE (cst) != INTEGER_CST)
880 return 0;
881
cce097f1 882 if (compare_tree_int (cst, REGPARM_MAX) > 0)
b08de47e
MM
883 return 0;
884
885 return 1;
886 }
887
888 return 0;
889}
890
891/* Return 0 if the attributes for two types are incompatible, 1 if they
892 are compatible, and 2 if they are nearly compatible (which causes a
893 warning to be generated). */
894
895int
e075ae69 896ix86_comp_type_attributes (type1, type2)
afcfe58c
MM
897 tree type1;
898 tree type2;
b08de47e 899{
0f290768 900 /* Check for mismatch of non-default calling convention. */
69ddee61 901 const char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
902
903 if (TREE_CODE (type1) != FUNCTION_TYPE)
904 return 1;
905
906 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
907 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
908 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 909 return 0;
b08de47e
MM
910 return 1;
911}
b08de47e
MM
912\f
913/* Value is the number of bytes of arguments automatically
914 popped when returning from a subroutine call.
915 FUNDECL is the declaration node of the function (as a tree),
916 FUNTYPE is the data type of the function (as a tree),
917 or for a library call it is an identifier node for the subroutine name.
918 SIZE is the number of bytes of arguments passed on the stack.
919
920 On the 80386, the RTD insn may be used to pop them if the number
921 of args is fixed, but if the number is variable then the caller
922 must pop them all. RTD can't be used for library calls now
923 because the library is compiled with the Unix compiler.
924 Use of RTD is a selectable option, since it is incompatible with
925 standard Unix calling sequences. If the option is not selected,
926 the caller must always pop the args.
927
928 The attribute stdcall is equivalent to RTD on a per module basis. */
929
930int
e075ae69 931ix86_return_pops_args (fundecl, funtype, size)
b08de47e
MM
932 tree fundecl;
933 tree funtype;
934 int size;
79325812 935{
3345ee7d 936 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 937
0f290768 938 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 939 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 940
0f290768 941 /* Stdcall functions will pop the stack if not variable args. */
698cdd84
SC
942 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
943 rtd = 1;
79325812 944
698cdd84
SC
945 if (rtd
946 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
947 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
948 == void_type_node)))
698cdd84
SC
949 return size;
950 }
79325812 951
e9a25f70 952 /* Lose any fake structure return argument. */
698cdd84
SC
953 if (aggregate_value_p (TREE_TYPE (funtype)))
954 return GET_MODE_SIZE (Pmode);
79325812 955
2614aac6 956 return 0;
b08de47e 957}
b08de47e
MM
958\f
959/* Argument support functions. */
960
961/* Initialize a variable CUM of type CUMULATIVE_ARGS
962 for a call to a function whose data type is FNTYPE.
963 For a library call, FNTYPE is 0. */
964
965void
966init_cumulative_args (cum, fntype, libname)
e9a25f70 967 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
b08de47e
MM
968 tree fntype; /* tree ptr for function decl */
969 rtx libname; /* SYMBOL_REF of library name or 0 */
970{
971 static CUMULATIVE_ARGS zero_cum;
972 tree param, next_param;
973
974 if (TARGET_DEBUG_ARG)
975 {
976 fprintf (stderr, "\ninit_cumulative_args (");
977 if (fntype)
e9a25f70
JL
978 fprintf (stderr, "fntype code = %s, ret code = %s",
979 tree_code_name[(int) TREE_CODE (fntype)],
980 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
981 else
982 fprintf (stderr, "no fntype");
983
984 if (libname)
985 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
986 }
987
988 *cum = zero_cum;
989
990 /* Set up the number of registers to use for passing arguments. */
e075ae69 991 cum->nregs = ix86_regparm;
b08de47e
MM
992 if (fntype)
993 {
994 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
e9a25f70 995
b08de47e
MM
996 if (attr)
997 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
998 }
999
1000 /* Determine if this function has variable arguments. This is
1001 indicated by the last argument being 'void_type_mode' if there
1002 are no variable arguments. If there are variable arguments, then
1003 we won't pass anything in registers */
1004
1005 if (cum->nregs)
1006 {
1007 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 1008 param != 0; param = next_param)
b08de47e
MM
1009 {
1010 next_param = TREE_CHAIN (param);
e9a25f70 1011 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
b08de47e
MM
1012 cum->nregs = 0;
1013 }
1014 }
1015
1016 if (TARGET_DEBUG_ARG)
1017 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1018
1019 return;
1020}
1021
1022/* Update the data in CUM to advance over an argument
1023 of mode MODE and data type TYPE.
1024 (TYPE is null for libcalls where that information may not be available.) */
1025
1026void
1027function_arg_advance (cum, mode, type, named)
1028 CUMULATIVE_ARGS *cum; /* current arg information */
1029 enum machine_mode mode; /* current arg mode */
1030 tree type; /* type of the argument or 0 if lib support */
1031 int named; /* whether or not the argument was named */
1032{
5ac9118e
KG
1033 int bytes =
1034 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
1035 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1036
1037 if (TARGET_DEBUG_ARG)
1038 fprintf (stderr,
e9a25f70 1039 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
b08de47e 1040 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
82a127a9 1041 if (TARGET_SSE && mode == TImode)
b08de47e 1042 {
82a127a9
CM
1043 cum->sse_words += words;
1044 cum->sse_nregs -= 1;
1045 cum->sse_regno += 1;
1046 if (cum->sse_nregs <= 0)
1047 {
1048 cum->sse_nregs = 0;
1049 cum->sse_regno = 0;
1050 }
b08de47e 1051 }
82a127a9
CM
1052 else
1053 {
1054 cum->words += words;
1055 cum->nregs -= words;
1056 cum->regno += words;
b08de47e 1057
82a127a9
CM
1058 if (cum->nregs <= 0)
1059 {
1060 cum->nregs = 0;
1061 cum->regno = 0;
1062 }
1063 }
b08de47e
MM
1064 return;
1065}
1066
1067/* Define where to put the arguments to a function.
1068 Value is zero to push the argument on the stack,
1069 or a hard register in which to store the argument.
1070
1071 MODE is the argument's machine mode.
1072 TYPE is the data type of the argument (as a tree).
1073 This is null for libcalls where that information may
1074 not be available.
1075 CUM is a variable of type CUMULATIVE_ARGS which gives info about
1076 the preceding args and about the function being called.
1077 NAMED is nonzero if this argument is a named parameter
1078 (otherwise it is an extra parameter matching an ellipsis). */
1079
1080struct rtx_def *
1081function_arg (cum, mode, type, named)
1082 CUMULATIVE_ARGS *cum; /* current arg information */
1083 enum machine_mode mode; /* current arg mode */
1084 tree type; /* type of the argument or 0 if lib support */
1085 int named; /* != 0 for normal args, == 0 for ... args */
1086{
1087 rtx ret = NULL_RTX;
5ac9118e
KG
1088 int bytes =
1089 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
1090 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1091
1092 switch (mode)
1093 {
0f290768 1094 /* For now, pass fp/complex values on the stack. */
e9a25f70 1095 default:
b08de47e
MM
1096 break;
1097
1098 case BLKmode:
1099 case DImode:
1100 case SImode:
1101 case HImode:
1102 case QImode:
1103 if (words <= cum->nregs)
f64cecad 1104 ret = gen_rtx_REG (mode, cum->regno);
b08de47e 1105 break;
82a127a9
CM
1106 case TImode:
1107 if (cum->sse_nregs)
1108 ret = gen_rtx_REG (mode, cum->sse_regno);
1109 break;
b08de47e
MM
1110 }
1111
1112 if (TARGET_DEBUG_ARG)
1113 {
1114 fprintf (stderr,
e9a25f70 1115 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
b08de47e
MM
1116 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
1117
1118 if (ret)
1119 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
1120 else
1121 fprintf (stderr, ", stack");
1122
1123 fprintf (stderr, " )\n");
1124 }
1125
1126 return ret;
1127}
e075ae69 1128\f
8bad7136
JL
1129
1130/* Return nonzero if OP is (const_int 1), else return zero. */
1131
1132int
1133const_int_1_operand (op, mode)
1134 rtx op;
1135 enum machine_mode mode ATTRIBUTE_UNUSED;
1136{
1137 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
1138}
1139
e075ae69
RH
1140/* Returns 1 if OP is either a symbol reference or a sum of a symbol
1141 reference and a constant. */
b08de47e
MM
1142
1143int
e075ae69
RH
1144symbolic_operand (op, mode)
1145 register rtx op;
1146 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 1147{
e075ae69 1148 switch (GET_CODE (op))
2a2ab3f9 1149 {
e075ae69
RH
1150 case SYMBOL_REF:
1151 case LABEL_REF:
1152 return 1;
1153
1154 case CONST:
1155 op = XEXP (op, 0);
1156 if (GET_CODE (op) == SYMBOL_REF
1157 || GET_CODE (op) == LABEL_REF
1158 || (GET_CODE (op) == UNSPEC
1159 && XINT (op, 1) >= 6
1160 && XINT (op, 1) <= 7))
1161 return 1;
1162 if (GET_CODE (op) != PLUS
1163 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1164 return 0;
1165
1166 op = XEXP (op, 0);
1167 if (GET_CODE (op) == SYMBOL_REF
1168 || GET_CODE (op) == LABEL_REF)
1169 return 1;
1170 /* Only @GOTOFF gets offsets. */
1171 if (GET_CODE (op) != UNSPEC
1172 || XINT (op, 1) != 7)
1173 return 0;
1174
1175 op = XVECEXP (op, 0, 0);
1176 if (GET_CODE (op) == SYMBOL_REF
1177 || GET_CODE (op) == LABEL_REF)
1178 return 1;
1179 return 0;
1180
1181 default:
1182 return 0;
2a2ab3f9
JVA
1183 }
1184}
2a2ab3f9 1185
e075ae69 1186/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 1187
e075ae69
RH
1188int
1189pic_symbolic_operand (op, mode)
1190 register rtx op;
1191 enum machine_mode mode ATTRIBUTE_UNUSED;
1192{
1193 if (GET_CODE (op) == CONST)
2a2ab3f9 1194 {
e075ae69
RH
1195 op = XEXP (op, 0);
1196 if (GET_CODE (op) == UNSPEC)
1197 return 1;
1198 if (GET_CODE (op) != PLUS
1199 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1200 return 0;
1201 op = XEXP (op, 0);
1202 if (GET_CODE (op) == UNSPEC)
1203 return 1;
2a2ab3f9 1204 }
e075ae69 1205 return 0;
2a2ab3f9 1206}
2a2ab3f9 1207
28d52ffb
RH
1208/* Test for a valid operand for a call instruction. Don't allow the
1209 arg pointer register or virtual regs since they may decay into
1210 reg + const, which the patterns can't handle. */
2a2ab3f9 1211
e075ae69
RH
1212int
1213call_insn_operand (op, mode)
1214 rtx op;
1215 enum machine_mode mode ATTRIBUTE_UNUSED;
1216{
e075ae69
RH
1217 /* Disallow indirect through a virtual register. This leads to
1218 compiler aborts when trying to eliminate them. */
1219 if (GET_CODE (op) == REG
1220 && (op == arg_pointer_rtx
564d80f4 1221 || op == frame_pointer_rtx
e075ae69
RH
1222 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
1223 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
1224 return 0;
2a2ab3f9 1225
28d52ffb
RH
1226 /* Disallow `call 1234'. Due to varying assembler lameness this
1227 gets either rejected or translated to `call .+1234'. */
1228 if (GET_CODE (op) == CONST_INT)
1229 return 0;
1230
cbbf65e0
RH
1231 /* Explicitly allow SYMBOL_REF even if pic. */
1232 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 1233 return 1;
2a2ab3f9 1234
cbbf65e0
RH
1235 /* Half-pic doesn't allow anything but registers and constants.
1236 We've just taken care of the later. */
1237 if (HALF_PIC_P ())
1238 return register_operand (op, Pmode);
1239
1240 /* Otherwise we can allow any general_operand in the address. */
1241 return general_operand (op, Pmode);
e075ae69 1242}
79325812 1243
e075ae69
RH
1244int
1245constant_call_address_operand (op, mode)
1246 rtx op;
1247 enum machine_mode mode ATTRIBUTE_UNUSED;
1248{
eaf19aba
JJ
1249 if (GET_CODE (op) == CONST
1250 && GET_CODE (XEXP (op, 0)) == PLUS
1251 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1252 op = XEXP (XEXP (op, 0), 0);
e1ff012c 1253 return GET_CODE (op) == SYMBOL_REF;
e075ae69 1254}
2a2ab3f9 1255
e075ae69 1256/* Match exactly zero and one. */
e9a25f70 1257
0f290768 1258int
e075ae69
RH
1259const0_operand (op, mode)
1260 register rtx op;
1261 enum machine_mode mode;
1262{
1263 return op == CONST0_RTX (mode);
1264}
e9a25f70 1265
0f290768 1266int
e075ae69
RH
1267const1_operand (op, mode)
1268 register rtx op;
1269 enum machine_mode mode ATTRIBUTE_UNUSED;
1270{
1271 return op == const1_rtx;
1272}
2a2ab3f9 1273
e075ae69 1274/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 1275
e075ae69
RH
1276int
1277const248_operand (op, mode)
1278 register rtx op;
1279 enum machine_mode mode ATTRIBUTE_UNUSED;
1280{
1281 return (GET_CODE (op) == CONST_INT
1282 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
1283}
e9a25f70 1284
e075ae69 1285/* True if this is a constant appropriate for an increment or decremenmt. */
81fd0956 1286
e075ae69
RH
1287int
1288incdec_operand (op, mode)
1289 register rtx op;
1290 enum machine_mode mode;
1291{
b4e89e2d
JH
1292 /* On Pentium4, the inc and dec operations causes extra dependancy on flag
1293 registers, since carry flag is not set. */
1294 if (TARGET_PENTIUM4 && !optimize_size)
1295 return 0;
e075ae69
RH
1296 if (op == const1_rtx || op == constm1_rtx)
1297 return 1;
1298 if (GET_CODE (op) != CONST_INT)
1299 return 0;
1300 if (mode == SImode && INTVAL (op) == (HOST_WIDE_INT) 0xffffffff)
1301 return 1;
1302 if (mode == HImode && INTVAL (op) == (HOST_WIDE_INT) 0xffff)
1303 return 1;
1304 if (mode == QImode && INTVAL (op) == (HOST_WIDE_INT) 0xff)
1305 return 1;
1306 return 0;
1307}
2a2ab3f9 1308
0f290768 1309/* Return false if this is the stack pointer, or any other fake
e075ae69
RH
1310 register eliminable to the stack pointer. Otherwise, this is
1311 a register operand.
2a2ab3f9 1312
e075ae69
RH
1313 This is used to prevent esp from being used as an index reg.
1314 Which would only happen in pathological cases. */
5f1ec3e6 1315
e075ae69
RH
1316int
1317reg_no_sp_operand (op, mode)
1318 register rtx op;
1319 enum machine_mode mode;
1320{
1321 rtx t = op;
1322 if (GET_CODE (t) == SUBREG)
1323 t = SUBREG_REG (t);
564d80f4 1324 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 1325 return 0;
2a2ab3f9 1326
e075ae69 1327 return register_operand (op, mode);
2a2ab3f9 1328}
b840bfb0 1329
915119a5
BS
1330int
1331mmx_reg_operand (op, mode)
1332 register rtx op;
bd793c65 1333 enum machine_mode mode ATTRIBUTE_UNUSED;
915119a5
BS
1334{
1335 return MMX_REG_P (op);
1336}
1337
2c5a510c
RH
1338/* Return false if this is any eliminable register. Otherwise
1339 general_operand. */
1340
1341int
1342general_no_elim_operand (op, mode)
1343 register rtx op;
1344 enum machine_mode mode;
1345{
1346 rtx t = op;
1347 if (GET_CODE (t) == SUBREG)
1348 t = SUBREG_REG (t);
1349 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1350 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1351 || t == virtual_stack_dynamic_rtx)
1352 return 0;
1353
1354 return general_operand (op, mode);
1355}
1356
1357/* Return false if this is any eliminable register. Otherwise
1358 register_operand or const_int. */
1359
1360int
1361nonmemory_no_elim_operand (op, mode)
1362 register rtx op;
1363 enum machine_mode mode;
1364{
1365 rtx t = op;
1366 if (GET_CODE (t) == SUBREG)
1367 t = SUBREG_REG (t);
1368 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1369 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1370 || t == virtual_stack_dynamic_rtx)
1371 return 0;
1372
1373 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
1374}
1375
e075ae69 1376/* Return true if op is a Q_REGS class register. */
b840bfb0 1377
e075ae69
RH
1378int
1379q_regs_operand (op, mode)
1380 register rtx op;
1381 enum machine_mode mode;
b840bfb0 1382{
e075ae69
RH
1383 if (mode != VOIDmode && GET_MODE (op) != mode)
1384 return 0;
1385 if (GET_CODE (op) == SUBREG)
1386 op = SUBREG_REG (op);
1387 return QI_REG_P (op);
0f290768 1388}
b840bfb0 1389
e075ae69 1390/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 1391
e075ae69
RH
1392int
1393non_q_regs_operand (op, mode)
1394 register rtx op;
1395 enum machine_mode mode;
1396{
1397 if (mode != VOIDmode && GET_MODE (op) != mode)
1398 return 0;
1399 if (GET_CODE (op) == SUBREG)
1400 op = SUBREG_REG (op);
1401 return NON_QI_REG_P (op);
0f290768 1402}
b840bfb0 1403
915119a5
BS
1404/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
1405 insns. */
1406int
1407sse_comparison_operator (op, mode)
1408 rtx op;
1409 enum machine_mode mode ATTRIBUTE_UNUSED;
1410{
1411 enum rtx_code code = GET_CODE (op);
a46d1d38
JH
1412 switch (code)
1413 {
1414 /* Operations supported directly. */
1415 case EQ:
1416 case LT:
1417 case LE:
1418 case UNORDERED:
1419 case NE:
1420 case UNGE:
1421 case UNGT:
1422 case ORDERED:
1423 return 1;
1424 /* These are equivalent to ones above in non-IEEE comparisons. */
1425 case UNEQ:
1426 case UNLT:
1427 case UNLE:
1428 case LTGT:
1429 case GE:
1430 case GT:
1431 return !TARGET_IEEE_FP;
1432 default:
1433 return 0;
1434 }
915119a5 1435}
9076b9c1 1436/* Return 1 if OP is a valid comparison operator in valid mode. */
e075ae69 1437int
9076b9c1
JH
1438ix86_comparison_operator (op, mode)
1439 register rtx op;
1440 enum machine_mode mode;
e075ae69 1441{
9076b9c1 1442 enum machine_mode inmode;
9a915772 1443 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
1444 if (mode != VOIDmode && GET_MODE (op) != mode)
1445 return 0;
9a915772
JH
1446 if (GET_RTX_CLASS (code) != '<')
1447 return 0;
1448 inmode = GET_MODE (XEXP (op, 0));
1449
1450 if (inmode == CCFPmode || inmode == CCFPUmode)
1451 {
1452 enum rtx_code second_code, bypass_code;
1453 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1454 return (bypass_code == NIL && second_code == NIL);
1455 }
1456 switch (code)
3a3677ff
RH
1457 {
1458 case EQ: case NE:
3a3677ff 1459 return 1;
9076b9c1 1460 case LT: case GE:
7e08e190 1461 if (inmode == CCmode || inmode == CCGCmode
9076b9c1
JH
1462 || inmode == CCGOCmode || inmode == CCNOmode)
1463 return 1;
1464 return 0;
7e08e190 1465 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
7e08e190 1466 if (inmode == CCmode)
9076b9c1
JH
1467 return 1;
1468 return 0;
1469 case GT: case LE:
7e08e190 1470 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
9076b9c1
JH
1471 return 1;
1472 return 0;
3a3677ff
RH
1473 default:
1474 return 0;
1475 }
1476}
1477
9076b9c1 1478/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3a3677ff 1479
9076b9c1
JH
1480int
1481fcmov_comparison_operator (op, mode)
3a3677ff
RH
1482 register rtx op;
1483 enum machine_mode mode;
1484{
b62d22a2 1485 enum machine_mode inmode;
9a915772 1486 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
1487 if (mode != VOIDmode && GET_MODE (op) != mode)
1488 return 0;
9a915772
JH
1489 if (GET_RTX_CLASS (code) != '<')
1490 return 0;
1491 inmode = GET_MODE (XEXP (op, 0));
1492 if (inmode == CCFPmode || inmode == CCFPUmode)
3a3677ff 1493 {
9a915772
JH
1494 enum rtx_code second_code, bypass_code;
1495 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1496 if (bypass_code != NIL || second_code != NIL)
1497 return 0;
1498 code = ix86_fp_compare_code_to_integer (code);
1499 }
1500 /* i387 supports just limited amount of conditional codes. */
1501 switch (code)
1502 {
1503 case LTU: case GTU: case LEU: case GEU:
1504 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
9076b9c1
JH
1505 return 1;
1506 return 0;
9a915772
JH
1507 case ORDERED: case UNORDERED:
1508 case EQ: case NE:
1509 return 1;
3a3677ff
RH
1510 default:
1511 return 0;
1512 }
e075ae69 1513}
b840bfb0 1514
e9e80858
JH
1515/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1516
1517int
1518promotable_binary_operator (op, mode)
1519 register rtx op;
1520 enum machine_mode mode ATTRIBUTE_UNUSED;
1521{
1522 switch (GET_CODE (op))
1523 {
1524 case MULT:
1525 /* Modern CPUs have same latency for HImode and SImode multiply,
1526 but 386 and 486 do HImode multiply faster. */
1527 return ix86_cpu > PROCESSOR_I486;
1528 case PLUS:
1529 case AND:
1530 case IOR:
1531 case XOR:
1532 case ASHIFT:
1533 return 1;
1534 default:
1535 return 0;
1536 }
1537}
1538
e075ae69
RH
1539/* Nearly general operand, but accept any const_double, since we wish
1540 to be able to drop them into memory rather than have them get pulled
1541 into registers. */
b840bfb0 1542
2a2ab3f9 1543int
e075ae69
RH
1544cmp_fp_expander_operand (op, mode)
1545 register rtx op;
1546 enum machine_mode mode;
2a2ab3f9 1547{
e075ae69 1548 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 1549 return 0;
e075ae69 1550 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 1551 return 1;
e075ae69 1552 return general_operand (op, mode);
2a2ab3f9
JVA
1553}
1554
e075ae69 1555/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
1556
1557int
e075ae69 1558ext_register_operand (op, mode)
2a2ab3f9 1559 register rtx op;
bb5177ac 1560 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 1561{
e075ae69
RH
1562 if (GET_MODE (op) != SImode && GET_MODE (op) != HImode)
1563 return 0;
1564 return register_operand (op, VOIDmode);
1565}
1566
1567/* Return 1 if this is a valid binary floating-point operation.
0f290768 1568 OP is the expression matched, and MODE is its mode. */
e075ae69
RH
1569
1570int
1571binary_fp_operator (op, mode)
1572 register rtx op;
1573 enum machine_mode mode;
1574{
1575 if (mode != VOIDmode && mode != GET_MODE (op))
1576 return 0;
1577
2a2ab3f9
JVA
1578 switch (GET_CODE (op))
1579 {
e075ae69
RH
1580 case PLUS:
1581 case MINUS:
1582 case MULT:
1583 case DIV:
1584 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 1585
2a2ab3f9
JVA
1586 default:
1587 return 0;
1588 }
1589}
fee2770d 1590
e075ae69
RH
1591int
1592mult_operator(op, mode)
1593 register rtx op;
1594 enum machine_mode mode ATTRIBUTE_UNUSED;
1595{
1596 return GET_CODE (op) == MULT;
1597}
1598
1599int
1600div_operator(op, mode)
1601 register rtx op;
1602 enum machine_mode mode ATTRIBUTE_UNUSED;
1603{
1604 return GET_CODE (op) == DIV;
1605}
0a726ef1
JL
1606
1607int
e075ae69
RH
1608arith_or_logical_operator (op, mode)
1609 rtx op;
1610 enum machine_mode mode;
0a726ef1 1611{
e075ae69
RH
1612 return ((mode == VOIDmode || GET_MODE (op) == mode)
1613 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
1614 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
1615}
1616
e075ae69 1617/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
1618
1619int
e075ae69
RH
1620memory_displacement_operand (op, mode)
1621 register rtx op;
1622 enum machine_mode mode;
4f2c8ebb 1623{
e075ae69 1624 struct ix86_address parts;
e9a25f70 1625
e075ae69
RH
1626 if (! memory_operand (op, mode))
1627 return 0;
1628
1629 if (! ix86_decompose_address (XEXP (op, 0), &parts))
1630 abort ();
1631
1632 return parts.disp != NULL_RTX;
4f2c8ebb
RS
1633}
1634
16189740 1635/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
1636 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1637
1638 ??? It seems likely that this will only work because cmpsi is an
1639 expander, and no actual insns use this. */
4f2c8ebb
RS
1640
1641int
e075ae69
RH
1642cmpsi_operand (op, mode)
1643 rtx op;
1644 enum machine_mode mode;
fee2770d 1645{
e075ae69
RH
1646 if (general_operand (op, mode))
1647 return 1;
1648
1649 if (GET_CODE (op) == AND
1650 && GET_MODE (op) == SImode
1651 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
1652 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
1653 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
1654 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
1655 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
1656 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 1657 return 1;
e9a25f70 1658
fee2770d
RS
1659 return 0;
1660}
d784886d 1661
e075ae69
RH
1662/* Returns 1 if OP is memory operand that can not be represented by the
1663 modRM array. */
d784886d
RK
1664
1665int
e075ae69 1666long_memory_operand (op, mode)
d784886d
RK
1667 register rtx op;
1668 enum machine_mode mode;
1669{
e075ae69 1670 if (! memory_operand (op, mode))
d784886d
RK
1671 return 0;
1672
e075ae69 1673 return memory_address_length (op) != 0;
d784886d 1674}
2247f6ed
JH
1675
1676/* Return nonzero if the rtx is known aligned. */
1677
1678int
1679aligned_operand (op, mode)
1680 rtx op;
1681 enum machine_mode mode;
1682{
1683 struct ix86_address parts;
1684
1685 if (!general_operand (op, mode))
1686 return 0;
1687
0f290768 1688 /* Registers and immediate operands are always "aligned". */
2247f6ed
JH
1689 if (GET_CODE (op) != MEM)
1690 return 1;
1691
0f290768 1692 /* Don't even try to do any aligned optimizations with volatiles. */
2247f6ed
JH
1693 if (MEM_VOLATILE_P (op))
1694 return 0;
1695
1696 op = XEXP (op, 0);
1697
1698 /* Pushes and pops are only valid on the stack pointer. */
1699 if (GET_CODE (op) == PRE_DEC
1700 || GET_CODE (op) == POST_INC)
1701 return 1;
1702
1703 /* Decode the address. */
1704 if (! ix86_decompose_address (op, &parts))
1705 abort ();
1706
1707 /* Look for some component that isn't known to be aligned. */
1708 if (parts.index)
1709 {
1710 if (parts.scale < 4
bdb429a5 1711 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
1712 return 0;
1713 }
1714 if (parts.base)
1715 {
bdb429a5 1716 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
1717 return 0;
1718 }
1719 if (parts.disp)
1720 {
1721 if (GET_CODE (parts.disp) != CONST_INT
1722 || (INTVAL (parts.disp) & 3) != 0)
1723 return 0;
1724 }
1725
1726 /* Didn't find one -- this must be an aligned address. */
1727 return 1;
1728}
e075ae69
RH
1729\f
1730/* Return true if the constant is something that can be loaded with
1731 a special instruction. Only handle 0.0 and 1.0; others are less
1732 worthwhile. */
57dbca5e
BS
1733
1734int
e075ae69
RH
1735standard_80387_constant_p (x)
1736 rtx x;
57dbca5e 1737{
2b04e52b 1738 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 1739 return -1;
2b04e52b
JH
1740 /* Note that on the 80387, other constants, such as pi, that we should support
1741 too. On some machines, these are much slower to load as standard constant,
1742 than to load from doubles in memory. */
1743 if (x == CONST0_RTX (GET_MODE (x)))
1744 return 1;
1745 if (x == CONST1_RTX (GET_MODE (x)))
1746 return 2;
e075ae69 1747 return 0;
57dbca5e
BS
1748}
1749
2b04e52b
JH
1750/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
1751 */
1752int
1753standard_sse_constant_p (x)
1754 rtx x;
1755{
1756 if (GET_CODE (x) != CONST_DOUBLE)
1757 return -1;
1758 return (x == CONST0_RTX (GET_MODE (x)));
1759}
1760
2a2ab3f9
JVA
1761/* Returns 1 if OP contains a symbol reference */
1762
1763int
1764symbolic_reference_mentioned_p (op)
1765 rtx op;
1766{
6f7d635c 1767 register const char *fmt;
2a2ab3f9
JVA
1768 register int i;
1769
1770 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
1771 return 1;
1772
1773 fmt = GET_RTX_FORMAT (GET_CODE (op));
1774 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
1775 {
1776 if (fmt[i] == 'E')
1777 {
1778 register int j;
1779
1780 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
1781 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
1782 return 1;
1783 }
e9a25f70 1784
2a2ab3f9
JVA
1785 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
1786 return 1;
1787 }
1788
1789 return 0;
1790}
e075ae69
RH
1791
1792/* Return 1 if it is appropriate to emit `ret' instructions in the
1793 body of a function. Do this only if the epilogue is simple, needing a
1794 couple of insns. Prior to reloading, we can't tell how many registers
1795 must be saved, so return 0 then. Return 0 if there is no frame
1796 marker to de-allocate.
1797
1798 If NON_SAVING_SETJMP is defined and true, then it is not possible
1799 for the epilogue to be simple, so return 0. This is a special case
1800 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1801 until final, but jump_optimize may need to know sooner if a
1802 `return' is OK. */
32b5b1aa
SC
1803
1804int
e075ae69 1805ix86_can_use_return_insn_p ()
32b5b1aa 1806{
4dd2ac2c 1807 struct ix86_frame frame;
9a7372d6 1808
e075ae69
RH
1809#ifdef NON_SAVING_SETJMP
1810 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
1811 return 0;
1812#endif
9a7372d6
RH
1813#ifdef FUNCTION_BLOCK_PROFILER_EXIT
1814 if (profile_block_flag == 2)
1815 return 0;
1816#endif
1817
1818 if (! reload_completed || frame_pointer_needed)
1819 return 0;
32b5b1aa 1820
9a7372d6
RH
1821 /* Don't allow more than 32 pop, since that's all we can do
1822 with one instruction. */
1823 if (current_function_pops_args
1824 && current_function_args_size >= 32768)
e075ae69 1825 return 0;
32b5b1aa 1826
4dd2ac2c
JH
1827 ix86_compute_frame_layout (&frame);
1828 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 1829}
6189a572
JH
1830\f
1831/* Return 1 if VALUE can be stored in the sign extended immediate field. */
1832int
1833x86_64_sign_extended_value (value)
1834 rtx value;
1835{
1836 switch (GET_CODE (value))
1837 {
1838 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
1839 to be at least 32 and this all acceptable constants are
1840 represented as CONST_INT. */
1841 case CONST_INT:
1842 if (HOST_BITS_PER_WIDE_INT == 32)
1843 return 1;
1844 else
1845 {
1846 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
1847 return (HOST_WIDE_INT)(int)val == val;
1848 }
1849 break;
1850
1851 /* For certain code models, the symbolic references are known to fit. */
1852 case SYMBOL_REF:
1853 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
1854
1855 /* For certain code models, the code is near as well. */
1856 case LABEL_REF:
1857 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
1858
1859 /* We also may accept the offsetted memory references in certain special
1860 cases. */
1861 case CONST:
1862 if (GET_CODE (XEXP (value, 0)) == UNSPEC
1863 && XVECLEN (XEXP (value, 0), 0) == 1
1864 && XINT (XEXP (value, 0), 1) == 15)
1865 return 1;
1866 else if (GET_CODE (XEXP (value, 0)) == PLUS)
1867 {
1868 rtx op1 = XEXP (XEXP (value, 0), 0);
1869 rtx op2 = XEXP (XEXP (value, 0), 1);
1870 HOST_WIDE_INT offset;
1871
1872 if (ix86_cmodel == CM_LARGE)
1873 return 0;
1874 if (GET_CODE (op2) != CONST_INT)
1875 return 0;
1876 offset = trunc_int_for_mode (INTVAL (op2), DImode);
1877 switch (GET_CODE (op1))
1878 {
1879 case SYMBOL_REF:
1880 /* For CM_SMALL assume that latest object is 1MB before
1881 end of 31bits boundary. We may also accept pretty
1882 large negative constants knowing that all objects are
1883 in the positive half of address space. */
1884 if (ix86_cmodel == CM_SMALL
1885 && offset < 1024*1024*1024
1886 && trunc_int_for_mode (offset, SImode) == offset)
1887 return 1;
1888 /* For CM_KERNEL we know that all object resist in the
1889 negative half of 32bits address space. We may not
1890 accept negative offsets, since they may be just off
1891 and we may accept pretty large possitive ones. */
1892 if (ix86_cmodel == CM_KERNEL
1893 && offset > 0
1894 && trunc_int_for_mode (offset, SImode) == offset)
1895 return 1;
1896 break;
1897 case LABEL_REF:
1898 /* These conditions are similar to SYMBOL_REF ones, just the
1899 constraints for code models differ. */
1900 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
1901 && offset < 1024*1024*1024
1902 && trunc_int_for_mode (offset, SImode) == offset)
1903 return 1;
1904 if (ix86_cmodel == CM_KERNEL
1905 && offset > 0
1906 && trunc_int_for_mode (offset, SImode) == offset)
1907 return 1;
1908 break;
1909 default:
1910 return 0;
1911 }
1912 }
1913 return 0;
1914 default:
1915 return 0;
1916 }
1917}
1918
1919/* Return 1 if VALUE can be stored in the zero extended immediate field. */
1920int
1921x86_64_zero_extended_value (value)
1922 rtx value;
1923{
1924 switch (GET_CODE (value))
1925 {
1926 case CONST_DOUBLE:
1927 if (HOST_BITS_PER_WIDE_INT == 32)
1928 return (GET_MODE (value) == VOIDmode
1929 && !CONST_DOUBLE_HIGH (value));
1930 else
1931 return 0;
1932 case CONST_INT:
1933 if (HOST_BITS_PER_WIDE_INT == 32)
1934 return INTVAL (value) >= 0;
1935 else
1936 return !(INTVAL (value) & ~(HOST_WIDE_INT)0xffffffff);
1937 break;
1938
1939 /* For certain code models, the symbolic references are known to fit. */
1940 case SYMBOL_REF:
1941 return ix86_cmodel == CM_SMALL;
1942
1943 /* For certain code models, the code is near as well. */
1944 case LABEL_REF:
1945 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
1946
1947 /* We also may accept the offsetted memory references in certain special
1948 cases. */
1949 case CONST:
1950 if (GET_CODE (XEXP (value, 0)) == PLUS)
1951 {
1952 rtx op1 = XEXP (XEXP (value, 0), 0);
1953 rtx op2 = XEXP (XEXP (value, 0), 1);
1954
1955 if (ix86_cmodel == CM_LARGE)
1956 return 0;
1957 switch (GET_CODE (op1))
1958 {
1959 case SYMBOL_REF:
1960 return 0;
1961 /* For small code model we may accept pretty large possitive
1962 offsets, since one bit is available for free. Negative
1963 offsets are limited by the size of NULL pointer area
1964 specified by the ABI. */
1965 if (ix86_cmodel == CM_SMALL
1966 && GET_CODE (op2) == CONST_INT
1967 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
1968 && (trunc_int_for_mode (INTVAL (op2), SImode)
1969 == INTVAL (op2)))
1970 return 1;
1971 /* ??? For the kernel, we may accept adjustment of
1972 -0x10000000, since we know that it will just convert
1973 negative address space to possitive, but perhaps this
1974 is not worthwhile. */
1975 break;
1976 case LABEL_REF:
1977 /* These conditions are similar to SYMBOL_REF ones, just the
1978 constraints for code models differ. */
1979 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
1980 && GET_CODE (op2) == CONST_INT
1981 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
1982 && (trunc_int_for_mode (INTVAL (op2), SImode)
1983 == INTVAL (op2)))
1984 return 1;
1985 break;
1986 default:
1987 return 0;
1988 }
1989 }
1990 return 0;
1991 default:
1992 return 0;
1993 }
1994}
6fca22eb
RH
1995
1996/* Value should be nonzero if functions must have frame pointers.
1997 Zero means the frame pointer need not be set up (and parms may
1998 be accessed via the stack pointer) in functions that seem suitable. */
1999
2000int
2001ix86_frame_pointer_required ()
2002{
2003 /* If we accessed previous frames, then the generated code expects
2004 to be able to access the saved ebp value in our frame. */
2005 if (cfun->machine->accesses_prev_frame)
2006 return 1;
2007
2008 /* Several x86 os'es need a frame pointer for other reasons,
2009 usually pertaining to setjmp. */
2010 if (SUBTARGET_FRAME_POINTER_REQUIRED)
2011 return 1;
2012
2013 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
2014 the frame pointer by default. Turn it back on now if we've not
2015 got a leaf function. */
2016 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
2017 return 1;
2018
2019 return 0;
2020}
2021
2022/* Record that the current function accesses previous call frames. */
2023
2024void
2025ix86_setup_frame_addresses ()
2026{
2027 cfun->machine->accesses_prev_frame = 1;
2028}
e075ae69 2029\f
4cf12e7e 2030static char pic_label_name[32];
e9a25f70 2031
e075ae69
RH
2032/* This function generates code for -fpic that loads %ebx with
2033 the return address of the caller and then returns. */
2034
2035void
4cf12e7e 2036ix86_asm_file_end (file)
e075ae69 2037 FILE *file;
e075ae69
RH
2038{
2039 rtx xops[2];
32b5b1aa 2040
4cf12e7e
RH
2041 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
2042 return;
32b5b1aa 2043
c7f0da1d
RH
2044 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
2045 to updating relocations to a section being discarded such that this
2046 doesn't work. Ought to detect this at configure time. */
2047#if 0 && defined (ASM_OUTPUT_SECTION_NAME)
4cf12e7e
RH
2048 /* The trick here is to create a linkonce section containing the
2049 pic label thunk, but to refer to it with an internal label.
2050 Because the label is internal, we don't have inter-dso name
2051 binding issues on hosts that don't support ".hidden".
e9a25f70 2052
4cf12e7e
RH
2053 In order to use these macros, however, we must create a fake
2054 function decl. */
2055 {
2056 tree decl = build_decl (FUNCTION_DECL,
2057 get_identifier ("i686.get_pc_thunk"),
2058 error_mark_node);
2059 DECL_ONE_ONLY (decl) = 1;
2060 UNIQUE_SECTION (decl, 0);
2061 named_section (decl, NULL, 0);
2062 }
2063#else
2064 text_section ();
2065#endif
0afeb08a 2066
4cf12e7e
RH
2067 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
2068 internal (non-global) label that's being emitted, it didn't make
2069 sense to have .type information for local labels. This caused
2070 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
2071 me debug info for a label that you're declaring non-global?) this
2072 was changed to call ASM_OUTPUT_LABEL() instead. */
2073
2074 ASM_OUTPUT_LABEL (file, pic_label_name);
2075
2076 xops[0] = pic_offset_table_rtx;
2077 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
2078 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
2079 output_asm_insn ("ret", xops);
32b5b1aa 2080}
32b5b1aa 2081
e075ae69
RH
2082void
2083load_pic_register ()
32b5b1aa 2084{
e075ae69 2085 rtx gotsym, pclab;
32b5b1aa 2086
a8a05998 2087 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
32b5b1aa 2088
e075ae69 2089 if (TARGET_DEEP_BRANCH_PREDICTION)
32b5b1aa 2090 {
4cf12e7e
RH
2091 if (! pic_label_name[0])
2092 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
e075ae69 2093 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
32b5b1aa 2094 }
e075ae69 2095 else
e5cb57e8 2096 {
e075ae69 2097 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
e5cb57e8 2098 }
e5cb57e8 2099
e075ae69 2100 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
2a2ab3f9 2101
e075ae69
RH
2102 if (! TARGET_DEEP_BRANCH_PREDICTION)
2103 emit_insn (gen_popsi1 (pic_offset_table_rtx));
79325812 2104
e075ae69 2105 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
e9a25f70 2106}
8dfe5673 2107
e075ae69 2108/* Generate an SImode "push" pattern for input ARG. */
e9a25f70 2109
e075ae69
RH
2110static rtx
2111gen_push (arg)
2112 rtx arg;
e9a25f70 2113{
c5c76735
JL
2114 return gen_rtx_SET (VOIDmode,
2115 gen_rtx_MEM (SImode,
2116 gen_rtx_PRE_DEC (SImode,
2117 stack_pointer_rtx)),
2118 arg);
e9a25f70
JL
2119}
2120
4dd2ac2c
JH
2121/* Return 1 if we need to save REGNO. */
2122static int
2123ix86_save_reg (regno)
2124 int regno;
2125{
2126 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2127 || current_function_uses_const_pool);
2128 return ((regs_ever_live[regno] && !call_used_regs[regno]
2129 && !fixed_regs[regno]
2130 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed))
2131 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used));
2132
2133}
2134
0903fcab
JH
2135/* Return number of registers to be saved on the stack. */
2136
2137static int
2138ix86_nsaved_regs ()
2139{
2140 int nregs = 0;
0903fcab
JH
2141 int regno;
2142
4dd2ac2c
JH
2143 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
2144 if (ix86_save_reg (regno))
2145 nregs++;
0903fcab
JH
2146 return nregs;
2147}
2148
2149/* Return the offset between two registers, one to be eliminated, and the other
2150 its replacement, at the start of a routine. */
2151
2152HOST_WIDE_INT
2153ix86_initial_elimination_offset (from, to)
2154 int from;
2155 int to;
2156{
4dd2ac2c
JH
2157 struct ix86_frame frame;
2158 ix86_compute_frame_layout (&frame);
564d80f4
JH
2159
2160 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 2161 return frame.hard_frame_pointer_offset;
564d80f4
JH
2162 else if (from == FRAME_POINTER_REGNUM
2163 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 2164 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
2165 else
2166 {
564d80f4
JH
2167 if (to != STACK_POINTER_REGNUM)
2168 abort ();
2169 else if (from == ARG_POINTER_REGNUM)
4dd2ac2c 2170 return frame.stack_pointer_offset;
564d80f4
JH
2171 else if (from != FRAME_POINTER_REGNUM)
2172 abort ();
0903fcab 2173 else
4dd2ac2c 2174 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
2175 }
2176}
2177
4dd2ac2c 2178/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 2179
4dd2ac2c
JH
2180static void
2181ix86_compute_frame_layout (frame)
2182 struct ix86_frame *frame;
65954bd8 2183{
65954bd8 2184 HOST_WIDE_INT total_size;
564d80f4 2185 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
44affdae
JH
2186 int offset;
2187 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4dd2ac2c 2188 HOST_WIDE_INT size = get_frame_size ();
65954bd8 2189
4dd2ac2c 2190 frame->nregs = ix86_nsaved_regs ();
564d80f4 2191 total_size = size;
65954bd8 2192
4dd2ac2c
JH
2193 /* Skip return value and save base pointer. */
2194 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
2195
2196 frame->hard_frame_pointer_offset = offset;
564d80f4 2197
fcbfaa65
RK
2198 /* Do some sanity checking of stack_alignment_needed and
2199 preferred_alignment, since i386 port is the only using those features
2200 that may break easilly. */
564d80f4 2201
44affdae
JH
2202 if (size && !stack_alignment_needed)
2203 abort ();
44affdae
JH
2204 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
2205 abort ();
2206 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
2207 abort ();
2208 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
2209 abort ();
564d80f4 2210
4dd2ac2c
JH
2211 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
2212 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 2213
4dd2ac2c
JH
2214 /* Register save area */
2215 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 2216
4dd2ac2c
JH
2217 /* Align start of frame for local function. */
2218 frame->padding1 = ((offset + stack_alignment_needed - 1)
2219 & -stack_alignment_needed) - offset;
f73ad30e 2220
4dd2ac2c 2221 offset += frame->padding1;
65954bd8 2222
4dd2ac2c
JH
2223 /* Frame pointer points here. */
2224 frame->frame_pointer_offset = offset;
54ff41b7 2225
4dd2ac2c 2226 offset += size;
65954bd8 2227
4dd2ac2c 2228 /* Add outgoing arguments area. */
f73ad30e 2229 if (ACCUMULATE_OUTGOING_ARGS)
4dd2ac2c
JH
2230 {
2231 offset += current_function_outgoing_args_size;
2232 frame->outgoing_arguments_size = current_function_outgoing_args_size;
2233 }
2234 else
2235 frame->outgoing_arguments_size = 0;
564d80f4 2236
4dd2ac2c
JH
2237 /* Align stack boundary. */
2238 frame->padding2 = ((offset + preferred_alignment - 1)
2239 & -preferred_alignment) - offset;
2240
2241 offset += frame->padding2;
2242
2243 /* We've reached end of stack frame. */
2244 frame->stack_pointer_offset = offset;
2245
2246 /* Size prologue needs to allocate. */
2247 frame->to_allocate =
2248 (size + frame->padding1 + frame->padding2
2249 + frame->outgoing_arguments_size);
2250
2251#if 0
2252 fprintf (stderr, "nregs: %i\n", frame->nregs);
2253 fprintf (stderr, "size: %i\n", size);
2254 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
2255 fprintf (stderr, "padding1: %i\n", frame->padding1);
2256 fprintf (stderr, "padding2: %i\n", frame->padding2);
2257 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
2258 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
2259 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
2260 frame->hard_frame_pointer_offset);
2261 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
2262#endif
65954bd8
JL
2263}
2264
0903fcab
JH
2265/* Emit code to save registers in the prologue. */
2266
2267static void
2268ix86_emit_save_regs ()
2269{
2270 register int regno;
0903fcab 2271 rtx insn;
0903fcab 2272
4dd2ac2c
JH
2273 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
2274 if (ix86_save_reg (regno))
0903fcab
JH
2275 {
2276 insn = emit_insn (gen_push (gen_rtx_REG (SImode, regno)));
2277 RTX_FRAME_RELATED_P (insn) = 1;
2278 }
2279}
2280
0f290768 2281/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
2282
2283void
2284ix86_expand_prologue ()
2a2ab3f9 2285{
564d80f4 2286 rtx insn;
aae75261
JVA
2287 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2288 || current_function_uses_const_pool);
4dd2ac2c
JH
2289 struct ix86_frame frame;
2290
2291 ix86_compute_frame_layout (&frame);
79325812 2292
e075ae69
RH
2293 /* Note: AT&T enter does NOT have reversed args. Enter is probably
2294 slower on all targets. Also sdb doesn't like it. */
e9a25f70 2295
2a2ab3f9
JVA
2296 if (frame_pointer_needed)
2297 {
564d80f4 2298 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 2299 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 2300
564d80f4 2301 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 2302 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
2303 }
2304
1c71e60e 2305 ix86_emit_save_regs ();
564d80f4 2306
4dd2ac2c 2307 if (frame.to_allocate == 0)
8dfe5673 2308 ;
4dd2ac2c 2309 else if (! TARGET_STACK_PROBE || frame.to_allocate < CHECK_STACK_LIMIT)
469ac993 2310 {
e075ae69 2311 if (frame_pointer_needed)
1c71e60e
JH
2312 insn = emit_insn (gen_pro_epilogue_adjust_stack
2313 (stack_pointer_rtx, stack_pointer_rtx,
4dd2ac2c 2314 GEN_INT (-frame.to_allocate), hard_frame_pointer_rtx));
79325812 2315 else
e075ae69 2316 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
4dd2ac2c 2317 GEN_INT (-frame.to_allocate)));
e075ae69 2318 RTX_FRAME_RELATED_P (insn) = 1;
469ac993 2319 }
79325812 2320 else
8dfe5673 2321 {
e075ae69 2322 /* ??? Is this only valid for Win32? */
e9a25f70 2323
e075ae69 2324 rtx arg0, sym;
e9a25f70 2325
e075ae69 2326 arg0 = gen_rtx_REG (SImode, 0);
4dd2ac2c 2327 emit_move_insn (arg0, GEN_INT (frame.to_allocate));
77a989d1 2328
e075ae69
RH
2329 sym = gen_rtx_MEM (FUNCTION_MODE,
2330 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
2331 insn = emit_call_insn (gen_call (sym, const0_rtx));
2332
2333 CALL_INSN_FUNCTION_USAGE (insn)
276ab4a4
RH
2334 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
2335 CALL_INSN_FUNCTION_USAGE (insn));
e075ae69 2336 }
e9a25f70 2337
84530511
SC
2338#ifdef SUBTARGET_PROLOGUE
2339 SUBTARGET_PROLOGUE;
0f290768 2340#endif
84530511 2341
e9a25f70 2342 if (pic_reg_used)
e075ae69 2343 load_pic_register ();
77a989d1 2344
e9a25f70
JL
2345 /* If we are profiling, make sure no instructions are scheduled before
2346 the call to mcount. However, if -fpic, the above call will have
2347 done that. */
e075ae69 2348 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
e9a25f70 2349 emit_insn (gen_blockage ());
77a989d1
SC
2350}
2351
0903fcab
JH
2352/* Emit code to add TSIZE to esp value. Use POP instruction when
2353 profitable. */
2354
2355static void
2356ix86_emit_epilogue_esp_adjustment (tsize)
2357 int tsize;
2358{
bdeb029c
JH
2359 /* If a frame pointer is present, we must be sure to tie the sp
2360 to the fp so that we don't mis-schedule. */
2361 if (frame_pointer_needed)
2362 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2363 stack_pointer_rtx,
2364 GEN_INT (tsize),
2365 hard_frame_pointer_rtx));
0903fcab 2366 else
bdeb029c
JH
2367 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
2368 GEN_INT (tsize)));
0903fcab
JH
2369}
2370
da2d1d3a
JH
2371/* Emit code to restore saved registers using MOV insns. First register
2372 is restored from POINTER + OFFSET. */
2373static void
2374ix86_emit_restore_regs_using_mov (pointer, offset)
2375 rtx pointer;
2376 int offset;
2377{
2378 int regno;
da2d1d3a 2379
4dd2ac2c
JH
2380 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2381 if (ix86_save_reg (regno))
da2d1d3a 2382 {
4dd2ac2c
JH
2383 emit_move_insn (gen_rtx_REG (Pmode, regno),
2384 adj_offsettable_operand (gen_rtx_MEM (Pmode,
da2d1d3a
JH
2385 pointer),
2386 offset));
4dd2ac2c 2387 offset += UNITS_PER_WORD;
da2d1d3a
JH
2388 }
2389}
2390
0f290768 2391/* Restore function stack, frame, and registers. */
e9a25f70 2392
2a2ab3f9 2393void
cbbf65e0
RH
2394ix86_expand_epilogue (emit_return)
2395 int emit_return;
2a2ab3f9 2396{
1c71e60e 2397 int regno;
fdb8a883 2398 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 2399 struct ix86_frame frame;
65954bd8 2400 HOST_WIDE_INT offset;
4dd2ac2c
JH
2401
2402 ix86_compute_frame_layout (&frame);
2a2ab3f9 2403
1c71e60e 2404 /* Calculate start of saved registers relative to ebp. */
4dd2ac2c 2405 offset = -frame.nregs * UNITS_PER_WORD;
2a2ab3f9 2406
1c71e60e
JH
2407#ifdef FUNCTION_BLOCK_PROFILER_EXIT
2408 if (profile_block_flag == 2)
564d80f4 2409 {
1c71e60e 2410 FUNCTION_BLOCK_PROFILER_EXIT;
564d80f4 2411 }
1c71e60e 2412#endif
564d80f4 2413
fdb8a883
JW
2414 /* If we're only restoring one register and sp is not valid then
2415 using a move instruction to restore the register since it's
0f290768 2416 less work than reloading sp and popping the register.
da2d1d3a
JH
2417
2418 The default code result in stack adjustment using add/lea instruction,
2419 while this code results in LEAVE instruction (or discrete equivalent),
2420 so it is profitable in some other cases as well. Especially when there
2421 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2422 and there is exactly one register to pop. This heruistic may need some
2423 tuning in future. */
4dd2ac2c
JH
2424 if ((!sp_valid && frame.nregs <= 1)
2425 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
da2d1d3a 2426 || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
4dd2ac2c 2427 && frame.nregs == 1))
2a2ab3f9 2428 {
da2d1d3a
JH
2429 /* Restore registers. We can use ebp or esp to address the memory
2430 locations. If both are available, default to ebp, since offsets
2431 are known to be small. Only exception is esp pointing directly to the
2432 end of block of saved registers, where we may simplify addressing
2433 mode. */
2434
4dd2ac2c
JH
2435 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
2436 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
da2d1d3a
JH
2437 else
2438 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, offset);
2a2ab3f9 2439
da2d1d3a 2440 if (!frame_pointer_needed)
4dd2ac2c
JH
2441 ix86_emit_epilogue_esp_adjustment (frame.to_allocate
2442 + frame.nregs * UNITS_PER_WORD);
0f290768 2443 /* If not an i386, mov & pop is faster than "leave". */
da2d1d3a 2444 else if (TARGET_USE_LEAVE || optimize_size)
564d80f4 2445 emit_insn (gen_leave ());
c8c5cb99 2446 else
2a2ab3f9 2447 {
1c71e60e
JH
2448 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2449 hard_frame_pointer_rtx,
2450 const0_rtx,
2451 hard_frame_pointer_rtx));
564d80f4 2452 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
2453 }
2454 }
1c71e60e 2455 else
68f654ec 2456 {
1c71e60e
JH
2457 /* First step is to deallocate the stack frame so that we can
2458 pop the registers. */
2459 if (!sp_valid)
2460 {
2461 if (!frame_pointer_needed)
2462 abort ();
2463 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2464 hard_frame_pointer_rtx,
2465 GEN_INT (offset),
2466 hard_frame_pointer_rtx));
2467 }
4dd2ac2c
JH
2468 else if (frame.to_allocate)
2469 ix86_emit_epilogue_esp_adjustment (frame.to_allocate);
1c71e60e 2470
4dd2ac2c
JH
2471 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2472 if (ix86_save_reg (regno))
1c71e60e 2473 emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
4dd2ac2c
JH
2474 if (frame_pointer_needed)
2475 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
68f654ec 2476 }
68f654ec 2477
cbbf65e0
RH
2478 /* Sibcall epilogues don't want a return instruction. */
2479 if (! emit_return)
2480 return;
2481
2a2ab3f9
JVA
2482 if (current_function_pops_args && current_function_args_size)
2483 {
e075ae69 2484 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 2485
b8c752c8
UD
2486 /* i386 can only pop 64K bytes. If asked to pop more, pop
2487 return address, do explicit add, and jump indirectly to the
0f290768 2488 caller. */
2a2ab3f9 2489
b8c752c8 2490 if (current_function_pops_args >= 65536)
2a2ab3f9 2491 {
e075ae69 2492 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 2493
e075ae69
RH
2494 emit_insn (gen_popsi1 (ecx));
2495 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 2496 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 2497 }
79325812 2498 else
e075ae69
RH
2499 emit_jump_insn (gen_return_pop_internal (popc));
2500 }
2501 else
2502 emit_jump_insn (gen_return_internal ());
2503}
2504\f
2505/* Extract the parts of an RTL expression that is a valid memory address
2506 for an instruction. Return false if the structure of the address is
2507 grossly off. */
2508
2509static int
2510ix86_decompose_address (addr, out)
2511 register rtx addr;
2512 struct ix86_address *out;
2513{
2514 rtx base = NULL_RTX;
2515 rtx index = NULL_RTX;
2516 rtx disp = NULL_RTX;
2517 HOST_WIDE_INT scale = 1;
2518 rtx scale_rtx = NULL_RTX;
2519
2520 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
2521 base = addr;
2522 else if (GET_CODE (addr) == PLUS)
2523 {
2524 rtx op0 = XEXP (addr, 0);
2525 rtx op1 = XEXP (addr, 1);
2526 enum rtx_code code0 = GET_CODE (op0);
2527 enum rtx_code code1 = GET_CODE (op1);
2528
2529 if (code0 == REG || code0 == SUBREG)
2530 {
2531 if (code1 == REG || code1 == SUBREG)
2532 index = op0, base = op1; /* index + base */
2533 else
2534 base = op0, disp = op1; /* base + displacement */
2535 }
2536 else if (code0 == MULT)
e9a25f70 2537 {
e075ae69
RH
2538 index = XEXP (op0, 0);
2539 scale_rtx = XEXP (op0, 1);
2540 if (code1 == REG || code1 == SUBREG)
2541 base = op1; /* index*scale + base */
e9a25f70 2542 else
e075ae69
RH
2543 disp = op1; /* index*scale + disp */
2544 }
2545 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
2546 {
2547 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
2548 scale_rtx = XEXP (XEXP (op0, 0), 1);
2549 base = XEXP (op0, 1);
2550 disp = op1;
2a2ab3f9 2551 }
e075ae69
RH
2552 else if (code0 == PLUS)
2553 {
2554 index = XEXP (op0, 0); /* index + base + disp */
2555 base = XEXP (op0, 1);
2556 disp = op1;
2557 }
2558 else
2559 return FALSE;
2560 }
2561 else if (GET_CODE (addr) == MULT)
2562 {
2563 index = XEXP (addr, 0); /* index*scale */
2564 scale_rtx = XEXP (addr, 1);
2565 }
2566 else if (GET_CODE (addr) == ASHIFT)
2567 {
2568 rtx tmp;
2569
2570 /* We're called for lea too, which implements ashift on occasion. */
2571 index = XEXP (addr, 0);
2572 tmp = XEXP (addr, 1);
2573 if (GET_CODE (tmp) != CONST_INT)
2574 return FALSE;
2575 scale = INTVAL (tmp);
2576 if ((unsigned HOST_WIDE_INT) scale > 3)
2577 return FALSE;
2578 scale = 1 << scale;
2a2ab3f9 2579 }
2a2ab3f9 2580 else
e075ae69
RH
2581 disp = addr; /* displacement */
2582
2583 /* Extract the integral value of scale. */
2584 if (scale_rtx)
e9a25f70 2585 {
e075ae69
RH
2586 if (GET_CODE (scale_rtx) != CONST_INT)
2587 return FALSE;
2588 scale = INTVAL (scale_rtx);
e9a25f70 2589 }
3b3c6a3f 2590
e075ae69
RH
2591 /* Allow arg pointer and stack pointer as index if there is not scaling */
2592 if (base && index && scale == 1
564d80f4
JH
2593 && (index == arg_pointer_rtx || index == frame_pointer_rtx
2594 || index == stack_pointer_rtx))
e075ae69
RH
2595 {
2596 rtx tmp = base;
2597 base = index;
2598 index = tmp;
2599 }
2600
2601 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
2602 if ((base == hard_frame_pointer_rtx
2603 || base == frame_pointer_rtx
2604 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
2605 disp = const0_rtx;
2606
2607 /* Special case: on K6, [%esi] makes the instruction vector decoded.
2608 Avoid this by transforming to [%esi+0]. */
2609 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
2610 && base && !index && !disp
329e1d01 2611 && REG_P (base)
e075ae69
RH
2612 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
2613 disp = const0_rtx;
2614
2615 /* Special case: encode reg+reg instead of reg*2. */
2616 if (!base && index && scale && scale == 2)
2617 base = index, scale = 1;
0f290768 2618
e075ae69
RH
2619 /* Special case: scaling cannot be encoded without base or displacement. */
2620 if (!base && !disp && index && scale != 1)
2621 disp = const0_rtx;
2622
2623 out->base = base;
2624 out->index = index;
2625 out->disp = disp;
2626 out->scale = scale;
3b3c6a3f 2627
e075ae69
RH
2628 return TRUE;
2629}
01329426
JH
2630\f
2631/* Return cost of the memory address x.
2632 For i386, it is better to use a complex address than let gcc copy
2633 the address into a reg and make a new pseudo. But not if the address
2634 requires to two regs - that would mean more pseudos with longer
2635 lifetimes. */
2636int
2637ix86_address_cost (x)
2638 rtx x;
2639{
2640 struct ix86_address parts;
2641 int cost = 1;
3b3c6a3f 2642
01329426
JH
2643 if (!ix86_decompose_address (x, &parts))
2644 abort ();
2645
2646 /* More complex memory references are better. */
2647 if (parts.disp && parts.disp != const0_rtx)
2648 cost--;
2649
2650 /* Attempt to minimize number of registers in the address. */
2651 if ((parts.base
2652 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
2653 || (parts.index
2654 && (!REG_P (parts.index)
2655 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
2656 cost++;
2657
2658 if (parts.base
2659 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
2660 && parts.index
2661 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
2662 && parts.base != parts.index)
2663 cost++;
2664
2665 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
2666 since it's predecode logic can't detect the length of instructions
2667 and it degenerates to vector decoded. Increase cost of such
2668 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 2669 to split such addresses or even refuse such addresses at all.
01329426
JH
2670
2671 Following addressing modes are affected:
2672 [base+scale*index]
2673 [scale*index+disp]
2674 [base+index]
0f290768 2675
01329426
JH
2676 The first and last case may be avoidable by explicitly coding the zero in
2677 memory address, but I don't have AMD-K6 machine handy to check this
2678 theory. */
2679
2680 if (TARGET_K6
2681 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
2682 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
2683 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
2684 cost += 10;
0f290768 2685
01329426
JH
2686 return cost;
2687}
2688\f
b949ea8b
JW
2689/* If X is a machine specific address (i.e. a symbol or label being
2690 referenced as a displacement from the GOT implemented using an
2691 UNSPEC), then return the base term. Otherwise return X. */
2692
2693rtx
2694ix86_find_base_term (x)
2695 rtx x;
2696{
2697 rtx term;
2698
2699 if (GET_CODE (x) != PLUS
2700 || XEXP (x, 0) != pic_offset_table_rtx
2701 || GET_CODE (XEXP (x, 1)) != CONST)
2702 return x;
2703
2704 term = XEXP (XEXP (x, 1), 0);
2705
2706 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
2707 term = XEXP (term, 0);
2708
2709 if (GET_CODE (term) != UNSPEC
2710 || XVECLEN (term, 0) != 1
2711 || XINT (term, 1) != 7)
2712 return x;
2713
2714 term = XVECEXP (term, 0, 0);
2715
2716 if (GET_CODE (term) != SYMBOL_REF
2717 && GET_CODE (term) != LABEL_REF)
2718 return x;
2719
2720 return term;
2721}
2722\f
e075ae69
RH
2723/* Determine if a given CONST RTX is a valid memory displacement
2724 in PIC mode. */
0f290768 2725
59be65f6 2726int
91bb873f
RH
2727legitimate_pic_address_disp_p (disp)
2728 register rtx disp;
2729{
2730 if (GET_CODE (disp) != CONST)
2731 return 0;
2732 disp = XEXP (disp, 0);
2733
2734 if (GET_CODE (disp) == PLUS)
2735 {
2736 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
2737 return 0;
2738 disp = XEXP (disp, 0);
2739 }
2740
2741 if (GET_CODE (disp) != UNSPEC
2742 || XVECLEN (disp, 0) != 1)
2743 return 0;
2744
2745 /* Must be @GOT or @GOTOFF. */
2746 if (XINT (disp, 1) != 6
2747 && XINT (disp, 1) != 7)
2748 return 0;
2749
2750 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2751 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
2752 return 0;
2753
2754 return 1;
2755}
2756
e075ae69
RH
2757/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
2758 memory address for an instruction. The MODE argument is the machine mode
2759 for the MEM expression that wants to use this address.
2760
2761 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
2762 convert common non-canonical forms to canonical form so that they will
2763 be recognized. */
2764
3b3c6a3f
MM
2765int
2766legitimate_address_p (mode, addr, strict)
2767 enum machine_mode mode;
2768 register rtx addr;
2769 int strict;
2770{
e075ae69
RH
2771 struct ix86_address parts;
2772 rtx base, index, disp;
2773 HOST_WIDE_INT scale;
2774 const char *reason = NULL;
2775 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
2776
2777 if (TARGET_DEBUG_ADDR)
2778 {
2779 fprintf (stderr,
e9a25f70 2780 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 2781 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
2782 debug_rtx (addr);
2783 }
2784
e075ae69 2785 if (! ix86_decompose_address (addr, &parts))
3b3c6a3f 2786 {
e075ae69 2787 reason = "decomposition failed";
50e60bc3 2788 goto report_error;
3b3c6a3f
MM
2789 }
2790
e075ae69
RH
2791 base = parts.base;
2792 index = parts.index;
2793 disp = parts.disp;
2794 scale = parts.scale;
91f0226f 2795
e075ae69 2796 /* Validate base register.
e9a25f70
JL
2797
2798 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
2799 is one word out of a two word structure, which is represented internally
2800 as a DImode int. */
e9a25f70 2801
3b3c6a3f
MM
2802 if (base)
2803 {
e075ae69
RH
2804 reason_rtx = base;
2805
3d771dfd 2806 if (GET_CODE (base) != REG)
3b3c6a3f 2807 {
e075ae69 2808 reason = "base is not a register";
50e60bc3 2809 goto report_error;
3b3c6a3f
MM
2810 }
2811
c954bd01
RH
2812 if (GET_MODE (base) != Pmode)
2813 {
e075ae69 2814 reason = "base is not in Pmode";
50e60bc3 2815 goto report_error;
c954bd01
RH
2816 }
2817
e9a25f70
JL
2818 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
2819 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
3b3c6a3f 2820 {
e075ae69 2821 reason = "base is not valid";
50e60bc3 2822 goto report_error;
3b3c6a3f
MM
2823 }
2824 }
2825
e075ae69 2826 /* Validate index register.
e9a25f70
JL
2827
2828 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
2829 is one word out of a two word structure, which is represented internally
2830 as a DImode int. */
e075ae69
RH
2831
2832 if (index)
3b3c6a3f 2833 {
e075ae69
RH
2834 reason_rtx = index;
2835
2836 if (GET_CODE (index) != REG)
3b3c6a3f 2837 {
e075ae69 2838 reason = "index is not a register";
50e60bc3 2839 goto report_error;
3b3c6a3f
MM
2840 }
2841
e075ae69 2842 if (GET_MODE (index) != Pmode)
c954bd01 2843 {
e075ae69 2844 reason = "index is not in Pmode";
50e60bc3 2845 goto report_error;
c954bd01
RH
2846 }
2847
e075ae69
RH
2848 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
2849 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
3b3c6a3f 2850 {
e075ae69 2851 reason = "index is not valid";
50e60bc3 2852 goto report_error;
3b3c6a3f
MM
2853 }
2854 }
3b3c6a3f 2855
e075ae69
RH
2856 /* Validate scale factor. */
2857 if (scale != 1)
3b3c6a3f 2858 {
e075ae69
RH
2859 reason_rtx = GEN_INT (scale);
2860 if (!index)
3b3c6a3f 2861 {
e075ae69 2862 reason = "scale without index";
50e60bc3 2863 goto report_error;
3b3c6a3f
MM
2864 }
2865
e075ae69 2866 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 2867 {
e075ae69 2868 reason = "scale is not a valid multiplier";
50e60bc3 2869 goto report_error;
3b3c6a3f
MM
2870 }
2871 }
2872
91bb873f 2873 /* Validate displacement. */
3b3c6a3f
MM
2874 if (disp)
2875 {
e075ae69
RH
2876 reason_rtx = disp;
2877
91bb873f 2878 if (!CONSTANT_ADDRESS_P (disp))
3b3c6a3f 2879 {
e075ae69 2880 reason = "displacement is not constant";
50e60bc3 2881 goto report_error;
3b3c6a3f
MM
2882 }
2883
e075ae69 2884 if (GET_CODE (disp) == CONST_DOUBLE)
3b3c6a3f 2885 {
e075ae69 2886 reason = "displacement is a const_double";
50e60bc3 2887 goto report_error;
3b3c6a3f
MM
2888 }
2889
91bb873f 2890 if (flag_pic && SYMBOLIC_CONST (disp))
3b3c6a3f 2891 {
91bb873f
RH
2892 if (! legitimate_pic_address_disp_p (disp))
2893 {
e075ae69 2894 reason = "displacement is an invalid pic construct";
50e60bc3 2895 goto report_error;
91bb873f
RH
2896 }
2897
4e9efe54 2898 /* This code used to verify that a symbolic pic displacement
0f290768
KH
2899 includes the pic_offset_table_rtx register.
2900
4e9efe54
JH
2901 While this is good idea, unfortunately these constructs may
2902 be created by "adds using lea" optimization for incorrect
2903 code like:
2904
2905 int a;
2906 int foo(int i)
2907 {
2908 return *(&a+i);
2909 }
2910
50e60bc3 2911 This code is nonsensical, but results in addressing
4e9efe54
JH
2912 GOT table with pic_offset_table_rtx base. We can't
2913 just refuse it easilly, since it gets matched by
2914 "addsi3" pattern, that later gets split to lea in the
2915 case output register differs from input. While this
2916 can be handled by separate addsi pattern for this case
2917 that never results in lea, this seems to be easier and
2918 correct fix for crash to disable this test. */
3b3c6a3f 2919 }
91bb873f 2920 else if (HALF_PIC_P ())
3b3c6a3f 2921 {
91bb873f 2922 if (! HALF_PIC_ADDRESS_P (disp)
e075ae69 2923 || (base != NULL_RTX || index != NULL_RTX))
91bb873f 2924 {
e075ae69 2925 reason = "displacement is an invalid half-pic reference";
50e60bc3 2926 goto report_error;
91bb873f 2927 }
3b3c6a3f
MM
2928 }
2929 }
2930
e075ae69 2931 /* Everything looks valid. */
3b3c6a3f 2932 if (TARGET_DEBUG_ADDR)
e075ae69 2933 fprintf (stderr, "Success.\n");
3b3c6a3f 2934 return TRUE;
e075ae69 2935
50e60bc3 2936report_error:
e075ae69
RH
2937 if (TARGET_DEBUG_ADDR)
2938 {
2939 fprintf (stderr, "Error: %s\n", reason);
2940 debug_rtx (reason_rtx);
2941 }
2942 return FALSE;
3b3c6a3f 2943}
3b3c6a3f 2944\f
55efb413
JW
2945/* Return an unique alias set for the GOT. */
2946
0f290768 2947static HOST_WIDE_INT
55efb413
JW
2948ix86_GOT_alias_set ()
2949{
2950 static HOST_WIDE_INT set = -1;
2951 if (set == -1)
2952 set = new_alias_set ();
2953 return set;
0f290768 2954}
55efb413 2955
3b3c6a3f
MM
2956/* Return a legitimate reference for ORIG (an address) using the
2957 register REG. If REG is 0, a new pseudo is generated.
2958
91bb873f 2959 There are two types of references that must be handled:
3b3c6a3f
MM
2960
2961 1. Global data references must load the address from the GOT, via
2962 the PIC reg. An insn is emitted to do this load, and the reg is
2963 returned.
2964
91bb873f
RH
2965 2. Static data references, constant pool addresses, and code labels
2966 compute the address as an offset from the GOT, whose base is in
2967 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
2968 differentiate them from global data objects. The returned
2969 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
2970
2971 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 2972 reg also appears in the address. */
3b3c6a3f
MM
2973
2974rtx
2975legitimize_pic_address (orig, reg)
2976 rtx orig;
2977 rtx reg;
2978{
2979 rtx addr = orig;
2980 rtx new = orig;
91bb873f 2981 rtx base;
3b3c6a3f 2982
91bb873f
RH
2983 if (GET_CODE (addr) == LABEL_REF
2984 || (GET_CODE (addr) == SYMBOL_REF
2985 && (CONSTANT_POOL_ADDRESS_P (addr)
2986 || SYMBOL_REF_FLAG (addr))))
3b3c6a3f 2987 {
91bb873f
RH
2988 /* This symbol may be referenced via a displacement from the PIC
2989 base address (@GOTOFF). */
3b3c6a3f 2990
91bb873f 2991 current_function_uses_pic_offset_table = 1;
4859dd36
RH
2992 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
2993 new = gen_rtx_CONST (Pmode, new);
91bb873f 2994 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 2995
91bb873f
RH
2996 if (reg != 0)
2997 {
3b3c6a3f 2998 emit_move_insn (reg, new);
91bb873f 2999 new = reg;
3b3c6a3f 3000 }
3b3c6a3f 3001 }
91bb873f 3002 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 3003 {
91bb873f 3004 /* This symbol must be referenced via a load from the
0f290768 3005 Global Offset Table (@GOT). */
3b3c6a3f 3006
91bb873f 3007 current_function_uses_pic_offset_table = 1;
4859dd36
RH
3008 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
3009 new = gen_rtx_CONST (Pmode, new);
91bb873f
RH
3010 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3011 new = gen_rtx_MEM (Pmode, new);
3012 RTX_UNCHANGING_P (new) = 1;
0f290768 3013 MEM_ALIAS_SET (new) = ix86_GOT_alias_set ();
3b3c6a3f
MM
3014
3015 if (reg == 0)
3016 reg = gen_reg_rtx (Pmode);
91bb873f
RH
3017 emit_move_insn (reg, new);
3018 new = reg;
0f290768 3019 }
91bb873f
RH
3020 else
3021 {
3022 if (GET_CODE (addr) == CONST)
3b3c6a3f 3023 {
91bb873f
RH
3024 addr = XEXP (addr, 0);
3025 if (GET_CODE (addr) == UNSPEC)
3026 {
3027 /* Check that the unspec is one of the ones we generate? */
3028 }
3029 else if (GET_CODE (addr) != PLUS)
564d80f4 3030 abort ();
3b3c6a3f 3031 }
91bb873f
RH
3032 if (GET_CODE (addr) == PLUS)
3033 {
3034 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 3035
91bb873f
RH
3036 /* Check first to see if this is a constant offset from a @GOTOFF
3037 symbol reference. */
3038 if ((GET_CODE (op0) == LABEL_REF
3039 || (GET_CODE (op0) == SYMBOL_REF
3040 && (CONSTANT_POOL_ADDRESS_P (op0)
3041 || SYMBOL_REF_FLAG (op0))))
3042 && GET_CODE (op1) == CONST_INT)
3043 {
3044 current_function_uses_pic_offset_table = 1;
4859dd36
RH
3045 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
3046 new = gen_rtx_PLUS (Pmode, new, op1);
3047 new = gen_rtx_CONST (Pmode, new);
91bb873f
RH
3048 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3049
3050 if (reg != 0)
3051 {
3052 emit_move_insn (reg, new);
3053 new = reg;
3054 }
3055 }
3056 else
3057 {
3058 base = legitimize_pic_address (XEXP (addr, 0), reg);
3059 new = legitimize_pic_address (XEXP (addr, 1),
3060 base == reg ? NULL_RTX : reg);
3061
3062 if (GET_CODE (new) == CONST_INT)
3063 new = plus_constant (base, INTVAL (new));
3064 else
3065 {
3066 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
3067 {
3068 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
3069 new = XEXP (new, 1);
3070 }
3071 new = gen_rtx_PLUS (Pmode, base, new);
3072 }
3073 }
3074 }
3b3c6a3f
MM
3075 }
3076 return new;
3077}
3078\f
3b3c6a3f
MM
3079/* Try machine-dependent ways of modifying an illegitimate address
3080 to be legitimate. If we find one, return the new, valid address.
3081 This macro is used in only one place: `memory_address' in explow.c.
3082
3083 OLDX is the address as it was before break_out_memory_refs was called.
3084 In some cases it is useful to look at this to decide what needs to be done.
3085
3086 MODE and WIN are passed so that this macro can use
3087 GO_IF_LEGITIMATE_ADDRESS.
3088
3089 It is always safe for this macro to do nothing. It exists to recognize
3090 opportunities to optimize the output.
3091
3092 For the 80386, we handle X+REG by loading X into a register R and
3093 using R+REG. R will go in a general reg and indexing will be used.
3094 However, if REG is a broken-out memory address or multiplication,
3095 nothing needs to be done because REG can certainly go in a general reg.
3096
3097 When -fpic is used, special handling is needed for symbolic references.
3098 See comments by legitimize_pic_address in i386.c for details. */
3099
3100rtx
3101legitimize_address (x, oldx, mode)
3102 register rtx x;
bb5177ac 3103 register rtx oldx ATTRIBUTE_UNUSED;
3b3c6a3f
MM
3104 enum machine_mode mode;
3105{
3106 int changed = 0;
3107 unsigned log;
3108
3109 if (TARGET_DEBUG_ADDR)
3110 {
e9a25f70
JL
3111 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
3112 GET_MODE_NAME (mode));
3b3c6a3f
MM
3113 debug_rtx (x);
3114 }
3115
3116 if (flag_pic && SYMBOLIC_CONST (x))
3117 return legitimize_pic_address (x, 0);
3118
3119 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
3120 if (GET_CODE (x) == ASHIFT
3121 && GET_CODE (XEXP (x, 1)) == CONST_INT
3122 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3123 {
3124 changed = 1;
a269a03c
JC
3125 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
3126 GEN_INT (1 << log));
3b3c6a3f
MM
3127 }
3128
3129 if (GET_CODE (x) == PLUS)
3130 {
0f290768 3131 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 3132
3b3c6a3f
MM
3133 if (GET_CODE (XEXP (x, 0)) == ASHIFT
3134 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3135 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3136 {
3137 changed = 1;
c5c76735
JL
3138 XEXP (x, 0) = gen_rtx_MULT (Pmode,
3139 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
3140 GEN_INT (1 << log));
3b3c6a3f
MM
3141 }
3142
3143 if (GET_CODE (XEXP (x, 1)) == ASHIFT
3144 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
3145 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3146 {
3147 changed = 1;
c5c76735
JL
3148 XEXP (x, 1) = gen_rtx_MULT (Pmode,
3149 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
3150 GEN_INT (1 << log));
3b3c6a3f
MM
3151 }
3152
0f290768 3153 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
3154 if (GET_CODE (XEXP (x, 1)) == MULT)
3155 {
3156 rtx tmp = XEXP (x, 0);
3157 XEXP (x, 0) = XEXP (x, 1);
3158 XEXP (x, 1) = tmp;
3159 changed = 1;
3160 }
3161
3162 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
3163 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
3164 created by virtual register instantiation, register elimination, and
3165 similar optimizations. */
3166 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
3167 {
3168 changed = 1;
c5c76735
JL
3169 x = gen_rtx_PLUS (Pmode,
3170 gen_rtx_PLUS (Pmode, XEXP (x, 0),
3171 XEXP (XEXP (x, 1), 0)),
3172 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
3173 }
3174
e9a25f70
JL
3175 /* Canonicalize
3176 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
3177 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
3178 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
3179 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
3180 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
3181 && CONSTANT_P (XEXP (x, 1)))
3182 {
00c79232
ML
3183 rtx constant;
3184 rtx other = NULL_RTX;
3b3c6a3f
MM
3185
3186 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
3187 {
3188 constant = XEXP (x, 1);
3189 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
3190 }
3191 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
3192 {
3193 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
3194 other = XEXP (x, 1);
3195 }
3196 else
3197 constant = 0;
3198
3199 if (constant)
3200 {
3201 changed = 1;
c5c76735
JL
3202 x = gen_rtx_PLUS (Pmode,
3203 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
3204 XEXP (XEXP (XEXP (x, 0), 1), 0)),
3205 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
3206 }
3207 }
3208
3209 if (changed && legitimate_address_p (mode, x, FALSE))
3210 return x;
3211
3212 if (GET_CODE (XEXP (x, 0)) == MULT)
3213 {
3214 changed = 1;
3215 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
3216 }
3217
3218 if (GET_CODE (XEXP (x, 1)) == MULT)
3219 {
3220 changed = 1;
3221 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
3222 }
3223
3224 if (changed
3225 && GET_CODE (XEXP (x, 1)) == REG
3226 && GET_CODE (XEXP (x, 0)) == REG)
3227 return x;
3228
3229 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
3230 {
3231 changed = 1;
3232 x = legitimize_pic_address (x, 0);
3233 }
3234
3235 if (changed && legitimate_address_p (mode, x, FALSE))
3236 return x;
3237
3238 if (GET_CODE (XEXP (x, 0)) == REG)
3239 {
3240 register rtx temp = gen_reg_rtx (Pmode);
3241 register rtx val = force_operand (XEXP (x, 1), temp);
3242 if (val != temp)
3243 emit_move_insn (temp, val);
3244
3245 XEXP (x, 1) = temp;
3246 return x;
3247 }
3248
3249 else if (GET_CODE (XEXP (x, 1)) == REG)
3250 {
3251 register rtx temp = gen_reg_rtx (Pmode);
3252 register rtx val = force_operand (XEXP (x, 0), temp);
3253 if (val != temp)
3254 emit_move_insn (temp, val);
3255
3256 XEXP (x, 0) = temp;
3257 return x;
3258 }
3259 }
3260
3261 return x;
3262}
2a2ab3f9
JVA
3263\f
3264/* Print an integer constant expression in assembler syntax. Addition
3265 and subtraction are the only arithmetic that may appear in these
3266 expressions. FILE is the stdio stream to write to, X is the rtx, and
3267 CODE is the operand print code from the output string. */
3268
3269static void
3270output_pic_addr_const (file, x, code)
3271 FILE *file;
3272 rtx x;
3273 int code;
3274{
3275 char buf[256];
3276
3277 switch (GET_CODE (x))
3278 {
3279 case PC:
3280 if (flag_pic)
3281 putc ('.', file);
3282 else
3283 abort ();
3284 break;
3285
3286 case SYMBOL_REF:
91bb873f
RH
3287 assemble_name (file, XSTR (x, 0));
3288 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
3289 fputs ("@PLT", file);
2a2ab3f9
JVA
3290 break;
3291
91bb873f
RH
3292 case LABEL_REF:
3293 x = XEXP (x, 0);
3294 /* FALLTHRU */
2a2ab3f9
JVA
3295 case CODE_LABEL:
3296 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
3297 assemble_name (asm_out_file, buf);
3298 break;
3299
3300 case CONST_INT:
f64cecad 3301 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
3302 break;
3303
3304 case CONST:
3305 /* This used to output parentheses around the expression,
3306 but that does not work on the 386 (either ATT or BSD assembler). */
3307 output_pic_addr_const (file, XEXP (x, 0), code);
3308 break;
3309
3310 case CONST_DOUBLE:
3311 if (GET_MODE (x) == VOIDmode)
3312 {
3313 /* We can use %d if the number is <32 bits and positive. */
3314 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
3315 fprintf (file, "0x%lx%08lx",
3316 (unsigned long) CONST_DOUBLE_HIGH (x),
3317 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 3318 else
f64cecad 3319 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
3320 }
3321 else
3322 /* We can't handle floating point constants;
3323 PRINT_OPERAND must handle them. */
3324 output_operand_lossage ("floating constant misused");
3325 break;
3326
3327 case PLUS:
e9a25f70 3328 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
3329 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
3330 {
2a2ab3f9 3331 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 3332 putc ('+', file);
e9a25f70 3333 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 3334 }
91bb873f 3335 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 3336 {
2a2ab3f9 3337 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 3338 putc ('+', file);
e9a25f70 3339 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 3340 }
91bb873f
RH
3341 else
3342 abort ();
2a2ab3f9
JVA
3343 break;
3344
3345 case MINUS:
e075ae69 3346 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
2a2ab3f9 3347 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 3348 putc ('-', file);
2a2ab3f9 3349 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 3350 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
2a2ab3f9
JVA
3351 break;
3352
91bb873f
RH
3353 case UNSPEC:
3354 if (XVECLEN (x, 0) != 1)
77ebd435 3355 abort ();
91bb873f
RH
3356 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
3357 switch (XINT (x, 1))
77ebd435
AJ
3358 {
3359 case 6:
3360 fputs ("@GOT", file);
3361 break;
3362 case 7:
3363 fputs ("@GOTOFF", file);
3364 break;
3365 case 8:
3366 fputs ("@PLT", file);
3367 break;
3368 default:
3369 output_operand_lossage ("invalid UNSPEC as operand");
3370 break;
3371 }
91bb873f
RH
3372 break;
3373
2a2ab3f9
JVA
3374 default:
3375 output_operand_lossage ("invalid expression as operand");
3376 }
3377}
1865dbb5 3378
0f290768 3379/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
1865dbb5
JM
3380 We need to handle our special PIC relocations. */
3381
0f290768 3382void
1865dbb5
JM
3383i386_dwarf_output_addr_const (file, x)
3384 FILE *file;
3385 rtx x;
3386{
f0ca81d2 3387 fprintf (file, "%s", INT_ASM_OP);
1865dbb5
JM
3388 if (flag_pic)
3389 output_pic_addr_const (file, x, '\0');
3390 else
3391 output_addr_const (file, x);
3392 fputc ('\n', file);
3393}
3394
3395/* In the name of slightly smaller debug output, and to cater to
3396 general assembler losage, recognize PIC+GOTOFF and turn it back
3397 into a direct symbol reference. */
3398
3399rtx
3400i386_simplify_dwarf_addr (orig_x)
3401 rtx orig_x;
3402{
3403 rtx x = orig_x;
3404
3405 if (GET_CODE (x) != PLUS
3406 || GET_CODE (XEXP (x, 0)) != REG
3407 || GET_CODE (XEXP (x, 1)) != CONST)
3408 return orig_x;
3409
3410 x = XEXP (XEXP (x, 1), 0);
3411 if (GET_CODE (x) == UNSPEC
3adbce3d
RH
3412 && (XINT (x, 1) == 6
3413 || XINT (x, 1) == 7))
1865dbb5
JM
3414 return XVECEXP (x, 0, 0);
3415
3416 if (GET_CODE (x) == PLUS
3417 && GET_CODE (XEXP (x, 0)) == UNSPEC
3418 && GET_CODE (XEXP (x, 1)) == CONST_INT
3adbce3d
RH
3419 && (XINT (XEXP (x, 0), 1) == 6
3420 || XINT (XEXP (x, 0), 1) == 7))
1865dbb5
JM
3421 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
3422
3423 return orig_x;
3424}
2a2ab3f9 3425\f
a269a03c 3426static void
e075ae69 3427put_condition_code (code, mode, reverse, fp, file)
a269a03c 3428 enum rtx_code code;
e075ae69
RH
3429 enum machine_mode mode;
3430 int reverse, fp;
a269a03c
JC
3431 FILE *file;
3432{
a269a03c
JC
3433 const char *suffix;
3434
9a915772
JH
3435 if (mode == CCFPmode || mode == CCFPUmode)
3436 {
3437 enum rtx_code second_code, bypass_code;
3438 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3439 if (bypass_code != NIL || second_code != NIL)
3440 abort();
3441 code = ix86_fp_compare_code_to_integer (code);
3442 mode = CCmode;
3443 }
a269a03c
JC
3444 if (reverse)
3445 code = reverse_condition (code);
e075ae69 3446
a269a03c
JC
3447 switch (code)
3448 {
3449 case EQ:
3450 suffix = "e";
3451 break;
a269a03c
JC
3452 case NE:
3453 suffix = "ne";
3454 break;
a269a03c 3455 case GT:
7e08e190 3456 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
3457 abort ();
3458 suffix = "g";
a269a03c 3459 break;
a269a03c 3460 case GTU:
e075ae69
RH
3461 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
3462 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 3463 if (mode != CCmode)
0f290768 3464 abort ();
e075ae69 3465 suffix = fp ? "nbe" : "a";
a269a03c 3466 break;
a269a03c 3467 case LT:
9076b9c1 3468 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 3469 suffix = "s";
7e08e190 3470 else if (mode == CCmode || mode == CCGCmode)
e075ae69 3471 suffix = "l";
9076b9c1 3472 else
0f290768 3473 abort ();
a269a03c 3474 break;
a269a03c 3475 case LTU:
9076b9c1 3476 if (mode != CCmode)
0f290768 3477 abort ();
a269a03c
JC
3478 suffix = "b";
3479 break;
a269a03c 3480 case GE:
9076b9c1 3481 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 3482 suffix = "ns";
7e08e190 3483 else if (mode == CCmode || mode == CCGCmode)
e075ae69 3484 suffix = "ge";
9076b9c1 3485 else
0f290768 3486 abort ();
a269a03c 3487 break;
a269a03c 3488 case GEU:
e075ae69 3489 /* ??? As above. */
7e08e190 3490 if (mode != CCmode)
0f290768 3491 abort ();
7e08e190 3492 suffix = fp ? "nb" : "ae";
a269a03c 3493 break;
a269a03c 3494 case LE:
7e08e190 3495 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
3496 abort ();
3497 suffix = "le";
a269a03c 3498 break;
a269a03c 3499 case LEU:
9076b9c1
JH
3500 if (mode != CCmode)
3501 abort ();
7e08e190 3502 suffix = "be";
a269a03c 3503 break;
3a3677ff 3504 case UNORDERED:
9e7adcb3 3505 suffix = fp ? "u" : "p";
3a3677ff
RH
3506 break;
3507 case ORDERED:
9e7adcb3 3508 suffix = fp ? "nu" : "np";
3a3677ff 3509 break;
a269a03c
JC
3510 default:
3511 abort ();
3512 }
3513 fputs (suffix, file);
3514}
3515
e075ae69
RH
3516void
3517print_reg (x, code, file)
3518 rtx x;
3519 int code;
3520 FILE *file;
e5cb57e8 3521{
e075ae69 3522 if (REGNO (x) == ARG_POINTER_REGNUM
564d80f4 3523 || REGNO (x) == FRAME_POINTER_REGNUM
e075ae69
RH
3524 || REGNO (x) == FLAGS_REG
3525 || REGNO (x) == FPSR_REG)
3526 abort ();
e9a25f70 3527
e075ae69
RH
3528 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
3529 putc ('%', file);
3530
3531 if (code == 'w')
3532 code = 2;
3533 else if (code == 'b')
3534 code = 1;
3535 else if (code == 'k')
3536 code = 4;
3f3f2124
JH
3537 else if (code == 'q')
3538 code = 8;
e075ae69
RH
3539 else if (code == 'y')
3540 code = 3;
3541 else if (code == 'h')
3542 code = 0;
a7180f70
BS
3543 else if (code == 'm' || MMX_REG_P (x))
3544 code = 5;
e075ae69
RH
3545 else
3546 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 3547
3f3f2124
JH
3548 /* Irritatingly, AMD extended registers use different naming convention
3549 from the normal registers. */
3550 if (REX_INT_REG_P (x))
3551 {
3552 switch (code)
3553 {
3554 case 5:
3555 error ("Extended registers have no high halves\n");
3556 break;
3557 case 1:
3558 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
3559 break;
3560 case 2:
3561 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
3562 break;
3563 case 4:
3564 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
3565 break;
3566 case 8:
3567 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
3568 break;
3569 default:
3570 error ("Unsupported operand size for extended register.\n");
3571 break;
3572 }
3573 return;
3574 }
e075ae69
RH
3575 switch (code)
3576 {
a7180f70
BS
3577 case 5:
3578 fputs (hi_reg_name[REGNO (x)], file);
3579 break;
e075ae69
RH
3580 case 3:
3581 if (STACK_TOP_P (x))
3582 {
3583 fputs ("st(0)", file);
3584 break;
3585 }
3586 /* FALLTHRU */
e075ae69 3587 case 8:
3f3f2124 3588 case 4:
e075ae69 3589 case 12:
446988df 3590 if (! ANY_FP_REG_P (x))
3f3f2124 3591 putc (code == 8 ? 'r' : 'e', file);
e075ae69 3592 /* FALLTHRU */
a7180f70 3593 case 16:
e075ae69
RH
3594 case 2:
3595 fputs (hi_reg_name[REGNO (x)], file);
3596 break;
3597 case 1:
3598 fputs (qi_reg_name[REGNO (x)], file);
3599 break;
3600 case 0:
3601 fputs (qi_high_reg_name[REGNO (x)], file);
3602 break;
3603 default:
3604 abort ();
fe25fea3 3605 }
e5cb57e8
SC
3606}
3607
2a2ab3f9 3608/* Meaning of CODE:
fe25fea3 3609 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 3610 C -- print opcode suffix for set/cmov insn.
fe25fea3 3611 c -- like C, but print reversed condition
2a2ab3f9
JVA
3612 R -- print the prefix for register names.
3613 z -- print the opcode suffix for the size of the current operand.
3614 * -- print a star (in certain assembler syntax)
fb204271 3615 A -- print an absolute memory reference.
2a2ab3f9 3616 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
3617 s -- print a shift double count, followed by the assemblers argument
3618 delimiter.
fe25fea3
SC
3619 b -- print the QImode name of the register for the indicated operand.
3620 %b0 would print %al if operands[0] is reg 0.
3621 w -- likewise, print the HImode name of the register.
3622 k -- likewise, print the SImode name of the register.
3f3f2124 3623 q -- likewise, print the DImode name of the register.
fe25fea3 3624 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
a7180f70 3625 y -- print "st(0)" instead of "st" as a register.
a46d1d38
JH
3626 m -- print "st(n)" as an mmx register.
3627 D -- print condition for SSE cmp instruction.
3628 */
2a2ab3f9
JVA
3629
3630void
3631print_operand (file, x, code)
3632 FILE *file;
3633 rtx x;
3634 int code;
3635{
3636 if (code)
3637 {
3638 switch (code)
3639 {
3640 case '*':
e075ae69 3641 if (ASSEMBLER_DIALECT == 0)
2a2ab3f9
JVA
3642 putc ('*', file);
3643 return;
3644
fb204271
DN
3645 case 'A':
3646 if (ASSEMBLER_DIALECT == 0)
3647 putc ('*', file);
3648 else if (ASSEMBLER_DIALECT == 1)
3649 {
3650 /* Intel syntax. For absolute addresses, registers should not
3651 be surrounded by braces. */
3652 if (GET_CODE (x) != REG)
3653 {
3654 putc ('[', file);
3655 PRINT_OPERAND (file, x, 0);
3656 putc (']', file);
3657 return;
3658 }
3659 }
3660
3661 PRINT_OPERAND (file, x, 0);
3662 return;
3663
3664
2a2ab3f9 3665 case 'L':
e075ae69
RH
3666 if (ASSEMBLER_DIALECT == 0)
3667 putc ('l', file);
2a2ab3f9
JVA
3668 return;
3669
3670 case 'W':
e075ae69
RH
3671 if (ASSEMBLER_DIALECT == 0)
3672 putc ('w', file);
2a2ab3f9
JVA
3673 return;
3674
3675 case 'B':
e075ae69
RH
3676 if (ASSEMBLER_DIALECT == 0)
3677 putc ('b', file);
2a2ab3f9
JVA
3678 return;
3679
3680 case 'Q':
e075ae69
RH
3681 if (ASSEMBLER_DIALECT == 0)
3682 putc ('l', file);
2a2ab3f9
JVA
3683 return;
3684
3685 case 'S':
e075ae69
RH
3686 if (ASSEMBLER_DIALECT == 0)
3687 putc ('s', file);
2a2ab3f9
JVA
3688 return;
3689
5f1ec3e6 3690 case 'T':
e075ae69
RH
3691 if (ASSEMBLER_DIALECT == 0)
3692 putc ('t', file);
5f1ec3e6
JVA
3693 return;
3694
2a2ab3f9
JVA
3695 case 'z':
3696 /* 387 opcodes don't get size suffixes if the operands are
0f290768 3697 registers. */
2a2ab3f9
JVA
3698
3699 if (STACK_REG_P (x))
3700 return;
3701
3702 /* this is the size of op from size of operand */
3703 switch (GET_MODE_SIZE (GET_MODE (x)))
3704 {
2a2ab3f9 3705 case 2:
155d8a47
JW
3706#ifdef HAVE_GAS_FILDS_FISTS
3707 putc ('s', file);
3708#endif
2a2ab3f9
JVA
3709 return;
3710
3711 case 4:
3712 if (GET_MODE (x) == SFmode)
3713 {
e075ae69 3714 putc ('s', file);
2a2ab3f9
JVA
3715 return;
3716 }
3717 else
e075ae69 3718 putc ('l', file);
2a2ab3f9
JVA
3719 return;
3720
5f1ec3e6 3721 case 12:
2b589241 3722 case 16:
e075ae69
RH
3723 putc ('t', file);
3724 return;
5f1ec3e6 3725
2a2ab3f9
JVA
3726 case 8:
3727 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
3728 {
3729#ifdef GAS_MNEMONICS
e075ae69 3730 putc ('q', file);
56c0e8fa 3731#else
e075ae69
RH
3732 putc ('l', file);
3733 putc ('l', file);
56c0e8fa
JVA
3734#endif
3735 }
e075ae69
RH
3736 else
3737 putc ('l', file);
2a2ab3f9 3738 return;
155d8a47
JW
3739
3740 default:
3741 abort ();
2a2ab3f9 3742 }
4af3895e
JVA
3743
3744 case 'b':
3745 case 'w':
3746 case 'k':
3f3f2124 3747 case 'q':
4af3895e
JVA
3748 case 'h':
3749 case 'y':
a7180f70 3750 case 'm':
5cb6195d 3751 case 'X':
e075ae69 3752 case 'P':
4af3895e
JVA
3753 break;
3754
2d49677f
SC
3755 case 's':
3756 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
3757 {
3758 PRINT_OPERAND (file, x, 0);
e075ae69 3759 putc (',', file);
2d49677f 3760 }
a269a03c
JC
3761 return;
3762
a46d1d38
JH
3763 case 'D':
3764 /* Little bit of braindamage here. The SSE compare instructions
3765 does use completely different names for the comparisons that the
3766 fp conditional moves. */
3767 switch (GET_CODE (x))
3768 {
3769 case EQ:
3770 case UNEQ:
3771 fputs ("eq", file);
3772 break;
3773 case LT:
3774 case UNLT:
3775 fputs ("lt", file);
3776 break;
3777 case LE:
3778 case UNLE:
3779 fputs ("le", file);
3780 break;
3781 case UNORDERED:
3782 fputs ("unord", file);
3783 break;
3784 case NE:
3785 case LTGT:
3786 fputs ("neq", file);
3787 break;
3788 case UNGE:
3789 case GE:
3790 fputs ("nlt", file);
3791 break;
3792 case UNGT:
3793 case GT:
3794 fputs ("nle", file);
3795 break;
3796 case ORDERED:
3797 fputs ("ord", file);
3798 break;
3799 default:
3800 abort ();
3801 break;
3802 }
3803 return;
1853aadd 3804 case 'C':
e075ae69 3805 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 3806 return;
fe25fea3 3807 case 'F':
e075ae69 3808 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
3809 return;
3810
e9a25f70 3811 /* Like above, but reverse condition */
e075ae69
RH
3812 case 'c':
3813 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
3814 return;
fe25fea3 3815 case 'f':
e075ae69 3816 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 3817 return;
e5cb57e8 3818
4af3895e 3819 default:
68daafd4
JVA
3820 {
3821 char str[50];
68daafd4
JVA
3822 sprintf (str, "invalid operand code `%c'", code);
3823 output_operand_lossage (str);
3824 }
2a2ab3f9
JVA
3825 }
3826 }
e9a25f70 3827
2a2ab3f9
JVA
3828 if (GET_CODE (x) == REG)
3829 {
3830 PRINT_REG (x, code, file);
3831 }
e9a25f70 3832
2a2ab3f9
JVA
3833 else if (GET_CODE (x) == MEM)
3834 {
e075ae69
RH
3835 /* No `byte ptr' prefix for call instructions. */
3836 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
2a2ab3f9 3837 {
69ddee61 3838 const char * size;
e075ae69
RH
3839 switch (GET_MODE_SIZE (GET_MODE (x)))
3840 {
3841 case 1: size = "BYTE"; break;
3842 case 2: size = "WORD"; break;
3843 case 4: size = "DWORD"; break;
3844 case 8: size = "QWORD"; break;
3845 case 12: size = "XWORD"; break;
a7180f70 3846 case 16: size = "XMMWORD"; break;
e075ae69 3847 default:
564d80f4 3848 abort ();
e075ae69 3849 }
fb204271
DN
3850
3851 /* Check for explicit size override (codes 'b', 'w' and 'k') */
3852 if (code == 'b')
3853 size = "BYTE";
3854 else if (code == 'w')
3855 size = "WORD";
3856 else if (code == 'k')
3857 size = "DWORD";
3858
e075ae69
RH
3859 fputs (size, file);
3860 fputs (" PTR ", file);
2a2ab3f9 3861 }
e075ae69
RH
3862
3863 x = XEXP (x, 0);
3864 if (flag_pic && CONSTANT_ADDRESS_P (x))
3865 output_pic_addr_const (file, x, code);
2a2ab3f9 3866 else
e075ae69 3867 output_address (x);
2a2ab3f9 3868 }
e9a25f70 3869
2a2ab3f9
JVA
3870 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
3871 {
e9a25f70
JL
3872 REAL_VALUE_TYPE r;
3873 long l;
3874
5f1ec3e6
JVA
3875 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3876 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69
RH
3877
3878 if (ASSEMBLER_DIALECT == 0)
3879 putc ('$', file);
52267fcb 3880 fprintf (file, "0x%lx", l);
5f1ec3e6 3881 }
e9a25f70 3882
0f290768 3883 /* These float cases don't actually occur as immediate operands. */
5f1ec3e6
JVA
3884 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
3885 {
e9a25f70
JL
3886 REAL_VALUE_TYPE r;
3887 char dstr[30];
3888
5f1ec3e6
JVA
3889 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3890 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3891 fprintf (file, "%s", dstr);
2a2ab3f9 3892 }
e9a25f70 3893
2b589241
JH
3894 else if (GET_CODE (x) == CONST_DOUBLE
3895 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
2a2ab3f9 3896 {
e9a25f70
JL
3897 REAL_VALUE_TYPE r;
3898 char dstr[30];
3899
5f1ec3e6
JVA
3900 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3901 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3902 fprintf (file, "%s", dstr);
2a2ab3f9 3903 }
79325812 3904 else
2a2ab3f9 3905 {
4af3895e 3906 if (code != 'P')
2a2ab3f9 3907 {
695dac07 3908 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69
RH
3909 {
3910 if (ASSEMBLER_DIALECT == 0)
3911 putc ('$', file);
3912 }
2a2ab3f9
JVA
3913 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
3914 || GET_CODE (x) == LABEL_REF)
e075ae69
RH
3915 {
3916 if (ASSEMBLER_DIALECT == 0)
3917 putc ('$', file);
3918 else
3919 fputs ("OFFSET FLAT:", file);
3920 }
2a2ab3f9 3921 }
e075ae69
RH
3922 if (GET_CODE (x) == CONST_INT)
3923 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3924 else if (flag_pic)
2a2ab3f9
JVA
3925 output_pic_addr_const (file, x, code);
3926 else
3927 output_addr_const (file, x);
3928 }
3929}
3930\f
3931/* Print a memory operand whose address is ADDR. */
3932
3933void
3934print_operand_address (file, addr)
3935 FILE *file;
3936 register rtx addr;
3937{
e075ae69
RH
3938 struct ix86_address parts;
3939 rtx base, index, disp;
3940 int scale;
e9a25f70 3941
e075ae69
RH
3942 if (! ix86_decompose_address (addr, &parts))
3943 abort ();
e9a25f70 3944
e075ae69
RH
3945 base = parts.base;
3946 index = parts.index;
3947 disp = parts.disp;
3948 scale = parts.scale;
e9a25f70 3949
e075ae69
RH
3950 if (!base && !index)
3951 {
3952 /* Displacement only requires special attention. */
e9a25f70 3953
e075ae69 3954 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 3955 {
e075ae69 3956 if (ASSEMBLER_DIALECT != 0)
fb204271
DN
3957 {
3958 if (USER_LABEL_PREFIX[0] == 0)
3959 putc ('%', file);
3960 fputs ("ds:", file);
3961 }
e075ae69 3962 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
2a2ab3f9 3963 }
e075ae69
RH
3964 else if (flag_pic)
3965 output_pic_addr_const (file, addr, 0);
3966 else
3967 output_addr_const (file, addr);
3968 }
3969 else
3970 {
3971 if (ASSEMBLER_DIALECT == 0)
2a2ab3f9 3972 {
e075ae69 3973 if (disp)
2a2ab3f9 3974 {
c399861d 3975 if (flag_pic)
e075ae69
RH
3976 output_pic_addr_const (file, disp, 0);
3977 else if (GET_CODE (disp) == LABEL_REF)
3978 output_asm_label (disp);
2a2ab3f9 3979 else
e075ae69 3980 output_addr_const (file, disp);
2a2ab3f9
JVA
3981 }
3982
e075ae69
RH
3983 putc ('(', file);
3984 if (base)
3985 PRINT_REG (base, 0, file);
3986 if (index)
2a2ab3f9 3987 {
e075ae69
RH
3988 putc (',', file);
3989 PRINT_REG (index, 0, file);
3990 if (scale != 1)
3991 fprintf (file, ",%d", scale);
2a2ab3f9 3992 }
e075ae69 3993 putc (')', file);
2a2ab3f9 3994 }
2a2ab3f9
JVA
3995 else
3996 {
e075ae69 3997 rtx offset = NULL_RTX;
e9a25f70 3998
e075ae69
RH
3999 if (disp)
4000 {
4001 /* Pull out the offset of a symbol; print any symbol itself. */
4002 if (GET_CODE (disp) == CONST
4003 && GET_CODE (XEXP (disp, 0)) == PLUS
4004 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
4005 {
4006 offset = XEXP (XEXP (disp, 0), 1);
4007 disp = gen_rtx_CONST (VOIDmode,
4008 XEXP (XEXP (disp, 0), 0));
4009 }
ce193852 4010
e075ae69
RH
4011 if (flag_pic)
4012 output_pic_addr_const (file, disp, 0);
4013 else if (GET_CODE (disp) == LABEL_REF)
4014 output_asm_label (disp);
4015 else if (GET_CODE (disp) == CONST_INT)
4016 offset = disp;
4017 else
4018 output_addr_const (file, disp);
4019 }
e9a25f70 4020
e075ae69
RH
4021 putc ('[', file);
4022 if (base)
a8620236 4023 {
e075ae69
RH
4024 PRINT_REG (base, 0, file);
4025 if (offset)
4026 {
4027 if (INTVAL (offset) >= 0)
4028 putc ('+', file);
4029 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
4030 }
a8620236 4031 }
e075ae69
RH
4032 else if (offset)
4033 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 4034 else
e075ae69 4035 putc ('0', file);
e9a25f70 4036
e075ae69
RH
4037 if (index)
4038 {
4039 putc ('+', file);
4040 PRINT_REG (index, 0, file);
4041 if (scale != 1)
4042 fprintf (file, "*%d", scale);
4043 }
4044 putc (']', file);
4045 }
2a2ab3f9
JVA
4046 }
4047}
4048\f
4049/* Split one or more DImode RTL references into pairs of SImode
4050 references. The RTL can be REG, offsettable MEM, integer constant, or
4051 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
4052 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 4053 that parallel "operands". */
2a2ab3f9
JVA
4054
4055void
4056split_di (operands, num, lo_half, hi_half)
4057 rtx operands[];
4058 int num;
4059 rtx lo_half[], hi_half[];
4060{
4061 while (num--)
4062 {
57dbca5e 4063 rtx op = operands[num];
e075ae69
RH
4064 if (CONSTANT_P (op))
4065 split_double (op, &lo_half[num], &hi_half[num]);
4066 else if (! reload_completed)
a269a03c
JC
4067 {
4068 lo_half[num] = gen_lowpart (SImode, op);
4069 hi_half[num] = gen_highpart (SImode, op);
4070 }
4071 else if (GET_CODE (op) == REG)
2a2ab3f9 4072 {
57dbca5e
BS
4073 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
4074 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
2a2ab3f9 4075 }
57dbca5e 4076 else if (offsettable_memref_p (op))
2a2ab3f9 4077 {
57dbca5e
BS
4078 rtx lo_addr = XEXP (op, 0);
4079 rtx hi_addr = XEXP (adj_offsettable_operand (op, 4), 0);
4080 lo_half[num] = change_address (op, SImode, lo_addr);
4081 hi_half[num] = change_address (op, SImode, hi_addr);
2a2ab3f9
JVA
4082 }
4083 else
564d80f4 4084 abort ();
2a2ab3f9
JVA
4085 }
4086}
4087\f
2a2ab3f9
JVA
4088/* Output code to perform a 387 binary operation in INSN, one of PLUS,
4089 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
4090 is the expression of the binary operation. The output may either be
4091 emitted here, or returned to the caller, like all output_* functions.
4092
4093 There is no guarantee that the operands are the same mode, as they
0f290768 4094 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 4095
e3c2afab
AM
4096#ifndef SYSV386_COMPAT
4097/* Set to 1 for compatibility with brain-damaged assemblers. No-one
4098 wants to fix the assemblers because that causes incompatibility
4099 with gcc. No-one wants to fix gcc because that causes
4100 incompatibility with assemblers... You can use the option of
4101 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
4102#define SYSV386_COMPAT 1
4103#endif
4104
69ddee61 4105const char *
2a2ab3f9
JVA
4106output_387_binary_op (insn, operands)
4107 rtx insn;
4108 rtx *operands;
4109{
e3c2afab 4110 static char buf[30];
69ddee61 4111 const char *p;
1deaa899
JH
4112 const char *ssep;
4113 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
2a2ab3f9 4114
e3c2afab
AM
4115#ifdef ENABLE_CHECKING
4116 /* Even if we do not want to check the inputs, this documents input
4117 constraints. Which helps in understanding the following code. */
4118 if (STACK_REG_P (operands[0])
4119 && ((REG_P (operands[1])
4120 && REGNO (operands[0]) == REGNO (operands[1])
4121 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
4122 || (REG_P (operands[2])
4123 && REGNO (operands[0]) == REGNO (operands[2])
4124 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
4125 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
4126 ; /* ok */
1deaa899 4127 else if (!is_sse)
e3c2afab
AM
4128 abort ();
4129#endif
4130
2a2ab3f9
JVA
4131 switch (GET_CODE (operands[3]))
4132 {
4133 case PLUS:
e075ae69
RH
4134 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4135 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4136 p = "fiadd";
4137 else
4138 p = "fadd";
1deaa899 4139 ssep = "add";
2a2ab3f9
JVA
4140 break;
4141
4142 case MINUS:
e075ae69
RH
4143 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4144 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4145 p = "fisub";
4146 else
4147 p = "fsub";
1deaa899 4148 ssep = "sub";
2a2ab3f9
JVA
4149 break;
4150
4151 case MULT:
e075ae69
RH
4152 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4153 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4154 p = "fimul";
4155 else
4156 p = "fmul";
1deaa899 4157 ssep = "mul";
2a2ab3f9
JVA
4158 break;
4159
4160 case DIV:
e075ae69
RH
4161 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4162 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4163 p = "fidiv";
4164 else
4165 p = "fdiv";
1deaa899 4166 ssep = "div";
2a2ab3f9
JVA
4167 break;
4168
4169 default:
4170 abort ();
4171 }
4172
1deaa899
JH
4173 if (is_sse)
4174 {
4175 strcpy (buf, ssep);
4176 if (GET_MODE (operands[0]) == SFmode)
4177 strcat (buf, "ss\t{%2, %0|%0, %2}");
4178 else
4179 strcat (buf, "sd\t{%2, %0|%0, %2}");
4180 return buf;
4181 }
e075ae69 4182 strcpy (buf, p);
2a2ab3f9
JVA
4183
4184 switch (GET_CODE (operands[3]))
4185 {
4186 case MULT:
4187 case PLUS:
4188 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
4189 {
e3c2afab 4190 rtx temp = operands[2];
2a2ab3f9
JVA
4191 operands[2] = operands[1];
4192 operands[1] = temp;
4193 }
4194
e3c2afab
AM
4195 /* know operands[0] == operands[1]. */
4196
2a2ab3f9 4197 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
4198 {
4199 p = "%z2\t%2";
4200 break;
4201 }
2a2ab3f9
JVA
4202
4203 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
4204 {
4205 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
4206 /* How is it that we are storing to a dead operand[2]?
4207 Well, presumably operands[1] is dead too. We can't
4208 store the result to st(0) as st(0) gets popped on this
4209 instruction. Instead store to operands[2] (which I
4210 think has to be st(1)). st(1) will be popped later.
4211 gcc <= 2.8.1 didn't have this check and generated
4212 assembly code that the Unixware assembler rejected. */
4213 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 4214 else
e3c2afab 4215 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 4216 break;
6b28fd63 4217 }
2a2ab3f9
JVA
4218
4219 if (STACK_TOP_P (operands[0]))
e3c2afab 4220 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 4221 else
e3c2afab 4222 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 4223 break;
2a2ab3f9
JVA
4224
4225 case MINUS:
4226 case DIV:
4227 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
4228 {
4229 p = "r%z1\t%1";
4230 break;
4231 }
2a2ab3f9
JVA
4232
4233 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
4234 {
4235 p = "%z2\t%2";
4236 break;
4237 }
2a2ab3f9 4238
2a2ab3f9 4239 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 4240 {
e3c2afab
AM
4241#if SYSV386_COMPAT
4242 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
4243 derived assemblers, confusingly reverse the direction of
4244 the operation for fsub{r} and fdiv{r} when the
4245 destination register is not st(0). The Intel assembler
4246 doesn't have this brain damage. Read !SYSV386_COMPAT to
4247 figure out what the hardware really does. */
4248 if (STACK_TOP_P (operands[0]))
4249 p = "{p\t%0, %2|rp\t%2, %0}";
4250 else
4251 p = "{rp\t%2, %0|p\t%0, %2}";
4252#else
6b28fd63 4253 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
4254 /* As above for fmul/fadd, we can't store to st(0). */
4255 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 4256 else
e3c2afab
AM
4257 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
4258#endif
e075ae69 4259 break;
6b28fd63 4260 }
2a2ab3f9
JVA
4261
4262 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 4263 {
e3c2afab 4264#if SYSV386_COMPAT
6b28fd63 4265 if (STACK_TOP_P (operands[0]))
e3c2afab 4266 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 4267 else
e3c2afab
AM
4268 p = "{p\t%1, %0|rp\t%0, %1}";
4269#else
4270 if (STACK_TOP_P (operands[0]))
4271 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
4272 else
4273 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
4274#endif
e075ae69 4275 break;
6b28fd63 4276 }
2a2ab3f9
JVA
4277
4278 if (STACK_TOP_P (operands[0]))
4279 {
4280 if (STACK_TOP_P (operands[1]))
e3c2afab 4281 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 4282 else
e3c2afab 4283 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 4284 break;
2a2ab3f9
JVA
4285 }
4286 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
4287 {
4288#if SYSV386_COMPAT
4289 p = "{\t%1, %0|r\t%0, %1}";
4290#else
4291 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
4292#endif
4293 }
2a2ab3f9 4294 else
e3c2afab
AM
4295 {
4296#if SYSV386_COMPAT
4297 p = "{r\t%2, %0|\t%0, %2}";
4298#else
4299 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
4300#endif
4301 }
e075ae69 4302 break;
2a2ab3f9
JVA
4303
4304 default:
4305 abort ();
4306 }
e075ae69
RH
4307
4308 strcat (buf, p);
4309 return buf;
2a2ab3f9 4310}
e075ae69 4311
2a2ab3f9 4312/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 4313 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 4314 operand may be [SDX]Fmode. */
2a2ab3f9 4315
69ddee61 4316const char *
2a2ab3f9
JVA
4317output_fix_trunc (insn, operands)
4318 rtx insn;
4319 rtx *operands;
4320{
4321 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69
RH
4322 int dimode_p = GET_MODE (operands[0]) == DImode;
4323 rtx xops[4];
2a2ab3f9 4324
e075ae69
RH
4325 /* Jump through a hoop or two for DImode, since the hardware has no
4326 non-popping instruction. We used to do this a different way, but
4327 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
4328 if (dimode_p && !stack_top_dies)
4329 output_asm_insn ("fld\t%y1", operands);
e075ae69
RH
4330
4331 if (! STACK_TOP_P (operands[1]))
10195bd8
JW
4332 abort ();
4333
e075ae69
RH
4334 xops[0] = GEN_INT (12);
4335 xops[1] = adj_offsettable_operand (operands[2], 1);
4336 xops[1] = change_address (xops[1], QImode, NULL_RTX);
305f097e 4337
e075ae69
RH
4338 xops[2] = operands[0];
4339 if (GET_CODE (operands[0]) != MEM)
4340 xops[2] = operands[3];
2a2ab3f9 4341
e075ae69
RH
4342 output_asm_insn ("fnstcw\t%2", operands);
4343 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands);
4344 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops);
4345 output_asm_insn ("fldcw\t%2", operands);
4346 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands);
e9a25f70 4347
e075ae69
RH
4348 if (stack_top_dies || dimode_p)
4349 output_asm_insn ("fistp%z2\t%2", xops);
10195bd8 4350 else
e075ae69
RH
4351 output_asm_insn ("fist%z2\t%2", xops);
4352
4353 output_asm_insn ("fldcw\t%2", operands);
10195bd8 4354
e075ae69 4355 if (GET_CODE (operands[0]) != MEM)
2a2ab3f9 4356 {
e075ae69 4357 if (dimode_p)
2e14a41b 4358 {
e075ae69
RH
4359 split_di (operands+0, 1, xops+0, xops+1);
4360 split_di (operands+3, 1, xops+2, xops+3);
4361 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4362 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops);
2e14a41b 4363 }
46d21d2c 4364 else if (GET_MODE (operands[0]) == SImode)
e3c2afab 4365 output_asm_insn ("mov{l}\t{%3, %0|%0, %3}", operands);
46d21d2c
JW
4366 else
4367 output_asm_insn ("mov{w}\t{%3, %0|%0, %3}", operands);
2a2ab3f9 4368 }
2a2ab3f9 4369
e075ae69 4370 return "";
2a2ab3f9 4371}
cda749b1 4372
e075ae69
RH
4373/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
4374 should be used and 2 when fnstsw should be used. UNORDERED_P is true
4375 when fucom should be used. */
4376
69ddee61 4377const char *
e075ae69 4378output_fp_compare (insn, operands, eflags_p, unordered_p)
cda749b1
JW
4379 rtx insn;
4380 rtx *operands;
e075ae69 4381 int eflags_p, unordered_p;
cda749b1 4382{
e075ae69
RH
4383 int stack_top_dies;
4384 rtx cmp_op0 = operands[0];
4385 rtx cmp_op1 = operands[1];
0644b628 4386 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
e075ae69
RH
4387
4388 if (eflags_p == 2)
4389 {
4390 cmp_op0 = cmp_op1;
4391 cmp_op1 = operands[2];
4392 }
0644b628
JH
4393 if (is_sse)
4394 {
4395 if (GET_MODE (operands[0]) == SFmode)
4396 if (unordered_p)
4397 return "ucomiss\t{%1, %0|%0, %1}";
4398 else
4399 return "comiss\t{%1, %0|%0, %y}";
4400 else
4401 if (unordered_p)
4402 return "ucomisd\t{%1, %0|%0, %1}";
4403 else
4404 return "comisd\t{%1, %0|%0, %y}";
4405 }
cda749b1 4406
e075ae69 4407 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
4408 abort ();
4409
e075ae69 4410 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 4411
e075ae69
RH
4412 if (STACK_REG_P (cmp_op1)
4413 && stack_top_dies
4414 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
4415 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 4416 {
e075ae69
RH
4417 /* If both the top of the 387 stack dies, and the other operand
4418 is also a stack register that dies, then this must be a
4419 `fcompp' float compare */
4420
4421 if (eflags_p == 1)
4422 {
4423 /* There is no double popping fcomi variant. Fortunately,
4424 eflags is immune from the fstp's cc clobbering. */
4425 if (unordered_p)
4426 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
4427 else
4428 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
4429 return "fstp\t%y0";
4430 }
4431 else
cda749b1 4432 {
e075ae69
RH
4433 if (eflags_p == 2)
4434 {
4435 if (unordered_p)
4436 return "fucompp\n\tfnstsw\t%0";
4437 else
4438 return "fcompp\n\tfnstsw\t%0";
4439 }
cda749b1
JW
4440 else
4441 {
e075ae69
RH
4442 if (unordered_p)
4443 return "fucompp";
4444 else
4445 return "fcompp";
cda749b1
JW
4446 }
4447 }
cda749b1
JW
4448 }
4449 else
4450 {
e075ae69 4451 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 4452
0f290768 4453 static const char * const alt[24] =
e075ae69
RH
4454 {
4455 "fcom%z1\t%y1",
4456 "fcomp%z1\t%y1",
4457 "fucom%z1\t%y1",
4458 "fucomp%z1\t%y1",
0f290768 4459
e075ae69
RH
4460 "ficom%z1\t%y1",
4461 "ficomp%z1\t%y1",
4462 NULL,
4463 NULL,
4464
4465 "fcomi\t{%y1, %0|%0, %y1}",
4466 "fcomip\t{%y1, %0|%0, %y1}",
4467 "fucomi\t{%y1, %0|%0, %y1}",
4468 "fucomip\t{%y1, %0|%0, %y1}",
4469
4470 NULL,
4471 NULL,
4472 NULL,
4473 NULL,
4474
4475 "fcom%z2\t%y2\n\tfnstsw\t%0",
4476 "fcomp%z2\t%y2\n\tfnstsw\t%0",
4477 "fucom%z2\t%y2\n\tfnstsw\t%0",
4478 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 4479
e075ae69
RH
4480 "ficom%z2\t%y2\n\tfnstsw\t%0",
4481 "ficomp%z2\t%y2\n\tfnstsw\t%0",
4482 NULL,
4483 NULL
4484 };
4485
4486 int mask;
69ddee61 4487 const char *ret;
e075ae69
RH
4488
4489 mask = eflags_p << 3;
4490 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
4491 mask |= unordered_p << 1;
4492 mask |= stack_top_dies;
4493
4494 if (mask >= 24)
4495 abort ();
4496 ret = alt[mask];
4497 if (ret == NULL)
4498 abort ();
cda749b1 4499
e075ae69 4500 return ret;
cda749b1
JW
4501 }
4502}
2a2ab3f9 4503
e075ae69 4504/* Output assembler code to FILE to initialize basic-block profiling.
2a2ab3f9 4505
e075ae69 4506 If profile_block_flag == 2
2a2ab3f9 4507
e075ae69
RH
4508 Output code to call the subroutine `__bb_init_trace_func'
4509 and pass two parameters to it. The first parameter is
4510 the address of a block allocated in the object module.
4511 The second parameter is the number of the first basic block
4512 of the function.
2a2ab3f9 4513
e075ae69 4514 The name of the block is a local symbol made with this statement:
0f290768 4515
e075ae69 4516 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
2a2ab3f9 4517
e075ae69
RH
4518 Of course, since you are writing the definition of
4519 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4520 can take a short cut in the definition of this macro and use the
4521 name that you know will result.
2a2ab3f9 4522
e075ae69
RH
4523 The number of the first basic block of the function is
4524 passed to the macro in BLOCK_OR_LABEL.
2a2ab3f9 4525
e075ae69
RH
4526 If described in a virtual assembler language the code to be
4527 output looks like:
2a2ab3f9 4528
e075ae69
RH
4529 parameter1 <- LPBX0
4530 parameter2 <- BLOCK_OR_LABEL
4531 call __bb_init_trace_func
2a2ab3f9 4532
e075ae69 4533 else if profile_block_flag != 0
e74389ff 4534
e075ae69
RH
4535 Output code to call the subroutine `__bb_init_func'
4536 and pass one single parameter to it, which is the same
4537 as the first parameter to `__bb_init_trace_func'.
e74389ff 4538
e075ae69
RH
4539 The first word of this parameter is a flag which will be nonzero if
4540 the object module has already been initialized. So test this word
4541 first, and do not call `__bb_init_func' if the flag is nonzero.
4542 Note: When profile_block_flag == 2 the test need not be done
4543 but `__bb_init_trace_func' *must* be called.
e74389ff 4544
e075ae69
RH
4545 BLOCK_OR_LABEL may be used to generate a label number as a
4546 branch destination in case `__bb_init_func' will not be called.
e74389ff 4547
e075ae69
RH
4548 If described in a virtual assembler language the code to be
4549 output looks like:
2a2ab3f9 4550
e075ae69
RH
4551 cmp (LPBX0),0
4552 jne local_label
4553 parameter1 <- LPBX0
4554 call __bb_init_func
4555 local_label:
4556*/
c572e5ba 4557
e075ae69
RH
4558void
4559ix86_output_function_block_profiler (file, block_or_label)
4560 FILE *file;
4561 int block_or_label;
c572e5ba 4562{
e075ae69
RH
4563 static int num_func = 0;
4564 rtx xops[8];
4565 char block_table[80], false_label[80];
c572e5ba 4566
e075ae69 4567 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
e9a25f70 4568
e075ae69
RH
4569 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4570 xops[5] = stack_pointer_rtx;
4571 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
2a2ab3f9 4572
e075ae69 4573 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
c572e5ba 4574
e075ae69 4575 switch (profile_block_flag)
c572e5ba 4576 {
e075ae69
RH
4577 case 2:
4578 xops[2] = GEN_INT (block_or_label);
4579 xops[3] = gen_rtx_MEM (Pmode,
4580 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
4581 xops[6] = GEN_INT (8);
e9a25f70 4582
e075ae69
RH
4583 output_asm_insn ("push{l}\t%2", xops);
4584 if (!flag_pic)
4585 output_asm_insn ("push{l}\t%1", xops);
e9a25f70 4586 else
870a0c2c 4587 {
e075ae69
RH
4588 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4589 output_asm_insn ("push{l}\t%7", xops);
870a0c2c 4590 }
e075ae69
RH
4591 output_asm_insn ("call\t%P3", xops);
4592 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4593 break;
c572e5ba 4594
e075ae69
RH
4595 default:
4596 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
c572e5ba 4597
e075ae69
RH
4598 xops[0] = const0_rtx;
4599 xops[2] = gen_rtx_MEM (Pmode,
4600 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
4601 xops[3] = gen_rtx_MEM (Pmode,
4602 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
4603 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
4604 xops[6] = GEN_INT (4);
a14003ee 4605
e075ae69 4606 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
446ba526 4607
e075ae69
RH
4608 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
4609 output_asm_insn ("jne\t%2", xops);
870a0c2c 4610
e075ae69
RH
4611 if (!flag_pic)
4612 output_asm_insn ("push{l}\t%1", xops);
4613 else
4614 {
4615 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
4616 output_asm_insn ("push{l}\t%7", xops);
870a0c2c 4617 }
e075ae69
RH
4618 output_asm_insn ("call\t%P3", xops);
4619 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4620 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
4621 num_func++;
4622 break;
c572e5ba 4623 }
2a2ab3f9 4624}
305f097e 4625
e075ae69
RH
4626/* Output assembler code to FILE to increment a counter associated
4627 with basic block number BLOCKNO.
305f097e 4628
e075ae69 4629 If profile_block_flag == 2
ecbc4695 4630
e075ae69
RH
4631 Output code to initialize the global structure `__bb' and
4632 call the function `__bb_trace_func' which will increment the
4633 counter.
ecbc4695 4634
e075ae69
RH
4635 `__bb' consists of two words. In the first word the number
4636 of the basic block has to be stored. In the second word
0f290768 4637 the address of a block allocated in the object module
e075ae69 4638 has to be stored.
ecbc4695 4639
e075ae69 4640 The basic block number is given by BLOCKNO.
ecbc4695 4641
0f290768 4642 The address of the block is given by the label created with
305f097e 4643
e075ae69 4644 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
305f097e 4645
e075ae69 4646 by FUNCTION_BLOCK_PROFILER.
ecbc4695 4647
e075ae69
RH
4648 Of course, since you are writing the definition of
4649 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4650 can take a short cut in the definition of this macro and use the
4651 name that you know will result.
305f097e 4652
e075ae69
RH
4653 If described in a virtual assembler language the code to be
4654 output looks like:
305f097e 4655
e075ae69
RH
4656 move BLOCKNO -> (__bb)
4657 move LPBX0 -> (__bb+4)
4658 call __bb_trace_func
305f097e 4659
e075ae69
RH
4660 Note that function `__bb_trace_func' must not change the
4661 machine state, especially the flag register. To grant
4662 this, you must output code to save and restore registers
4663 either in this macro or in the macros MACHINE_STATE_SAVE
4664 and MACHINE_STATE_RESTORE. The last two macros will be
4665 used in the function `__bb_trace_func', so you must make
0f290768 4666 sure that the function prologue does not change any
e075ae69 4667 register prior to saving it with MACHINE_STATE_SAVE.
305f097e 4668
e075ae69 4669 else if profile_block_flag != 0
305f097e 4670
e075ae69
RH
4671 Output code to increment the counter directly.
4672 Basic blocks are numbered separately from zero within each
4673 compiled object module. The count associated with block number
0f290768 4674 BLOCKNO is at index BLOCKNO in an array of words; the name of
e075ae69 4675 this array is a local symbol made with this statement:
32b5b1aa 4676
e075ae69 4677 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
32b5b1aa 4678
e075ae69
RH
4679 Of course, since you are writing the definition of
4680 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4681 can take a short cut in the definition of this macro and use the
0f290768 4682 name that you know will result.
32b5b1aa 4683
e075ae69
RH
4684 If described in a virtual assembler language the code to be
4685 output looks like:
32b5b1aa 4686
e075ae69
RH
4687 inc (LPBX2+4*BLOCKNO)
4688*/
32b5b1aa 4689
e075ae69
RH
4690void
4691ix86_output_block_profiler (file, blockno)
4692 FILE *file ATTRIBUTE_UNUSED;
4693 int blockno;
4694{
4695 rtx xops[8], cnt_rtx;
4696 char counts[80];
4697 char *block_table = counts;
4698
4699 switch (profile_block_flag)
4700 {
4701 case 2:
4702 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
32b5b1aa 4703
e075ae69
RH
4704 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4705 xops[2] = GEN_INT (blockno);
4706 xops[3] = gen_rtx_MEM (Pmode,
4707 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
4708 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
4709 xops[5] = plus_constant (xops[4], 4);
4710 xops[0] = gen_rtx_MEM (SImode, xops[4]);
4711 xops[6] = gen_rtx_MEM (SImode, xops[5]);
79325812 4712
e075ae69 4713 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
32b5b1aa 4714
e075ae69
RH
4715 output_asm_insn ("pushf", xops);
4716 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4717 if (flag_pic)
32b5b1aa 4718 {
e075ae69
RH
4719 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4720 output_asm_insn ("push{l}\t%7", xops);
4721 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4722 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
4723 output_asm_insn ("pop{l}\t%7", xops);
4724 }
4725 else
4726 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
4727 output_asm_insn ("call\t%P3", xops);
4728 output_asm_insn ("popf", xops);
32b5b1aa 4729
e075ae69 4730 break;
32b5b1aa 4731
e075ae69
RH
4732 default:
4733 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
4734 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
4735 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
32b5b1aa 4736
e075ae69
RH
4737 if (blockno)
4738 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
32b5b1aa 4739
e075ae69
RH
4740 if (flag_pic)
4741 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
32b5b1aa 4742
e075ae69
RH
4743 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
4744 output_asm_insn ("inc{l}\t%0", xops);
32b5b1aa 4745
e075ae69 4746 break;
32b5b1aa 4747 }
32b5b1aa 4748}
32b5b1aa 4749\f
79325812 4750void
e075ae69
RH
4751ix86_expand_move (mode, operands)
4752 enum machine_mode mode;
4753 rtx operands[];
32b5b1aa 4754{
e075ae69 4755 int strict = (reload_in_progress || reload_completed);
e075ae69 4756 rtx insn;
e9a25f70 4757
e075ae69 4758 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
32b5b1aa 4759 {
e075ae69 4760 /* Emit insns to move operands[1] into operands[0]. */
e9a25f70 4761
e075ae69
RH
4762 if (GET_CODE (operands[0]) == MEM)
4763 operands[1] = force_reg (Pmode, operands[1]);
4764 else
32b5b1aa 4765 {
e075ae69
RH
4766 rtx temp = operands[0];
4767 if (GET_CODE (temp) != REG)
4768 temp = gen_reg_rtx (Pmode);
4769 temp = legitimize_pic_address (operands[1], temp);
4770 if (temp == operands[0])
4771 return;
4772 operands[1] = temp;
32b5b1aa 4773 }
e075ae69
RH
4774 }
4775 else
4776 {
d7a29404
JH
4777 if (GET_CODE (operands[0]) == MEM
4778 && (GET_MODE (operands[0]) == QImode
4779 || !push_operand (operands[0], mode))
4780 && GET_CODE (operands[1]) == MEM)
e075ae69 4781 operands[1] = force_reg (mode, operands[1]);
e9a25f70 4782
2c5a510c
RH
4783 if (push_operand (operands[0], mode)
4784 && ! general_no_elim_operand (operands[1], mode))
4785 operands[1] = copy_to_mode_reg (mode, operands[1]);
4786
e075ae69 4787 if (FLOAT_MODE_P (mode))
32b5b1aa 4788 {
d7a29404
JH
4789 /* If we are loading a floating point constant to a register,
4790 force the value to memory now, since we'll get better code
4791 out the back end. */
e075ae69
RH
4792
4793 if (strict)
4794 ;
e075ae69 4795 else if (GET_CODE (operands[1]) == CONST_DOUBLE
d7a29404 4796 && register_operand (operands[0], mode))
e075ae69 4797 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
32b5b1aa 4798 }
32b5b1aa 4799 }
e9a25f70 4800
e075ae69 4801 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
e9a25f70 4802
e075ae69
RH
4803 emit_insn (insn);
4804}
e9a25f70 4805
e075ae69
RH
4806/* Attempt to expand a binary operator. Make the expansion closer to the
4807 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 4808 memory references (one output, two input) in a single insn. */
e9a25f70 4809
e075ae69
RH
4810void
4811ix86_expand_binary_operator (code, mode, operands)
4812 enum rtx_code code;
4813 enum machine_mode mode;
4814 rtx operands[];
4815{
4816 int matching_memory;
4817 rtx src1, src2, dst, op, clob;
4818
4819 dst = operands[0];
4820 src1 = operands[1];
4821 src2 = operands[2];
4822
4823 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
4824 if (GET_RTX_CLASS (code) == 'c'
4825 && (rtx_equal_p (dst, src2)
4826 || immediate_operand (src1, mode)))
4827 {
4828 rtx temp = src1;
4829 src1 = src2;
4830 src2 = temp;
32b5b1aa 4831 }
e9a25f70 4832
e075ae69
RH
4833 /* If the destination is memory, and we do not have matching source
4834 operands, do things in registers. */
4835 matching_memory = 0;
4836 if (GET_CODE (dst) == MEM)
32b5b1aa 4837 {
e075ae69
RH
4838 if (rtx_equal_p (dst, src1))
4839 matching_memory = 1;
4840 else if (GET_RTX_CLASS (code) == 'c'
4841 && rtx_equal_p (dst, src2))
4842 matching_memory = 2;
4843 else
4844 dst = gen_reg_rtx (mode);
4845 }
0f290768 4846
e075ae69
RH
4847 /* Both source operands cannot be in memory. */
4848 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
4849 {
4850 if (matching_memory != 2)
4851 src2 = force_reg (mode, src2);
4852 else
4853 src1 = force_reg (mode, src1);
32b5b1aa 4854 }
e9a25f70 4855
06a964de
JH
4856 /* If the operation is not commutable, source 1 cannot be a constant
4857 or non-matching memory. */
0f290768 4858 if ((CONSTANT_P (src1)
06a964de
JH
4859 || (!matching_memory && GET_CODE (src1) == MEM))
4860 && GET_RTX_CLASS (code) != 'c')
e075ae69 4861 src1 = force_reg (mode, src1);
0f290768 4862
e075ae69 4863 /* If optimizing, copy to regs to improve CSE */
fe577e58 4864 if (optimize && ! no_new_pseudos)
32b5b1aa 4865 {
e075ae69
RH
4866 if (GET_CODE (dst) == MEM)
4867 dst = gen_reg_rtx (mode);
4868 if (GET_CODE (src1) == MEM)
4869 src1 = force_reg (mode, src1);
4870 if (GET_CODE (src2) == MEM)
4871 src2 = force_reg (mode, src2);
32b5b1aa 4872 }
e9a25f70 4873
e075ae69
RH
4874 /* Emit the instruction. */
4875
4876 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
4877 if (reload_in_progress)
4878 {
4879 /* Reload doesn't know about the flags register, and doesn't know that
4880 it doesn't want to clobber it. We can only do this with PLUS. */
4881 if (code != PLUS)
4882 abort ();
4883 emit_insn (op);
4884 }
4885 else
32b5b1aa 4886 {
e075ae69
RH
4887 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4888 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 4889 }
e9a25f70 4890
e075ae69
RH
4891 /* Fix up the destination if needed. */
4892 if (dst != operands[0])
4893 emit_move_insn (operands[0], dst);
4894}
4895
4896/* Return TRUE or FALSE depending on whether the binary operator meets the
4897 appropriate constraints. */
4898
4899int
4900ix86_binary_operator_ok (code, mode, operands)
4901 enum rtx_code code;
4902 enum machine_mode mode ATTRIBUTE_UNUSED;
4903 rtx operands[3];
4904{
4905 /* Both source operands cannot be in memory. */
4906 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
4907 return 0;
4908 /* If the operation is not commutable, source 1 cannot be a constant. */
4909 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
4910 return 0;
4911 /* If the destination is memory, we must have a matching source operand. */
4912 if (GET_CODE (operands[0]) == MEM
4913 && ! (rtx_equal_p (operands[0], operands[1])
4914 || (GET_RTX_CLASS (code) == 'c'
4915 && rtx_equal_p (operands[0], operands[2]))))
4916 return 0;
06a964de
JH
4917 /* If the operation is not commutable and the source 1 is memory, we must
4918 have a matching destionation. */
4919 if (GET_CODE (operands[1]) == MEM
4920 && GET_RTX_CLASS (code) != 'c'
4921 && ! rtx_equal_p (operands[0], operands[1]))
4922 return 0;
e075ae69
RH
4923 return 1;
4924}
4925
4926/* Attempt to expand a unary operator. Make the expansion closer to the
4927 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 4928 memory references (one output, one input) in a single insn. */
e075ae69 4929
9d81fc27 4930void
e075ae69
RH
4931ix86_expand_unary_operator (code, mode, operands)
4932 enum rtx_code code;
4933 enum machine_mode mode;
4934 rtx operands[];
4935{
06a964de
JH
4936 int matching_memory;
4937 rtx src, dst, op, clob;
4938
4939 dst = operands[0];
4940 src = operands[1];
e075ae69 4941
06a964de
JH
4942 /* If the destination is memory, and we do not have matching source
4943 operands, do things in registers. */
4944 matching_memory = 0;
4945 if (GET_CODE (dst) == MEM)
32b5b1aa 4946 {
06a964de
JH
4947 if (rtx_equal_p (dst, src))
4948 matching_memory = 1;
e075ae69 4949 else
06a964de 4950 dst = gen_reg_rtx (mode);
32b5b1aa 4951 }
e9a25f70 4952
06a964de
JH
4953 /* When source operand is memory, destination must match. */
4954 if (!matching_memory && GET_CODE (src) == MEM)
4955 src = force_reg (mode, src);
0f290768 4956
06a964de 4957 /* If optimizing, copy to regs to improve CSE */
fe577e58 4958 if (optimize && ! no_new_pseudos)
06a964de
JH
4959 {
4960 if (GET_CODE (dst) == MEM)
4961 dst = gen_reg_rtx (mode);
4962 if (GET_CODE (src) == MEM)
4963 src = force_reg (mode, src);
4964 }
4965
4966 /* Emit the instruction. */
4967
4968 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
4969 if (reload_in_progress || code == NOT)
4970 {
4971 /* Reload doesn't know about the flags register, and doesn't know that
4972 it doesn't want to clobber it. */
4973 if (code != NOT)
4974 abort ();
4975 emit_insn (op);
4976 }
4977 else
4978 {
4979 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4980 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4981 }
4982
4983 /* Fix up the destination if needed. */
4984 if (dst != operands[0])
4985 emit_move_insn (operands[0], dst);
e075ae69
RH
4986}
4987
4988/* Return TRUE or FALSE depending on whether the unary operator meets the
4989 appropriate constraints. */
4990
4991int
4992ix86_unary_operator_ok (code, mode, operands)
4993 enum rtx_code code ATTRIBUTE_UNUSED;
4994 enum machine_mode mode ATTRIBUTE_UNUSED;
4995 rtx operands[2] ATTRIBUTE_UNUSED;
4996{
06a964de
JH
4997 /* If one of operands is memory, source and destination must match. */
4998 if ((GET_CODE (operands[0]) == MEM
4999 || GET_CODE (operands[1]) == MEM)
5000 && ! rtx_equal_p (operands[0], operands[1]))
5001 return FALSE;
e075ae69
RH
5002 return TRUE;
5003}
5004
16189740
RH
5005/* Return TRUE or FALSE depending on whether the first SET in INSN
5006 has source and destination with matching CC modes, and that the
5007 CC mode is at least as constrained as REQ_MODE. */
5008
5009int
5010ix86_match_ccmode (insn, req_mode)
5011 rtx insn;
5012 enum machine_mode req_mode;
5013{
5014 rtx set;
5015 enum machine_mode set_mode;
5016
5017 set = PATTERN (insn);
5018 if (GET_CODE (set) == PARALLEL)
5019 set = XVECEXP (set, 0, 0);
5020 if (GET_CODE (set) != SET)
5021 abort ();
9076b9c1
JH
5022 if (GET_CODE (SET_SRC (set)) != COMPARE)
5023 abort ();
16189740
RH
5024
5025 set_mode = GET_MODE (SET_DEST (set));
5026 switch (set_mode)
5027 {
9076b9c1
JH
5028 case CCNOmode:
5029 if (req_mode != CCNOmode
5030 && (req_mode != CCmode
5031 || XEXP (SET_SRC (set), 1) != const0_rtx))
5032 return 0;
5033 break;
16189740 5034 case CCmode:
9076b9c1 5035 if (req_mode == CCGCmode)
16189740
RH
5036 return 0;
5037 /* FALLTHRU */
9076b9c1
JH
5038 case CCGCmode:
5039 if (req_mode == CCGOCmode || req_mode == CCNOmode)
5040 return 0;
5041 /* FALLTHRU */
5042 case CCGOCmode:
16189740
RH
5043 if (req_mode == CCZmode)
5044 return 0;
5045 /* FALLTHRU */
5046 case CCZmode:
5047 break;
5048
5049 default:
5050 abort ();
5051 }
5052
5053 return (GET_MODE (SET_SRC (set)) == set_mode);
5054}
5055
e075ae69
RH
5056/* Generate insn patterns to do an integer compare of OPERANDS. */
5057
5058static rtx
5059ix86_expand_int_compare (code, op0, op1)
5060 enum rtx_code code;
5061 rtx op0, op1;
5062{
5063 enum machine_mode cmpmode;
5064 rtx tmp, flags;
5065
5066 cmpmode = SELECT_CC_MODE (code, op0, op1);
5067 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
5068
5069 /* This is very simple, but making the interface the same as in the
5070 FP case makes the rest of the code easier. */
5071 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
5072 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
5073
5074 /* Return the test that should be put into the flags user, i.e.
5075 the bcc, scc, or cmov instruction. */
5076 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
5077}
5078
3a3677ff
RH
5079/* Figure out whether to use ordered or unordered fp comparisons.
5080 Return the appropriate mode to use. */
e075ae69 5081
b1cdafbb 5082enum machine_mode
3a3677ff 5083ix86_fp_compare_mode (code)
8752c357 5084 enum rtx_code code ATTRIBUTE_UNUSED;
e075ae69 5085{
9e7adcb3
JH
5086 /* ??? In order to make all comparisons reversible, we do all comparisons
5087 non-trapping when compiling for IEEE. Once gcc is able to distinguish
5088 all forms trapping and nontrapping comparisons, we can make inequality
5089 comparisons trapping again, since it results in better code when using
5090 FCOM based compares. */
5091 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
5092}
5093
9076b9c1
JH
5094enum machine_mode
5095ix86_cc_mode (code, op0, op1)
5096 enum rtx_code code;
5097 rtx op0, op1;
5098{
5099 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5100 return ix86_fp_compare_mode (code);
5101 switch (code)
5102 {
5103 /* Only zero flag is needed. */
5104 case EQ: /* ZF=0 */
5105 case NE: /* ZF!=0 */
5106 return CCZmode;
5107 /* Codes needing carry flag. */
265dab10
JH
5108 case GEU: /* CF=0 */
5109 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
5110 case LTU: /* CF=1 */
5111 case LEU: /* CF=1 | ZF=1 */
265dab10 5112 return CCmode;
9076b9c1
JH
5113 /* Codes possibly doable only with sign flag when
5114 comparing against zero. */
5115 case GE: /* SF=OF or SF=0 */
7e08e190 5116 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
5117 if (op1 == const0_rtx)
5118 return CCGOCmode;
5119 else
5120 /* For other cases Carry flag is not required. */
5121 return CCGCmode;
5122 /* Codes doable only with sign flag when comparing
5123 against zero, but we miss jump instruction for it
5124 so we need to use relational tests agains overflow
5125 that thus needs to be zero. */
5126 case GT: /* ZF=0 & SF=OF */
5127 case LE: /* ZF=1 | SF<>OF */
5128 if (op1 == const0_rtx)
5129 return CCNOmode;
5130 else
5131 return CCGCmode;
5132 default:
0f290768 5133 abort ();
9076b9c1
JH
5134 }
5135}
5136
3a3677ff
RH
5137/* Return true if we should use an FCOMI instruction for this fp comparison. */
5138
a940d8bd 5139int
3a3677ff 5140ix86_use_fcomi_compare (code)
9e7adcb3 5141 enum rtx_code code ATTRIBUTE_UNUSED;
3a3677ff 5142{
9e7adcb3
JH
5143 enum rtx_code swapped_code = swap_condition (code);
5144 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
5145 || (ix86_fp_comparison_cost (swapped_code)
5146 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
5147}
5148
0f290768 5149/* Swap, force into registers, or otherwise massage the two operands
3a3677ff
RH
5150 to a fp comparison. The operands are updated in place; the new
5151 comparsion code is returned. */
5152
5153static enum rtx_code
5154ix86_prepare_fp_compare_args (code, pop0, pop1)
5155 enum rtx_code code;
5156 rtx *pop0, *pop1;
5157{
5158 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
5159 rtx op0 = *pop0, op1 = *pop1;
5160 enum machine_mode op_mode = GET_MODE (op0);
0644b628 5161 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
3a3677ff 5162
e075ae69 5163 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
5164 The same is true of the XFmode compare instructions. The same is
5165 true of the fcomi compare instructions. */
5166
0644b628
JH
5167 if (!is_sse
5168 && (fpcmp_mode == CCFPUmode
5169 || op_mode == XFmode
5170 || op_mode == TFmode
5171 || ix86_use_fcomi_compare (code)))
e075ae69 5172 {
3a3677ff
RH
5173 op0 = force_reg (op_mode, op0);
5174 op1 = force_reg (op_mode, op1);
e075ae69
RH
5175 }
5176 else
5177 {
5178 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
5179 things around if they appear profitable, otherwise force op0
5180 into a register. */
5181
5182 if (standard_80387_constant_p (op0) == 0
5183 || (GET_CODE (op0) == MEM
5184 && ! (standard_80387_constant_p (op1) == 0
5185 || GET_CODE (op1) == MEM)))
32b5b1aa 5186 {
e075ae69
RH
5187 rtx tmp;
5188 tmp = op0, op0 = op1, op1 = tmp;
5189 code = swap_condition (code);
5190 }
5191
5192 if (GET_CODE (op0) != REG)
3a3677ff 5193 op0 = force_reg (op_mode, op0);
e075ae69
RH
5194
5195 if (CONSTANT_P (op1))
5196 {
5197 if (standard_80387_constant_p (op1))
3a3677ff 5198 op1 = force_reg (op_mode, op1);
e075ae69 5199 else
3a3677ff 5200 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
5201 }
5202 }
e9a25f70 5203
9e7adcb3
JH
5204 /* Try to rearrange the comparison to make it cheaper. */
5205 if (ix86_fp_comparison_cost (code)
5206 > ix86_fp_comparison_cost (swap_condition (code))
5207 && (GET_CODE (op0) == REG || !reload_completed))
5208 {
5209 rtx tmp;
5210 tmp = op0, op0 = op1, op1 = tmp;
5211 code = swap_condition (code);
5212 if (GET_CODE (op0) != REG)
5213 op0 = force_reg (op_mode, op0);
5214 }
5215
3a3677ff
RH
5216 *pop0 = op0;
5217 *pop1 = op1;
5218 return code;
5219}
5220
c0c102a9
JH
5221/* Convert comparison codes we use to represent FP comparison to integer
5222 code that will result in proper branch. Return UNKNOWN if no such code
5223 is available. */
5224static enum rtx_code
5225ix86_fp_compare_code_to_integer (code)
5226 enum rtx_code code;
5227{
5228 switch (code)
5229 {
5230 case GT:
5231 return GTU;
5232 case GE:
5233 return GEU;
5234 case ORDERED:
5235 case UNORDERED:
5236 return code;
5237 break;
5238 case UNEQ:
5239 return EQ;
5240 break;
5241 case UNLT:
5242 return LTU;
5243 break;
5244 case UNLE:
5245 return LEU;
5246 break;
5247 case LTGT:
5248 return NE;
5249 break;
5250 default:
5251 return UNKNOWN;
5252 }
5253}
5254
5255/* Split comparison code CODE into comparisons we can do using branch
5256 instructions. BYPASS_CODE is comparison code for branch that will
5257 branch around FIRST_CODE and SECOND_CODE. If some of branches
5258 is not required, set value to NIL.
5259 We never require more than two branches. */
5260static void
5261ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
5262 enum rtx_code code, *bypass_code, *first_code, *second_code;
5263{
5264 *first_code = code;
5265 *bypass_code = NIL;
5266 *second_code = NIL;
5267
5268 /* The fcomi comparison sets flags as follows:
5269
5270 cmp ZF PF CF
5271 > 0 0 0
5272 < 0 0 1
5273 = 1 0 0
5274 un 1 1 1 */
5275
5276 switch (code)
5277 {
5278 case GT: /* GTU - CF=0 & ZF=0 */
5279 case GE: /* GEU - CF=0 */
5280 case ORDERED: /* PF=0 */
5281 case UNORDERED: /* PF=1 */
5282 case UNEQ: /* EQ - ZF=1 */
5283 case UNLT: /* LTU - CF=1 */
5284 case UNLE: /* LEU - CF=1 | ZF=1 */
5285 case LTGT: /* EQ - ZF=0 */
5286 break;
5287 case LT: /* LTU - CF=1 - fails on unordered */
5288 *first_code = UNLT;
5289 *bypass_code = UNORDERED;
5290 break;
5291 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
5292 *first_code = UNLE;
5293 *bypass_code = UNORDERED;
5294 break;
5295 case EQ: /* EQ - ZF=1 - fails on unordered */
5296 *first_code = UNEQ;
5297 *bypass_code = UNORDERED;
5298 break;
5299 case NE: /* NE - ZF=0 - fails on unordered */
5300 *first_code = LTGT;
5301 *second_code = UNORDERED;
5302 break;
5303 case UNGE: /* GEU - CF=0 - fails on unordered */
5304 *first_code = GE;
5305 *second_code = UNORDERED;
5306 break;
5307 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
5308 *first_code = GT;
5309 *second_code = UNORDERED;
5310 break;
5311 default:
5312 abort ();
5313 }
5314 if (!TARGET_IEEE_FP)
5315 {
5316 *second_code = NIL;
5317 *bypass_code = NIL;
5318 }
5319}
5320
9e7adcb3
JH
5321/* Return cost of comparison done fcom + arithmetics operations on AX.
5322 All following functions do use number of instructions as an cost metrics.
5323 In future this should be tweaked to compute bytes for optimize_size and
5324 take into account performance of various instructions on various CPUs. */
5325static int
5326ix86_fp_comparison_arithmetics_cost (code)
5327 enum rtx_code code;
5328{
5329 if (!TARGET_IEEE_FP)
5330 return 4;
5331 /* The cost of code output by ix86_expand_fp_compare. */
5332 switch (code)
5333 {
5334 case UNLE:
5335 case UNLT:
5336 case LTGT:
5337 case GT:
5338 case GE:
5339 case UNORDERED:
5340 case ORDERED:
5341 case UNEQ:
5342 return 4;
5343 break;
5344 case LT:
5345 case NE:
5346 case EQ:
5347 case UNGE:
5348 return 5;
5349 break;
5350 case LE:
5351 case UNGT:
5352 return 6;
5353 break;
5354 default:
5355 abort ();
5356 }
5357}
5358
5359/* Return cost of comparison done using fcomi operation.
5360 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5361static int
5362ix86_fp_comparison_fcomi_cost (code)
5363 enum rtx_code code;
5364{
5365 enum rtx_code bypass_code, first_code, second_code;
5366 /* Return arbitarily high cost when instruction is not supported - this
5367 prevents gcc from using it. */
5368 if (!TARGET_CMOVE)
5369 return 1024;
5370 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5371 return (bypass_code != NIL || second_code != NIL) + 2;
5372}
5373
5374/* Return cost of comparison done using sahf operation.
5375 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5376static int
5377ix86_fp_comparison_sahf_cost (code)
5378 enum rtx_code code;
5379{
5380 enum rtx_code bypass_code, first_code, second_code;
5381 /* Return arbitarily high cost when instruction is not preferred - this
5382 avoids gcc from using it. */
5383 if (!TARGET_USE_SAHF && !optimize_size)
5384 return 1024;
5385 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5386 return (bypass_code != NIL || second_code != NIL) + 3;
5387}
5388
5389/* Compute cost of the comparison done using any method.
5390 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5391static int
5392ix86_fp_comparison_cost (code)
5393 enum rtx_code code;
5394{
5395 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
5396 int min;
5397
5398 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
5399 sahf_cost = ix86_fp_comparison_sahf_cost (code);
5400
5401 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
5402 if (min > sahf_cost)
5403 min = sahf_cost;
5404 if (min > fcomi_cost)
5405 min = fcomi_cost;
5406 return min;
5407}
c0c102a9 5408
3a3677ff
RH
5409/* Generate insn patterns to do a floating point compare of OPERANDS. */
5410
9e7adcb3
JH
5411static rtx
5412ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
3a3677ff
RH
5413 enum rtx_code code;
5414 rtx op0, op1, scratch;
9e7adcb3
JH
5415 rtx *second_test;
5416 rtx *bypass_test;
3a3677ff
RH
5417{
5418 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 5419 rtx tmp, tmp2;
9e7adcb3 5420 int cost = ix86_fp_comparison_cost (code);
c0c102a9 5421 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
5422
5423 fpcmp_mode = ix86_fp_compare_mode (code);
5424 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
5425
9e7adcb3
JH
5426 if (second_test)
5427 *second_test = NULL_RTX;
5428 if (bypass_test)
5429 *bypass_test = NULL_RTX;
5430
c0c102a9
JH
5431 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5432
9e7adcb3
JH
5433 /* Do fcomi/sahf based test when profitable. */
5434 if ((bypass_code == NIL || bypass_test)
5435 && (second_code == NIL || second_test)
5436 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 5437 {
c0c102a9
JH
5438 if (TARGET_CMOVE)
5439 {
5440 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5441 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
5442 tmp);
5443 emit_insn (tmp);
5444 }
5445 else
5446 {
5447 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5448 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
bf71a4f8
JH
5449 if (!scratch)
5450 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
5451 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
5452 emit_insn (gen_x86_sahf_1 (scratch));
5453 }
e075ae69
RH
5454
5455 /* The FP codes work out to act like unsigned. */
9a915772 5456 intcmp_mode = fpcmp_mode;
9e7adcb3
JH
5457 code = first_code;
5458 if (bypass_code != NIL)
5459 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
5460 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5461 const0_rtx);
5462 if (second_code != NIL)
5463 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
5464 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5465 const0_rtx);
e075ae69
RH
5466 }
5467 else
5468 {
5469 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69
RH
5470 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5471 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
bf71a4f8
JH
5472 if (!scratch)
5473 scratch = gen_reg_rtx (HImode);
3a3677ff 5474 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 5475
9a915772
JH
5476 /* In the unordered case, we have to check C2 for NaN's, which
5477 doesn't happen to work out to anything nice combination-wise.
5478 So do some bit twiddling on the value we've got in AH to come
5479 up with an appropriate set of condition codes. */
e075ae69 5480
9a915772
JH
5481 intcmp_mode = CCNOmode;
5482 switch (code)
32b5b1aa 5483 {
9a915772
JH
5484 case GT:
5485 case UNGT:
5486 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 5487 {
3a3677ff 5488 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 5489 code = EQ;
9a915772
JH
5490 }
5491 else
5492 {
5493 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5494 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5495 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
5496 intcmp_mode = CCmode;
5497 code = GEU;
5498 }
5499 break;
5500 case LT:
5501 case UNLT:
5502 if (code == LT && TARGET_IEEE_FP)
5503 {
3a3677ff
RH
5504 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5505 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
5506 intcmp_mode = CCmode;
5507 code = EQ;
9a915772
JH
5508 }
5509 else
5510 {
5511 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
5512 code = NE;
5513 }
5514 break;
5515 case GE:
5516 case UNGE:
5517 if (code == GE || !TARGET_IEEE_FP)
5518 {
3a3677ff 5519 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 5520 code = EQ;
9a915772
JH
5521 }
5522 else
5523 {
5524 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5525 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
5526 GEN_INT (0x01)));
5527 code = NE;
5528 }
5529 break;
5530 case LE:
5531 case UNLE:
5532 if (code == LE && TARGET_IEEE_FP)
5533 {
3a3677ff
RH
5534 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5535 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5536 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
5537 intcmp_mode = CCmode;
5538 code = LTU;
9a915772
JH
5539 }
5540 else
5541 {
5542 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
5543 code = NE;
5544 }
5545 break;
5546 case EQ:
5547 case UNEQ:
5548 if (code == EQ && TARGET_IEEE_FP)
5549 {
3a3677ff
RH
5550 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5551 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
5552 intcmp_mode = CCmode;
5553 code = EQ;
9a915772
JH
5554 }
5555 else
5556 {
3a3677ff
RH
5557 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
5558 code = NE;
5559 break;
9a915772
JH
5560 }
5561 break;
5562 case NE:
5563 case LTGT:
5564 if (code == NE && TARGET_IEEE_FP)
5565 {
3a3677ff 5566 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
5567 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
5568 GEN_INT (0x40)));
3a3677ff 5569 code = NE;
9a915772
JH
5570 }
5571 else
5572 {
3a3677ff
RH
5573 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
5574 code = EQ;
32b5b1aa 5575 }
9a915772
JH
5576 break;
5577
5578 case UNORDERED:
5579 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
5580 code = NE;
5581 break;
5582 case ORDERED:
5583 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
5584 code = EQ;
5585 break;
5586
5587 default:
5588 abort ();
32b5b1aa 5589 }
32b5b1aa 5590 }
e075ae69
RH
5591
5592 /* Return the test that should be put into the flags user, i.e.
5593 the bcc, scc, or cmov instruction. */
5594 return gen_rtx_fmt_ee (code, VOIDmode,
5595 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5596 const0_rtx);
5597}
5598
9e3e266c 5599rtx
a1b8572c 5600ix86_expand_compare (code, second_test, bypass_test)
e075ae69 5601 enum rtx_code code;
a1b8572c 5602 rtx *second_test, *bypass_test;
e075ae69
RH
5603{
5604 rtx op0, op1, ret;
5605 op0 = ix86_compare_op0;
5606 op1 = ix86_compare_op1;
5607
a1b8572c
JH
5608 if (second_test)
5609 *second_test = NULL_RTX;
5610 if (bypass_test)
5611 *bypass_test = NULL_RTX;
5612
e075ae69 5613 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
bf71a4f8 5614 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
77ebd435 5615 second_test, bypass_test);
32b5b1aa 5616 else
e075ae69
RH
5617 ret = ix86_expand_int_compare (code, op0, op1);
5618
5619 return ret;
5620}
5621
5622void
3a3677ff 5623ix86_expand_branch (code, label)
e075ae69 5624 enum rtx_code code;
e075ae69
RH
5625 rtx label;
5626{
3a3677ff 5627 rtx tmp;
e075ae69 5628
3a3677ff 5629 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 5630 {
3a3677ff
RH
5631 case QImode:
5632 case HImode:
5633 case SImode:
a1b8572c 5634 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
5635 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5636 gen_rtx_LABEL_REF (VOIDmode, label),
5637 pc_rtx);
5638 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 5639 return;
e075ae69 5640
3a3677ff
RH
5641 case SFmode:
5642 case DFmode:
0f290768 5643 case XFmode:
2b589241 5644 case TFmode:
3a3677ff
RH
5645 /* Don't expand the comparison early, so that we get better code
5646 when jump or whoever decides to reverse the comparison. */
5647 {
5648 rtvec vec;
5649 int use_fcomi;
5650
5651 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
5652 &ix86_compare_op1);
5653
0b9aaeee 5654 tmp = gen_rtx_fmt_ee (code, VOIDmode,
3a3677ff
RH
5655 ix86_compare_op0, ix86_compare_op1);
5656 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5657 gen_rtx_LABEL_REF (VOIDmode, label),
5658 pc_rtx);
5659 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
5660
5661 use_fcomi = ix86_use_fcomi_compare (code);
5662 vec = rtvec_alloc (3 + !use_fcomi);
5663 RTVEC_ELT (vec, 0) = tmp;
5664 RTVEC_ELT (vec, 1)
5665 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
5666 RTVEC_ELT (vec, 2)
5667 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
5668 if (! use_fcomi)
5669 RTVEC_ELT (vec, 3)
5670 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
5671
5672 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
5673 return;
5674 }
32b5b1aa 5675
3a3677ff
RH
5676 case DImode:
5677 /* Expand DImode branch into multiple compare+branch. */
5678 {
5679 rtx lo[2], hi[2], label2;
5680 enum rtx_code code1, code2, code3;
32b5b1aa 5681
3a3677ff
RH
5682 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
5683 {
5684 tmp = ix86_compare_op0;
5685 ix86_compare_op0 = ix86_compare_op1;
5686 ix86_compare_op1 = tmp;
5687 code = swap_condition (code);
5688 }
5689 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
5690 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 5691
3a3677ff
RH
5692 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
5693 avoid two branches. This costs one extra insn, so disable when
5694 optimizing for size. */
32b5b1aa 5695
3a3677ff
RH
5696 if ((code == EQ || code == NE)
5697 && (!optimize_size
5698 || hi[1] == const0_rtx || lo[1] == const0_rtx))
5699 {
5700 rtx xor0, xor1;
32b5b1aa 5701
3a3677ff
RH
5702 xor1 = hi[0];
5703 if (hi[1] != const0_rtx)
5704 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
5705 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 5706
3a3677ff
RH
5707 xor0 = lo[0];
5708 if (lo[1] != const0_rtx)
5709 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
5710 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 5711
3a3677ff
RH
5712 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
5713 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 5714
3a3677ff
RH
5715 ix86_compare_op0 = tmp;
5716 ix86_compare_op1 = const0_rtx;
5717 ix86_expand_branch (code, label);
5718 return;
5719 }
e075ae69 5720
1f9124e4
JJ
5721 /* Otherwise, if we are doing less-than or greater-or-equal-than,
5722 op1 is a constant and the low word is zero, then we can just
5723 examine the high word. */
32b5b1aa 5724
1f9124e4
JJ
5725 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
5726 switch (code)
5727 {
5728 case LT: case LTU: case GE: case GEU:
5729 ix86_compare_op0 = hi[0];
5730 ix86_compare_op1 = hi[1];
5731 ix86_expand_branch (code, label);
5732 return;
5733 default:
5734 break;
5735 }
e075ae69 5736
3a3677ff 5737 /* Otherwise, we need two or three jumps. */
e075ae69 5738
3a3677ff 5739 label2 = gen_label_rtx ();
e075ae69 5740
3a3677ff
RH
5741 code1 = code;
5742 code2 = swap_condition (code);
5743 code3 = unsigned_condition (code);
e075ae69 5744
3a3677ff
RH
5745 switch (code)
5746 {
5747 case LT: case GT: case LTU: case GTU:
5748 break;
e075ae69 5749
3a3677ff
RH
5750 case LE: code1 = LT; code2 = GT; break;
5751 case GE: code1 = GT; code2 = LT; break;
5752 case LEU: code1 = LTU; code2 = GTU; break;
5753 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 5754
3a3677ff
RH
5755 case EQ: code1 = NIL; code2 = NE; break;
5756 case NE: code2 = NIL; break;
e075ae69 5757
3a3677ff
RH
5758 default:
5759 abort ();
5760 }
e075ae69 5761
3a3677ff
RH
5762 /*
5763 * a < b =>
5764 * if (hi(a) < hi(b)) goto true;
5765 * if (hi(a) > hi(b)) goto false;
5766 * if (lo(a) < lo(b)) goto true;
5767 * false:
5768 */
5769
5770 ix86_compare_op0 = hi[0];
5771 ix86_compare_op1 = hi[1];
5772
5773 if (code1 != NIL)
5774 ix86_expand_branch (code1, label);
5775 if (code2 != NIL)
5776 ix86_expand_branch (code2, label2);
5777
5778 ix86_compare_op0 = lo[0];
5779 ix86_compare_op1 = lo[1];
5780 ix86_expand_branch (code3, label);
5781
5782 if (code2 != NIL)
5783 emit_label (label2);
5784 return;
5785 }
e075ae69 5786
3a3677ff
RH
5787 default:
5788 abort ();
5789 }
32b5b1aa 5790}
e075ae69 5791
9e7adcb3
JH
5792/* Split branch based on floating point condition. */
5793void
5794ix86_split_fp_branch (condition, op1, op2, target1, target2, tmp)
5795 rtx condition, op1, op2, target1, target2, tmp;
5796{
5797 rtx second, bypass;
5798 rtx label = NULL_RTX;
5799 enum rtx_code code = GET_CODE (condition);
9e7adcb3
JH
5800
5801 if (target2 != pc_rtx)
5802 {
5803 rtx tmp = target2;
5804 code = reverse_condition_maybe_unordered (code);
5805 target2 = target1;
5806 target1 = tmp;
5807 }
5808
5809 condition = ix86_expand_fp_compare (code, op1, op2,
5810 tmp, &second, &bypass);
5811 if (bypass != NULL_RTX)
5812 {
5813 label = gen_label_rtx ();
5814 emit_jump_insn (gen_rtx_SET
5815 (VOIDmode, pc_rtx,
5816 gen_rtx_IF_THEN_ELSE (VOIDmode,
5817 bypass,
5818 gen_rtx_LABEL_REF (VOIDmode,
5819 label),
5820 pc_rtx)));
5821 }
5822 /* AMD Athlon and probably other CPUs too have fast bypass path between the
5823 comparison and first branch. The second branch takes longer to execute
5824 so place first branch the worse predicable one if possible. */
5825 if (second != NULL_RTX
5826 && (GET_CODE (second) == UNORDERED || GET_CODE (second) == ORDERED))
5827 {
5828 rtx tmp = condition;
5829 condition = second;
5830 second = tmp;
5831 }
5832 emit_jump_insn (gen_rtx_SET
5833 (VOIDmode, pc_rtx,
5834 gen_rtx_IF_THEN_ELSE (VOIDmode,
5835 condition, target1, target2)));
5836 if (second != NULL_RTX)
5837 emit_jump_insn (gen_rtx_SET
5838 (VOIDmode, pc_rtx,
5839 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1, target2)));
5840 if (label != NULL_RTX)
5841 emit_label (label);
5842}
5843
32b5b1aa 5844int
3a3677ff 5845ix86_expand_setcc (code, dest)
e075ae69 5846 enum rtx_code code;
e075ae69 5847 rtx dest;
32b5b1aa 5848{
a1b8572c
JH
5849 rtx ret, tmp, tmpreg;
5850 rtx second_test, bypass_test;
e075ae69
RH
5851 int type;
5852
5853 if (GET_MODE (ix86_compare_op0) == DImode)
5854 return 0; /* FAIL */
5855
5856 /* Three modes of generation:
5857 0 -- destination does not overlap compare sources:
5858 clear dest first, emit strict_low_part setcc.
5859 1 -- destination does overlap compare sources:
5860 emit subreg setcc, zero extend.
5861 2 -- destination is in QImode:
5862 emit setcc only.
5863 */
5864
5865 type = 0;
e075ae69
RH
5866
5867 if (GET_MODE (dest) == QImode)
5868 type = 2;
5869 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
a500c31b 5870 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
e075ae69
RH
5871 type = 1;
5872
5873 if (type == 0)
5874 emit_move_insn (dest, const0_rtx);
5875
a1b8572c 5876 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
5877 PUT_MODE (ret, QImode);
5878
5879 tmp = dest;
a1b8572c 5880 tmpreg = dest;
e075ae69 5881 if (type == 0)
32b5b1aa 5882 {
e075ae69 5883 tmp = gen_lowpart (QImode, dest);
a1b8572c 5884 tmpreg = tmp;
e075ae69
RH
5885 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
5886 }
5887 else if (type == 1)
5888 {
5889 if (!cse_not_expected)
5890 tmp = gen_reg_rtx (QImode);
5891 else
5892 tmp = gen_lowpart (QImode, dest);
a1b8572c 5893 tmpreg = tmp;
e075ae69 5894 }
32b5b1aa 5895
e075ae69 5896 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
5897 if (bypass_test || second_test)
5898 {
5899 rtx test = second_test;
5900 int bypass = 0;
5901 rtx tmp2 = gen_reg_rtx (QImode);
5902 if (bypass_test)
5903 {
5904 if (second_test)
5905 abort();
5906 test = bypass_test;
5907 bypass = 1;
5908 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
5909 }
5910 PUT_MODE (test, QImode);
5911 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
5912
5913 if (bypass)
5914 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
5915 else
5916 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
5917 }
e075ae69
RH
5918
5919 if (type == 1)
5920 {
5921 rtx clob;
5922
5923 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
5924 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
5925 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
5926 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5927 emit_insn (tmp);
32b5b1aa 5928 }
e075ae69
RH
5929
5930 return 1; /* DONE */
32b5b1aa 5931}
e075ae69 5932
32b5b1aa 5933int
e075ae69
RH
5934ix86_expand_int_movcc (operands)
5935 rtx operands[];
32b5b1aa 5936{
e075ae69
RH
5937 enum rtx_code code = GET_CODE (operands[1]), compare_code;
5938 rtx compare_seq, compare_op;
a1b8572c 5939 rtx second_test, bypass_test;
32b5b1aa 5940
36583fea
JH
5941 /* When the compare code is not LTU or GEU, we can not use sbbl case.
5942 In case comparsion is done with immediate, we can convert it to LTU or
5943 GEU by altering the integer. */
5944
5945 if ((code == LEU || code == GTU)
5946 && GET_CODE (ix86_compare_op1) == CONST_INT
5947 && GET_MODE (operands[0]) != HImode
5948 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
0f290768 5949 && GET_CODE (operands[2]) == CONST_INT
36583fea
JH
5950 && GET_CODE (operands[3]) == CONST_INT)
5951 {
5952 if (code == LEU)
5953 code = LTU;
5954 else
5955 code = GEU;
5956 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
5957 }
3a3677ff 5958
e075ae69 5959 start_sequence ();
a1b8572c 5960 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
5961 compare_seq = gen_sequence ();
5962 end_sequence ();
5963
5964 compare_code = GET_CODE (compare_op);
5965
5966 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
5967 HImode insns, we'd be swallowed in word prefix ops. */
5968
5969 if (GET_MODE (operands[0]) != HImode
0f290768 5970 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
5971 && GET_CODE (operands[3]) == CONST_INT)
5972 {
5973 rtx out = operands[0];
5974 HOST_WIDE_INT ct = INTVAL (operands[2]);
5975 HOST_WIDE_INT cf = INTVAL (operands[3]);
5976 HOST_WIDE_INT diff;
5977
a1b8572c
JH
5978 if ((compare_code == LTU || compare_code == GEU)
5979 && !second_test && !bypass_test)
e075ae69 5980 {
e075ae69
RH
5981
5982 /* Detect overlap between destination and compare sources. */
5983 rtx tmp = out;
5984
0f290768 5985 /* To simplify rest of code, restrict to the GEU case. */
36583fea
JH
5986 if (compare_code == LTU)
5987 {
5988 int tmp = ct;
5989 ct = cf;
5990 cf = tmp;
5991 compare_code = reverse_condition (compare_code);
5992 code = reverse_condition (code);
5993 }
5994 diff = ct - cf;
5995
e075ae69 5996 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
a500c31b 5997 || reg_overlap_mentioned_p (out, ix86_compare_op1))
e075ae69
RH
5998 tmp = gen_reg_rtx (SImode);
5999
6000 emit_insn (compare_seq);
6001 emit_insn (gen_x86_movsicc_0_m1 (tmp));
6002
36583fea
JH
6003 if (diff == 1)
6004 {
6005 /*
6006 * cmpl op0,op1
6007 * sbbl dest,dest
6008 * [addl dest, ct]
6009 *
6010 * Size 5 - 8.
6011 */
6012 if (ct)
6013 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
6014 }
6015 else if (cf == -1)
6016 {
6017 /*
6018 * cmpl op0,op1
6019 * sbbl dest,dest
6020 * orl $ct, dest
6021 *
6022 * Size 8.
6023 */
6024 emit_insn (gen_iorsi3 (out, out, GEN_INT (ct)));
6025 }
6026 else if (diff == -1 && ct)
6027 {
6028 /*
6029 * cmpl op0,op1
6030 * sbbl dest,dest
6031 * xorl $-1, dest
6032 * [addl dest, cf]
6033 *
6034 * Size 8 - 11.
6035 */
6036 emit_insn (gen_one_cmplsi2 (tmp, tmp));
6037 if (cf)
6038 emit_insn (gen_addsi3 (out, out, GEN_INT (cf)));
6039 }
6040 else
6041 {
6042 /*
6043 * cmpl op0,op1
6044 * sbbl dest,dest
6045 * andl cf - ct, dest
6046 * [addl dest, ct]
6047 *
6048 * Size 8 - 11.
6049 */
6050 emit_insn (gen_andsi3 (out, out, GEN_INT (cf - ct)));
6051 if (ct)
6052 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
6053 }
e075ae69
RH
6054
6055 if (tmp != out)
6056 emit_move_insn (out, tmp);
6057
6058 return 1; /* DONE */
6059 }
6060
6061 diff = ct - cf;
6062 if (diff < 0)
6063 {
6064 HOST_WIDE_INT tmp;
6065 tmp = ct, ct = cf, cf = tmp;
6066 diff = -diff;
734dba19
JH
6067 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
6068 {
6069 /* We may be reversing unordered compare to normal compare, that
6070 is not valid in general (we may convert non-trapping condition
6071 to trapping one), however on i386 we currently emit all
6072 comparisons unordered. */
6073 compare_code = reverse_condition_maybe_unordered (compare_code);
6074 code = reverse_condition_maybe_unordered (code);
6075 }
6076 else
6077 {
6078 compare_code = reverse_condition (compare_code);
6079 code = reverse_condition (code);
6080 }
e075ae69
RH
6081 }
6082 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
6083 || diff == 3 || diff == 5 || diff == 9)
6084 {
6085 /*
6086 * xorl dest,dest
6087 * cmpl op1,op2
6088 * setcc dest
6089 * lea cf(dest*(ct-cf)),dest
6090 *
6091 * Size 14.
6092 *
6093 * This also catches the degenerate setcc-only case.
6094 */
6095
6096 rtx tmp;
6097 int nops;
6098
6099 out = emit_store_flag (out, code, ix86_compare_op0,
6100 ix86_compare_op1, VOIDmode, 0, 1);
6101
6102 nops = 0;
6103 if (diff == 1)
6104 tmp = out;
6105 else
6106 {
6107 tmp = gen_rtx_MULT (SImode, out, GEN_INT (diff & ~1));
6108 nops++;
6109 if (diff & 1)
6110 {
6111 tmp = gen_rtx_PLUS (SImode, tmp, out);
6112 nops++;
6113 }
6114 }
6115 if (cf != 0)
6116 {
6117 tmp = gen_rtx_PLUS (SImode, tmp, GEN_INT (cf));
6118 nops++;
6119 }
6120 if (tmp != out)
6121 {
6122 if (nops == 0)
6123 emit_move_insn (out, tmp);
6124 else if (nops == 1)
6125 {
6126 rtx clob;
6127
6128 clob = gen_rtx_REG (CCmode, FLAGS_REG);
6129 clob = gen_rtx_CLOBBER (VOIDmode, clob);
6130
6131 tmp = gen_rtx_SET (VOIDmode, out, tmp);
6132 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6133 emit_insn (tmp);
6134 }
6135 else
6136 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
6137 }
6138 if (out != operands[0])
6139 emit_move_insn (operands[0], out);
6140
6141 return 1; /* DONE */
6142 }
6143
6144 /*
6145 * General case: Jumpful:
6146 * xorl dest,dest cmpl op1, op2
6147 * cmpl op1, op2 movl ct, dest
6148 * setcc dest jcc 1f
6149 * decl dest movl cf, dest
6150 * andl (cf-ct),dest 1:
6151 * addl ct,dest
0f290768 6152 *
e075ae69
RH
6153 * Size 20. Size 14.
6154 *
6155 * This is reasonably steep, but branch mispredict costs are
6156 * high on modern cpus, so consider failing only if optimizing
6157 * for space.
6158 *
6159 * %%% Parameterize branch_cost on the tuning architecture, then
6160 * use that. The 80386 couldn't care less about mispredicts.
6161 */
6162
6163 if (!optimize_size && !TARGET_CMOVE)
6164 {
6165 if (ct == 0)
6166 {
6167 ct = cf;
6168 cf = 0;
734dba19
JH
6169 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
6170 {
6171 /* We may be reversing unordered compare to normal compare,
6172 that is not valid in general (we may convert non-trapping
6173 condition to trapping one), however on i386 we currently
6174 emit all comparisons unordered. */
6175 compare_code = reverse_condition_maybe_unordered (compare_code);
6176 code = reverse_condition_maybe_unordered (code);
6177 }
6178 else
6179 {
6180 compare_code = reverse_condition (compare_code);
6181 code = reverse_condition (code);
6182 }
e075ae69
RH
6183 }
6184
6185 out = emit_store_flag (out, code, ix86_compare_op0,
6186 ix86_compare_op1, VOIDmode, 0, 1);
6187
6188 emit_insn (gen_addsi3 (out, out, constm1_rtx));
6189 emit_insn (gen_andsi3 (out, out, GEN_INT (cf-ct)));
6190 if (ct != 0)
6191 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
6192 if (out != operands[0])
6193 emit_move_insn (operands[0], out);
6194
6195 return 1; /* DONE */
6196 }
6197 }
6198
6199 if (!TARGET_CMOVE)
6200 {
6201 /* Try a few things more with specific constants and a variable. */
6202
78a0d70c 6203 optab op;
e075ae69
RH
6204 rtx var, orig_out, out, tmp;
6205
6206 if (optimize_size)
6207 return 0; /* FAIL */
6208
0f290768 6209 /* If one of the two operands is an interesting constant, load a
e075ae69 6210 constant with the above and mask it in with a logical operation. */
0f290768 6211
e075ae69
RH
6212 if (GET_CODE (operands[2]) == CONST_INT)
6213 {
6214 var = operands[3];
6215 if (INTVAL (operands[2]) == 0)
6216 operands[3] = constm1_rtx, op = and_optab;
6217 else if (INTVAL (operands[2]) == -1)
6218 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
6219 else
6220 return 0; /* FAIL */
e075ae69
RH
6221 }
6222 else if (GET_CODE (operands[3]) == CONST_INT)
6223 {
6224 var = operands[2];
6225 if (INTVAL (operands[3]) == 0)
6226 operands[2] = constm1_rtx, op = and_optab;
6227 else if (INTVAL (operands[3]) == -1)
6228 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
6229 else
6230 return 0; /* FAIL */
e075ae69 6231 }
78a0d70c 6232 else
e075ae69
RH
6233 return 0; /* FAIL */
6234
6235 orig_out = operands[0];
6236 tmp = gen_reg_rtx (GET_MODE (orig_out));
6237 operands[0] = tmp;
6238
6239 /* Recurse to get the constant loaded. */
6240 if (ix86_expand_int_movcc (operands) == 0)
6241 return 0; /* FAIL */
6242
6243 /* Mask in the interesting variable. */
6244 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
6245 OPTAB_WIDEN);
6246 if (out != orig_out)
6247 emit_move_insn (orig_out, out);
6248
6249 return 1; /* DONE */
6250 }
6251
6252 /*
6253 * For comparison with above,
6254 *
6255 * movl cf,dest
6256 * movl ct,tmp
6257 * cmpl op1,op2
6258 * cmovcc tmp,dest
6259 *
6260 * Size 15.
6261 */
6262
6263 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
6264 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
6265 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
6266 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
6267
a1b8572c
JH
6268 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
6269 {
6270 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
6271 emit_move_insn (tmp, operands[3]);
6272 operands[3] = tmp;
6273 }
6274 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
6275 {
6276 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
6277 emit_move_insn (tmp, operands[2]);
6278 operands[2] = tmp;
6279 }
6280
e075ae69
RH
6281 emit_insn (compare_seq);
6282 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6283 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6284 compare_op, operands[2],
6285 operands[3])));
a1b8572c
JH
6286 if (bypass_test)
6287 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6288 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6289 bypass_test,
6290 operands[3],
6291 operands[0])));
6292 if (second_test)
6293 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6294 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6295 second_test,
6296 operands[2],
6297 operands[0])));
e075ae69
RH
6298
6299 return 1; /* DONE */
e9a25f70 6300}
e075ae69 6301
32b5b1aa 6302int
e075ae69
RH
6303ix86_expand_fp_movcc (operands)
6304 rtx operands[];
32b5b1aa 6305{
e075ae69 6306 enum rtx_code code;
e075ae69 6307 rtx tmp;
a1b8572c 6308 rtx compare_op, second_test, bypass_test;
32b5b1aa 6309
0073023d
JH
6310 /* For SF/DFmode conditional moves based on comparisons
6311 in same mode, we may want to use SSE min/max instructions. */
6312 if (((TARGET_SSE && GET_MODE (operands[0]) == SFmode)
6313 || (TARGET_SSE2 && GET_MODE (operands[0]) == DFmode))
6314 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
6315 /* We may be called from the post-reload splitter. */
6316 && (!REG_P (operands[0])
6317 || SSE_REG_P (operands[0])
6318 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
6319 {
6320 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
6321 code = GET_CODE (operands[1]);
6322
6323 /* See if we have (cross) match between comparison operands and
6324 conditional move operands. */
6325 if (rtx_equal_p (operands[2], op1))
6326 {
6327 rtx tmp = op0;
6328 op0 = op1;
6329 op1 = tmp;
6330 code = reverse_condition_maybe_unordered (code);
6331 }
6332 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
6333 {
6334 /* Check for min operation. */
6335 if (code == LT)
6336 {
6337 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
6338 if (memory_operand (op0, VOIDmode))
6339 op0 = force_reg (GET_MODE (operands[0]), op0);
6340 if (GET_MODE (operands[0]) == SFmode)
6341 emit_insn (gen_minsf3 (operands[0], op0, op1));
6342 else
6343 emit_insn (gen_mindf3 (operands[0], op0, op1));
6344 return 1;
6345 }
6346 /* Check for max operation. */
6347 if (code == GT)
6348 {
6349 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
6350 if (memory_operand (op0, VOIDmode))
6351 op0 = force_reg (GET_MODE (operands[0]), op0);
6352 if (GET_MODE (operands[0]) == SFmode)
6353 emit_insn (gen_maxsf3 (operands[0], op0, op1));
6354 else
6355 emit_insn (gen_maxdf3 (operands[0], op0, op1));
6356 return 1;
6357 }
6358 }
6359 /* Manage condition to be sse_comparison_operator. In case we are
6360 in non-ieee mode, try to canonicalize the destination operand
6361 to be first in the comparison - this helps reload to avoid extra
6362 moves. */
6363 if (!sse_comparison_operator (operands[1], VOIDmode)
6364 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
6365 {
6366 rtx tmp = ix86_compare_op0;
6367 ix86_compare_op0 = ix86_compare_op1;
6368 ix86_compare_op1 = tmp;
6369 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
6370 VOIDmode, ix86_compare_op0,
6371 ix86_compare_op1);
6372 }
6373 /* Similary try to manage result to be first operand of conditional
6374 move. */
6375 if (rtx_equal_p (operands[0], operands[3]))
6376 {
6377 rtx tmp = operands[2];
6378 operands[2] = operands[3];
6379 operands[2] = tmp;
6380 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
6381 (GET_CODE (operands[1])),
6382 VOIDmode, ix86_compare_op0,
6383 ix86_compare_op1);
6384 }
6385 if (GET_MODE (operands[0]) == SFmode)
6386 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
6387 operands[2], operands[3],
6388 ix86_compare_op0, ix86_compare_op1));
6389 else
6390 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
6391 operands[2], operands[3],
6392 ix86_compare_op0, ix86_compare_op1));
6393 return 1;
6394 }
6395
e075ae69 6396 /* The floating point conditional move instructions don't directly
0f290768 6397 support conditions resulting from a signed integer comparison. */
32b5b1aa 6398
e075ae69 6399 code = GET_CODE (operands[1]);
a1b8572c 6400 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
6401
6402 /* The floating point conditional move instructions don't directly
6403 support signed integer comparisons. */
6404
a1b8572c 6405 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 6406 {
a1b8572c
JH
6407 if (second_test != NULL || bypass_test != NULL)
6408 abort();
e075ae69 6409 tmp = gen_reg_rtx (QImode);
3a3677ff 6410 ix86_expand_setcc (code, tmp);
e075ae69
RH
6411 code = NE;
6412 ix86_compare_op0 = tmp;
6413 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
6414 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
6415 }
6416 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
6417 {
6418 tmp = gen_reg_rtx (GET_MODE (operands[0]));
6419 emit_move_insn (tmp, operands[3]);
6420 operands[3] = tmp;
6421 }
6422 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
6423 {
6424 tmp = gen_reg_rtx (GET_MODE (operands[0]));
6425 emit_move_insn (tmp, operands[2]);
6426 operands[2] = tmp;
e075ae69 6427 }
e9a25f70 6428
e075ae69
RH
6429 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6430 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9e7adcb3 6431 compare_op,
e075ae69
RH
6432 operands[2],
6433 operands[3])));
a1b8572c
JH
6434 if (bypass_test)
6435 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6436 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6437 bypass_test,
6438 operands[3],
6439 operands[0])));
6440 if (second_test)
6441 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6442 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6443 second_test,
6444 operands[2],
6445 operands[0])));
32b5b1aa 6446
e075ae69 6447 return 1;
32b5b1aa
SC
6448}
6449
2450a057
JH
6450/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
6451 works for floating pointer parameters and nonoffsetable memories.
6452 For pushes, it returns just stack offsets; the values will be saved
6453 in the right order. Maximally three parts are generated. */
6454
2b589241 6455static int
2450a057
JH
6456ix86_split_to_parts (operand, parts, mode)
6457 rtx operand;
6458 rtx *parts;
6459 enum machine_mode mode;
32b5b1aa 6460{
2b589241 6461 int size = mode == TFmode ? 3 : GET_MODE_SIZE (mode) / 4;
2450a057 6462
a7180f70
BS
6463 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
6464 abort ();
2450a057
JH
6465 if (size < 2 || size > 3)
6466 abort ();
6467
d7a29404
JH
6468 /* Optimize constant pool reference to immediates. This is used by fp moves,
6469 that force all constants to memory to allow combining. */
6470
6471 if (GET_CODE (operand) == MEM
6472 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
6473 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
6474 operand = get_pool_constant (XEXP (operand, 0));
6475
2450a057 6476 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 6477 {
2450a057
JH
6478 /* The only non-offsetable memories we handle are pushes. */
6479 if (! push_operand (operand, VOIDmode))
6480 abort ();
6481
6482 PUT_MODE (operand, SImode);
6483 parts[0] = parts[1] = parts[2] = operand;
6484 }
6485 else
6486 {
6487 if (mode == DImode)
6488 split_di (&operand, 1, &parts[0], &parts[1]);
6489 else
e075ae69 6490 {
2450a057
JH
6491 if (REG_P (operand))
6492 {
6493 if (!reload_completed)
6494 abort ();
6495 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
6496 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
6497 if (size == 3)
6498 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
6499 }
6500 else if (offsettable_memref_p (operand))
6501 {
6502 PUT_MODE (operand, SImode);
6503 parts[0] = operand;
6504 parts[1] = adj_offsettable_operand (operand, 4);
6505 if (size == 3)
6506 parts[2] = adj_offsettable_operand (operand, 8);
6507 }
6508 else if (GET_CODE (operand) == CONST_DOUBLE)
6509 {
6510 REAL_VALUE_TYPE r;
2b589241 6511 long l[4];
2450a057
JH
6512
6513 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
6514 switch (mode)
6515 {
6516 case XFmode:
2b589241 6517 case TFmode:
2450a057
JH
6518 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
6519 parts[2] = GEN_INT (l[2]);
6520 break;
6521 case DFmode:
6522 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
6523 break;
6524 default:
6525 abort ();
6526 }
6527 parts[1] = GEN_INT (l[1]);
6528 parts[0] = GEN_INT (l[0]);
6529 }
6530 else
6531 abort ();
e075ae69 6532 }
2450a057
JH
6533 }
6534
2b589241 6535 return size;
2450a057
JH
6536}
6537
6538/* Emit insns to perform a move or push of DI, DF, and XF values.
6539 Return false when normal moves are needed; true when all required
6540 insns have been emitted. Operands 2-4 contain the input values
6541 int the correct order; operands 5-7 contain the output values. */
6542
0f290768 6543int
2450a057
JH
6544ix86_split_long_move (operands1)
6545 rtx operands1[];
6546{
6547 rtx part[2][3];
6548 rtx operands[2];
2b589241 6549 int size;
2450a057
JH
6550 int push = 0;
6551 int collisions = 0;
6552
6553 /* Make our own copy to avoid clobbering the operands. */
6554 operands[0] = copy_rtx (operands1[0]);
6555 operands[1] = copy_rtx (operands1[1]);
6556
2450a057
JH
6557 /* The only non-offsettable memory we handle is push. */
6558 if (push_operand (operands[0], VOIDmode))
6559 push = 1;
6560 else if (GET_CODE (operands[0]) == MEM
6561 && ! offsettable_memref_p (operands[0]))
6562 abort ();
6563
2b589241 6564 size = ix86_split_to_parts (operands[0], part[0], GET_MODE (operands1[0]));
2450a057
JH
6565 ix86_split_to_parts (operands[1], part[1], GET_MODE (operands1[0]));
6566
6567 /* When emitting push, take care for source operands on the stack. */
6568 if (push && GET_CODE (operands[1]) == MEM
6569 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
6570 {
6571 if (size == 3)
6572 part[1][1] = part[1][2];
6573 part[1][0] = part[1][1];
6574 }
6575
0f290768 6576 /* We need to do copy in the right order in case an address register
2450a057
JH
6577 of the source overlaps the destination. */
6578 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
6579 {
6580 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
6581 collisions++;
6582 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
6583 collisions++;
6584 if (size == 3
6585 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
6586 collisions++;
6587
6588 /* Collision in the middle part can be handled by reordering. */
6589 if (collisions == 1 && size == 3
6590 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 6591 {
2450a057
JH
6592 rtx tmp;
6593 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
6594 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
6595 }
e075ae69 6596
2450a057
JH
6597 /* If there are more collisions, we can't handle it by reordering.
6598 Do an lea to the last part and use only one colliding move. */
6599 else if (collisions > 1)
6600 {
6601 collisions = 1;
6602 emit_insn (gen_rtx_SET (VOIDmode, part[0][size - 1],
6603 XEXP (part[1][0], 0)));
6604 part[1][0] = change_address (part[1][0], SImode, part[0][size - 1]);
6605 part[1][1] = adj_offsettable_operand (part[1][0], 4);
6606 if (size == 3)
6607 part[1][2] = adj_offsettable_operand (part[1][0], 8);
6608 }
6609 }
6610
6611 if (push)
6612 {
6613 if (size == 3)
2b589241
JH
6614 {
6615 /* We use only first 12 bytes of TFmode value, but for pushing we
6616 are required to adjust stack as if we were pushing real 16byte
6617 value. */
6618 if (GET_MODE (operands1[0]) == TFmode)
6619 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
6620 GEN_INT (-4)));
6621 emit_insn (gen_push (part[1][2]));
6622 }
2450a057
JH
6623 emit_insn (gen_push (part[1][1]));
6624 emit_insn (gen_push (part[1][0]));
6625 return 1;
6626 }
6627
6628 /* Choose correct order to not overwrite the source before it is copied. */
6629 if ((REG_P (part[0][0])
6630 && REG_P (part[1][1])
6631 && (REGNO (part[0][0]) == REGNO (part[1][1])
6632 || (size == 3
6633 && REGNO (part[0][0]) == REGNO (part[1][2]))))
6634 || (collisions > 0
6635 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
6636 {
6637 if (size == 3)
6638 {
6639 operands1[2] = part[0][2];
6640 operands1[3] = part[0][1];
6641 operands1[4] = part[0][0];
6642 operands1[5] = part[1][2];
6643 operands1[6] = part[1][1];
6644 operands1[7] = part[1][0];
6645 }
6646 else
6647 {
6648 operands1[2] = part[0][1];
6649 operands1[3] = part[0][0];
6650 operands1[5] = part[1][1];
6651 operands1[6] = part[1][0];
6652 }
6653 }
6654 else
6655 {
6656 if (size == 3)
6657 {
6658 operands1[2] = part[0][0];
6659 operands1[3] = part[0][1];
6660 operands1[4] = part[0][2];
6661 operands1[5] = part[1][0];
6662 operands1[6] = part[1][1];
6663 operands1[7] = part[1][2];
6664 }
6665 else
6666 {
6667 operands1[2] = part[0][0];
6668 operands1[3] = part[0][1];
6669 operands1[5] = part[1][0];
6670 operands1[6] = part[1][1];
e075ae69
RH
6671 }
6672 }
32b5b1aa 6673
e9a25f70 6674 return 0;
32b5b1aa 6675}
32b5b1aa 6676
e075ae69
RH
6677void
6678ix86_split_ashldi (operands, scratch)
6679 rtx *operands, scratch;
32b5b1aa 6680{
e075ae69
RH
6681 rtx low[2], high[2];
6682 int count;
b985a30f 6683
e075ae69
RH
6684 if (GET_CODE (operands[2]) == CONST_INT)
6685 {
6686 split_di (operands, 2, low, high);
6687 count = INTVAL (operands[2]) & 63;
32b5b1aa 6688
e075ae69
RH
6689 if (count >= 32)
6690 {
6691 emit_move_insn (high[0], low[1]);
6692 emit_move_insn (low[0], const0_rtx);
b985a30f 6693
e075ae69
RH
6694 if (count > 32)
6695 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
6696 }
6697 else
6698 {
6699 if (!rtx_equal_p (operands[0], operands[1]))
6700 emit_move_insn (operands[0], operands[1]);
6701 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
6702 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
6703 }
6704 }
6705 else
6706 {
6707 if (!rtx_equal_p (operands[0], operands[1]))
6708 emit_move_insn (operands[0], operands[1]);
b985a30f 6709
e075ae69 6710 split_di (operands, 1, low, high);
b985a30f 6711
e075ae69
RH
6712 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
6713 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 6714
fe577e58 6715 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 6716 {
fe577e58 6717 if (! no_new_pseudos)
e075ae69
RH
6718 scratch = force_reg (SImode, const0_rtx);
6719 else
6720 emit_move_insn (scratch, const0_rtx);
6721
6722 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
6723 scratch));
6724 }
6725 else
6726 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
6727 }
e9a25f70 6728}
32b5b1aa 6729
e075ae69
RH
6730void
6731ix86_split_ashrdi (operands, scratch)
6732 rtx *operands, scratch;
32b5b1aa 6733{
e075ae69
RH
6734 rtx low[2], high[2];
6735 int count;
32b5b1aa 6736
e075ae69
RH
6737 if (GET_CODE (operands[2]) == CONST_INT)
6738 {
6739 split_di (operands, 2, low, high);
6740 count = INTVAL (operands[2]) & 63;
32b5b1aa 6741
e075ae69
RH
6742 if (count >= 32)
6743 {
6744 emit_move_insn (low[0], high[1]);
32b5b1aa 6745
e075ae69
RH
6746 if (! reload_completed)
6747 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
6748 else
6749 {
6750 emit_move_insn (high[0], low[0]);
6751 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
6752 }
6753
6754 if (count > 32)
6755 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
6756 }
6757 else
6758 {
6759 if (!rtx_equal_p (operands[0], operands[1]))
6760 emit_move_insn (operands[0], operands[1]);
6761 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
6762 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
6763 }
6764 }
6765 else
32b5b1aa 6766 {
e075ae69
RH
6767 if (!rtx_equal_p (operands[0], operands[1]))
6768 emit_move_insn (operands[0], operands[1]);
6769
6770 split_di (operands, 1, low, high);
6771
6772 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
6773 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
6774
fe577e58 6775 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 6776 {
fe577e58 6777 if (! no_new_pseudos)
e075ae69
RH
6778 scratch = gen_reg_rtx (SImode);
6779 emit_move_insn (scratch, high[0]);
6780 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
6781 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
6782 scratch));
6783 }
6784 else
6785 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 6786 }
e075ae69 6787}
32b5b1aa 6788
e075ae69
RH
6789void
6790ix86_split_lshrdi (operands, scratch)
6791 rtx *operands, scratch;
6792{
6793 rtx low[2], high[2];
6794 int count;
32b5b1aa 6795
e075ae69 6796 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 6797 {
e075ae69
RH
6798 split_di (operands, 2, low, high);
6799 count = INTVAL (operands[2]) & 63;
6800
6801 if (count >= 32)
c7271385 6802 {
e075ae69
RH
6803 emit_move_insn (low[0], high[1]);
6804 emit_move_insn (high[0], const0_rtx);
32b5b1aa 6805
e075ae69
RH
6806 if (count > 32)
6807 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
6808 }
6809 else
6810 {
6811 if (!rtx_equal_p (operands[0], operands[1]))
6812 emit_move_insn (operands[0], operands[1]);
6813 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
6814 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
6815 }
32b5b1aa 6816 }
e075ae69
RH
6817 else
6818 {
6819 if (!rtx_equal_p (operands[0], operands[1]))
6820 emit_move_insn (operands[0], operands[1]);
32b5b1aa 6821
e075ae69
RH
6822 split_di (operands, 1, low, high);
6823
6824 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
6825 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
6826
6827 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 6828 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 6829 {
fe577e58 6830 if (! no_new_pseudos)
e075ae69
RH
6831 scratch = force_reg (SImode, const0_rtx);
6832 else
6833 emit_move_insn (scratch, const0_rtx);
6834
6835 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
6836 scratch));
6837 }
6838 else
6839 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
6840 }
32b5b1aa 6841}
3f803cd9 6842
e075ae69
RH
6843/* Expand the appropriate insns for doing strlen if not just doing
6844 repnz; scasb
6845
6846 out = result, initialized with the start address
6847 align_rtx = alignment of the address.
6848 scratch = scratch register, initialized with the startaddress when
77ebd435 6849 not aligned, otherwise undefined
3f803cd9
SC
6850
6851 This is just the body. It needs the initialisations mentioned above and
6852 some address computing at the end. These things are done in i386.md. */
6853
e075ae69
RH
6854void
6855ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch)
6856 rtx out, align_rtx, scratch;
3f803cd9 6857{
e075ae69
RH
6858 int align;
6859 rtx tmp;
6860 rtx align_2_label = NULL_RTX;
6861 rtx align_3_label = NULL_RTX;
6862 rtx align_4_label = gen_label_rtx ();
6863 rtx end_0_label = gen_label_rtx ();
e075ae69 6864 rtx mem;
e2e52e1b 6865 rtx tmpreg = gen_reg_rtx (SImode);
e075ae69
RH
6866
6867 align = 0;
6868 if (GET_CODE (align_rtx) == CONST_INT)
6869 align = INTVAL (align_rtx);
3f803cd9 6870
e9a25f70 6871 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 6872
e9a25f70 6873 /* Is there a known alignment and is it less than 4? */
e075ae69 6874 if (align < 4)
3f803cd9 6875 {
e9a25f70 6876 /* Is there a known alignment and is it not 2? */
e075ae69 6877 if (align != 2)
3f803cd9 6878 {
e075ae69
RH
6879 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
6880 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
6881
6882 /* Leave just the 3 lower bits. */
6883 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (3),
6884 NULL_RTX, 0, OPTAB_WIDEN);
6885
9076b9c1 6886 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
77ebd435 6887 SImode, 1, 0, align_4_label);
9076b9c1
JH
6888 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
6889 SImode, 1, 0, align_2_label);
6890 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
6891 SImode, 1, 0, align_3_label);
3f803cd9
SC
6892 }
6893 else
6894 {
e9a25f70
JL
6895 /* Since the alignment is 2, we have to check 2 or 0 bytes;
6896 check if is aligned to 4 - byte. */
e9a25f70 6897
e075ae69
RH
6898 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (2),
6899 NULL_RTX, 0, OPTAB_WIDEN);
6900
9076b9c1
JH
6901 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
6902 SImode, 1, 0, align_4_label);
3f803cd9
SC
6903 }
6904
e075ae69 6905 mem = gen_rtx_MEM (QImode, out);
e9a25f70 6906
e075ae69 6907 /* Now compare the bytes. */
e9a25f70 6908
0f290768 6909 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1
JH
6910 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6911 QImode, 1, 0, end_0_label);
3f803cd9 6912
0f290768 6913 /* Increment the address. */
e075ae69 6914 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 6915
e075ae69
RH
6916 /* Not needed with an alignment of 2 */
6917 if (align != 2)
6918 {
6919 emit_label (align_2_label);
3f803cd9 6920
9076b9c1
JH
6921 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6922 QImode, 1, 0, end_0_label);
e075ae69
RH
6923
6924 emit_insn (gen_addsi3 (out, out, const1_rtx));
6925
6926 emit_label (align_3_label);
6927 }
6928
9076b9c1
JH
6929 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6930 QImode, 1, 0, end_0_label);
e075ae69
RH
6931
6932 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
6933 }
6934
e075ae69
RH
6935 /* Generate loop to check 4 bytes at a time. It is not a good idea to
6936 align this loop. It gives only huge programs, but does not help to
6937 speed up. */
6938 emit_label (align_4_label);
3f803cd9 6939
e075ae69
RH
6940 mem = gen_rtx_MEM (SImode, out);
6941 emit_move_insn (scratch, mem);
e075ae69 6942 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 6943
e2e52e1b
JH
6944 /* This formula yields a nonzero result iff one of the bytes is zero.
6945 This saves three branches inside loop and many cycles. */
6946
6947 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
6948 emit_insn (gen_one_cmplsi2 (scratch, scratch));
6949 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
6950 emit_insn (gen_andsi3 (tmpreg, tmpreg, GEN_INT (0x80808080)));
9076b9c1
JH
6951 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0,
6952 SImode, 1, 0, align_4_label);
e2e52e1b
JH
6953
6954 if (TARGET_CMOVE)
6955 {
6956 rtx reg = gen_reg_rtx (SImode);
6957 emit_move_insn (reg, tmpreg);
6958 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
6959
0f290768 6960 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 6961 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
6962 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6963 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6964 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
6965 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
6966 reg,
6967 tmpreg)));
e2e52e1b
JH
6968 /* Emit lea manually to avoid clobbering of flags. */
6969 emit_insn (gen_rtx_SET (SImode, reg,
6970 gen_rtx_PLUS (SImode, out, GEN_INT (2))));
6971
6972 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6973 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6974 emit_insn (gen_rtx_SET (VOIDmode, out,
6975 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
6976 reg,
6977 out)));
e2e52e1b
JH
6978
6979 }
6980 else
6981 {
6982 rtx end_2_label = gen_label_rtx ();
6983 /* Is zero in the first two bytes? */
6984
16189740 6985 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
6986 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6987 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
6988 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
6989 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
6990 pc_rtx);
6991 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
6992 JUMP_LABEL (tmp) = end_2_label;
6993
0f290768 6994 /* Not in the first two. Move two bytes forward. */
e2e52e1b
JH
6995 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
6996 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
6997
6998 emit_label (end_2_label);
6999
7000 }
7001
0f290768 7002 /* Avoid branch in fixing the byte. */
e2e52e1b 7003 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190
JH
7004 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
7005 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
e075ae69
RH
7006
7007 emit_label (end_0_label);
7008}
7009\f
e075ae69
RH
7010/* Clear stack slot assignments remembered from previous functions.
7011 This is called from INIT_EXPANDERS once before RTL is emitted for each
7012 function. */
7013
36edd3cc
BS
7014static void
7015ix86_init_machine_status (p)
1526a060 7016 struct function *p;
e075ae69 7017{
37b15744
RH
7018 p->machine = (struct machine_function *)
7019 xcalloc (1, sizeof (struct machine_function));
e075ae69
RH
7020}
7021
1526a060
BS
7022/* Mark machine specific bits of P for GC. */
7023static void
7024ix86_mark_machine_status (p)
7025 struct function *p;
7026{
37b15744 7027 struct machine_function *machine = p->machine;
1526a060
BS
7028 enum machine_mode mode;
7029 int n;
7030
37b15744
RH
7031 if (! machine)
7032 return;
7033
1526a060
BS
7034 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
7035 mode = (enum machine_mode) ((int) mode + 1))
7036 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
37b15744
RH
7037 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
7038}
7039
7040static void
7041ix86_free_machine_status (p)
7042 struct function *p;
7043{
7044 free (p->machine);
7045 p->machine = NULL;
1526a060
BS
7046}
7047
e075ae69
RH
7048/* Return a MEM corresponding to a stack slot with mode MODE.
7049 Allocate a new slot if necessary.
7050
7051 The RTL for a function can have several slots available: N is
7052 which slot to use. */
7053
7054rtx
7055assign_386_stack_local (mode, n)
7056 enum machine_mode mode;
7057 int n;
7058{
7059 if (n < 0 || n >= MAX_386_STACK_LOCALS)
7060 abort ();
7061
7062 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
7063 ix86_stack_locals[(int) mode][n]
7064 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
7065
7066 return ix86_stack_locals[(int) mode][n];
7067}
7068\f
7069/* Calculate the length of the memory address in the instruction
7070 encoding. Does not include the one-byte modrm, opcode, or prefix. */
7071
7072static int
7073memory_address_length (addr)
7074 rtx addr;
7075{
7076 struct ix86_address parts;
7077 rtx base, index, disp;
7078 int len;
7079
7080 if (GET_CODE (addr) == PRE_DEC
7081 || GET_CODE (addr) == POST_INC)
7082 return 0;
3f803cd9 7083
e075ae69
RH
7084 if (! ix86_decompose_address (addr, &parts))
7085 abort ();
3f803cd9 7086
e075ae69
RH
7087 base = parts.base;
7088 index = parts.index;
7089 disp = parts.disp;
7090 len = 0;
3f803cd9 7091
e075ae69
RH
7092 /* Register Indirect. */
7093 if (base && !index && !disp)
7094 {
7095 /* Special cases: ebp and esp need the two-byte modrm form. */
7096 if (addr == stack_pointer_rtx
7097 || addr == arg_pointer_rtx
564d80f4
JH
7098 || addr == frame_pointer_rtx
7099 || addr == hard_frame_pointer_rtx)
e075ae69 7100 len = 1;
3f803cd9 7101 }
e9a25f70 7102
e075ae69
RH
7103 /* Direct Addressing. */
7104 else if (disp && !base && !index)
7105 len = 4;
7106
3f803cd9
SC
7107 else
7108 {
e075ae69
RH
7109 /* Find the length of the displacement constant. */
7110 if (disp)
7111 {
7112 if (GET_CODE (disp) == CONST_INT
7113 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
7114 len = 1;
7115 else
7116 len = 4;
7117 }
3f803cd9 7118
e075ae69
RH
7119 /* An index requires the two-byte modrm form. */
7120 if (index)
7121 len += 1;
3f803cd9
SC
7122 }
7123
e075ae69
RH
7124 return len;
7125}
79325812 7126
6ef67412
JH
7127/* Compute default value for "length_immediate" attribute. When SHORTFORM is set
7128 expect that insn have 8bit immediate alternative. */
e075ae69 7129int
6ef67412 7130ix86_attr_length_immediate_default (insn, shortform)
e075ae69 7131 rtx insn;
6ef67412 7132 int shortform;
e075ae69 7133{
6ef67412
JH
7134 int len = 0;
7135 int i;
6c698a6d 7136 extract_insn_cached (insn);
6ef67412
JH
7137 for (i = recog_data.n_operands - 1; i >= 0; --i)
7138 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 7139 {
6ef67412 7140 if (len)
3071fab5 7141 abort ();
6ef67412
JH
7142 if (shortform
7143 && GET_CODE (recog_data.operand[i]) == CONST_INT
7144 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
7145 len = 1;
7146 else
7147 {
7148 switch (get_attr_mode (insn))
7149 {
7150 case MODE_QI:
7151 len+=1;
7152 break;
7153 case MODE_HI:
7154 len+=2;
7155 break;
7156 case MODE_SI:
7157 len+=4;
7158 break;
7159 default:
7160 fatal_insn ("Unknown insn mode", insn);
7161 }
7162 }
3071fab5 7163 }
6ef67412
JH
7164 return len;
7165}
7166/* Compute default value for "length_address" attribute. */
7167int
7168ix86_attr_length_address_default (insn)
7169 rtx insn;
7170{
7171 int i;
6c698a6d 7172 extract_insn_cached (insn);
1ccbefce
RH
7173 for (i = recog_data.n_operands - 1; i >= 0; --i)
7174 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 7175 {
6ef67412 7176 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
7177 break;
7178 }
6ef67412 7179 return 0;
3f803cd9 7180}
e075ae69
RH
7181\f
7182/* Return the maximum number of instructions a cpu can issue. */
b657fc39 7183
e075ae69
RH
7184int
7185ix86_issue_rate ()
b657fc39 7186{
e075ae69 7187 switch (ix86_cpu)
b657fc39 7188 {
e075ae69
RH
7189 case PROCESSOR_PENTIUM:
7190 case PROCESSOR_K6:
7191 return 2;
79325812 7192
e075ae69 7193 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
7194 case PROCESSOR_PENTIUM4:
7195 case PROCESSOR_ATHLON:
e075ae69 7196 return 3;
b657fc39 7197
b657fc39 7198 default:
e075ae69 7199 return 1;
b657fc39 7200 }
b657fc39
L
7201}
7202
e075ae69
RH
7203/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
7204 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 7205
e075ae69
RH
7206static int
7207ix86_flags_dependant (insn, dep_insn, insn_type)
7208 rtx insn, dep_insn;
7209 enum attr_type insn_type;
7210{
7211 rtx set, set2;
b657fc39 7212
e075ae69
RH
7213 /* Simplify the test for uninteresting insns. */
7214 if (insn_type != TYPE_SETCC
7215 && insn_type != TYPE_ICMOV
7216 && insn_type != TYPE_FCMOV
7217 && insn_type != TYPE_IBR)
7218 return 0;
b657fc39 7219
e075ae69
RH
7220 if ((set = single_set (dep_insn)) != 0)
7221 {
7222 set = SET_DEST (set);
7223 set2 = NULL_RTX;
7224 }
7225 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
7226 && XVECLEN (PATTERN (dep_insn), 0) == 2
7227 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
7228 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
7229 {
7230 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
7231 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
7232 }
78a0d70c
ZW
7233 else
7234 return 0;
b657fc39 7235
78a0d70c
ZW
7236 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
7237 return 0;
b657fc39 7238
78a0d70c
ZW
7239 /* This test is true if the dependant insn reads the flags but
7240 not any other potentially set register. */
7241 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
7242 return 0;
7243
7244 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
7245 return 0;
7246
7247 return 1;
e075ae69 7248}
b657fc39 7249
e075ae69
RH
7250/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
7251 address with operands set by DEP_INSN. */
7252
7253static int
7254ix86_agi_dependant (insn, dep_insn, insn_type)
7255 rtx insn, dep_insn;
7256 enum attr_type insn_type;
7257{
7258 rtx addr;
7259
7260 if (insn_type == TYPE_LEA)
5fbdde42
RH
7261 {
7262 addr = PATTERN (insn);
7263 if (GET_CODE (addr) == SET)
7264 ;
7265 else if (GET_CODE (addr) == PARALLEL
7266 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
7267 addr = XVECEXP (addr, 0, 0);
7268 else
7269 abort ();
7270 addr = SET_SRC (addr);
7271 }
e075ae69
RH
7272 else
7273 {
7274 int i;
6c698a6d 7275 extract_insn_cached (insn);
1ccbefce
RH
7276 for (i = recog_data.n_operands - 1; i >= 0; --i)
7277 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 7278 {
1ccbefce 7279 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
7280 goto found;
7281 }
7282 return 0;
7283 found:;
b657fc39
L
7284 }
7285
e075ae69 7286 return modified_in_p (addr, dep_insn);
b657fc39 7287}
a269a03c
JC
7288
7289int
e075ae69 7290ix86_adjust_cost (insn, link, dep_insn, cost)
a269a03c
JC
7291 rtx insn, link, dep_insn;
7292 int cost;
7293{
e075ae69 7294 enum attr_type insn_type, dep_insn_type;
0b5107cf 7295 enum attr_memory memory;
e075ae69 7296 rtx set, set2;
9b00189f 7297 int dep_insn_code_number;
a269a03c 7298
309ada50 7299 /* Anti and output depenancies have zero cost on all CPUs. */
e075ae69 7300 if (REG_NOTE_KIND (link) != 0)
309ada50 7301 return 0;
a269a03c 7302
9b00189f
JH
7303 dep_insn_code_number = recog_memoized (dep_insn);
7304
e075ae69 7305 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 7306 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 7307 return cost;
a269a03c 7308
1c71e60e
JH
7309 insn_type = get_attr_type (insn);
7310 dep_insn_type = get_attr_type (dep_insn);
9b00189f 7311
1c71e60e
JH
7312 /* Prologue and epilogue allocators can have a false dependency on ebp.
7313 This results in one cycle extra stall on Pentium prologue scheduling,
7314 so handle this important case manually. */
7315 if (dep_insn_code_number == CODE_FOR_pro_epilogue_adjust_stack
7316 && dep_insn_type == TYPE_ALU
9b00189f
JH
7317 && !reg_mentioned_p (stack_pointer_rtx, insn))
7318 return 0;
7319
a269a03c
JC
7320 switch (ix86_cpu)
7321 {
7322 case PROCESSOR_PENTIUM:
e075ae69
RH
7323 /* Address Generation Interlock adds a cycle of latency. */
7324 if (ix86_agi_dependant (insn, dep_insn, insn_type))
7325 cost += 1;
7326
7327 /* ??? Compares pair with jump/setcc. */
7328 if (ix86_flags_dependant (insn, dep_insn, insn_type))
7329 cost = 0;
7330
7331 /* Floating point stores require value to be ready one cycle ealier. */
0f290768 7332 if (insn_type == TYPE_FMOV
e075ae69
RH
7333 && get_attr_memory (insn) == MEMORY_STORE
7334 && !ix86_agi_dependant (insn, dep_insn, insn_type))
7335 cost += 1;
7336 break;
a269a03c 7337
e075ae69 7338 case PROCESSOR_PENTIUMPRO:
0f290768 7339 /* Since we can't represent delayed latencies of load+operation,
e075ae69
RH
7340 increase the cost here for non-imov insns. */
7341 if (dep_insn_type != TYPE_IMOV
7342 && dep_insn_type != TYPE_FMOV
0b5107cf
JH
7343 && ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
7344 || memory == MEMORY_BOTH))
e075ae69
RH
7345 cost += 1;
7346
7347 /* INT->FP conversion is expensive. */
7348 if (get_attr_fp_int_src (dep_insn))
7349 cost += 5;
7350
7351 /* There is one cycle extra latency between an FP op and a store. */
7352 if (insn_type == TYPE_FMOV
7353 && (set = single_set (dep_insn)) != NULL_RTX
7354 && (set2 = single_set (insn)) != NULL_RTX
7355 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
7356 && GET_CODE (SET_DEST (set2)) == MEM)
7357 cost += 1;
7358 break;
a269a03c 7359
e075ae69
RH
7360 case PROCESSOR_K6:
7361 /* The esp dependency is resolved before the instruction is really
7362 finished. */
7363 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
7364 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
7365 return 1;
a269a03c 7366
0f290768 7367 /* Since we can't represent delayed latencies of load+operation,
e075ae69 7368 increase the cost here for non-imov insns. */
0b5107cf
JH
7369 if ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
7370 || memory == MEMORY_BOTH)
e075ae69
RH
7371 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
7372
7373 /* INT->FP conversion is expensive. */
7374 if (get_attr_fp_int_src (dep_insn))
7375 cost += 5;
a14003ee 7376 break;
e075ae69 7377
309ada50 7378 case PROCESSOR_ATHLON:
0b5107cf
JH
7379 if ((memory = get_attr_memory (dep_insn)) == MEMORY_LOAD
7380 || memory == MEMORY_BOTH)
7381 {
7382 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
7383 cost += 2;
7384 else
7385 cost += 3;
7386 }
309ada50 7387
a269a03c 7388 default:
a269a03c
JC
7389 break;
7390 }
7391
7392 return cost;
7393}
0a726ef1 7394
e075ae69
RH
7395static union
7396{
7397 struct ppro_sched_data
7398 {
7399 rtx decode[3];
7400 int issued_this_cycle;
7401 } ppro;
7402} ix86_sched_data;
0a726ef1 7403
e075ae69
RH
7404static int
7405ix86_safe_length (insn)
7406 rtx insn;
7407{
7408 if (recog_memoized (insn) >= 0)
7409 return get_attr_length(insn);
7410 else
7411 return 128;
7412}
0a726ef1 7413
e075ae69
RH
7414static int
7415ix86_safe_length_prefix (insn)
7416 rtx insn;
7417{
7418 if (recog_memoized (insn) >= 0)
7419 return get_attr_length(insn);
7420 else
7421 return 0;
7422}
7423
7424static enum attr_memory
7425ix86_safe_memory (insn)
7426 rtx insn;
7427{
7428 if (recog_memoized (insn) >= 0)
7429 return get_attr_memory(insn);
7430 else
7431 return MEMORY_UNKNOWN;
7432}
0a726ef1 7433
e075ae69
RH
7434static enum attr_pent_pair
7435ix86_safe_pent_pair (insn)
7436 rtx insn;
7437{
7438 if (recog_memoized (insn) >= 0)
7439 return get_attr_pent_pair(insn);
7440 else
7441 return PENT_PAIR_NP;
7442}
0a726ef1 7443
e075ae69
RH
7444static enum attr_ppro_uops
7445ix86_safe_ppro_uops (insn)
7446 rtx insn;
7447{
7448 if (recog_memoized (insn) >= 0)
7449 return get_attr_ppro_uops (insn);
7450 else
7451 return PPRO_UOPS_MANY;
7452}
0a726ef1 7453
e075ae69
RH
7454static void
7455ix86_dump_ppro_packet (dump)
7456 FILE *dump;
0a726ef1 7457{
e075ae69 7458 if (ix86_sched_data.ppro.decode[0])
0a726ef1 7459 {
e075ae69
RH
7460 fprintf (dump, "PPRO packet: %d",
7461 INSN_UID (ix86_sched_data.ppro.decode[0]));
7462 if (ix86_sched_data.ppro.decode[1])
7463 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
7464 if (ix86_sched_data.ppro.decode[2])
7465 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
7466 fputc ('\n', dump);
7467 }
7468}
0a726ef1 7469
e075ae69 7470/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 7471
e075ae69
RH
7472void
7473ix86_sched_init (dump, sched_verbose)
7474 FILE *dump ATTRIBUTE_UNUSED;
7475 int sched_verbose ATTRIBUTE_UNUSED;
7476{
7477 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
7478}
7479
7480/* Shift INSN to SLOT, and shift everything else down. */
7481
7482static void
7483ix86_reorder_insn (insnp, slot)
7484 rtx *insnp, *slot;
7485{
7486 if (insnp != slot)
7487 {
7488 rtx insn = *insnp;
0f290768 7489 do
e075ae69
RH
7490 insnp[0] = insnp[1];
7491 while (++insnp != slot);
7492 *insnp = insn;
0a726ef1 7493 }
e075ae69
RH
7494}
7495
7496/* Find an instruction with given pairability and minimal amount of cycles
7497 lost by the fact that the CPU waits for both pipelines to finish before
7498 reading next instructions. Also take care that both instructions together
7499 can not exceed 7 bytes. */
7500
7501static rtx *
7502ix86_pent_find_pair (e_ready, ready, type, first)
7503 rtx *e_ready;
7504 rtx *ready;
7505 enum attr_pent_pair type;
7506 rtx first;
7507{
7508 int mincycles, cycles;
7509 enum attr_pent_pair tmp;
7510 enum attr_memory memory;
7511 rtx *insnp, *bestinsnp = NULL;
0a726ef1 7512
e075ae69
RH
7513 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
7514 return NULL;
0a726ef1 7515
e075ae69
RH
7516 memory = ix86_safe_memory (first);
7517 cycles = result_ready_cost (first);
7518 mincycles = INT_MAX;
7519
7520 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
7521 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
7522 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
6ec6d558 7523 {
e075ae69
RH
7524 enum attr_memory second_memory;
7525 int secondcycles, currentcycles;
7526
7527 second_memory = ix86_safe_memory (*insnp);
7528 secondcycles = result_ready_cost (*insnp);
7529 currentcycles = abs (cycles - secondcycles);
7530
7531 if (secondcycles >= 1 && cycles >= 1)
6ec6d558 7532 {
e075ae69
RH
7533 /* Two read/modify/write instructions together takes two
7534 cycles longer. */
7535 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
7536 currentcycles += 2;
0f290768 7537
e075ae69
RH
7538 /* Read modify/write instruction followed by read/modify
7539 takes one cycle longer. */
7540 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
7541 && tmp != PENT_PAIR_UV
7542 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
7543 currentcycles += 1;
6ec6d558 7544 }
e075ae69
RH
7545 if (currentcycles < mincycles)
7546 bestinsnp = insnp, mincycles = currentcycles;
6ec6d558 7547 }
0a726ef1 7548
e075ae69
RH
7549 return bestinsnp;
7550}
7551
78a0d70c 7552/* Subroutines of ix86_sched_reorder. */
e075ae69 7553
c6991660 7554static void
78a0d70c 7555ix86_sched_reorder_pentium (ready, e_ready)
e075ae69 7556 rtx *ready;
78a0d70c 7557 rtx *e_ready;
e075ae69 7558{
78a0d70c 7559 enum attr_pent_pair pair1, pair2;
e075ae69 7560 rtx *insnp;
e075ae69 7561
78a0d70c
ZW
7562 /* This wouldn't be necessary if Haifa knew that static insn ordering
7563 is important to which pipe an insn is issued to. So we have to make
7564 some minor rearrangements. */
e075ae69 7565
78a0d70c
ZW
7566 pair1 = ix86_safe_pent_pair (*e_ready);
7567
7568 /* If the first insn is non-pairable, let it be. */
7569 if (pair1 == PENT_PAIR_NP)
7570 return;
7571
7572 pair2 = PENT_PAIR_NP;
7573 insnp = 0;
7574
7575 /* If the first insn is UV or PV pairable, search for a PU
7576 insn to go with. */
7577 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
e075ae69 7578 {
78a0d70c
ZW
7579 insnp = ix86_pent_find_pair (e_ready-1, ready,
7580 PENT_PAIR_PU, *e_ready);
7581 if (insnp)
7582 pair2 = PENT_PAIR_PU;
7583 }
e075ae69 7584
78a0d70c
ZW
7585 /* If the first insn is PU or UV pairable, search for a PV
7586 insn to go with. */
7587 if (pair2 == PENT_PAIR_NP
7588 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
7589 {
7590 insnp = ix86_pent_find_pair (e_ready-1, ready,
7591 PENT_PAIR_PV, *e_ready);
7592 if (insnp)
7593 pair2 = PENT_PAIR_PV;
7594 }
e075ae69 7595
78a0d70c
ZW
7596 /* If the first insn is pairable, search for a UV
7597 insn to go with. */
7598 if (pair2 == PENT_PAIR_NP)
7599 {
7600 insnp = ix86_pent_find_pair (e_ready-1, ready,
7601 PENT_PAIR_UV, *e_ready);
7602 if (insnp)
7603 pair2 = PENT_PAIR_UV;
7604 }
e075ae69 7605
78a0d70c
ZW
7606 if (pair2 == PENT_PAIR_NP)
7607 return;
e075ae69 7608
78a0d70c
ZW
7609 /* Found something! Decide if we need to swap the order. */
7610 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
7611 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
7612 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
7613 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
7614 ix86_reorder_insn (insnp, e_ready);
7615 else
7616 ix86_reorder_insn (insnp, e_ready - 1);
7617}
e075ae69 7618
c6991660 7619static void
78a0d70c
ZW
7620ix86_sched_reorder_ppro (ready, e_ready)
7621 rtx *ready;
7622 rtx *e_ready;
7623{
7624 rtx decode[3];
7625 enum attr_ppro_uops cur_uops;
7626 int issued_this_cycle;
7627 rtx *insnp;
7628 int i;
e075ae69 7629
0f290768 7630 /* At this point .ppro.decode contains the state of the three
78a0d70c 7631 decoders from last "cycle". That is, those insns that were
0f290768 7632 actually independent. But here we're scheduling for the
78a0d70c
ZW
7633 decoder, and we may find things that are decodable in the
7634 same cycle. */
e075ae69 7635
0f290768 7636 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
78a0d70c 7637 issued_this_cycle = 0;
e075ae69 7638
78a0d70c
ZW
7639 insnp = e_ready;
7640 cur_uops = ix86_safe_ppro_uops (*insnp);
0a726ef1 7641
78a0d70c
ZW
7642 /* If the decoders are empty, and we've a complex insn at the
7643 head of the priority queue, let it issue without complaint. */
7644 if (decode[0] == NULL)
7645 {
7646 if (cur_uops == PPRO_UOPS_MANY)
7647 {
7648 decode[0] = *insnp;
7649 goto ppro_done;
7650 }
7651
7652 /* Otherwise, search for a 2-4 uop unsn to issue. */
7653 while (cur_uops != PPRO_UOPS_FEW)
7654 {
7655 if (insnp == ready)
7656 break;
7657 cur_uops = ix86_safe_ppro_uops (*--insnp);
7658 }
7659
7660 /* If so, move it to the head of the line. */
7661 if (cur_uops == PPRO_UOPS_FEW)
7662 ix86_reorder_insn (insnp, e_ready);
0a726ef1 7663
78a0d70c
ZW
7664 /* Issue the head of the queue. */
7665 issued_this_cycle = 1;
7666 decode[0] = *e_ready--;
7667 }
fb693d44 7668
78a0d70c
ZW
7669 /* Look for simple insns to fill in the other two slots. */
7670 for (i = 1; i < 3; ++i)
7671 if (decode[i] == NULL)
7672 {
7673 if (ready >= e_ready)
7674 goto ppro_done;
fb693d44 7675
e075ae69
RH
7676 insnp = e_ready;
7677 cur_uops = ix86_safe_ppro_uops (*insnp);
78a0d70c
ZW
7678 while (cur_uops != PPRO_UOPS_ONE)
7679 {
7680 if (insnp == ready)
7681 break;
7682 cur_uops = ix86_safe_ppro_uops (*--insnp);
7683 }
fb693d44 7684
78a0d70c
ZW
7685 /* Found one. Move it to the head of the queue and issue it. */
7686 if (cur_uops == PPRO_UOPS_ONE)
e075ae69 7687 {
78a0d70c
ZW
7688 ix86_reorder_insn (insnp, e_ready);
7689 decode[i] = *e_ready--;
7690 issued_this_cycle++;
7691 continue;
7692 }
fb693d44 7693
78a0d70c
ZW
7694 /* ??? Didn't find one. Ideally, here we would do a lazy split
7695 of 2-uop insns, issue one and queue the other. */
7696 }
fb693d44 7697
78a0d70c
ZW
7698 ppro_done:
7699 if (issued_this_cycle == 0)
7700 issued_this_cycle = 1;
7701 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
7702}
fb693d44 7703
0f290768 7704/* We are about to being issuing insns for this clock cycle.
78a0d70c
ZW
7705 Override the default sort algorithm to better slot instructions. */
7706int
7707ix86_sched_reorder (dump, sched_verbose, ready, n_ready, clock_var)
7708 FILE *dump ATTRIBUTE_UNUSED;
7709 int sched_verbose ATTRIBUTE_UNUSED;
7710 rtx *ready;
7711 int n_ready;
7712 int clock_var ATTRIBUTE_UNUSED;
7713{
7714 rtx *e_ready = ready + n_ready - 1;
fb693d44 7715
78a0d70c
ZW
7716 if (n_ready < 2)
7717 goto out;
e075ae69 7718
78a0d70c
ZW
7719 switch (ix86_cpu)
7720 {
7721 default:
7722 break;
e075ae69 7723
78a0d70c
ZW
7724 case PROCESSOR_PENTIUM:
7725 ix86_sched_reorder_pentium (ready, e_ready);
7726 break;
e075ae69 7727
78a0d70c
ZW
7728 case PROCESSOR_PENTIUMPRO:
7729 ix86_sched_reorder_ppro (ready, e_ready);
e075ae69 7730 break;
fb693d44
RH
7731 }
7732
e075ae69
RH
7733out:
7734 return ix86_issue_rate ();
7735}
fb693d44 7736
e075ae69
RH
7737/* We are about to issue INSN. Return the number of insns left on the
7738 ready queue that can be issued this cycle. */
b222082e 7739
e075ae69
RH
7740int
7741ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
7742 FILE *dump;
7743 int sched_verbose;
7744 rtx insn;
7745 int can_issue_more;
7746{
7747 int i;
7748 switch (ix86_cpu)
fb693d44 7749 {
e075ae69
RH
7750 default:
7751 return can_issue_more - 1;
fb693d44 7752
e075ae69
RH
7753 case PROCESSOR_PENTIUMPRO:
7754 {
7755 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 7756
e075ae69
RH
7757 if (uops == PPRO_UOPS_MANY)
7758 {
7759 if (sched_verbose)
7760 ix86_dump_ppro_packet (dump);
7761 ix86_sched_data.ppro.decode[0] = insn;
7762 ix86_sched_data.ppro.decode[1] = NULL;
7763 ix86_sched_data.ppro.decode[2] = NULL;
7764 if (sched_verbose)
7765 ix86_dump_ppro_packet (dump);
7766 ix86_sched_data.ppro.decode[0] = NULL;
7767 }
7768 else if (uops == PPRO_UOPS_FEW)
7769 {
7770 if (sched_verbose)
7771 ix86_dump_ppro_packet (dump);
7772 ix86_sched_data.ppro.decode[0] = insn;
7773 ix86_sched_data.ppro.decode[1] = NULL;
7774 ix86_sched_data.ppro.decode[2] = NULL;
7775 }
7776 else
7777 {
7778 for (i = 0; i < 3; ++i)
7779 if (ix86_sched_data.ppro.decode[i] == NULL)
7780 {
7781 ix86_sched_data.ppro.decode[i] = insn;
7782 break;
7783 }
7784 if (i == 3)
7785 abort ();
7786 if (i == 2)
7787 {
7788 if (sched_verbose)
7789 ix86_dump_ppro_packet (dump);
7790 ix86_sched_data.ppro.decode[0] = NULL;
7791 ix86_sched_data.ppro.decode[1] = NULL;
7792 ix86_sched_data.ppro.decode[2] = NULL;
7793 }
7794 }
7795 }
7796 return --ix86_sched_data.ppro.issued_this_cycle;
7797 }
fb693d44 7798}
a7180f70 7799\f
0e4970d7
RK
7800/* Walk through INSNS and look for MEM references whose address is DSTREG or
7801 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
7802 appropriate. */
7803
7804void
7805ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
7806 rtx insns;
7807 rtx dstref, srcref, dstreg, srcreg;
7808{
7809 rtx insn;
7810
7811 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
7812 if (INSN_P (insn))
7813 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
7814 dstreg, srcreg);
7815}
7816
7817/* Subroutine of above to actually do the updating by recursively walking
7818 the rtx. */
7819
7820static void
7821ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
7822 rtx x;
7823 rtx dstref, srcref, dstreg, srcreg;
7824{
7825 enum rtx_code code = GET_CODE (x);
7826 const char *format_ptr = GET_RTX_FORMAT (code);
7827 int i, j;
7828
7829 if (code == MEM && XEXP (x, 0) == dstreg)
7830 MEM_COPY_ATTRIBUTES (x, dstref);
7831 else if (code == MEM && XEXP (x, 0) == srcreg)
7832 MEM_COPY_ATTRIBUTES (x, srcref);
7833
7834 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
7835 {
7836 if (*format_ptr == 'e')
7837 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
7838 dstreg, srcreg);
7839 else if (*format_ptr == 'E')
7840 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
397be6cf 7841 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
0e4970d7
RK
7842 dstreg, srcreg);
7843 }
7844}
7845\f
a7180f70
BS
7846/* Compute the alignment given to a constant that is being placed in memory.
7847 EXP is the constant and ALIGN is the alignment that the object would
7848 ordinarily have.
7849 The value of this function is used instead of that alignment to align
7850 the object. */
7851
7852int
7853ix86_constant_alignment (exp, align)
7854 tree exp;
7855 int align;
7856{
7857 if (TREE_CODE (exp) == REAL_CST)
7858 {
7859 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
7860 return 64;
7861 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
7862 return 128;
7863 }
7864 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
7865 && align < 256)
7866 return 256;
7867
7868 return align;
7869}
7870
7871/* Compute the alignment for a static variable.
7872 TYPE is the data type, and ALIGN is the alignment that
7873 the object would ordinarily have. The value of this function is used
7874 instead of that alignment to align the object. */
7875
7876int
7877ix86_data_alignment (type, align)
7878 tree type;
7879 int align;
7880{
7881 if (AGGREGATE_TYPE_P (type)
7882 && TYPE_SIZE (type)
7883 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
7884 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
7885 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
7886 return 256;
7887
7888 if (TREE_CODE (type) == ARRAY_TYPE)
7889 {
7890 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
7891 return 64;
7892 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
7893 return 128;
7894 }
7895 else if (TREE_CODE (type) == COMPLEX_TYPE)
7896 {
0f290768 7897
a7180f70
BS
7898 if (TYPE_MODE (type) == DCmode && align < 64)
7899 return 64;
7900 if (TYPE_MODE (type) == XCmode && align < 128)
7901 return 128;
7902 }
7903 else if ((TREE_CODE (type) == RECORD_TYPE
7904 || TREE_CODE (type) == UNION_TYPE
7905 || TREE_CODE (type) == QUAL_UNION_TYPE)
7906 && TYPE_FIELDS (type))
7907 {
7908 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
7909 return 64;
7910 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
7911 return 128;
7912 }
7913 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
7914 || TREE_CODE (type) == INTEGER_TYPE)
7915 {
7916 if (TYPE_MODE (type) == DFmode && align < 64)
7917 return 64;
7918 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
7919 return 128;
7920 }
7921
7922 return align;
7923}
7924
7925/* Compute the alignment for a local variable.
7926 TYPE is the data type, and ALIGN is the alignment that
7927 the object would ordinarily have. The value of this macro is used
7928 instead of that alignment to align the object. */
7929
7930int
7931ix86_local_alignment (type, align)
7932 tree type;
7933 int align;
7934{
7935 if (TREE_CODE (type) == ARRAY_TYPE)
7936 {
7937 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
7938 return 64;
7939 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
7940 return 128;
7941 }
7942 else if (TREE_CODE (type) == COMPLEX_TYPE)
7943 {
7944 if (TYPE_MODE (type) == DCmode && align < 64)
7945 return 64;
7946 if (TYPE_MODE (type) == XCmode && align < 128)
7947 return 128;
7948 }
7949 else if ((TREE_CODE (type) == RECORD_TYPE
7950 || TREE_CODE (type) == UNION_TYPE
7951 || TREE_CODE (type) == QUAL_UNION_TYPE)
7952 && TYPE_FIELDS (type))
7953 {
7954 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
7955 return 64;
7956 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
7957 return 128;
7958 }
7959 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
7960 || TREE_CODE (type) == INTEGER_TYPE)
7961 {
0f290768 7962
a7180f70
BS
7963 if (TYPE_MODE (type) == DFmode && align < 64)
7964 return 64;
7965 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
7966 return 128;
7967 }
7968 return align;
7969}
bd793c65
BS
7970
7971#define def_builtin(NAME, TYPE, CODE) \
7972 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL_PTR)
7973struct builtin_description
7974{
7975 enum insn_code icode;
7976 const char * name;
7977 enum ix86_builtins code;
7978 enum rtx_code comparison;
7979 unsigned int flag;
7980};
7981
7982static struct builtin_description bdesc_comi[] =
7983{
7984 { CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
7985 { CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
7986 { CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
7987 { CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
7988 { CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
7989 { CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
7990 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
7991 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
7992 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
7993 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
7994 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
7995 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
7996};
7997
7998static struct builtin_description bdesc_2arg[] =
7999{
8000 /* SSE */
8001 { CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
8002 { CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
8003 { CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
8004 { CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
8005 { CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
8006 { CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
8007 { CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
8008 { CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
8009
8010 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
8011 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
8012 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
8013 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
8014 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
8015 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
8016 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
8017 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
8018 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
8019 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
8020 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
8021 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
8022 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
8023 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
8024 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
8025 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
8026 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
8027 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
8028 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
8029 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
8030 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
8031 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
8032 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
8033 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
8034
8035 { CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
8036 { CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
8037 { CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
8038 { CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
8039
8040 { CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
8041 { CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
8042 { CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
8043 { CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
8044
8045 { CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
8046 { CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
8047 { CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
8048 { CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
8049 { CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
8050
8051 /* MMX */
8052 { CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
8053 { CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
8054 { CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
8055 { CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
8056 { CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
8057 { CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
8058
8059 { CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
8060 { CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
8061 { CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
8062 { CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
8063 { CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
8064 { CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
8065 { CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
8066 { CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
8067
8068 { CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
8069 { CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
8070 { CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
8071
8072 { CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
8073 { CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
8074 { CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
8075 { CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
8076
8077 { CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
8078 { CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
8079
8080 { CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
8081 { CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
8082 { CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
8083 { CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
8084 { CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
8085 { CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
8086
8087 { CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
8088 { CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
8089 { CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
8090 { CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
8091
8092 { CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
8093 { CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
8094 { CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
8095 { CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
8096 { CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
8097 { CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
8098
8099 /* Special. */
8100 { CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
8101 { CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
8102 { CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
8103
8104 { CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
8105 { CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
8106
8107 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
8108 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
8109 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
8110 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
8111 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
8112 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
8113
8114 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
8115 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
8116 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
8117 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
8118 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
8119 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
8120
8121 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
8122 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
8123 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
8124 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
8125
8126 { CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
8127 { CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
8128
8129};
8130
8131static struct builtin_description bdesc_1arg[] =
8132{
8133 { CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
8134 { CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
8135
8136 { CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
8137 { CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
8138 { CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
8139
8140 { CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
8141 { CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
8142 { CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
8143 { CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
8144
8145};
8146
8147/* Expand all the target specific builtins. This is not called if TARGET_MMX
8148 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
8149 builtins. */
8150void
8151ix86_init_builtins ()
8152{
8153 struct builtin_description * d;
77ebd435 8154 size_t i;
cbd5937a 8155 tree endlink = void_list_node;
bd793c65
BS
8156
8157 tree pchar_type_node = build_pointer_type (char_type_node);
8158 tree pfloat_type_node = build_pointer_type (float_type_node);
8159 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
8160 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
8161
8162 /* Comparisons. */
8163 tree int_ftype_v4sf_v4sf
8164 = build_function_type (integer_type_node,
8165 tree_cons (NULL_TREE, V4SF_type_node,
8166 tree_cons (NULL_TREE,
8167 V4SF_type_node,
8168 endlink)));
8169 tree v4si_ftype_v4sf_v4sf
8170 = build_function_type (V4SI_type_node,
8171 tree_cons (NULL_TREE, V4SF_type_node,
8172 tree_cons (NULL_TREE,
8173 V4SF_type_node,
8174 endlink)));
8175 /* MMX/SSE/integer conversions. */
8176 tree int_ftype_v4sf_int
8177 = build_function_type (integer_type_node,
8178 tree_cons (NULL_TREE, V4SF_type_node,
8179 tree_cons (NULL_TREE,
8180 integer_type_node,
8181 endlink)));
8182 tree int_ftype_v4sf
8183 = build_function_type (integer_type_node,
8184 tree_cons (NULL_TREE, V4SF_type_node,
8185 endlink));
8186 tree int_ftype_v8qi
8187 = build_function_type (integer_type_node,
8188 tree_cons (NULL_TREE, V8QI_type_node,
8189 endlink));
8190 tree int_ftype_v2si
8191 = build_function_type (integer_type_node,
8192 tree_cons (NULL_TREE, V2SI_type_node,
8193 endlink));
8194 tree v2si_ftype_int
8195 = build_function_type (V2SI_type_node,
8196 tree_cons (NULL_TREE, integer_type_node,
8197 endlink));
8198 tree v4sf_ftype_v4sf_int
8199 = build_function_type (integer_type_node,
8200 tree_cons (NULL_TREE, V4SF_type_node,
8201 tree_cons (NULL_TREE, integer_type_node,
8202 endlink)));
8203 tree v4sf_ftype_v4sf_v2si
8204 = build_function_type (V4SF_type_node,
8205 tree_cons (NULL_TREE, V4SF_type_node,
8206 tree_cons (NULL_TREE, V2SI_type_node,
8207 endlink)));
8208 tree int_ftype_v4hi_int
8209 = build_function_type (integer_type_node,
8210 tree_cons (NULL_TREE, V4HI_type_node,
8211 tree_cons (NULL_TREE, integer_type_node,
8212 endlink)));
8213 tree v4hi_ftype_v4hi_int_int
332316cd 8214 = build_function_type (V4HI_type_node,
bd793c65
BS
8215 tree_cons (NULL_TREE, V4HI_type_node,
8216 tree_cons (NULL_TREE, integer_type_node,
8217 tree_cons (NULL_TREE,
8218 integer_type_node,
8219 endlink))));
8220 /* Miscellaneous. */
8221 tree v8qi_ftype_v4hi_v4hi
8222 = build_function_type (V8QI_type_node,
8223 tree_cons (NULL_TREE, V4HI_type_node,
8224 tree_cons (NULL_TREE, V4HI_type_node,
8225 endlink)));
8226 tree v4hi_ftype_v2si_v2si
8227 = build_function_type (V4HI_type_node,
8228 tree_cons (NULL_TREE, V2SI_type_node,
8229 tree_cons (NULL_TREE, V2SI_type_node,
8230 endlink)));
8231 tree v4sf_ftype_v4sf_v4sf_int
8232 = build_function_type (V4SF_type_node,
8233 tree_cons (NULL_TREE, V4SF_type_node,
8234 tree_cons (NULL_TREE, V4SF_type_node,
8235 tree_cons (NULL_TREE,
8236 integer_type_node,
8237 endlink))));
8238 tree v4hi_ftype_v8qi_v8qi
8239 = build_function_type (V4HI_type_node,
8240 tree_cons (NULL_TREE, V8QI_type_node,
8241 tree_cons (NULL_TREE, V8QI_type_node,
8242 endlink)));
8243 tree v2si_ftype_v4hi_v4hi
8244 = build_function_type (V2SI_type_node,
8245 tree_cons (NULL_TREE, V4HI_type_node,
8246 tree_cons (NULL_TREE, V4HI_type_node,
8247 endlink)));
8248 tree v4hi_ftype_v4hi_int
8249 = build_function_type (V4HI_type_node,
8250 tree_cons (NULL_TREE, V4HI_type_node,
8251 tree_cons (NULL_TREE, integer_type_node,
8252 endlink)));
8253 tree di_ftype_di_int
8254 = build_function_type (long_long_unsigned_type_node,
8255 tree_cons (NULL_TREE, long_long_unsigned_type_node,
8256 tree_cons (NULL_TREE, integer_type_node,
8257 endlink)));
8258 tree v8qi_ftype_v8qi_di
8259 = build_function_type (V8QI_type_node,
8260 tree_cons (NULL_TREE, V8QI_type_node,
8261 tree_cons (NULL_TREE,
8262 long_long_integer_type_node,
8263 endlink)));
8264 tree v4hi_ftype_v4hi_di
8265 = build_function_type (V4HI_type_node,
8266 tree_cons (NULL_TREE, V4HI_type_node,
8267 tree_cons (NULL_TREE,
8268 long_long_integer_type_node,
8269 endlink)));
8270 tree v2si_ftype_v2si_di
8271 = build_function_type (V2SI_type_node,
8272 tree_cons (NULL_TREE, V2SI_type_node,
8273 tree_cons (NULL_TREE,
8274 long_long_integer_type_node,
8275 endlink)));
8276 tree void_ftype_void
8277 = build_function_type (void_type_node, endlink);
8278 tree void_ftype_pchar_int
8279 = build_function_type (void_type_node,
8280 tree_cons (NULL_TREE, pchar_type_node,
8281 tree_cons (NULL_TREE, integer_type_node,
8282 endlink)));
8283 tree void_ftype_unsigned
8284 = build_function_type (void_type_node,
8285 tree_cons (NULL_TREE, unsigned_type_node,
8286 endlink));
8287 tree unsigned_ftype_void
8288 = build_function_type (unsigned_type_node, endlink);
8289 tree di_ftype_void
8290 = build_function_type (long_long_unsigned_type_node, endlink);
8291 tree ti_ftype_void
8292 = build_function_type (intTI_type_node, endlink);
8293 tree v2si_ftype_v4sf
8294 = build_function_type (V2SI_type_node,
8295 tree_cons (NULL_TREE, V4SF_type_node,
8296 endlink));
8297 /* Loads/stores. */
8298 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
8299 tree_cons (NULL_TREE, V8QI_type_node,
8300 tree_cons (NULL_TREE,
8301 pchar_type_node,
8302 endlink)));
8303 tree void_ftype_v8qi_v8qi_pchar
8304 = build_function_type (void_type_node, maskmovq_args);
8305 tree v4sf_ftype_pfloat
8306 = build_function_type (V4SF_type_node,
8307 tree_cons (NULL_TREE, pfloat_type_node,
8308 endlink));
8309 tree v4sf_ftype_float
8310 = build_function_type (V4SF_type_node,
8311 tree_cons (NULL_TREE, float_type_node,
8312 endlink));
8313 tree v4sf_ftype_float_float_float_float
8314 = build_function_type (V4SF_type_node,
8315 tree_cons (NULL_TREE, float_type_node,
8316 tree_cons (NULL_TREE, float_type_node,
8317 tree_cons (NULL_TREE,
8318 float_type_node,
8319 tree_cons (NULL_TREE,
8320 float_type_node,
8321 endlink)))));
8322 /* @@@ the type is bogus */
8323 tree v4sf_ftype_v4sf_pv2si
8324 = build_function_type (V4SF_type_node,
8325 tree_cons (NULL_TREE, V4SF_type_node,
8326 tree_cons (NULL_TREE, pv2si_type_node,
8327 endlink)));
8328 tree v4sf_ftype_pv2si_v4sf
8329 = build_function_type (V4SF_type_node,
8330 tree_cons (NULL_TREE, V4SF_type_node,
8331 tree_cons (NULL_TREE, pv2si_type_node,
8332 endlink)));
8333 tree void_ftype_pfloat_v4sf
8334 = build_function_type (void_type_node,
8335 tree_cons (NULL_TREE, pfloat_type_node,
8336 tree_cons (NULL_TREE, V4SF_type_node,
8337 endlink)));
8338 tree void_ftype_pdi_di
8339 = build_function_type (void_type_node,
8340 tree_cons (NULL_TREE, pdi_type_node,
8341 tree_cons (NULL_TREE,
8342 long_long_unsigned_type_node,
8343 endlink)));
8344 /* Normal vector unops. */
8345 tree v4sf_ftype_v4sf
8346 = build_function_type (V4SF_type_node,
8347 tree_cons (NULL_TREE, V4SF_type_node,
8348 endlink));
0f290768 8349
bd793c65
BS
8350 /* Normal vector binops. */
8351 tree v4sf_ftype_v4sf_v4sf
8352 = build_function_type (V4SF_type_node,
8353 tree_cons (NULL_TREE, V4SF_type_node,
8354 tree_cons (NULL_TREE, V4SF_type_node,
8355 endlink)));
8356 tree v8qi_ftype_v8qi_v8qi
8357 = build_function_type (V8QI_type_node,
8358 tree_cons (NULL_TREE, V8QI_type_node,
8359 tree_cons (NULL_TREE, V8QI_type_node,
8360 endlink)));
8361 tree v4hi_ftype_v4hi_v4hi
8362 = build_function_type (V4HI_type_node,
8363 tree_cons (NULL_TREE, V4HI_type_node,
8364 tree_cons (NULL_TREE, V4HI_type_node,
8365 endlink)));
8366 tree v2si_ftype_v2si_v2si
8367 = build_function_type (V2SI_type_node,
8368 tree_cons (NULL_TREE, V2SI_type_node,
8369 tree_cons (NULL_TREE, V2SI_type_node,
8370 endlink)));
8371 tree ti_ftype_ti_ti
8372 = build_function_type (intTI_type_node,
8373 tree_cons (NULL_TREE, intTI_type_node,
8374 tree_cons (NULL_TREE, intTI_type_node,
8375 endlink)));
8376 tree di_ftype_di_di
8377 = build_function_type (long_long_unsigned_type_node,
8378 tree_cons (NULL_TREE, long_long_unsigned_type_node,
8379 tree_cons (NULL_TREE,
8380 long_long_unsigned_type_node,
8381 endlink)));
8382
8383 /* Add all builtins that are more or less simple operations on two
8384 operands. */
8385 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
8386 {
8387 /* Use one of the operands; the target can have a different mode for
8388 mask-generating compares. */
8389 enum machine_mode mode;
8390 tree type;
8391
8392 if (d->name == 0)
8393 continue;
8394 mode = insn_data[d->icode].operand[1].mode;
8395
8396 if (! TARGET_SSE && ! VALID_MMX_REG_MODE (mode))
8397 continue;
8398
8399 switch (mode)
8400 {
8401 case V4SFmode:
8402 type = v4sf_ftype_v4sf_v4sf;
8403 break;
8404 case V8QImode:
8405 type = v8qi_ftype_v8qi_v8qi;
8406 break;
8407 case V4HImode:
8408 type = v4hi_ftype_v4hi_v4hi;
8409 break;
8410 case V2SImode:
8411 type = v2si_ftype_v2si_v2si;
8412 break;
8413 case TImode:
8414 type = ti_ftype_ti_ti;
8415 break;
8416 case DImode:
8417 type = di_ftype_di_di;
8418 break;
8419
8420 default:
8421 abort ();
8422 }
0f290768 8423
bd793c65
BS
8424 /* Override for comparisons. */
8425 if (d->icode == CODE_FOR_maskcmpv4sf3
8426 || d->icode == CODE_FOR_maskncmpv4sf3
8427 || d->icode == CODE_FOR_vmmaskcmpv4sf3
8428 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
8429 type = v4si_ftype_v4sf_v4sf;
8430
8431 def_builtin (d->name, type, d->code);
8432 }
8433
8434 /* Add the remaining MMX insns with somewhat more complicated types. */
8435 def_builtin ("__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
8436 def_builtin ("__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
8437 def_builtin ("__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
8438 def_builtin ("__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
8439 def_builtin ("__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
8440 def_builtin ("__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
8441 def_builtin ("__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
8442 def_builtin ("__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
8443 def_builtin ("__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
8444
8445 def_builtin ("__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
8446 def_builtin ("__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
8447 def_builtin ("__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
8448
8449 def_builtin ("__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
8450 def_builtin ("__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
8451
8452 def_builtin ("__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
8453 def_builtin ("__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
8454
8455 /* Everything beyond this point is SSE only. */
8456 if (! TARGET_SSE)
8457 return;
0f290768 8458
bd793c65
BS
8459 /* comi/ucomi insns. */
8460 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
8461 def_builtin (d->name, int_ftype_v4sf_v4sf, d->code);
8462
8463 def_builtin ("__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
8464 def_builtin ("__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
8465 def_builtin ("__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
8466
8467 def_builtin ("__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
8468 def_builtin ("__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
8469 def_builtin ("__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
8470 def_builtin ("__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
8471 def_builtin ("__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
8472 def_builtin ("__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
8473
8474 def_builtin ("__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
8475 def_builtin ("__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
8476
8477 def_builtin ("__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
8478
8479 def_builtin ("__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
8480 def_builtin ("__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
8481 def_builtin ("__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
8482 def_builtin ("__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
8483 def_builtin ("__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
8484 def_builtin ("__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
8485
8486 def_builtin ("__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
8487 def_builtin ("__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
8488 def_builtin ("__builtin_ia32_storehps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
8489 def_builtin ("__builtin_ia32_storelps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
8490
8491 def_builtin ("__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
8492 def_builtin ("__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
8493 def_builtin ("__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
8494 def_builtin ("__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
8495
8496 def_builtin ("__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
8497 def_builtin ("__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
8498
8499 def_builtin ("__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
8500
8501 def_builtin ("__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
8502 def_builtin ("__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
8503 def_builtin ("__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
8504 def_builtin ("__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
8505 def_builtin ("__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
8506 def_builtin ("__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
8507
8508 def_builtin ("__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
8509
8510 /* Composite intrinsics. */
8511 def_builtin ("__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
8512 def_builtin ("__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
8513 def_builtin ("__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
8514 def_builtin ("__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
8515 def_builtin ("__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
8516 def_builtin ("__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
8517 def_builtin ("__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
8518}
8519
8520/* Errors in the source file can cause expand_expr to return const0_rtx
8521 where we expect a vector. To avoid crashing, use one of the vector
8522 clear instructions. */
8523static rtx
8524safe_vector_operand (x, mode)
8525 rtx x;
8526 enum machine_mode mode;
8527{
8528 if (x != const0_rtx)
8529 return x;
8530 x = gen_reg_rtx (mode);
8531
8532 if (VALID_MMX_REG_MODE (mode))
8533 emit_insn (gen_mmx_clrdi (mode == DImode ? x
8534 : gen_rtx_SUBREG (DImode, x, 0)));
8535 else
8536 emit_insn (gen_sse_clrti (mode == TImode ? x
8537 : gen_rtx_SUBREG (TImode, x, 0)));
8538 return x;
8539}
8540
8541/* Subroutine of ix86_expand_builtin to take care of binop insns. */
8542
8543static rtx
8544ix86_expand_binop_builtin (icode, arglist, target)
8545 enum insn_code icode;
8546 tree arglist;
8547 rtx target;
8548{
8549 rtx pat;
8550 tree arg0 = TREE_VALUE (arglist);
8551 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8552 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8553 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8554 enum machine_mode tmode = insn_data[icode].operand[0].mode;
8555 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
8556 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
8557
8558 if (VECTOR_MODE_P (mode0))
8559 op0 = safe_vector_operand (op0, mode0);
8560 if (VECTOR_MODE_P (mode1))
8561 op1 = safe_vector_operand (op1, mode1);
8562
8563 if (! target
8564 || GET_MODE (target) != tmode
8565 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8566 target = gen_reg_rtx (tmode);
8567
8568 /* In case the insn wants input operands in modes different from
8569 the result, abort. */
8570 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
8571 abort ();
8572
8573 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8574 op0 = copy_to_mode_reg (mode0, op0);
8575 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8576 op1 = copy_to_mode_reg (mode1, op1);
8577
8578 pat = GEN_FCN (icode) (target, op0, op1);
8579 if (! pat)
8580 return 0;
8581 emit_insn (pat);
8582 return target;
8583}
8584
8585/* Subroutine of ix86_expand_builtin to take care of stores. */
8586
8587static rtx
8588ix86_expand_store_builtin (icode, arglist, shuffle)
8589 enum insn_code icode;
8590 tree arglist;
8591 int shuffle;
8592{
8593 rtx pat;
8594 tree arg0 = TREE_VALUE (arglist);
8595 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8596 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8597 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8598 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
8599 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
8600
8601 if (VECTOR_MODE_P (mode1))
8602 op1 = safe_vector_operand (op1, mode1);
8603
8604 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8605 if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
8606 op1 = copy_to_mode_reg (mode1, op1);
8607 if (shuffle >= 0)
8608 emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
8609 pat = GEN_FCN (icode) (op0, op1);
8610 if (pat)
8611 emit_insn (pat);
8612 return 0;
8613}
8614
8615/* Subroutine of ix86_expand_builtin to take care of unop insns. */
8616
8617static rtx
8618ix86_expand_unop_builtin (icode, arglist, target, do_load)
8619 enum insn_code icode;
8620 tree arglist;
8621 rtx target;
8622 int do_load;
8623{
8624 rtx pat;
8625 tree arg0 = TREE_VALUE (arglist);
8626 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8627 enum machine_mode tmode = insn_data[icode].operand[0].mode;
8628 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
8629
8630 if (! target
8631 || GET_MODE (target) != tmode
8632 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8633 target = gen_reg_rtx (tmode);
8634 if (do_load)
8635 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8636 else
8637 {
8638 if (VECTOR_MODE_P (mode0))
8639 op0 = safe_vector_operand (op0, mode0);
8640
8641 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8642 op0 = copy_to_mode_reg (mode0, op0);
8643 }
8644
8645 pat = GEN_FCN (icode) (target, op0);
8646 if (! pat)
8647 return 0;
8648 emit_insn (pat);
8649 return target;
8650}
8651
8652/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
8653 sqrtss, rsqrtss, rcpss. */
8654
8655static rtx
8656ix86_expand_unop1_builtin (icode, arglist, target)
8657 enum insn_code icode;
8658 tree arglist;
8659 rtx target;
8660{
8661 rtx pat;
8662 tree arg0 = TREE_VALUE (arglist);
8663 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8664 enum machine_mode tmode = insn_data[icode].operand[0].mode;
8665 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
8666
8667 if (! target
8668 || GET_MODE (target) != tmode
8669 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8670 target = gen_reg_rtx (tmode);
8671
8672 if (VECTOR_MODE_P (mode0))
8673 op0 = safe_vector_operand (op0, mode0);
8674
8675 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8676 op0 = copy_to_mode_reg (mode0, op0);
8677
8678 pat = GEN_FCN (icode) (target, op0, op0);
8679 if (! pat)
8680 return 0;
8681 emit_insn (pat);
8682 return target;
8683}
8684
8685/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
8686
8687static rtx
8688ix86_expand_sse_compare (d, arglist, target)
8689 struct builtin_description *d;
8690 tree arglist;
8691 rtx target;
8692{
8693 rtx pat;
8694 tree arg0 = TREE_VALUE (arglist);
8695 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8696 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8697 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8698 rtx op2;
8699 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
8700 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
8701 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
8702 enum rtx_code comparison = d->comparison;
8703
8704 if (VECTOR_MODE_P (mode0))
8705 op0 = safe_vector_operand (op0, mode0);
8706 if (VECTOR_MODE_P (mode1))
8707 op1 = safe_vector_operand (op1, mode1);
8708
8709 /* Swap operands if we have a comparison that isn't available in
8710 hardware. */
8711 if (d->flag)
8712 {
8713 target = gen_reg_rtx (tmode);
8714 emit_move_insn (target, op1);
8715 op1 = op0;
8716 op0 = target;
8717 comparison = swap_condition (comparison);
8718 }
8719 else if (! target
8720 || GET_MODE (target) != tmode
8721 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
8722 target = gen_reg_rtx (tmode);
8723
8724 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
8725 op0 = copy_to_mode_reg (mode0, op0);
8726 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
8727 op1 = copy_to_mode_reg (mode1, op1);
8728
8729 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
8730 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
8731 if (! pat)
8732 return 0;
8733 emit_insn (pat);
8734 return target;
8735}
8736
8737/* Subroutine of ix86_expand_builtin to take care of comi insns. */
8738
8739static rtx
8740ix86_expand_sse_comi (d, arglist, target)
8741 struct builtin_description *d;
8742 tree arglist;
8743 rtx target;
8744{
8745 rtx pat;
8746 tree arg0 = TREE_VALUE (arglist);
8747 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8748 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8749 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8750 rtx op2;
8751 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
8752 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
8753 enum rtx_code comparison = d->comparison;
8754
8755 if (VECTOR_MODE_P (mode0))
8756 op0 = safe_vector_operand (op0, mode0);
8757 if (VECTOR_MODE_P (mode1))
8758 op1 = safe_vector_operand (op1, mode1);
8759
8760 /* Swap operands if we have a comparison that isn't available in
8761 hardware. */
8762 if (d->flag)
8763 {
8764 rtx tmp = op1;
8765 op1 = op0;
8766 op0 = tmp;
8767 comparison = swap_condition (comparison);
8768 }
8769
8770 target = gen_reg_rtx (SImode);
8771 emit_move_insn (target, const0_rtx);
8772 target = gen_rtx_SUBREG (QImode, target, 0);
8773
8774 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
8775 op0 = copy_to_mode_reg (mode0, op0);
8776 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
8777 op1 = copy_to_mode_reg (mode1, op1);
8778
8779 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
8780 pat = GEN_FCN (d->icode) (op0, op1, op2);
8781 if (! pat)
8782 return 0;
8783 emit_insn (pat);
8784 emit_insn (gen_setcc_2 (target, op2));
8785
8786 return target;
8787}
8788
8789/* Expand an expression EXP that calls a built-in function,
8790 with result going to TARGET if that's convenient
8791 (and in mode MODE if that's convenient).
8792 SUBTARGET may be used as the target for computing one of EXP's operands.
8793 IGNORE is nonzero if the value is to be ignored. */
8794
8795rtx
8796ix86_expand_builtin (exp, target, subtarget, mode, ignore)
8797 tree exp;
8798 rtx target;
8799 rtx subtarget ATTRIBUTE_UNUSED;
8800 enum machine_mode mode ATTRIBUTE_UNUSED;
8801 int ignore ATTRIBUTE_UNUSED;
8802{
8803 struct builtin_description *d;
77ebd435 8804 size_t i;
bd793c65
BS
8805 enum insn_code icode;
8806 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8807 tree arglist = TREE_OPERAND (exp, 1);
8808 tree arg0, arg1, arg2, arg3;
8809 rtx op0, op1, op2, pat;
8810 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 8811 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
8812
8813 switch (fcode)
8814 {
8815 case IX86_BUILTIN_EMMS:
8816 emit_insn (gen_emms ());
8817 return 0;
8818
8819 case IX86_BUILTIN_SFENCE:
8820 emit_insn (gen_sfence ());
8821 return 0;
8822
8823 case IX86_BUILTIN_M_FROM_INT:
8824 target = gen_reg_rtx (DImode);
8825 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8826 emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
8827 return target;
8828
8829 case IX86_BUILTIN_M_TO_INT:
8830 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8831 op0 = copy_to_mode_reg (DImode, op0);
8832 target = gen_reg_rtx (SImode);
8833 emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
8834 return target;
8835
8836 case IX86_BUILTIN_PEXTRW:
8837 icode = CODE_FOR_mmx_pextrw;
8838 arg0 = TREE_VALUE (arglist);
8839 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8840 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8841 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8842 tmode = insn_data[icode].operand[0].mode;
8843 mode0 = insn_data[icode].operand[1].mode;
8844 mode1 = insn_data[icode].operand[2].mode;
8845
8846 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8847 op0 = copy_to_mode_reg (mode0, op0);
8848 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8849 {
8850 /* @@@ better error message */
8851 error ("selector must be an immediate");
8852 return const0_rtx;
8853 }
8854 if (target == 0
8855 || GET_MODE (target) != tmode
8856 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8857 target = gen_reg_rtx (tmode);
8858 pat = GEN_FCN (icode) (target, op0, op1);
8859 if (! pat)
8860 return 0;
8861 emit_insn (pat);
8862 return target;
8863
8864 case IX86_BUILTIN_PINSRW:
8865 icode = CODE_FOR_mmx_pinsrw;
8866 arg0 = TREE_VALUE (arglist);
8867 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8868 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8869 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8870 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8871 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8872 tmode = insn_data[icode].operand[0].mode;
8873 mode0 = insn_data[icode].operand[1].mode;
8874 mode1 = insn_data[icode].operand[2].mode;
8875 mode2 = insn_data[icode].operand[3].mode;
8876
8877 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8878 op0 = copy_to_mode_reg (mode0, op0);
8879 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8880 op1 = copy_to_mode_reg (mode1, op1);
8881 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
8882 {
8883 /* @@@ better error message */
8884 error ("selector must be an immediate");
8885 return const0_rtx;
8886 }
8887 if (target == 0
8888 || GET_MODE (target) != tmode
8889 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8890 target = gen_reg_rtx (tmode);
8891 pat = GEN_FCN (icode) (target, op0, op1, op2);
8892 if (! pat)
8893 return 0;
8894 emit_insn (pat);
8895 return target;
8896
8897 case IX86_BUILTIN_MASKMOVQ:
8898 icode = CODE_FOR_mmx_maskmovq;
8899 /* Note the arg order is different from the operand order. */
8900 arg1 = TREE_VALUE (arglist);
8901 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
8902 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8903 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8904 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8905 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8906 mode0 = insn_data[icode].operand[0].mode;
8907 mode1 = insn_data[icode].operand[1].mode;
8908 mode2 = insn_data[icode].operand[2].mode;
8909
8910 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8911 op0 = copy_to_mode_reg (mode0, op0);
8912 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
8913 op1 = copy_to_mode_reg (mode1, op1);
8914 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
8915 op2 = copy_to_mode_reg (mode2, op2);
8916 pat = GEN_FCN (icode) (op0, op1, op2);
8917 if (! pat)
8918 return 0;
8919 emit_insn (pat);
8920 return 0;
8921
8922 case IX86_BUILTIN_SQRTSS:
8923 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
8924 case IX86_BUILTIN_RSQRTSS:
8925 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
8926 case IX86_BUILTIN_RCPSS:
8927 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
8928
8929 case IX86_BUILTIN_LOADAPS:
8930 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
8931
8932 case IX86_BUILTIN_LOADUPS:
8933 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
8934
8935 case IX86_BUILTIN_STOREAPS:
8936 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
8937 case IX86_BUILTIN_STOREUPS:
8938 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
8939
8940 case IX86_BUILTIN_LOADSS:
8941 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
8942
8943 case IX86_BUILTIN_STORESS:
8944 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
8945
0f290768 8946 case IX86_BUILTIN_LOADHPS:
bd793c65
BS
8947 case IX86_BUILTIN_LOADLPS:
8948 icode = (fcode == IX86_BUILTIN_LOADHPS
8949 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
8950 arg0 = TREE_VALUE (arglist);
8951 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8952 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8953 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8954 tmode = insn_data[icode].operand[0].mode;
8955 mode0 = insn_data[icode].operand[1].mode;
8956 mode1 = insn_data[icode].operand[2].mode;
8957
8958 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8959 op0 = copy_to_mode_reg (mode0, op0);
8960 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
8961 if (target == 0
8962 || GET_MODE (target) != tmode
8963 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8964 target = gen_reg_rtx (tmode);
8965 pat = GEN_FCN (icode) (target, op0, op1);
8966 if (! pat)
8967 return 0;
8968 emit_insn (pat);
8969 return target;
0f290768 8970
bd793c65
BS
8971 case IX86_BUILTIN_STOREHPS:
8972 case IX86_BUILTIN_STORELPS:
8973 icode = (fcode == IX86_BUILTIN_STOREHPS
8974 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
8975 arg0 = TREE_VALUE (arglist);
8976 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8977 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8978 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8979 mode0 = insn_data[icode].operand[1].mode;
8980 mode1 = insn_data[icode].operand[2].mode;
8981
8982 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8983 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8984 op1 = copy_to_mode_reg (mode1, op1);
8985
8986 pat = GEN_FCN (icode) (op0, op0, op1);
8987 if (! pat)
8988 return 0;
8989 emit_insn (pat);
8990 return 0;
8991
8992 case IX86_BUILTIN_MOVNTPS:
8993 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
8994 case IX86_BUILTIN_MOVNTQ:
8995 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
8996
8997 case IX86_BUILTIN_LDMXCSR:
8998 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8999 target = assign_386_stack_local (SImode, 0);
9000 emit_move_insn (target, op0);
9001 emit_insn (gen_ldmxcsr (target));
9002 return 0;
9003
9004 case IX86_BUILTIN_STMXCSR:
9005 target = assign_386_stack_local (SImode, 0);
9006 emit_insn (gen_stmxcsr (target));
9007 return copy_to_mode_reg (SImode, target);
9008
9009 case IX86_BUILTIN_PREFETCH:
9010 icode = CODE_FOR_prefetch;
9011 arg0 = TREE_VALUE (arglist);
9012 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9013 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9014 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
332316cd
BS
9015 mode0 = insn_data[icode].operand[0].mode;
9016 mode1 = insn_data[icode].operand[1].mode;
bd793c65 9017
332316cd 9018 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
bd793c65
BS
9019 {
9020 /* @@@ better error message */
9021 error ("selector must be an immediate");
9022 return const0_rtx;
9023 }
9024
332316cd 9025 op0 = copy_to_mode_reg (Pmode, op0);
bd793c65
BS
9026 pat = GEN_FCN (icode) (op0, op1);
9027 if (! pat)
9028 return 0;
9029 emit_insn (pat);
9030 return target;
0f290768 9031
bd793c65
BS
9032 case IX86_BUILTIN_SHUFPS:
9033 icode = CODE_FOR_sse_shufps;
9034 arg0 = TREE_VALUE (arglist);
9035 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9036 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
9037 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9038 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9039 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
9040 tmode = insn_data[icode].operand[0].mode;
9041 mode0 = insn_data[icode].operand[1].mode;
9042 mode1 = insn_data[icode].operand[2].mode;
9043 mode2 = insn_data[icode].operand[3].mode;
9044
9045 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9046 op0 = copy_to_mode_reg (mode0, op0);
9047 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
9048 op1 = copy_to_mode_reg (mode1, op1);
9049 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
9050 {
9051 /* @@@ better error message */
9052 error ("mask must be an immediate");
9053 return const0_rtx;
9054 }
9055 if (target == 0
9056 || GET_MODE (target) != tmode
9057 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9058 target = gen_reg_rtx (tmode);
9059 pat = GEN_FCN (icode) (target, op0, op1, op2);
9060 if (! pat)
9061 return 0;
9062 emit_insn (pat);
9063 return target;
9064
9065 case IX86_BUILTIN_PSHUFW:
9066 icode = CODE_FOR_mmx_pshufw;
9067 arg0 = TREE_VALUE (arglist);
9068 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9069 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9070 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9071 tmode = insn_data[icode].operand[0].mode;
9072 mode0 = insn_data[icode].operand[2].mode;
9073 mode1 = insn_data[icode].operand[3].mode;
9074
9075 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9076 op0 = copy_to_mode_reg (mode0, op0);
9077 if (! (*insn_data[icode].operand[3].predicate) (op1, mode1))
9078 {
9079 /* @@@ better error message */
9080 error ("mask must be an immediate");
9081 return const0_rtx;
9082 }
9083 if (target == 0
9084 || GET_MODE (target) != tmode
9085 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9086 target = gen_reg_rtx (tmode);
9087 pat = GEN_FCN (icode) (target, target, op0, op1);
9088 if (! pat)
9089 return 0;
9090 emit_insn (pat);
9091 return target;
9092
9093 /* Composite intrinsics. */
9094 case IX86_BUILTIN_SETPS1:
9095 target = assign_386_stack_local (SFmode, 0);
9096 arg0 = TREE_VALUE (arglist);
9097 emit_move_insn (change_address (target, SFmode, XEXP (target, 0)),
9098 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
9099 op0 = gen_reg_rtx (V4SFmode);
9100 emit_insn (gen_sse_loadss (op0, change_address (target, V4SFmode,
9101 XEXP (target, 0))));
9102 emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
9103 return op0;
0f290768 9104
bd793c65
BS
9105 case IX86_BUILTIN_SETPS:
9106 target = assign_386_stack_local (V4SFmode, 0);
9107 op0 = change_address (target, SFmode, XEXP (target, 0));
9108 arg0 = TREE_VALUE (arglist);
9109 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9110 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
9111 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
9112 emit_move_insn (op0,
9113 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
9114 emit_move_insn (adj_offsettable_operand (op0, 4),
9115 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
9116 emit_move_insn (adj_offsettable_operand (op0, 8),
9117 expand_expr (arg2, NULL_RTX, VOIDmode, 0));
9118 emit_move_insn (adj_offsettable_operand (op0, 12),
9119 expand_expr (arg3, NULL_RTX, VOIDmode, 0));
9120 op0 = gen_reg_rtx (V4SFmode);
9121 emit_insn (gen_sse_movaps (op0, target));
9122 return op0;
9123
9124 case IX86_BUILTIN_CLRPS:
9125 target = gen_reg_rtx (TImode);
9126 emit_insn (gen_sse_clrti (target));
9127 return target;
9128
9129 case IX86_BUILTIN_LOADRPS:
9130 target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
9131 gen_reg_rtx (V4SFmode), 1);
9132 emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
9133 return target;
9134
9135 case IX86_BUILTIN_LOADPS1:
9136 target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
9137 gen_reg_rtx (V4SFmode), 1);
9138 emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
9139 return target;
9140
9141 case IX86_BUILTIN_STOREPS1:
9142 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
9143 case IX86_BUILTIN_STORERPS:
9144 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
9145
9146 case IX86_BUILTIN_MMX_ZERO:
9147 target = gen_reg_rtx (DImode);
9148 emit_insn (gen_mmx_clrdi (target));
9149 return target;
9150
9151 default:
9152 break;
9153 }
9154
9155 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
9156 if (d->code == fcode)
9157 {
9158 /* Compares are treated specially. */
9159 if (d->icode == CODE_FOR_maskcmpv4sf3
9160 || d->icode == CODE_FOR_vmmaskcmpv4sf3
9161 || d->icode == CODE_FOR_maskncmpv4sf3
9162 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
9163 return ix86_expand_sse_compare (d, arglist, target);
9164
9165 return ix86_expand_binop_builtin (d->icode, arglist, target);
9166 }
9167
9168 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
9169 if (d->code == fcode)
9170 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 9171
bd793c65
BS
9172 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
9173 if (d->code == fcode)
9174 return ix86_expand_sse_comi (d, arglist, target);
0f290768 9175
bd793c65
BS
9176 /* @@@ Should really do something sensible here. */
9177 return 0;
bd793c65 9178}
4211a8fb
JH
9179
9180/* Store OPERAND to the memory after reload is completed. This means
9181 that we can't easilly use assign_stack_local. */
9182rtx
9183ix86_force_to_memory (mode, operand)
9184 enum machine_mode mode;
9185 rtx operand;
9186{
9187 if (!reload_completed)
9188 abort ();
9189 switch (mode)
9190 {
9191 case DImode:
9192 {
9193 rtx operands[2];
9194 split_di (&operand, 1, operands, operands+1);
9195 emit_insn (
9196 gen_rtx_SET (VOIDmode,
9197 gen_rtx_MEM (SImode,
9198 gen_rtx_PRE_DEC (Pmode,
9199 stack_pointer_rtx)),
9200 operands[1]));
9201 emit_insn (
9202 gen_rtx_SET (VOIDmode,
9203 gen_rtx_MEM (SImode,
9204 gen_rtx_PRE_DEC (Pmode,
9205 stack_pointer_rtx)),
9206 operands[0]));
9207 }
9208 break;
9209 case HImode:
9210 /* It is better to store HImodes as SImodes. */
9211 if (!TARGET_PARTIAL_REG_STALL)
9212 operand = gen_lowpart (SImode, operand);
9213 /* FALLTHRU */
9214 case SImode:
9215 emit_insn (
9216 gen_rtx_SET (VOIDmode,
9217 gen_rtx_MEM (GET_MODE (operand),
9218 gen_rtx_PRE_DEC (SImode,
9219 stack_pointer_rtx)),
9220 operand));
9221 break;
9222 default:
9223 abort();
9224 }
9225 return gen_rtx_MEM (mode, stack_pointer_rtx);
9226}
9227
9228/* Free operand from the memory. */
9229void
9230ix86_free_from_memory (mode)
9231 enum machine_mode mode;
9232{
9233 /* Use LEA to deallocate stack space. In peephole2 it will be converted
9234 to pop or add instruction if registers are available. */
9235 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9236 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9237 GEN_INT (mode == DImode
9238 ? 8
9239 : mode == HImode && TARGET_PARTIAL_REG_STALL
9240 ? 2
9241 : 4))));
9242}
a946dd00 9243
f84aa48a
JH
9244/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
9245 QImode must go into class Q_REGS.
9246 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
9247 movdf to do mem-to-mem moves through integer regs. */
9248enum reg_class
9249ix86_preferred_reload_class (x, class)
9250 rtx x;
9251 enum reg_class class;
9252{
9253 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
9254 {
9255 /* SSE can't load any constant directly yet. */
9256 if (SSE_CLASS_P (class))
9257 return NO_REGS;
9258 /* Floats can load 0 and 1. */
9259 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
9260 {
9261 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
9262 if (MAYBE_SSE_CLASS_P (class))
9263 return (reg_class_subset_p (class, GENERAL_REGS)
9264 ? GENERAL_REGS : FLOAT_REGS);
9265 else
9266 return class;
9267 }
9268 /* General regs can load everything. */
9269 if (reg_class_subset_p (class, GENERAL_REGS))
9270 return GENERAL_REGS;
9271 /* In case we haven't resolved FLOAT or SSE yet, give up. */
9272 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
9273 return NO_REGS;
9274 }
9275 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
9276 return NO_REGS;
9277 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
9278 return Q_REGS;
9279 return class;
9280}
9281
9282/* If we are copying between general and FP registers, we need a memory
9283 location. The same is true for SSE and MMX registers.
9284
9285 The macro can't work reliably when one of the CLASSES is class containing
9286 registers from multiple units (SSE, MMX, integer). We avoid this by never
9287 combining those units in single alternative in the machine description.
9288 Ensure that this constraint holds to avoid unexpected surprises.
9289
9290 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
9291 enforce these sanity checks. */
9292int
9293ix86_secondary_memory_needed (class1, class2, mode, strict)
9294 enum reg_class class1, class2;
9295 enum machine_mode mode;
9296 int strict;
9297{
9298 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
9299 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
9300 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
9301 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
9302 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
9303 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
9304 {
9305 if (strict)
9306 abort ();
9307 else
9308 return 1;
9309 }
9310 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
9311 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
9312 && (mode) != SImode)
9313 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
9314 && (mode) != SImode));
9315}
9316/* Return the cost of moving data from a register in class CLASS1 to
9317 one in class CLASS2.
9318
9319 It is not required that the cost always equal 2 when FROM is the same as TO;
9320 on some machines it is expensive to move between registers if they are not
9321 general registers. */
9322int
9323ix86_register_move_cost (mode, class1, class2)
9324 enum machine_mode mode;
9325 enum reg_class class1, class2;
9326{
9327 /* In case we require secondary memory, compute cost of the store followed
9328 by load. In case of copying from general_purpose_register we may emit
9329 multiple stores followed by single load causing memory size mismatch
9330 stall. Count this as arbitarily high cost of 20. */
9331 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
9332 {
62415523 9333 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
f84aa48a 9334 return 10;
62415523
JH
9335 return (MEMORY_MOVE_COST (mode, class1, 0)
9336 + MEMORY_MOVE_COST (mode, class2, 1));
f84aa48a
JH
9337 }
9338 /* Moves between SSE/MMX and integer unit are expensive.
9339 ??? We should make this cost CPU specific. */
62415523
JH
9340 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
9341 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
9342 return ix86_cost->mmxsse_to_integer;
9343 if (MAYBE_FLOAT_CLASS_P (class1))
9344 return ix86_cost->fp_move;
9345 if (MAYBE_SSE_CLASS_P (class1))
9346 return ix86_cost->sse_move;
9347 if (MAYBE_MMX_CLASS_P (class1))
9348 return ix86_cost->mmx_move;
f84aa48a
JH
9349 return 2;
9350}
9351
a946dd00
JH
9352/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
9353int
9354ix86_hard_regno_mode_ok (regno, mode)
9355 int regno;
9356 enum machine_mode mode;
9357{
9358 /* Flags and only flags can only hold CCmode values. */
9359 if (CC_REGNO_P (regno))
9360 return GET_MODE_CLASS (mode) == MODE_CC;
9361 if (GET_MODE_CLASS (mode) == MODE_CC
9362 || GET_MODE_CLASS (mode) == MODE_RANDOM
9363 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
9364 return 0;
9365 if (FP_REGNO_P (regno))
9366 return VALID_FP_MODE_P (mode);
9367 if (SSE_REGNO_P (regno))
9368 return VALID_SSE_REG_MODE (mode);
9369 if (MMX_REGNO_P (regno))
9370 return VALID_MMX_REG_MODE (mode);
9371 /* We handle both integer and floats in the general purpose registers.
9372 In future we should be able to handle vector modes as well. */
9373 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
9374 return 0;
9375 /* Take care for QImode values - they can be in non-QI regs, but then
9376 they do cause partial register stalls. */
d2836273 9377 if (regno < 4 || mode != QImode || TARGET_64BIT)
a946dd00
JH
9378 return 1;
9379 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
9380}
fa79946e
JH
9381
9382/* Return the cost of moving data of mode M between a
9383 register and memory. A value of 2 is the default; this cost is
9384 relative to those in `REGISTER_MOVE_COST'.
9385
9386 If moving between registers and memory is more expensive than
9387 between two registers, you should define this macro to express the
9388 relative cost.
9389
9390 Model also increased moving costs of QImode registers in non
9391 Q_REGS classes.
9392 */
9393int
9394ix86_memory_move_cost (mode, class, in)
9395 enum machine_mode mode;
9396 enum reg_class class;
9397 int in;
9398{
9399 if (FLOAT_CLASS_P (class))
9400 {
9401 int index;
9402 switch (mode)
9403 {
9404 case SFmode:
9405 index = 0;
9406 break;
9407 case DFmode:
9408 index = 1;
9409 break;
9410 case XFmode:
9411 case TFmode:
9412 index = 2;
9413 break;
9414 default:
9415 return 100;
9416 }
9417 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
9418 }
9419 if (SSE_CLASS_P (class))
9420 {
9421 int index;
9422 switch (GET_MODE_SIZE (mode))
9423 {
9424 case 4:
9425 index = 0;
9426 break;
9427 case 8:
9428 index = 1;
9429 break;
9430 case 16:
9431 index = 2;
9432 break;
9433 default:
9434 return 100;
9435 }
9436 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
9437 }
9438 if (MMX_CLASS_P (class))
9439 {
9440 int index;
9441 switch (GET_MODE_SIZE (mode))
9442 {
9443 case 4:
9444 index = 0;
9445 break;
9446 case 8:
9447 index = 1;
9448 break;
9449 default:
9450 return 100;
9451 }
9452 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
9453 }
9454 switch (GET_MODE_SIZE (mode))
9455 {
9456 case 1:
9457 if (in)
9458 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
9459 : ix86_cost->movzbl_load);
9460 else
9461 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
9462 : ix86_cost->int_store[0] + 4);
9463 break;
9464 case 2:
9465 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
9466 default:
9467 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
9468 if (mode == TFmode)
9469 mode = XFmode;
3bb7e126 9470 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
fa79946e
JH
9471 * (int) GET_MODE_SIZE (mode) / 4);
9472 }
9473}
This page took 2.208234 seconds and 5 git commands to generate.