]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
* Makefile.in (concat.o): Depend on config.h.
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
8752c357 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
4592bdcb 3 Free Software Foundation, Inc.
2a2ab3f9
JVA
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
1fba7553 23#include <setjmp.h>
bb5177ac 24#include "system.h"
2a2ab3f9 25#include "rtl.h"
6baf1cc8
BS
26#include "tree.h"
27#include "tm_p.h"
2a2ab3f9
JVA
28#include "regs.h"
29#include "hard-reg-set.h"
30#include "real.h"
31#include "insn-config.h"
32#include "conditions.h"
2a2ab3f9
JVA
33#include "output.h"
34#include "insn-attr.h"
2a2ab3f9 35#include "flags.h"
a8ffcc81 36#include "except.h"
ecbc4695 37#include "function.h"
00c79232 38#include "recog.h"
ced8dd8c 39#include "expr.h"
f103890b 40#include "toplev.h"
e075ae69 41#include "basic-block.h"
1526a060 42#include "ggc.h"
672a6f42
NB
43#include "target.h"
44#include "target-def.h"
2a2ab3f9 45
8dfe5673
RK
46#ifndef CHECK_STACK_LIMIT
47#define CHECK_STACK_LIMIT -1
48#endif
49
32b5b1aa
SC
50/* Processor costs (relative to an add) */
51struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 52 1, /* cost of an add instruction */
32b5b1aa
SC
53 1, /* cost of a lea instruction */
54 3, /* variable shift costs */
55 2, /* constant shift costs */
56 6, /* cost of starting a multiply */
57 1, /* cost of multiply per each bit set */
e075ae69 58 23, /* cost of a divide/mod */
96e7ae40 59 15, /* "large" insn */
e2e52e1b 60 3, /* MOVE_RATIO */
7c6b971d 61 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
62 {2, 4, 2}, /* cost of loading integer registers
63 in QImode, HImode and SImode.
0f290768 64 Relative to reg-reg move (2). */
96e7ae40
JH
65 {2, 4, 2}, /* cost of storing integer registers */
66 2, /* cost of reg,reg fld/fst */
67 {8, 8, 8}, /* cost of loading fp registers
68 in SFmode, DFmode and XFmode */
fa79946e
JH
69 {8, 8, 8}, /* cost of loading integer registers */
70 2, /* cost of moving MMX register */
71 {4, 8}, /* cost of loading MMX registers
72 in SImode and DImode */
73 {4, 8}, /* cost of storing MMX registers
74 in SImode and DImode */
75 2, /* cost of moving SSE register */
76 {4, 8, 16}, /* cost of loading SSE registers
77 in SImode, DImode and TImode */
78 {4, 8, 16}, /* cost of storing SSE registers
79 in SImode, DImode and TImode */
80 3, /* MMX or SSE register to integer */
32b5b1aa
SC
81};
82
83struct processor_costs i486_cost = { /* 486 specific costs */
84 1, /* cost of an add instruction */
85 1, /* cost of a lea instruction */
86 3, /* variable shift costs */
87 2, /* constant shift costs */
88 12, /* cost of starting a multiply */
89 1, /* cost of multiply per each bit set */
e075ae69 90 40, /* cost of a divide/mod */
96e7ae40 91 15, /* "large" insn */
e2e52e1b 92 3, /* MOVE_RATIO */
7c6b971d 93 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
94 {2, 4, 2}, /* cost of loading integer registers
95 in QImode, HImode and SImode.
0f290768 96 Relative to reg-reg move (2). */
96e7ae40
JH
97 {2, 4, 2}, /* cost of storing integer registers */
98 2, /* cost of reg,reg fld/fst */
99 {8, 8, 8}, /* cost of loading fp registers
100 in SFmode, DFmode and XFmode */
fa79946e
JH
101 {8, 8, 8}, /* cost of loading integer registers */
102 2, /* cost of moving MMX register */
103 {4, 8}, /* cost of loading MMX registers
104 in SImode and DImode */
105 {4, 8}, /* cost of storing MMX registers
106 in SImode and DImode */
107 2, /* cost of moving SSE register */
108 {4, 8, 16}, /* cost of loading SSE registers
109 in SImode, DImode and TImode */
110 {4, 8, 16}, /* cost of storing SSE registers
111 in SImode, DImode and TImode */
112 3 /* MMX or SSE register to integer */
32b5b1aa
SC
113};
114
e5cb57e8 115struct processor_costs pentium_cost = {
32b5b1aa
SC
116 1, /* cost of an add instruction */
117 1, /* cost of a lea instruction */
856b07a1 118 4, /* variable shift costs */
e5cb57e8 119 1, /* constant shift costs */
856b07a1
SC
120 11, /* cost of starting a multiply */
121 0, /* cost of multiply per each bit set */
e075ae69 122 25, /* cost of a divide/mod */
96e7ae40 123 8, /* "large" insn */
e2e52e1b 124 6, /* MOVE_RATIO */
7c6b971d 125 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
126 {2, 4, 2}, /* cost of loading integer registers
127 in QImode, HImode and SImode.
0f290768 128 Relative to reg-reg move (2). */
96e7ae40
JH
129 {2, 4, 2}, /* cost of storing integer registers */
130 2, /* cost of reg,reg fld/fst */
131 {2, 2, 6}, /* cost of loading fp registers
132 in SFmode, DFmode and XFmode */
fa79946e
JH
133 {4, 4, 6}, /* cost of loading integer registers */
134 8, /* cost of moving MMX register */
135 {8, 8}, /* cost of loading MMX registers
136 in SImode and DImode */
137 {8, 8}, /* cost of storing MMX registers
138 in SImode and DImode */
139 2, /* cost of moving SSE register */
140 {4, 8, 16}, /* cost of loading SSE registers
141 in SImode, DImode and TImode */
142 {4, 8, 16}, /* cost of storing SSE registers
143 in SImode, DImode and TImode */
144 3 /* MMX or SSE register to integer */
32b5b1aa
SC
145};
146
856b07a1
SC
147struct processor_costs pentiumpro_cost = {
148 1, /* cost of an add instruction */
149 1, /* cost of a lea instruction */
e075ae69 150 1, /* variable shift costs */
856b07a1 151 1, /* constant shift costs */
369e59b1 152 4, /* cost of starting a multiply */
856b07a1 153 0, /* cost of multiply per each bit set */
e075ae69 154 17, /* cost of a divide/mod */
96e7ae40 155 8, /* "large" insn */
e2e52e1b 156 6, /* MOVE_RATIO */
7c6b971d 157 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
158 {4, 4, 4}, /* cost of loading integer registers
159 in QImode, HImode and SImode.
0f290768 160 Relative to reg-reg move (2). */
96e7ae40
JH
161 {2, 2, 2}, /* cost of storing integer registers */
162 2, /* cost of reg,reg fld/fst */
163 {2, 2, 6}, /* cost of loading fp registers
164 in SFmode, DFmode and XFmode */
fa79946e
JH
165 {4, 4, 6}, /* cost of loading integer registers */
166 2, /* cost of moving MMX register */
167 {2, 2}, /* cost of loading MMX registers
168 in SImode and DImode */
169 {2, 2}, /* cost of storing MMX registers
170 in SImode and DImode */
171 2, /* cost of moving SSE register */
172 {2, 2, 8}, /* cost of loading SSE registers
173 in SImode, DImode and TImode */
174 {2, 2, 8}, /* cost of storing SSE registers
175 in SImode, DImode and TImode */
176 3 /* MMX or SSE register to integer */
856b07a1
SC
177};
178
a269a03c
JC
179struct processor_costs k6_cost = {
180 1, /* cost of an add instruction */
e075ae69 181 2, /* cost of a lea instruction */
a269a03c
JC
182 1, /* variable shift costs */
183 1, /* constant shift costs */
73fe76e4 184 3, /* cost of starting a multiply */
a269a03c 185 0, /* cost of multiply per each bit set */
e075ae69 186 18, /* cost of a divide/mod */
96e7ae40 187 8, /* "large" insn */
e2e52e1b 188 4, /* MOVE_RATIO */
7c6b971d 189 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
190 {4, 5, 4}, /* cost of loading integer registers
191 in QImode, HImode and SImode.
0f290768 192 Relative to reg-reg move (2). */
96e7ae40
JH
193 {2, 3, 2}, /* cost of storing integer registers */
194 4, /* cost of reg,reg fld/fst */
195 {6, 6, 6}, /* cost of loading fp registers
196 in SFmode, DFmode and XFmode */
fa79946e
JH
197 {4, 4, 4}, /* cost of loading integer registers */
198 2, /* cost of moving MMX register */
199 {2, 2}, /* cost of loading MMX registers
200 in SImode and DImode */
201 {2, 2}, /* cost of storing MMX registers
202 in SImode and DImode */
203 2, /* cost of moving SSE register */
204 {2, 2, 8}, /* cost of loading SSE registers
205 in SImode, DImode and TImode */
206 {2, 2, 8}, /* cost of storing SSE registers
207 in SImode, DImode and TImode */
208 6 /* MMX or SSE register to integer */
a269a03c
JC
209};
210
309ada50
JH
211struct processor_costs athlon_cost = {
212 1, /* cost of an add instruction */
0b5107cf 213 2, /* cost of a lea instruction */
309ada50
JH
214 1, /* variable shift costs */
215 1, /* constant shift costs */
216 5, /* cost of starting a multiply */
217 0, /* cost of multiply per each bit set */
0b5107cf 218 42, /* cost of a divide/mod */
309ada50 219 8, /* "large" insn */
e2e52e1b 220 9, /* MOVE_RATIO */
309ada50
JH
221 4, /* cost for loading QImode using movzbl */
222 {4, 5, 4}, /* cost of loading integer registers
223 in QImode, HImode and SImode.
0f290768 224 Relative to reg-reg move (2). */
309ada50
JH
225 {2, 3, 2}, /* cost of storing integer registers */
226 4, /* cost of reg,reg fld/fst */
0b5107cf 227 {6, 6, 20}, /* cost of loading fp registers
309ada50 228 in SFmode, DFmode and XFmode */
fa79946e
JH
229 {4, 4, 16}, /* cost of loading integer registers */
230 2, /* cost of moving MMX register */
231 {2, 2}, /* cost of loading MMX registers
232 in SImode and DImode */
233 {2, 2}, /* cost of storing MMX registers
234 in SImode and DImode */
235 2, /* cost of moving SSE register */
236 {2, 2, 8}, /* cost of loading SSE registers
237 in SImode, DImode and TImode */
238 {2, 2, 8}, /* cost of storing SSE registers
239 in SImode, DImode and TImode */
240 6 /* MMX or SSE register to integer */
309ada50
JH
241};
242
b4e89e2d
JH
243struct processor_costs pentium4_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 8, /* variable shift costs */
247 8, /* constant shift costs */
248 30, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 112, /* cost of a divide/mod */
251 16, /* "large" insn */
252 6, /* MOVE_RATIO */
253 2, /* cost for loading QImode using movzbl */
254 {4, 5, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 3, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 12, /* cost of moving SSE register */
268 {12, 12, 12}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 10, /* MMX or SSE register to integer */
273};
274
32b5b1aa
SC
275struct processor_costs *ix86_cost = &pentium_cost;
276
a269a03c
JC
277/* Processor feature/optimization bitmasks. */
278#define m_386 (1<<PROCESSOR_I386)
279#define m_486 (1<<PROCESSOR_I486)
280#define m_PENT (1<<PROCESSOR_PENTIUM)
281#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
282#define m_K6 (1<<PROCESSOR_K6)
309ada50 283#define m_ATHLON (1<<PROCESSOR_ATHLON)
b4e89e2d 284#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
a269a03c 285
309ada50 286const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
b4e89e2d 287const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
a269a03c 288const int x86_zero_extend_with_and = m_486 | m_PENT;
b4e89e2d 289const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
e075ae69 290const int x86_double_with_add = ~m_386;
a269a03c 291const int x86_use_bit_test = m_386;
e2e52e1b 292const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
b4e89e2d
JH
293const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
294const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
ef6257cd 295const int x86_branch_hints = m_PENT4;
b4e89e2d 296const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
e075ae69
RH
297const int x86_partial_reg_stall = m_PPRO;
298const int x86_use_loop = m_K6;
309ada50 299const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
e075ae69
RH
300const int x86_use_mov0 = m_K6;
301const int x86_use_cltd = ~(m_PENT | m_K6);
302const int x86_read_modify_write = ~m_PENT;
303const int x86_read_modify = ~(m_PENT | m_PPRO);
304const int x86_split_long_moves = m_PPRO;
e9e80858 305const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
b4e89e2d 306const int x86_single_stringop = m_386 | m_PENT4;
d9f32422
JH
307const int x86_qimode_math = ~(0);
308const int x86_promote_qi_regs = 0;
309const int x86_himode_math = ~(m_PPRO);
310const int x86_promote_hi_regs = m_PPRO;
b4e89e2d
JH
311const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
312const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
313const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
314const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
315const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
316const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
317const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
c6036a37
JH
318const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
319const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
320const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
a269a03c 321
564d80f4 322#define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
2a2ab3f9 323
e075ae69
RH
324const char * const hi_reg_name[] = HI_REGISTER_NAMES;
325const char * const qi_reg_name[] = QI_REGISTER_NAMES;
326const char * const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
4c0d89b5
RS
327
328/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 329 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 330
e075ae69 331enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
332{
333 /* ax, dx, cx, bx */
ab408a86 334 AREG, DREG, CREG, BREG,
4c0d89b5 335 /* si, di, bp, sp */
e075ae69 336 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
337 /* FP registers */
338 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 339 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 340 /* arg pointer */
83774849 341 NON_Q_REGS,
564d80f4 342 /* flags, fpsr, dirflag, frame */
a7180f70
BS
343 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
344 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
345 SSE_REGS, SSE_REGS,
346 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30
JH
347 MMX_REGS, MMX_REGS,
348 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
349 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
350 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
351 SSE_REGS, SSE_REGS,
4c0d89b5 352};
c572e5ba 353
3d117b30 354/* The "default" register map used in 32bit mode. */
83774849 355
0f290768 356int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
357{
358 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
359 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
3d117b30 360 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
361 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
362 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
363 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
364 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
365};
366
0f7fa3d0
JH
367/* The "default" register map used in 64bit mode. */
368int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
369{
370 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
371 33, 34, 35, 36, 37, 38, 39, 40 /* fp regs */
372 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
373 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
374 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
375 8,9,10,11,12,13,14,15, /* extended integer registers */
376 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
377};
378
83774849
RH
379/* Define the register numbers to be used in Dwarf debugging information.
380 The SVR4 reference port C compiler uses the following register numbers
381 in its Dwarf output code:
382 0 for %eax (gcc regno = 0)
383 1 for %ecx (gcc regno = 2)
384 2 for %edx (gcc regno = 1)
385 3 for %ebx (gcc regno = 3)
386 4 for %esp (gcc regno = 7)
387 5 for %ebp (gcc regno = 6)
388 6 for %esi (gcc regno = 4)
389 7 for %edi (gcc regno = 5)
390 The following three DWARF register numbers are never generated by
391 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
392 believes these numbers have these meanings.
393 8 for %eip (no gcc equivalent)
394 9 for %eflags (gcc regno = 17)
395 10 for %trapno (no gcc equivalent)
396 It is not at all clear how we should number the FP stack registers
397 for the x86 architecture. If the version of SDB on x86/svr4 were
398 a bit less brain dead with respect to floating-point then we would
399 have a precedent to follow with respect to DWARF register numbers
400 for x86 FP registers, but the SDB on x86/svr4 is so completely
401 broken with respect to FP registers that it is hardly worth thinking
402 of it as something to strive for compatibility with.
403 The version of x86/svr4 SDB I have at the moment does (partially)
404 seem to believe that DWARF register number 11 is associated with
405 the x86 register %st(0), but that's about all. Higher DWARF
406 register numbers don't seem to be associated with anything in
407 particular, and even for DWARF regno 11, SDB only seems to under-
408 stand that it should say that a variable lives in %st(0) (when
409 asked via an `=' command) if we said it was in DWARF regno 11,
410 but SDB still prints garbage when asked for the value of the
411 variable in question (via a `/' command).
412 (Also note that the labels SDB prints for various FP stack regs
413 when doing an `x' command are all wrong.)
414 Note that these problems generally don't affect the native SVR4
415 C compiler because it doesn't allow the use of -O with -g and
416 because when it is *not* optimizing, it allocates a memory
417 location for each floating-point variable, and the memory
418 location is what gets described in the DWARF AT_location
419 attribute for the variable in question.
420 Regardless of the severe mental illness of the x86/svr4 SDB, we
421 do something sensible here and we use the following DWARF
422 register numbers. Note that these are all stack-top-relative
423 numbers.
424 11 for %st(0) (gcc regno = 8)
425 12 for %st(1) (gcc regno = 9)
426 13 for %st(2) (gcc regno = 10)
427 14 for %st(3) (gcc regno = 11)
428 15 for %st(4) (gcc regno = 12)
429 16 for %st(5) (gcc regno = 13)
430 17 for %st(6) (gcc regno = 14)
431 18 for %st(7) (gcc regno = 15)
432*/
0f290768 433int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
434{
435 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
436 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
3f3f2124 437 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
438 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
439 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
3f3f2124
JH
440 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
441 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
83774849
RH
442};
443
c572e5ba
JVA
444/* Test and compare insns in i386.md store the information needed to
445 generate branch and scc insns here. */
446
e075ae69
RH
447struct rtx_def *ix86_compare_op0 = NULL_RTX;
448struct rtx_def *ix86_compare_op1 = NULL_RTX;
f5316dfe 449
7a2e09f4 450#define MAX_386_STACK_LOCALS 3
8362f420
JH
451/* Size of the register save area. */
452#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
36edd3cc
BS
453
454/* Define the structure for the machine field in struct function. */
455struct machine_function
456{
457 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
8362f420 458 int save_varrargs_registers;
6fca22eb 459 int accesses_prev_frame;
36edd3cc
BS
460};
461
01d939e8 462#define ix86_stack_locals (cfun->machine->stack_locals)
8362f420 463#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
36edd3cc 464
4dd2ac2c
JH
465/* Structure describing stack frame layout.
466 Stack grows downward:
467
468 [arguments]
469 <- ARG_POINTER
470 saved pc
471
472 saved frame pointer if frame_pointer_needed
473 <- HARD_FRAME_POINTER
474 [saved regs]
475
476 [padding1] \
477 )
478 [va_arg registers] (
479 > to_allocate <- FRAME_POINTER
480 [frame] (
481 )
482 [padding2] /
483 */
484struct ix86_frame
485{
486 int nregs;
487 int padding1;
8362f420 488 int va_arg_size;
4dd2ac2c
JH
489 HOST_WIDE_INT frame;
490 int padding2;
491 int outgoing_arguments_size;
8362f420 492 int red_zone_size;
4dd2ac2c
JH
493
494 HOST_WIDE_INT to_allocate;
495 /* The offsets relative to ARG_POINTER. */
496 HOST_WIDE_INT frame_pointer_offset;
497 HOST_WIDE_INT hard_frame_pointer_offset;
498 HOST_WIDE_INT stack_pointer_offset;
499};
500
6189a572
JH
501/* Code model option as passed by user. */
502const char *ix86_cmodel_string;
503/* Parsed value. */
504enum cmodel ix86_cmodel;
505
c8c5cb99 506/* which cpu are we scheduling for */
e42ea7f9 507enum processor_type ix86_cpu;
c8c5cb99
SC
508
509/* which instruction set architecture to use. */
c942177e 510int ix86_arch;
c8c5cb99
SC
511
512/* Strings to hold which cpu and instruction set architecture to use. */
9c23aa47
ZW
513const char *ix86_cpu_string; /* for -mcpu=<xxx> */
514const char *ix86_arch_string; /* for -march=<xxx> */
c8c5cb99 515
0f290768 516/* # of registers to use to pass arguments. */
e075ae69 517const char *ix86_regparm_string;
e9a25f70 518
e075ae69
RH
519/* ix86_regparm_string as a number */
520int ix86_regparm;
e9a25f70
JL
521
522/* Alignment to use for loops and jumps: */
523
0f290768 524/* Power of two alignment for loops. */
e075ae69 525const char *ix86_align_loops_string;
e9a25f70 526
0f290768 527/* Power of two alignment for non-loop jumps. */
e075ae69 528const char *ix86_align_jumps_string;
e9a25f70 529
3af4bd89 530/* Power of two alignment for stack boundary in bytes. */
e075ae69 531const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
532
533/* Preferred alignment for stack boundary in bits. */
e075ae69 534int ix86_preferred_stack_boundary;
3af4bd89 535
e9a25f70 536/* Values 1-5: see jump.c */
e075ae69
RH
537int ix86_branch_cost;
538const char *ix86_branch_cost_string;
e9a25f70 539
0f290768 540/* Power of two alignment for functions. */
e075ae69 541const char *ix86_align_funcs_string;
e075ae69 542\f
f6da8bc3
KG
543static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
544static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
e075ae69 545 int, int, FILE *));
f6da8bc3 546static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
3a3677ff
RH
547static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
548 rtx *, rtx *));
f6da8bc3
KG
549static rtx gen_push PARAMS ((rtx));
550static int memory_address_length PARAMS ((rtx addr));
551static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
552static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
553static int ix86_safe_length PARAMS ((rtx));
554static enum attr_memory ix86_safe_memory PARAMS ((rtx));
555static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
556static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
557static void ix86_dump_ppro_packet PARAMS ((FILE *));
558static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
559static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
e075ae69 560 rtx));
f6da8bc3
KG
561static void ix86_init_machine_status PARAMS ((struct function *));
562static void ix86_mark_machine_status PARAMS ((struct function *));
37b15744 563static void ix86_free_machine_status PARAMS ((struct function *));
2b589241 564static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
f6da8bc3 565static int ix86_safe_length_prefix PARAMS ((rtx));
0903fcab
JH
566static int ix86_nsaved_regs PARAMS((void));
567static void ix86_emit_save_regs PARAMS((void));
c6036a37 568static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
37a58036 569static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
0e4970d7 570static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
c6991660
KG
571static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
572static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
55efb413 573static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
0945b39d 574static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
0945b39d
JH
575static rtx ix86_expand_aligntest PARAMS ((rtx, int));
576static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
0ecf09f9 577static void ix86_output_main_function_alignment_hack PARAMS ((FILE *f, int));
e075ae69
RH
578
579struct ix86_address
580{
581 rtx base, index, disp;
582 HOST_WIDE_INT scale;
583};
b08de47e 584
e075ae69 585static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
bd793c65
BS
586
587struct builtin_description;
588static rtx ix86_expand_sse_comi PARAMS ((struct builtin_description *, tree,
589 rtx));
590static rtx ix86_expand_sse_compare PARAMS ((struct builtin_description *, tree,
591 rtx));
592static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
593static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
594static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
595static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
596static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
c0c102a9
JH
597static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
598static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
599 enum rtx_code *,
600 enum rtx_code *,
601 enum rtx_code *));
9e7adcb3
JH
602static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
603 rtx *, rtx *));
604static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
605static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
606static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
607static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
37a58036 608static int ix86_save_reg PARAMS ((int, int));
4dd2ac2c 609static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
8d8e52be 610static int ix86_comp_type_attributes PARAMS ((tree, tree));
672a6f42
NB
611\f
612/* Initialize the GCC target structure. */
f5f4be42 613#undef TARGET_VALID_TYPE_ATTRIBUTE
672a6f42 614#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
f5f4be42
NB
615# define TARGET_VALID_TYPE_ATTRIBUTE i386_pe_valid_type_attribute_p
616# undef TARGET_VALID_DECL_ATTRIBUTE
617# define TARGET_VALID_DECL_ATTRIBUTE i386_pe_valid_decl_attribute_p
618# undef TARGET_MERGE_DECL_ATTRIBUTES
619# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
620#else
621# define TARGET_VALID_TYPE_ATTRIBUTE ix86_valid_type_attribute_p
672a6f42
NB
622#endif
623
8d8e52be
JM
624#undef TARGET_COMP_TYPE_ATTRIBUTES
625#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
626
f6155fda
SS
627#undef TARGET_INIT_BUILTINS
628#define TARGET_INIT_BUILTINS ix86_init_builtins
629
630#undef TARGET_EXPAND_BUILTIN
631#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
632
08c148a8
NB
633#if defined (OSF_OS) || defined (TARGET_OSF1ELF)
634 static void ix86_osf_output_function_prologue PARAMS ((FILE *,
635 HOST_WIDE_INT));
636# undef TARGET_ASM_FUNCTION_PROLOGUE
637# define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue
0ecf09f9
JH
638#else
639# undef TARGET_ASM_FUNCTION_PROLOGUE
640# define TARGET_ASM_FUNCTION_PROLOGUE \
641 ix86_output_main_function_alignment_hack
08c148a8
NB
642#endif
643
17b53c33
NB
644#undef TARGET_ASM_OPEN_PAREN
645#define TARGET_ASM_OPEN_PAREN ""
646#undef TARGET_ASM_CLOSE_PAREN
647#define TARGET_ASM_CLOSE_PAREN ""
648
f6897b10 649struct gcc_target targetm = TARGET_INITIALIZER;
e075ae69 650\f
f5316dfe
MM
651/* Sometimes certain combinations of command options do not make
652 sense on a particular target machine. You can define a macro
653 `OVERRIDE_OPTIONS' to take account of this. This macro, if
654 defined, is executed once just after all the command options have
655 been parsed.
656
657 Don't use this macro to turn on various extra optimizations for
658 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
659
660void
661override_options ()
662{
400500c4 663 int i;
e075ae69
RH
664 /* Comes from final.c -- no real reason to change it. */
665#define MAX_CODE_ALIGN 16
f5316dfe 666
c8c5cb99
SC
667 static struct ptt
668 {
e075ae69
RH
669 struct processor_costs *cost; /* Processor costs */
670 int target_enable; /* Target flags to enable. */
671 int target_disable; /* Target flags to disable. */
672 int align_loop; /* Default alignments. */
673 int align_jump;
674 int align_func;
675 int branch_cost;
676 }
0f290768 677 const processor_target_table[PROCESSOR_max] =
e075ae69
RH
678 {
679 {&i386_cost, 0, 0, 2, 2, 2, 1},
680 {&i486_cost, 0, 0, 4, 4, 4, 1},
681 {&pentium_cost, 0, 0, -4, -4, -4, 1},
682 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
309ada50 683 {&k6_cost, 0, 0, -5, -5, 4, 1},
b4e89e2d
JH
684 {&athlon_cost, 0, 0, 4, -4, 4, 1},
685 {&pentium4_cost, 0, 0, 2, 2, 2, 1}
e075ae69
RH
686 };
687
688 static struct pta
689 {
0f290768 690 const char *name; /* processor name or nickname. */
e075ae69
RH
691 enum processor_type processor;
692 }
0f290768 693 const processor_alias_table[] =
e075ae69
RH
694 {
695 {"i386", PROCESSOR_I386},
696 {"i486", PROCESSOR_I486},
697 {"i586", PROCESSOR_PENTIUM},
698 {"pentium", PROCESSOR_PENTIUM},
699 {"i686", PROCESSOR_PENTIUMPRO},
700 {"pentiumpro", PROCESSOR_PENTIUMPRO},
e075ae69 701 {"k6", PROCESSOR_K6},
309ada50 702 {"athlon", PROCESSOR_ATHLON},
b4e89e2d 703 {"pentium4", PROCESSOR_PENTIUM4},
3af4bd89 704 };
c8c5cb99 705
0f290768 706 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
c8c5cb99 707
f5316dfe
MM
708#ifdef SUBTARGET_OVERRIDE_OPTIONS
709 SUBTARGET_OVERRIDE_OPTIONS;
710#endif
711
5a6ee819 712 ix86_arch = PROCESSOR_I386;
e075ae69
RH
713 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
714
6189a572
JH
715 if (ix86_cmodel_string != 0)
716 {
717 if (!strcmp (ix86_cmodel_string, "small"))
718 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
719 else if (flag_pic)
720 sorry ("Code model %s not supported in PIC mode", ix86_cmodel_string);
721 else if (!strcmp (ix86_cmodel_string, "32"))
722 ix86_cmodel = CM_32;
723 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
724 ix86_cmodel = CM_KERNEL;
725 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
726 ix86_cmodel = CM_MEDIUM;
727 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
728 ix86_cmodel = CM_LARGE;
729 else
730 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
731 }
732 else
733 {
734 ix86_cmodel = CM_32;
735 if (TARGET_64BIT)
736 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
737 }
738 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
739 error ("Code model `%s' not supported in the %s bit mode.",
740 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
741 if (ix86_cmodel == CM_LARGE)
742 sorry ("Code model `large' not supported yet.");
0c2dc519
JH
743 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
744 sorry ("%i-bit mode not compiled in.",
745 (target_flags & MASK_64BIT) ? 64 : 32);
6189a572 746
e075ae69
RH
747 if (ix86_arch_string != 0)
748 {
e075ae69
RH
749 for (i = 0; i < pta_size; i++)
750 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
751 {
752 ix86_arch = processor_alias_table[i].processor;
753 /* Default cpu tuning to the architecture. */
754 ix86_cpu = ix86_arch;
755 break;
756 }
400500c4 757
e075ae69
RH
758 if (i == pta_size)
759 error ("bad value (%s) for -march= switch", ix86_arch_string);
760 }
761
762 if (ix86_cpu_string != 0)
763 {
e075ae69
RH
764 for (i = 0; i < pta_size; i++)
765 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
766 {
767 ix86_cpu = processor_alias_table[i].processor;
768 break;
769 }
770 if (i == pta_size)
771 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
772 }
773
774 ix86_cost = processor_target_table[ix86_cpu].cost;
775 target_flags |= processor_target_table[ix86_cpu].target_enable;
776 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
777
36edd3cc
BS
778 /* Arrange to set up i386_stack_locals for all functions. */
779 init_machine_status = ix86_init_machine_status;
1526a060 780 mark_machine_status = ix86_mark_machine_status;
37b15744 781 free_machine_status = ix86_free_machine_status;
36edd3cc 782
0f290768 783 /* Validate -mregparm= value. */
e075ae69 784 if (ix86_regparm_string)
b08de47e 785 {
400500c4
RK
786 i = atoi (ix86_regparm_string);
787 if (i < 0 || i > REGPARM_MAX)
788 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
789 else
790 ix86_regparm = i;
b08de47e 791 }
0d7d98ee
JH
792 else
793 if (TARGET_64BIT)
794 ix86_regparm = REGPARM_MAX;
b08de47e 795
3e18fdf6 796 /* If the user has provided any of the -malign-* options,
a4f31c00 797 warn and use that value only if -falign-* is not set.
3e18fdf6 798 Remove this code in GCC 3.2 or later. */
e075ae69 799 if (ix86_align_loops_string)
b08de47e 800 {
3e18fdf6
GK
801 warning ("-malign-loops is obsolete, use -falign-loops");
802 if (align_loops == 0)
803 {
804 i = atoi (ix86_align_loops_string);
805 if (i < 0 || i > MAX_CODE_ALIGN)
806 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
807 else
808 align_loops = 1 << i;
809 }
b08de47e 810 }
3af4bd89 811
e075ae69 812 if (ix86_align_jumps_string)
b08de47e 813 {
3e18fdf6
GK
814 warning ("-malign-jumps is obsolete, use -falign-jumps");
815 if (align_jumps == 0)
816 {
817 i = atoi (ix86_align_jumps_string);
818 if (i < 0 || i > MAX_CODE_ALIGN)
819 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
820 else
821 align_jumps = 1 << i;
822 }
b08de47e 823 }
b08de47e 824
e075ae69 825 if (ix86_align_funcs_string)
b08de47e 826 {
3e18fdf6
GK
827 warning ("-malign-functions is obsolete, use -falign-functions");
828 if (align_functions == 0)
829 {
830 i = atoi (ix86_align_funcs_string);
831 if (i < 0 || i > MAX_CODE_ALIGN)
832 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
833 else
834 align_functions = 1 << i;
835 }
b08de47e 836 }
3af4bd89 837
3e18fdf6
GK
838 /* Default align_* from the processor table. */
839#define abs(n) (n < 0 ? -n : n)
840 if (align_loops == 0)
841 align_loops = 1 << abs (processor_target_table[ix86_cpu].align_loop);
842 if (align_jumps == 0)
843 align_jumps = 1 << abs (processor_target_table[ix86_cpu].align_jump);
844 if (align_functions == 0)
845 align_functions = 1 << abs (processor_target_table[ix86_cpu].align_func);
846
e4c0478d 847 /* Validate -mpreferred-stack-boundary= value, or provide default.
3af4bd89 848 The default of 128 bits is for Pentium III's SSE __m128. */
e075ae69
RH
849 ix86_preferred_stack_boundary = 128;
850 if (ix86_preferred_stack_boundary_string)
3af4bd89 851 {
400500c4 852 i = atoi (ix86_preferred_stack_boundary_string);
0d7d98ee
JH
853 if (i < (TARGET_64BIT ? 3 : 2) || i > 31)
854 error ("-mpreferred-stack-boundary=%d is not between %d and 31", i,
855 TARGET_64BIT ? 3 : 2);
400500c4
RK
856 else
857 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 858 }
77a989d1 859
0f290768 860 /* Validate -mbranch-cost= value, or provide default. */
e075ae69
RH
861 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
862 if (ix86_branch_cost_string)
804a8ee0 863 {
400500c4
RK
864 i = atoi (ix86_branch_cost_string);
865 if (i < 0 || i > 5)
866 error ("-mbranch-cost=%d is not between 0 and 5", i);
867 else
868 ix86_branch_cost = i;
804a8ee0 869 }
804a8ee0 870
e9a25f70
JL
871 /* Keep nonleaf frame pointers. */
872 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 873 flag_omit_frame_pointer = 1;
e075ae69
RH
874
875 /* If we're doing fast math, we don't care about comparison order
876 wrt NaNs. This lets us use a shorter comparison sequence. */
de6c5979 877 if (flag_unsafe_math_optimizations)
e075ae69
RH
878 target_flags &= ~MASK_IEEE_FP;
879
a7180f70
BS
880 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
881 on by -msse. */
882 if (TARGET_SSE)
883 target_flags |= MASK_MMX;
c6036a37
JH
884
885 if ((x86_accumulate_outgoing_args & CPUMASK)
886 && !(target_flags & MASK_NO_ACCUMULATE_OUTGOING_ARGS)
887 && !optimize_size)
888 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
f5316dfe
MM
889}
890\f
32b5b1aa 891void
c6aded7c 892optimization_options (level, size)
32b5b1aa 893 int level;
bb5177ac 894 int size ATTRIBUTE_UNUSED;
32b5b1aa 895{
e9a25f70
JL
896 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
897 make the problem with not enough registers even worse. */
32b5b1aa
SC
898#ifdef INSN_SCHEDULING
899 if (level > 1)
900 flag_schedule_insns = 0;
901#endif
902}
b08de47e 903\f
b08de47e
MM
904/* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
905 attribute for TYPE. The attributes in ATTRIBUTES have previously been
906 assigned to TYPE. */
907
9959db6d 908int
e075ae69 909ix86_valid_type_attribute_p (type, attributes, identifier, args)
b08de47e 910 tree type;
bb5177ac 911 tree attributes ATTRIBUTE_UNUSED;
b08de47e
MM
912 tree identifier;
913 tree args;
914{
915 if (TREE_CODE (type) != FUNCTION_TYPE
ac478ac0 916 && TREE_CODE (type) != METHOD_TYPE
b08de47e
MM
917 && TREE_CODE (type) != FIELD_DECL
918 && TREE_CODE (type) != TYPE_DECL)
919 return 0;
920
921 /* Stdcall attribute says callee is responsible for popping arguments
922 if they are not variable. */
0d7d98ee
JH
923 if (is_attribute_p ("stdcall", identifier)
924 && !TARGET_64BIT)
b08de47e
MM
925 return (args == NULL_TREE);
926
0f290768 927 /* Cdecl attribute says the callee is a normal C declaration. */
0d7d98ee
JH
928 if (is_attribute_p ("cdecl", identifier)
929 && !TARGET_64BIT)
b08de47e
MM
930 return (args == NULL_TREE);
931
932 /* Regparm attribute specifies how many integer arguments are to be
0f290768 933 passed in registers. */
b08de47e
MM
934 if (is_attribute_p ("regparm", identifier))
935 {
936 tree cst;
937
e9a25f70 938 if (! args || TREE_CODE (args) != TREE_LIST
b08de47e
MM
939 || TREE_CHAIN (args) != NULL_TREE
940 || TREE_VALUE (args) == NULL_TREE)
941 return 0;
942
943 cst = TREE_VALUE (args);
944 if (TREE_CODE (cst) != INTEGER_CST)
945 return 0;
946
cce097f1 947 if (compare_tree_int (cst, REGPARM_MAX) > 0)
b08de47e
MM
948 return 0;
949
950 return 1;
951 }
952
953 return 0;
954}
955
08c148a8
NB
956#if defined (OSF_OS) || defined (TARGET_OSF1ELF)
957
958/* Generate the assembly code for function entry. FILE is a stdio
959 stream to output the code to. SIZE is an int: how many units of
960 temporary storage to allocate.
961
962 Refer to the array `regs_ever_live' to determine which registers to
963 save; `regs_ever_live[I]' is nonzero if register number I is ever
964 used in the function. This function is responsible for knowing
965 which registers should not be saved even if used.
966
967 We override it here to allow for the new profiling code to go before
968 the prologue and the old mcount code to go after the prologue (and
969 after %ebx has been set up for ELF shared library support). */
970
971static void
972ix86_osf_output_function_prologue (file, size)
973 FILE *file;
974 HOST_WIDE_INT size;
975{
976 char *prefix = "";
977 char *lprefix = LPREFIX;
978 int labelno = profile_label_no;
979
980#ifdef OSF_OS
981
982 if (TARGET_UNDERSCORES)
983 prefix = "_";
984
985 if (profile_flag && OSF_PROFILE_BEFORE_PROLOGUE)
986 {
987 if (!flag_pic && !HALF_PIC_P ())
988 {
989 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
990 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
991 }
992
993 else if (HALF_PIC_P ())
994 {
995 rtx symref;
996
997 HALF_PIC_EXTERNAL ("_mcount_ptr");
998 symref = HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode,
999 "_mcount_ptr"));
1000
1001 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1002 fprintf (file, "\tmovl %s%s,%%eax\n", prefix,
1003 XSTR (symref, 0));
1004 fprintf (file, "\tcall *(%%eax)\n");
1005 }
1006
1007 else
1008 {
1009 static int call_no = 0;
1010
1011 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1012 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1013 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1014 lprefix, call_no++);
1015 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1016 lprefix, labelno);
1017 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1018 prefix);
1019 fprintf (file, "\tcall *(%%eax)\n");
1020 }
1021 }
1022
1023#else /* !OSF_OS */
1024
1025 if (profile_flag && OSF_PROFILE_BEFORE_PROLOGUE)
1026 {
1027 if (!flag_pic)
1028 {
1029 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1030 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1031 }
1032
1033 else
1034 {
1035 static int call_no = 0;
1036
1037 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1038 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1039 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1040 lprefix, call_no++);
1041 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1042 lprefix, labelno);
1043 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1044 prefix);
1045 fprintf (file, "\tcall *(%%eax)\n");
1046 }
1047 }
1048#endif /* !OSF_OS */
1049
1050 function_prologue (file, size);
1051}
1052
1053#endif /* OSF_OS || TARGET_OSF1ELF */
1054
b08de47e
MM
1055/* Return 0 if the attributes for two types are incompatible, 1 if they
1056 are compatible, and 2 if they are nearly compatible (which causes a
1057 warning to be generated). */
1058
8d8e52be 1059static int
e075ae69 1060ix86_comp_type_attributes (type1, type2)
afcfe58c
MM
1061 tree type1;
1062 tree type2;
b08de47e 1063{
0f290768 1064 /* Check for mismatch of non-default calling convention. */
69ddee61 1065 const char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
1066
1067 if (TREE_CODE (type1) != FUNCTION_TYPE)
1068 return 1;
1069
1070 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
1071 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1072 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 1073 return 0;
b08de47e
MM
1074 return 1;
1075}
b08de47e
MM
1076\f
1077/* Value is the number of bytes of arguments automatically
1078 popped when returning from a subroutine call.
1079 FUNDECL is the declaration node of the function (as a tree),
1080 FUNTYPE is the data type of the function (as a tree),
1081 or for a library call it is an identifier node for the subroutine name.
1082 SIZE is the number of bytes of arguments passed on the stack.
1083
1084 On the 80386, the RTD insn may be used to pop them if the number
1085 of args is fixed, but if the number is variable then the caller
1086 must pop them all. RTD can't be used for library calls now
1087 because the library is compiled with the Unix compiler.
1088 Use of RTD is a selectable option, since it is incompatible with
1089 standard Unix calling sequences. If the option is not selected,
1090 the caller must always pop the args.
1091
1092 The attribute stdcall is equivalent to RTD on a per module basis. */
1093
1094int
e075ae69 1095ix86_return_pops_args (fundecl, funtype, size)
b08de47e
MM
1096 tree fundecl;
1097 tree funtype;
1098 int size;
79325812 1099{
3345ee7d 1100 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 1101
0f290768 1102 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 1103 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 1104
0f290768 1105 /* Stdcall functions will pop the stack if not variable args. */
698cdd84
SC
1106 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1107 rtd = 1;
79325812 1108
698cdd84
SC
1109 if (rtd
1110 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
1111 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1112 == void_type_node)))
698cdd84
SC
1113 return size;
1114 }
79325812 1115
e9a25f70 1116 /* Lose any fake structure return argument. */
0d7d98ee
JH
1117 if (aggregate_value_p (TREE_TYPE (funtype))
1118 && !TARGET_64BIT)
698cdd84 1119 return GET_MODE_SIZE (Pmode);
79325812 1120
2614aac6 1121 return 0;
b08de47e 1122}
b08de47e
MM
1123\f
1124/* Argument support functions. */
1125
1126/* Initialize a variable CUM of type CUMULATIVE_ARGS
1127 for a call to a function whose data type is FNTYPE.
1128 For a library call, FNTYPE is 0. */
1129
1130void
1131init_cumulative_args (cum, fntype, libname)
e9a25f70 1132 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
b08de47e
MM
1133 tree fntype; /* tree ptr for function decl */
1134 rtx libname; /* SYMBOL_REF of library name or 0 */
1135{
1136 static CUMULATIVE_ARGS zero_cum;
1137 tree param, next_param;
1138
1139 if (TARGET_DEBUG_ARG)
1140 {
1141 fprintf (stderr, "\ninit_cumulative_args (");
1142 if (fntype)
e9a25f70
JL
1143 fprintf (stderr, "fntype code = %s, ret code = %s",
1144 tree_code_name[(int) TREE_CODE (fntype)],
1145 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
1146 else
1147 fprintf (stderr, "no fntype");
1148
1149 if (libname)
1150 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1151 }
1152
1153 *cum = zero_cum;
1154
1155 /* Set up the number of registers to use for passing arguments. */
e075ae69 1156 cum->nregs = ix86_regparm;
b08de47e
MM
1157 if (fntype)
1158 {
1159 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
e9a25f70 1160
b08de47e
MM
1161 if (attr)
1162 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1163 }
1164
1165 /* Determine if this function has variable arguments. This is
1166 indicated by the last argument being 'void_type_mode' if there
1167 are no variable arguments. If there are variable arguments, then
1168 we won't pass anything in registers */
1169
1170 if (cum->nregs)
1171 {
1172 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 1173 param != 0; param = next_param)
b08de47e
MM
1174 {
1175 next_param = TREE_CHAIN (param);
e9a25f70 1176 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
b08de47e
MM
1177 cum->nregs = 0;
1178 }
1179 }
1180
1181 if (TARGET_DEBUG_ARG)
1182 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1183
1184 return;
1185}
1186
1187/* Update the data in CUM to advance over an argument
1188 of mode MODE and data type TYPE.
1189 (TYPE is null for libcalls where that information may not be available.) */
1190
1191void
1192function_arg_advance (cum, mode, type, named)
1193 CUMULATIVE_ARGS *cum; /* current arg information */
1194 enum machine_mode mode; /* current arg mode */
1195 tree type; /* type of the argument or 0 if lib support */
1196 int named; /* whether or not the argument was named */
1197{
5ac9118e
KG
1198 int bytes =
1199 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
1200 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1201
1202 if (TARGET_DEBUG_ARG)
1203 fprintf (stderr,
e9a25f70 1204 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
b08de47e 1205 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
82a127a9 1206 if (TARGET_SSE && mode == TImode)
b08de47e 1207 {
82a127a9
CM
1208 cum->sse_words += words;
1209 cum->sse_nregs -= 1;
1210 cum->sse_regno += 1;
1211 if (cum->sse_nregs <= 0)
1212 {
1213 cum->sse_nregs = 0;
1214 cum->sse_regno = 0;
1215 }
b08de47e 1216 }
a4f31c00 1217 else
82a127a9
CM
1218 {
1219 cum->words += words;
1220 cum->nregs -= words;
1221 cum->regno += words;
b08de47e 1222
82a127a9
CM
1223 if (cum->nregs <= 0)
1224 {
1225 cum->nregs = 0;
1226 cum->regno = 0;
1227 }
1228 }
b08de47e
MM
1229 return;
1230}
1231
1232/* Define where to put the arguments to a function.
1233 Value is zero to push the argument on the stack,
1234 or a hard register in which to store the argument.
1235
1236 MODE is the argument's machine mode.
1237 TYPE is the data type of the argument (as a tree).
1238 This is null for libcalls where that information may
1239 not be available.
1240 CUM is a variable of type CUMULATIVE_ARGS which gives info about
1241 the preceding args and about the function being called.
1242 NAMED is nonzero if this argument is a named parameter
1243 (otherwise it is an extra parameter matching an ellipsis). */
1244
1245struct rtx_def *
1246function_arg (cum, mode, type, named)
1247 CUMULATIVE_ARGS *cum; /* current arg information */
1248 enum machine_mode mode; /* current arg mode */
1249 tree type; /* type of the argument or 0 if lib support */
1250 int named; /* != 0 for normal args, == 0 for ... args */
1251{
1252 rtx ret = NULL_RTX;
5ac9118e
KG
1253 int bytes =
1254 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
1255 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1256
32ee7d1d
JH
1257 if (mode == VOIDmode)
1258 return constm1_rtx;
1259
b08de47e
MM
1260 switch (mode)
1261 {
0f290768 1262 /* For now, pass fp/complex values on the stack. */
e9a25f70 1263 default:
b08de47e
MM
1264 break;
1265
1266 case BLKmode:
1267 case DImode:
1268 case SImode:
1269 case HImode:
1270 case QImode:
1271 if (words <= cum->nregs)
f64cecad 1272 ret = gen_rtx_REG (mode, cum->regno);
b08de47e 1273 break;
82a127a9
CM
1274 case TImode:
1275 if (cum->sse_nregs)
1276 ret = gen_rtx_REG (mode, cum->sse_regno);
a4f31c00 1277 break;
b08de47e
MM
1278 }
1279
1280 if (TARGET_DEBUG_ARG)
1281 {
1282 fprintf (stderr,
e9a25f70 1283 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
b08de47e
MM
1284 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
1285
1286 if (ret)
1287 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
1288 else
1289 fprintf (stderr, ", stack");
1290
1291 fprintf (stderr, " )\n");
1292 }
1293
1294 return ret;
1295}
e075ae69 1296\f
8bad7136 1297
7dd4b4a3
JH
1298/* Return nonzero if OP is general operand representable on x86_64. */
1299
1300int
1301x86_64_general_operand (op, mode)
1302 rtx op;
1303 enum machine_mode mode;
1304{
1305 if (!TARGET_64BIT)
1306 return general_operand (op, mode);
1307 if (nonimmediate_operand (op, mode))
1308 return 1;
1309 return x86_64_sign_extended_value (op);
1310}
1311
1312/* Return nonzero if OP is general operand representable on x86_64
1313 as eighter sign extended or zero extended constant. */
1314
1315int
1316x86_64_szext_general_operand (op, mode)
1317 rtx op;
1318 enum machine_mode mode;
1319{
1320 if (!TARGET_64BIT)
1321 return general_operand (op, mode);
1322 if (nonimmediate_operand (op, mode))
1323 return 1;
1324 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
1325}
1326
1327/* Return nonzero if OP is nonmemory operand representable on x86_64. */
1328
1329int
1330x86_64_nonmemory_operand (op, mode)
1331 rtx op;
1332 enum machine_mode mode;
1333{
1334 if (!TARGET_64BIT)
1335 return nonmemory_operand (op, mode);
1336 if (register_operand (op, mode))
1337 return 1;
1338 return x86_64_sign_extended_value (op);
1339}
1340
1341/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
1342
1343int
1344x86_64_movabs_operand (op, mode)
1345 rtx op;
1346 enum machine_mode mode;
1347{
1348 if (!TARGET_64BIT || !flag_pic)
1349 return nonmemory_operand (op, mode);
1350 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
1351 return 1;
1352 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
1353 return 1;
1354 return 0;
1355}
1356
1357/* Return nonzero if OP is nonmemory operand representable on x86_64. */
1358
1359int
1360x86_64_szext_nonmemory_operand (op, mode)
1361 rtx op;
1362 enum machine_mode mode;
1363{
1364 if (!TARGET_64BIT)
1365 return nonmemory_operand (op, mode);
1366 if (register_operand (op, mode))
1367 return 1;
1368 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
1369}
1370
1371/* Return nonzero if OP is immediate operand representable on x86_64. */
1372
1373int
1374x86_64_immediate_operand (op, mode)
1375 rtx op;
1376 enum machine_mode mode;
1377{
1378 if (!TARGET_64BIT)
1379 return immediate_operand (op, mode);
1380 return x86_64_sign_extended_value (op);
1381}
1382
1383/* Return nonzero if OP is immediate operand representable on x86_64. */
1384
1385int
1386x86_64_zext_immediate_operand (op, mode)
1387 rtx op;
1388 enum machine_mode mode ATTRIBUTE_UNUSED;
1389{
1390 return x86_64_zero_extended_value (op);
1391}
1392
8bad7136
JL
1393/* Return nonzero if OP is (const_int 1), else return zero. */
1394
1395int
1396const_int_1_operand (op, mode)
1397 rtx op;
1398 enum machine_mode mode ATTRIBUTE_UNUSED;
1399{
1400 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
1401}
1402
e075ae69
RH
1403/* Returns 1 if OP is either a symbol reference or a sum of a symbol
1404 reference and a constant. */
b08de47e
MM
1405
1406int
e075ae69
RH
1407symbolic_operand (op, mode)
1408 register rtx op;
1409 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 1410{
e075ae69 1411 switch (GET_CODE (op))
2a2ab3f9 1412 {
e075ae69
RH
1413 case SYMBOL_REF:
1414 case LABEL_REF:
1415 return 1;
1416
1417 case CONST:
1418 op = XEXP (op, 0);
1419 if (GET_CODE (op) == SYMBOL_REF
1420 || GET_CODE (op) == LABEL_REF
1421 || (GET_CODE (op) == UNSPEC
1422 && XINT (op, 1) >= 6
1423 && XINT (op, 1) <= 7))
1424 return 1;
1425 if (GET_CODE (op) != PLUS
1426 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1427 return 0;
1428
1429 op = XEXP (op, 0);
1430 if (GET_CODE (op) == SYMBOL_REF
1431 || GET_CODE (op) == LABEL_REF)
1432 return 1;
1433 /* Only @GOTOFF gets offsets. */
1434 if (GET_CODE (op) != UNSPEC
1435 || XINT (op, 1) != 7)
1436 return 0;
1437
1438 op = XVECEXP (op, 0, 0);
1439 if (GET_CODE (op) == SYMBOL_REF
1440 || GET_CODE (op) == LABEL_REF)
1441 return 1;
1442 return 0;
1443
1444 default:
1445 return 0;
2a2ab3f9
JVA
1446 }
1447}
2a2ab3f9 1448
e075ae69 1449/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 1450
e075ae69
RH
1451int
1452pic_symbolic_operand (op, mode)
1453 register rtx op;
1454 enum machine_mode mode ATTRIBUTE_UNUSED;
1455{
1456 if (GET_CODE (op) == CONST)
2a2ab3f9 1457 {
e075ae69
RH
1458 op = XEXP (op, 0);
1459 if (GET_CODE (op) == UNSPEC)
1460 return 1;
1461 if (GET_CODE (op) != PLUS
1462 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1463 return 0;
1464 op = XEXP (op, 0);
1465 if (GET_CODE (op) == UNSPEC)
1466 return 1;
2a2ab3f9 1467 }
e075ae69 1468 return 0;
2a2ab3f9 1469}
2a2ab3f9 1470
28d52ffb
RH
1471/* Test for a valid operand for a call instruction. Don't allow the
1472 arg pointer register or virtual regs since they may decay into
1473 reg + const, which the patterns can't handle. */
2a2ab3f9 1474
e075ae69
RH
1475int
1476call_insn_operand (op, mode)
1477 rtx op;
1478 enum machine_mode mode ATTRIBUTE_UNUSED;
1479{
e075ae69
RH
1480 /* Disallow indirect through a virtual register. This leads to
1481 compiler aborts when trying to eliminate them. */
1482 if (GET_CODE (op) == REG
1483 && (op == arg_pointer_rtx
564d80f4 1484 || op == frame_pointer_rtx
e075ae69
RH
1485 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
1486 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
1487 return 0;
2a2ab3f9 1488
28d52ffb
RH
1489 /* Disallow `call 1234'. Due to varying assembler lameness this
1490 gets either rejected or translated to `call .+1234'. */
1491 if (GET_CODE (op) == CONST_INT)
1492 return 0;
1493
cbbf65e0
RH
1494 /* Explicitly allow SYMBOL_REF even if pic. */
1495 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 1496 return 1;
2a2ab3f9 1497
cbbf65e0
RH
1498 /* Half-pic doesn't allow anything but registers and constants.
1499 We've just taken care of the later. */
1500 if (HALF_PIC_P ())
1501 return register_operand (op, Pmode);
1502
1503 /* Otherwise we can allow any general_operand in the address. */
1504 return general_operand (op, Pmode);
e075ae69 1505}
79325812 1506
e075ae69
RH
1507int
1508constant_call_address_operand (op, mode)
1509 rtx op;
1510 enum machine_mode mode ATTRIBUTE_UNUSED;
1511{
eaf19aba
JJ
1512 if (GET_CODE (op) == CONST
1513 && GET_CODE (XEXP (op, 0)) == PLUS
1514 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1515 op = XEXP (XEXP (op, 0), 0);
e1ff012c 1516 return GET_CODE (op) == SYMBOL_REF;
e075ae69 1517}
2a2ab3f9 1518
e075ae69 1519/* Match exactly zero and one. */
e9a25f70 1520
0f290768 1521int
e075ae69
RH
1522const0_operand (op, mode)
1523 register rtx op;
1524 enum machine_mode mode;
1525{
1526 return op == CONST0_RTX (mode);
1527}
e9a25f70 1528
0f290768 1529int
e075ae69
RH
1530const1_operand (op, mode)
1531 register rtx op;
1532 enum machine_mode mode ATTRIBUTE_UNUSED;
1533{
1534 return op == const1_rtx;
1535}
2a2ab3f9 1536
e075ae69 1537/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 1538
e075ae69
RH
1539int
1540const248_operand (op, mode)
1541 register rtx op;
1542 enum machine_mode mode ATTRIBUTE_UNUSED;
1543{
1544 return (GET_CODE (op) == CONST_INT
1545 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
1546}
e9a25f70 1547
e075ae69 1548/* True if this is a constant appropriate for an increment or decremenmt. */
81fd0956 1549
e075ae69
RH
1550int
1551incdec_operand (op, mode)
1552 register rtx op;
0631e0bf 1553 enum machine_mode mode ATTRIBUTE_UNUSED;
e075ae69 1554{
b4e89e2d
JH
1555 /* On Pentium4, the inc and dec operations causes extra dependancy on flag
1556 registers, since carry flag is not set. */
1557 if (TARGET_PENTIUM4 && !optimize_size)
1558 return 0;
2b1c08f5 1559 return op == const1_rtx || op == constm1_rtx;
e075ae69 1560}
2a2ab3f9 1561
371bc54b
JH
1562/* Return nonzero if OP is acceptable as operand of DImode shift
1563 expander. */
1564
1565int
1566shiftdi_operand (op, mode)
1567 rtx op;
1568 enum machine_mode mode ATTRIBUTE_UNUSED;
1569{
1570 if (TARGET_64BIT)
1571 return nonimmediate_operand (op, mode);
1572 else
1573 return register_operand (op, mode);
1574}
1575
0f290768 1576/* Return false if this is the stack pointer, or any other fake
e075ae69
RH
1577 register eliminable to the stack pointer. Otherwise, this is
1578 a register operand.
2a2ab3f9 1579
e075ae69
RH
1580 This is used to prevent esp from being used as an index reg.
1581 Which would only happen in pathological cases. */
5f1ec3e6 1582
e075ae69
RH
1583int
1584reg_no_sp_operand (op, mode)
1585 register rtx op;
1586 enum machine_mode mode;
1587{
1588 rtx t = op;
1589 if (GET_CODE (t) == SUBREG)
1590 t = SUBREG_REG (t);
564d80f4 1591 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 1592 return 0;
2a2ab3f9 1593
e075ae69 1594 return register_operand (op, mode);
2a2ab3f9 1595}
b840bfb0 1596
915119a5
BS
1597int
1598mmx_reg_operand (op, mode)
1599 register rtx op;
bd793c65 1600 enum machine_mode mode ATTRIBUTE_UNUSED;
915119a5
BS
1601{
1602 return MMX_REG_P (op);
1603}
1604
2c5a510c
RH
1605/* Return false if this is any eliminable register. Otherwise
1606 general_operand. */
1607
1608int
1609general_no_elim_operand (op, mode)
1610 register rtx op;
1611 enum machine_mode mode;
1612{
1613 rtx t = op;
1614 if (GET_CODE (t) == SUBREG)
1615 t = SUBREG_REG (t);
1616 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1617 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1618 || t == virtual_stack_dynamic_rtx)
1619 return 0;
1020a5ab
RH
1620 if (REG_P (t)
1621 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
1622 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
1623 return 0;
2c5a510c
RH
1624
1625 return general_operand (op, mode);
1626}
1627
1628/* Return false if this is any eliminable register. Otherwise
1629 register_operand or const_int. */
1630
1631int
1632nonmemory_no_elim_operand (op, mode)
1633 register rtx op;
1634 enum machine_mode mode;
1635{
1636 rtx t = op;
1637 if (GET_CODE (t) == SUBREG)
1638 t = SUBREG_REG (t);
1639 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1640 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1641 || t == virtual_stack_dynamic_rtx)
1642 return 0;
1643
1644 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
1645}
1646
e075ae69 1647/* Return true if op is a Q_REGS class register. */
b840bfb0 1648
e075ae69
RH
1649int
1650q_regs_operand (op, mode)
1651 register rtx op;
1652 enum machine_mode mode;
b840bfb0 1653{
e075ae69
RH
1654 if (mode != VOIDmode && GET_MODE (op) != mode)
1655 return 0;
1656 if (GET_CODE (op) == SUBREG)
1657 op = SUBREG_REG (op);
1658 return QI_REG_P (op);
0f290768 1659}
b840bfb0 1660
e075ae69 1661/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 1662
e075ae69
RH
1663int
1664non_q_regs_operand (op, mode)
1665 register rtx op;
1666 enum machine_mode mode;
1667{
1668 if (mode != VOIDmode && GET_MODE (op) != mode)
1669 return 0;
1670 if (GET_CODE (op) == SUBREG)
1671 op = SUBREG_REG (op);
1672 return NON_QI_REG_P (op);
0f290768 1673}
b840bfb0 1674
915119a5
BS
1675/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
1676 insns. */
1677int
1678sse_comparison_operator (op, mode)
1679 rtx op;
1680 enum machine_mode mode ATTRIBUTE_UNUSED;
1681{
1682 enum rtx_code code = GET_CODE (op);
a46d1d38
JH
1683 switch (code)
1684 {
1685 /* Operations supported directly. */
1686 case EQ:
1687 case LT:
1688 case LE:
1689 case UNORDERED:
1690 case NE:
1691 case UNGE:
1692 case UNGT:
1693 case ORDERED:
1694 return 1;
1695 /* These are equivalent to ones above in non-IEEE comparisons. */
1696 case UNEQ:
1697 case UNLT:
1698 case UNLE:
1699 case LTGT:
1700 case GE:
1701 case GT:
1702 return !TARGET_IEEE_FP;
1703 default:
1704 return 0;
1705 }
915119a5 1706}
9076b9c1 1707/* Return 1 if OP is a valid comparison operator in valid mode. */
e075ae69 1708int
9076b9c1
JH
1709ix86_comparison_operator (op, mode)
1710 register rtx op;
1711 enum machine_mode mode;
e075ae69 1712{
9076b9c1 1713 enum machine_mode inmode;
9a915772 1714 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
1715 if (mode != VOIDmode && GET_MODE (op) != mode)
1716 return 0;
9a915772
JH
1717 if (GET_RTX_CLASS (code) != '<')
1718 return 0;
1719 inmode = GET_MODE (XEXP (op, 0));
1720
1721 if (inmode == CCFPmode || inmode == CCFPUmode)
1722 {
1723 enum rtx_code second_code, bypass_code;
1724 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1725 return (bypass_code == NIL && second_code == NIL);
1726 }
1727 switch (code)
3a3677ff
RH
1728 {
1729 case EQ: case NE:
3a3677ff 1730 return 1;
9076b9c1 1731 case LT: case GE:
7e08e190 1732 if (inmode == CCmode || inmode == CCGCmode
9076b9c1
JH
1733 || inmode == CCGOCmode || inmode == CCNOmode)
1734 return 1;
1735 return 0;
7e08e190 1736 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
7e08e190 1737 if (inmode == CCmode)
9076b9c1
JH
1738 return 1;
1739 return 0;
1740 case GT: case LE:
7e08e190 1741 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
9076b9c1
JH
1742 return 1;
1743 return 0;
3a3677ff
RH
1744 default:
1745 return 0;
1746 }
1747}
1748
9076b9c1 1749/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3a3677ff 1750
9076b9c1
JH
1751int
1752fcmov_comparison_operator (op, mode)
3a3677ff
RH
1753 register rtx op;
1754 enum machine_mode mode;
1755{
b62d22a2 1756 enum machine_mode inmode;
9a915772 1757 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
1758 if (mode != VOIDmode && GET_MODE (op) != mode)
1759 return 0;
9a915772
JH
1760 if (GET_RTX_CLASS (code) != '<')
1761 return 0;
1762 inmode = GET_MODE (XEXP (op, 0));
1763 if (inmode == CCFPmode || inmode == CCFPUmode)
3a3677ff 1764 {
9a915772
JH
1765 enum rtx_code second_code, bypass_code;
1766 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1767 if (bypass_code != NIL || second_code != NIL)
1768 return 0;
1769 code = ix86_fp_compare_code_to_integer (code);
1770 }
1771 /* i387 supports just limited amount of conditional codes. */
1772 switch (code)
1773 {
1774 case LTU: case GTU: case LEU: case GEU:
1775 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
9076b9c1
JH
1776 return 1;
1777 return 0;
9a915772
JH
1778 case ORDERED: case UNORDERED:
1779 case EQ: case NE:
1780 return 1;
3a3677ff
RH
1781 default:
1782 return 0;
1783 }
e075ae69 1784}
b840bfb0 1785
e9e80858
JH
1786/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1787
1788int
1789promotable_binary_operator (op, mode)
1790 register rtx op;
1791 enum machine_mode mode ATTRIBUTE_UNUSED;
1792{
1793 switch (GET_CODE (op))
1794 {
1795 case MULT:
1796 /* Modern CPUs have same latency for HImode and SImode multiply,
1797 but 386 and 486 do HImode multiply faster. */
1798 return ix86_cpu > PROCESSOR_I486;
1799 case PLUS:
1800 case AND:
1801 case IOR:
1802 case XOR:
1803 case ASHIFT:
1804 return 1;
1805 default:
1806 return 0;
1807 }
1808}
1809
e075ae69
RH
1810/* Nearly general operand, but accept any const_double, since we wish
1811 to be able to drop them into memory rather than have them get pulled
1812 into registers. */
b840bfb0 1813
2a2ab3f9 1814int
e075ae69
RH
1815cmp_fp_expander_operand (op, mode)
1816 register rtx op;
1817 enum machine_mode mode;
2a2ab3f9 1818{
e075ae69 1819 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 1820 return 0;
e075ae69 1821 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 1822 return 1;
e075ae69 1823 return general_operand (op, mode);
2a2ab3f9
JVA
1824}
1825
e075ae69 1826/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
1827
1828int
e075ae69 1829ext_register_operand (op, mode)
2a2ab3f9 1830 register rtx op;
bb5177ac 1831 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 1832{
3522082b 1833 int regno;
0d7d98ee
JH
1834 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
1835 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
e075ae69 1836 return 0;
3522082b
JH
1837
1838 if (!register_operand (op, VOIDmode))
1839 return 0;
1840
1841 /* Be curefull to accept only registers having upper parts. */
1842 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
1843 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
e075ae69
RH
1844}
1845
1846/* Return 1 if this is a valid binary floating-point operation.
0f290768 1847 OP is the expression matched, and MODE is its mode. */
e075ae69
RH
1848
1849int
1850binary_fp_operator (op, mode)
1851 register rtx op;
1852 enum machine_mode mode;
1853{
1854 if (mode != VOIDmode && mode != GET_MODE (op))
1855 return 0;
1856
2a2ab3f9
JVA
1857 switch (GET_CODE (op))
1858 {
e075ae69
RH
1859 case PLUS:
1860 case MINUS:
1861 case MULT:
1862 case DIV:
1863 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 1864
2a2ab3f9
JVA
1865 default:
1866 return 0;
1867 }
1868}
fee2770d 1869
e075ae69
RH
1870int
1871mult_operator(op, mode)
1872 register rtx op;
1873 enum machine_mode mode ATTRIBUTE_UNUSED;
1874{
1875 return GET_CODE (op) == MULT;
1876}
1877
1878int
1879div_operator(op, mode)
1880 register rtx op;
1881 enum machine_mode mode ATTRIBUTE_UNUSED;
1882{
1883 return GET_CODE (op) == DIV;
1884}
0a726ef1
JL
1885
1886int
e075ae69
RH
1887arith_or_logical_operator (op, mode)
1888 rtx op;
1889 enum machine_mode mode;
0a726ef1 1890{
e075ae69
RH
1891 return ((mode == VOIDmode || GET_MODE (op) == mode)
1892 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
1893 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
1894}
1895
e075ae69 1896/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
1897
1898int
e075ae69
RH
1899memory_displacement_operand (op, mode)
1900 register rtx op;
1901 enum machine_mode mode;
4f2c8ebb 1902{
e075ae69 1903 struct ix86_address parts;
e9a25f70 1904
e075ae69
RH
1905 if (! memory_operand (op, mode))
1906 return 0;
1907
1908 if (! ix86_decompose_address (XEXP (op, 0), &parts))
1909 abort ();
1910
1911 return parts.disp != NULL_RTX;
4f2c8ebb
RS
1912}
1913
16189740 1914/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
1915 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1916
1917 ??? It seems likely that this will only work because cmpsi is an
1918 expander, and no actual insns use this. */
4f2c8ebb
RS
1919
1920int
e075ae69
RH
1921cmpsi_operand (op, mode)
1922 rtx op;
1923 enum machine_mode mode;
fee2770d 1924{
b9b2c339 1925 if (nonimmediate_operand (op, mode))
e075ae69
RH
1926 return 1;
1927
1928 if (GET_CODE (op) == AND
1929 && GET_MODE (op) == SImode
1930 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
1931 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
1932 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
1933 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
1934 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
1935 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 1936 return 1;
e9a25f70 1937
fee2770d
RS
1938 return 0;
1939}
d784886d 1940
e075ae69
RH
1941/* Returns 1 if OP is memory operand that can not be represented by the
1942 modRM array. */
d784886d
RK
1943
1944int
e075ae69 1945long_memory_operand (op, mode)
d784886d
RK
1946 register rtx op;
1947 enum machine_mode mode;
1948{
e075ae69 1949 if (! memory_operand (op, mode))
d784886d
RK
1950 return 0;
1951
e075ae69 1952 return memory_address_length (op) != 0;
d784886d 1953}
2247f6ed
JH
1954
1955/* Return nonzero if the rtx is known aligned. */
1956
1957int
1958aligned_operand (op, mode)
1959 rtx op;
1960 enum machine_mode mode;
1961{
1962 struct ix86_address parts;
1963
1964 if (!general_operand (op, mode))
1965 return 0;
1966
0f290768 1967 /* Registers and immediate operands are always "aligned". */
2247f6ed
JH
1968 if (GET_CODE (op) != MEM)
1969 return 1;
1970
0f290768 1971 /* Don't even try to do any aligned optimizations with volatiles. */
2247f6ed
JH
1972 if (MEM_VOLATILE_P (op))
1973 return 0;
1974
1975 op = XEXP (op, 0);
1976
1977 /* Pushes and pops are only valid on the stack pointer. */
1978 if (GET_CODE (op) == PRE_DEC
1979 || GET_CODE (op) == POST_INC)
1980 return 1;
1981
1982 /* Decode the address. */
1983 if (! ix86_decompose_address (op, &parts))
1984 abort ();
1985
1986 /* Look for some component that isn't known to be aligned. */
1987 if (parts.index)
1988 {
1989 if (parts.scale < 4
bdb429a5 1990 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
1991 return 0;
1992 }
1993 if (parts.base)
1994 {
bdb429a5 1995 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
1996 return 0;
1997 }
1998 if (parts.disp)
1999 {
2000 if (GET_CODE (parts.disp) != CONST_INT
2001 || (INTVAL (parts.disp) & 3) != 0)
2002 return 0;
2003 }
2004
2005 /* Didn't find one -- this must be an aligned address. */
2006 return 1;
2007}
e075ae69
RH
2008\f
2009/* Return true if the constant is something that can be loaded with
2010 a special instruction. Only handle 0.0 and 1.0; others are less
2011 worthwhile. */
57dbca5e
BS
2012
2013int
e075ae69
RH
2014standard_80387_constant_p (x)
2015 rtx x;
57dbca5e 2016{
2b04e52b 2017 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 2018 return -1;
2b04e52b
JH
2019 /* Note that on the 80387, other constants, such as pi, that we should support
2020 too. On some machines, these are much slower to load as standard constant,
2021 than to load from doubles in memory. */
2022 if (x == CONST0_RTX (GET_MODE (x)))
2023 return 1;
2024 if (x == CONST1_RTX (GET_MODE (x)))
2025 return 2;
e075ae69 2026 return 0;
57dbca5e
BS
2027}
2028
2b04e52b
JH
2029/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
2030 */
2031int
2032standard_sse_constant_p (x)
2033 rtx x;
2034{
2035 if (GET_CODE (x) != CONST_DOUBLE)
2036 return -1;
2037 return (x == CONST0_RTX (GET_MODE (x)));
2038}
2039
2a2ab3f9
JVA
2040/* Returns 1 if OP contains a symbol reference */
2041
2042int
2043symbolic_reference_mentioned_p (op)
2044 rtx op;
2045{
6f7d635c 2046 register const char *fmt;
2a2ab3f9
JVA
2047 register int i;
2048
2049 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
2050 return 1;
2051
2052 fmt = GET_RTX_FORMAT (GET_CODE (op));
2053 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
2054 {
2055 if (fmt[i] == 'E')
2056 {
2057 register int j;
2058
2059 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
2060 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
2061 return 1;
2062 }
e9a25f70 2063
2a2ab3f9
JVA
2064 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
2065 return 1;
2066 }
2067
2068 return 0;
2069}
e075ae69
RH
2070
2071/* Return 1 if it is appropriate to emit `ret' instructions in the
2072 body of a function. Do this only if the epilogue is simple, needing a
2073 couple of insns. Prior to reloading, we can't tell how many registers
2074 must be saved, so return 0 then. Return 0 if there is no frame
2075 marker to de-allocate.
2076
2077 If NON_SAVING_SETJMP is defined and true, then it is not possible
2078 for the epilogue to be simple, so return 0. This is a special case
2079 since NON_SAVING_SETJMP will not cause regs_ever_live to change
2080 until final, but jump_optimize may need to know sooner if a
2081 `return' is OK. */
32b5b1aa
SC
2082
2083int
e075ae69 2084ix86_can_use_return_insn_p ()
32b5b1aa 2085{
4dd2ac2c 2086 struct ix86_frame frame;
9a7372d6 2087
e075ae69
RH
2088#ifdef NON_SAVING_SETJMP
2089 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
2090 return 0;
2091#endif
9a7372d6
RH
2092#ifdef FUNCTION_BLOCK_PROFILER_EXIT
2093 if (profile_block_flag == 2)
2094 return 0;
2095#endif
2096
2097 if (! reload_completed || frame_pointer_needed)
2098 return 0;
32b5b1aa 2099
9a7372d6
RH
2100 /* Don't allow more than 32 pop, since that's all we can do
2101 with one instruction. */
2102 if (current_function_pops_args
2103 && current_function_args_size >= 32768)
e075ae69 2104 return 0;
32b5b1aa 2105
4dd2ac2c
JH
2106 ix86_compute_frame_layout (&frame);
2107 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 2108}
6189a572
JH
2109\f
2110/* Return 1 if VALUE can be stored in the sign extended immediate field. */
2111int
2112x86_64_sign_extended_value (value)
2113 rtx value;
2114{
2115 switch (GET_CODE (value))
2116 {
2117 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
2118 to be at least 32 and this all acceptable constants are
2119 represented as CONST_INT. */
2120 case CONST_INT:
2121 if (HOST_BITS_PER_WIDE_INT == 32)
2122 return 1;
2123 else
2124 {
2125 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
fa9f36a1 2126 return trunc_int_for_mode (val, SImode) == val;
6189a572
JH
2127 }
2128 break;
2129
2130 /* For certain code models, the symbolic references are known to fit. */
2131 case SYMBOL_REF:
2132 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
2133
2134 /* For certain code models, the code is near as well. */
2135 case LABEL_REF:
2136 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
2137
2138 /* We also may accept the offsetted memory references in certain special
2139 cases. */
2140 case CONST:
2141 if (GET_CODE (XEXP (value, 0)) == UNSPEC
2142 && XVECLEN (XEXP (value, 0), 0) == 1
2143 && XINT (XEXP (value, 0), 1) == 15)
2144 return 1;
2145 else if (GET_CODE (XEXP (value, 0)) == PLUS)
2146 {
2147 rtx op1 = XEXP (XEXP (value, 0), 0);
2148 rtx op2 = XEXP (XEXP (value, 0), 1);
2149 HOST_WIDE_INT offset;
2150
2151 if (ix86_cmodel == CM_LARGE)
2152 return 0;
2153 if (GET_CODE (op2) != CONST_INT)
2154 return 0;
2155 offset = trunc_int_for_mode (INTVAL (op2), DImode);
2156 switch (GET_CODE (op1))
2157 {
2158 case SYMBOL_REF:
2159 /* For CM_SMALL assume that latest object is 1MB before
2160 end of 31bits boundary. We may also accept pretty
2161 large negative constants knowing that all objects are
2162 in the positive half of address space. */
2163 if (ix86_cmodel == CM_SMALL
2164 && offset < 1024*1024*1024
2165 && trunc_int_for_mode (offset, SImode) == offset)
2166 return 1;
2167 /* For CM_KERNEL we know that all object resist in the
2168 negative half of 32bits address space. We may not
2169 accept negative offsets, since they may be just off
2170 and we may accept pretty large possitive ones. */
2171 if (ix86_cmodel == CM_KERNEL
2172 && offset > 0
2173 && trunc_int_for_mode (offset, SImode) == offset)
2174 return 1;
2175 break;
2176 case LABEL_REF:
2177 /* These conditions are similar to SYMBOL_REF ones, just the
2178 constraints for code models differ. */
2179 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
2180 && offset < 1024*1024*1024
2181 && trunc_int_for_mode (offset, SImode) == offset)
2182 return 1;
2183 if (ix86_cmodel == CM_KERNEL
2184 && offset > 0
2185 && trunc_int_for_mode (offset, SImode) == offset)
2186 return 1;
2187 break;
2188 default:
2189 return 0;
2190 }
2191 }
2192 return 0;
2193 default:
2194 return 0;
2195 }
2196}
2197
2198/* Return 1 if VALUE can be stored in the zero extended immediate field. */
2199int
2200x86_64_zero_extended_value (value)
2201 rtx value;
2202{
2203 switch (GET_CODE (value))
2204 {
2205 case CONST_DOUBLE:
2206 if (HOST_BITS_PER_WIDE_INT == 32)
2207 return (GET_MODE (value) == VOIDmode
2208 && !CONST_DOUBLE_HIGH (value));
2209 else
2210 return 0;
2211 case CONST_INT:
2212 if (HOST_BITS_PER_WIDE_INT == 32)
2213 return INTVAL (value) >= 0;
2214 else
2215 return !(INTVAL (value) & ~(HOST_WIDE_INT)0xffffffff);
2216 break;
2217
2218 /* For certain code models, the symbolic references are known to fit. */
2219 case SYMBOL_REF:
2220 return ix86_cmodel == CM_SMALL;
2221
2222 /* For certain code models, the code is near as well. */
2223 case LABEL_REF:
2224 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
2225
2226 /* We also may accept the offsetted memory references in certain special
2227 cases. */
2228 case CONST:
2229 if (GET_CODE (XEXP (value, 0)) == PLUS)
2230 {
2231 rtx op1 = XEXP (XEXP (value, 0), 0);
2232 rtx op2 = XEXP (XEXP (value, 0), 1);
2233
2234 if (ix86_cmodel == CM_LARGE)
2235 return 0;
2236 switch (GET_CODE (op1))
2237 {
2238 case SYMBOL_REF:
2239 return 0;
2240 /* For small code model we may accept pretty large possitive
2241 offsets, since one bit is available for free. Negative
2242 offsets are limited by the size of NULL pointer area
2243 specified by the ABI. */
2244 if (ix86_cmodel == CM_SMALL
2245 && GET_CODE (op2) == CONST_INT
2246 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
2247 && (trunc_int_for_mode (INTVAL (op2), SImode)
2248 == INTVAL (op2)))
2249 return 1;
2250 /* ??? For the kernel, we may accept adjustment of
2251 -0x10000000, since we know that it will just convert
2252 negative address space to possitive, but perhaps this
2253 is not worthwhile. */
2254 break;
2255 case LABEL_REF:
2256 /* These conditions are similar to SYMBOL_REF ones, just the
2257 constraints for code models differ. */
2258 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
2259 && GET_CODE (op2) == CONST_INT
2260 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
2261 && (trunc_int_for_mode (INTVAL (op2), SImode)
2262 == INTVAL (op2)))
2263 return 1;
2264 break;
2265 default:
2266 return 0;
2267 }
2268 }
2269 return 0;
2270 default:
2271 return 0;
2272 }
2273}
6fca22eb
RH
2274
2275/* Value should be nonzero if functions must have frame pointers.
2276 Zero means the frame pointer need not be set up (and parms may
2277 be accessed via the stack pointer) in functions that seem suitable. */
2278
2279int
2280ix86_frame_pointer_required ()
2281{
2282 /* If we accessed previous frames, then the generated code expects
2283 to be able to access the saved ebp value in our frame. */
2284 if (cfun->machine->accesses_prev_frame)
2285 return 1;
a4f31c00 2286
6fca22eb
RH
2287 /* Several x86 os'es need a frame pointer for other reasons,
2288 usually pertaining to setjmp. */
2289 if (SUBTARGET_FRAME_POINTER_REQUIRED)
2290 return 1;
2291
2292 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
2293 the frame pointer by default. Turn it back on now if we've not
2294 got a leaf function. */
2295 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
2296 return 1;
2297
2298 return 0;
2299}
2300
2301/* Record that the current function accesses previous call frames. */
2302
2303void
2304ix86_setup_frame_addresses ()
2305{
2306 cfun->machine->accesses_prev_frame = 1;
2307}
e075ae69 2308\f
4cf12e7e 2309static char pic_label_name[32];
e9a25f70 2310
e075ae69
RH
2311/* This function generates code for -fpic that loads %ebx with
2312 the return address of the caller and then returns. */
2313
2314void
4cf12e7e 2315ix86_asm_file_end (file)
e075ae69 2316 FILE *file;
e075ae69
RH
2317{
2318 rtx xops[2];
32b5b1aa 2319
4cf12e7e
RH
2320 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
2321 return;
32b5b1aa 2322
c7f0da1d
RH
2323 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
2324 to updating relocations to a section being discarded such that this
2325 doesn't work. Ought to detect this at configure time. */
2326#if 0 && defined (ASM_OUTPUT_SECTION_NAME)
4cf12e7e
RH
2327 /* The trick here is to create a linkonce section containing the
2328 pic label thunk, but to refer to it with an internal label.
2329 Because the label is internal, we don't have inter-dso name
2330 binding issues on hosts that don't support ".hidden".
e9a25f70 2331
4cf12e7e
RH
2332 In order to use these macros, however, we must create a fake
2333 function decl. */
2334 {
2335 tree decl = build_decl (FUNCTION_DECL,
2336 get_identifier ("i686.get_pc_thunk"),
2337 error_mark_node);
2338 DECL_ONE_ONLY (decl) = 1;
2339 UNIQUE_SECTION (decl, 0);
2340 named_section (decl, NULL, 0);
2341 }
2342#else
2343 text_section ();
2344#endif
0afeb08a 2345
4cf12e7e
RH
2346 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
2347 internal (non-global) label that's being emitted, it didn't make
2348 sense to have .type information for local labels. This caused
2349 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
2350 me debug info for a label that you're declaring non-global?) this
2351 was changed to call ASM_OUTPUT_LABEL() instead. */
2352
2353 ASM_OUTPUT_LABEL (file, pic_label_name);
2354
2355 xops[0] = pic_offset_table_rtx;
2356 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
2357 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
2358 output_asm_insn ("ret", xops);
32b5b1aa 2359}
32b5b1aa 2360
e075ae69
RH
2361void
2362load_pic_register ()
32b5b1aa 2363{
e075ae69 2364 rtx gotsym, pclab;
32b5b1aa 2365
0d7d98ee
JH
2366 if (TARGET_64BIT)
2367 abort();
2368
a8a05998 2369 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
32b5b1aa 2370
e075ae69 2371 if (TARGET_DEEP_BRANCH_PREDICTION)
32b5b1aa 2372 {
4cf12e7e
RH
2373 if (! pic_label_name[0])
2374 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
e075ae69 2375 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
32b5b1aa 2376 }
e075ae69 2377 else
e5cb57e8 2378 {
e075ae69 2379 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
e5cb57e8 2380 }
e5cb57e8 2381
e075ae69 2382 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
2a2ab3f9 2383
e075ae69
RH
2384 if (! TARGET_DEEP_BRANCH_PREDICTION)
2385 emit_insn (gen_popsi1 (pic_offset_table_rtx));
79325812 2386
e075ae69 2387 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
e9a25f70 2388}
8dfe5673 2389
0d7d98ee 2390/* Generate an "push" pattern for input ARG. */
e9a25f70 2391
e075ae69
RH
2392static rtx
2393gen_push (arg)
2394 rtx arg;
e9a25f70 2395{
c5c76735 2396 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
2397 gen_rtx_MEM (Pmode,
2398 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
2399 stack_pointer_rtx)),
2400 arg);
e9a25f70
JL
2401}
2402
4dd2ac2c
JH
2403/* Return 1 if we need to save REGNO. */
2404static int
1020a5ab
RH
2405ix86_save_reg (regno, maybe_eh_return)
2406 int regno;
37a58036 2407 int maybe_eh_return;
1020a5ab
RH
2408{
2409 if (flag_pic
2410 && ! TARGET_64BIT
2411 && regno == PIC_OFFSET_TABLE_REGNUM
2412 && (current_function_uses_pic_offset_table
2413 || current_function_uses_const_pool
2414 || current_function_calls_eh_return))
2415 return 1;
2416
2417 if (current_function_calls_eh_return && maybe_eh_return)
2418 {
2419 unsigned i;
2420 for (i = 0; ; i++)
2421 {
2422 unsigned test = EH_RETURN_DATA_REGNO(i);
2423 if (test == INVALID_REGNUM)
2424 break;
2425 if (test == (unsigned) regno)
2426 return 1;
2427 }
2428 }
4dd2ac2c 2429
1020a5ab
RH
2430 return (regs_ever_live[regno]
2431 && !call_used_regs[regno]
2432 && !fixed_regs[regno]
2433 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
2434}
2435
0903fcab
JH
2436/* Return number of registers to be saved on the stack. */
2437
2438static int
2439ix86_nsaved_regs ()
2440{
2441 int nregs = 0;
0903fcab
JH
2442 int regno;
2443
4dd2ac2c 2444 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 2445 if (ix86_save_reg (regno, true))
4dd2ac2c 2446 nregs++;
0903fcab
JH
2447 return nregs;
2448}
2449
2450/* Return the offset between two registers, one to be eliminated, and the other
2451 its replacement, at the start of a routine. */
2452
2453HOST_WIDE_INT
2454ix86_initial_elimination_offset (from, to)
2455 int from;
2456 int to;
2457{
4dd2ac2c
JH
2458 struct ix86_frame frame;
2459 ix86_compute_frame_layout (&frame);
564d80f4
JH
2460
2461 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 2462 return frame.hard_frame_pointer_offset;
564d80f4
JH
2463 else if (from == FRAME_POINTER_REGNUM
2464 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 2465 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
2466 else
2467 {
564d80f4
JH
2468 if (to != STACK_POINTER_REGNUM)
2469 abort ();
2470 else if (from == ARG_POINTER_REGNUM)
4dd2ac2c 2471 return frame.stack_pointer_offset;
564d80f4
JH
2472 else if (from != FRAME_POINTER_REGNUM)
2473 abort ();
0903fcab 2474 else
4dd2ac2c 2475 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
2476 }
2477}
2478
4dd2ac2c 2479/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 2480
4dd2ac2c
JH
2481static void
2482ix86_compute_frame_layout (frame)
2483 struct ix86_frame *frame;
65954bd8 2484{
65954bd8 2485 HOST_WIDE_INT total_size;
564d80f4 2486 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
44affdae
JH
2487 int offset;
2488 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4dd2ac2c 2489 HOST_WIDE_INT size = get_frame_size ();
65954bd8 2490
4dd2ac2c 2491 frame->nregs = ix86_nsaved_regs ();
564d80f4 2492 total_size = size;
65954bd8 2493
4dd2ac2c
JH
2494 /* Skip return value and save base pointer. */
2495 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
2496
2497 frame->hard_frame_pointer_offset = offset;
564d80f4 2498
fcbfaa65
RK
2499 /* Do some sanity checking of stack_alignment_needed and
2500 preferred_alignment, since i386 port is the only using those features
2501 that may break easilly. */
564d80f4 2502
44affdae
JH
2503 if (size && !stack_alignment_needed)
2504 abort ();
44affdae
JH
2505 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
2506 abort ();
2507 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
2508 abort ();
2509 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
2510 abort ();
564d80f4 2511
4dd2ac2c
JH
2512 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
2513 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 2514
4dd2ac2c
JH
2515 /* Register save area */
2516 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 2517
8362f420
JH
2518 /* Va-arg area */
2519 if (ix86_save_varrargs_registers)
2520 {
2521 offset += X86_64_VARARGS_SIZE;
2522 frame->va_arg_size = X86_64_VARARGS_SIZE;
2523 }
2524 else
2525 frame->va_arg_size = 0;
2526
4dd2ac2c
JH
2527 /* Align start of frame for local function. */
2528 frame->padding1 = ((offset + stack_alignment_needed - 1)
2529 & -stack_alignment_needed) - offset;
f73ad30e 2530
4dd2ac2c 2531 offset += frame->padding1;
65954bd8 2532
4dd2ac2c
JH
2533 /* Frame pointer points here. */
2534 frame->frame_pointer_offset = offset;
54ff41b7 2535
4dd2ac2c 2536 offset += size;
65954bd8 2537
4dd2ac2c 2538 /* Add outgoing arguments area. */
f73ad30e 2539 if (ACCUMULATE_OUTGOING_ARGS)
4dd2ac2c
JH
2540 {
2541 offset += current_function_outgoing_args_size;
2542 frame->outgoing_arguments_size = current_function_outgoing_args_size;
2543 }
2544 else
2545 frame->outgoing_arguments_size = 0;
564d80f4 2546
4dd2ac2c
JH
2547 /* Align stack boundary. */
2548 frame->padding2 = ((offset + preferred_alignment - 1)
2549 & -preferred_alignment) - offset;
2550
2551 offset += frame->padding2;
2552
2553 /* We've reached end of stack frame. */
2554 frame->stack_pointer_offset = offset;
2555
2556 /* Size prologue needs to allocate. */
2557 frame->to_allocate =
2558 (size + frame->padding1 + frame->padding2
8362f420 2559 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 2560
8362f420
JH
2561 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
2562 && current_function_is_leaf)
2563 {
2564 frame->red_zone_size = frame->to_allocate;
2565 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
2566 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
2567 }
2568 else
2569 frame->red_zone_size = 0;
2570 frame->to_allocate -= frame->red_zone_size;
2571 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c
JH
2572#if 0
2573 fprintf (stderr, "nregs: %i\n", frame->nregs);
2574 fprintf (stderr, "size: %i\n", size);
2575 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
2576 fprintf (stderr, "padding1: %i\n", frame->padding1);
8362f420 2577 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4dd2ac2c
JH
2578 fprintf (stderr, "padding2: %i\n", frame->padding2);
2579 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
8362f420 2580 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4dd2ac2c
JH
2581 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
2582 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
2583 frame->hard_frame_pointer_offset);
2584 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
2585#endif
65954bd8
JL
2586}
2587
0903fcab
JH
2588/* Emit code to save registers in the prologue. */
2589
2590static void
2591ix86_emit_save_regs ()
2592{
2593 register int regno;
0903fcab 2594 rtx insn;
0903fcab 2595
4dd2ac2c 2596 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 2597 if (ix86_save_reg (regno, true))
0903fcab 2598 {
0d7d98ee 2599 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
2600 RTX_FRAME_RELATED_P (insn) = 1;
2601 }
2602}
2603
c6036a37
JH
2604/* Emit code to save registers using MOV insns. First register
2605 is restored from POINTER + OFFSET. */
2606static void
2607ix86_emit_save_regs_using_mov (pointer, offset)
b72f00af
RK
2608 rtx pointer;
2609 HOST_WIDE_INT offset;
c6036a37
JH
2610{
2611 int regno;
2612 rtx insn;
2613
2614 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2615 if (ix86_save_reg (regno, true))
2616 {
b72f00af
RK
2617 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
2618 Pmode, offset),
c6036a37
JH
2619 gen_rtx_REG (Pmode, regno));
2620 RTX_FRAME_RELATED_P (insn) = 1;
2621 offset += UNITS_PER_WORD;
2622 }
2623}
2624
0f290768 2625/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
2626
2627void
2628ix86_expand_prologue ()
2a2ab3f9 2629{
564d80f4 2630 rtx insn;
0d7d98ee
JH
2631 int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table
2632 || current_function_uses_const_pool)
2633 && !TARGET_64BIT);
4dd2ac2c 2634 struct ix86_frame frame;
c6036a37
JH
2635 int use_mov = (TARGET_PROLOGUE_USING_MOVE && !optimize_size);
2636 HOST_WIDE_INT allocate;
4dd2ac2c
JH
2637
2638 ix86_compute_frame_layout (&frame);
79325812 2639
e075ae69
RH
2640 /* Note: AT&T enter does NOT have reversed args. Enter is probably
2641 slower on all targets. Also sdb doesn't like it. */
e9a25f70 2642
2a2ab3f9
JVA
2643 if (frame_pointer_needed)
2644 {
564d80f4 2645 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 2646 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 2647
564d80f4 2648 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 2649 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
2650 }
2651
c6036a37
JH
2652 allocate = frame.to_allocate;
2653 /* In case we are dealing only with single register and empty frame,
2654 push is equivalent of the mov+add sequence. */
2655 if (allocate == 0 && frame.nregs <= 1)
2656 use_mov = 0;
2657
2658 if (!use_mov)
2659 ix86_emit_save_regs ();
2660 else
2661 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 2662
c6036a37 2663 if (allocate == 0)
8dfe5673 2664 ;
e323735c 2665 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
469ac993 2666 {
f2042df3
RH
2667 insn = emit_insn (gen_pro_epilogue_adjust_stack
2668 (stack_pointer_rtx, stack_pointer_rtx,
e323735c 2669 GEN_INT (-allocate)));
e075ae69 2670 RTX_FRAME_RELATED_P (insn) = 1;
469ac993 2671 }
79325812 2672 else
8dfe5673 2673 {
e075ae69 2674 /* ??? Is this only valid for Win32? */
e9a25f70 2675
e075ae69 2676 rtx arg0, sym;
e9a25f70 2677
8362f420
JH
2678 if (TARGET_64BIT)
2679 abort();
2680
e075ae69 2681 arg0 = gen_rtx_REG (SImode, 0);
c6036a37 2682 emit_move_insn (arg0, GEN_INT (allocate));
77a989d1 2683
e075ae69
RH
2684 sym = gen_rtx_MEM (FUNCTION_MODE,
2685 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
32ee7d1d 2686 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
e075ae69
RH
2687
2688 CALL_INSN_FUNCTION_USAGE (insn)
276ab4a4
RH
2689 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
2690 CALL_INSN_FUNCTION_USAGE (insn));
e075ae69 2691 }
c6036a37
JH
2692 if (use_mov)
2693 {
2694 if (!frame_pointer_needed || !frame.to_allocate)
2695 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
2696 else
2697 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
2698 -frame.nregs * UNITS_PER_WORD);
2699 }
e9a25f70 2700
84530511
SC
2701#ifdef SUBTARGET_PROLOGUE
2702 SUBTARGET_PROLOGUE;
0f290768 2703#endif
84530511 2704
e9a25f70 2705 if (pic_reg_used)
e075ae69 2706 load_pic_register ();
77a989d1 2707
e9a25f70
JL
2708 /* If we are profiling, make sure no instructions are scheduled before
2709 the call to mcount. However, if -fpic, the above call will have
2710 done that. */
e075ae69 2711 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
e9a25f70 2712 emit_insn (gen_blockage ());
77a989d1
SC
2713}
2714
da2d1d3a
JH
2715/* Emit code to restore saved registers using MOV insns. First register
2716 is restored from POINTER + OFFSET. */
2717static void
1020a5ab
RH
2718ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
2719 rtx pointer;
2720 int offset;
37a58036 2721 int maybe_eh_return;
da2d1d3a
JH
2722{
2723 int regno;
da2d1d3a 2724
4dd2ac2c 2725 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 2726 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 2727 {
4dd2ac2c 2728 emit_move_insn (gen_rtx_REG (Pmode, regno),
b72f00af
RK
2729 adjust_address (gen_rtx_MEM (Pmode, pointer),
2730 Pmode, offset));
4dd2ac2c 2731 offset += UNITS_PER_WORD;
da2d1d3a
JH
2732 }
2733}
2734
0f290768 2735/* Restore function stack, frame, and registers. */
e9a25f70 2736
2a2ab3f9 2737void
1020a5ab
RH
2738ix86_expand_epilogue (style)
2739 int style;
2a2ab3f9 2740{
1c71e60e 2741 int regno;
fdb8a883 2742 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 2743 struct ix86_frame frame;
65954bd8 2744 HOST_WIDE_INT offset;
4dd2ac2c
JH
2745
2746 ix86_compute_frame_layout (&frame);
2a2ab3f9 2747
a4f31c00 2748 /* Calculate start of saved registers relative to ebp. Special care
84e306b4
RH
2749 must be taken for the normal return case of a function using
2750 eh_return: the eax and edx registers are marked as saved, but not
2751 restored along this path. */
2752 offset = frame.nregs;
2753 if (current_function_calls_eh_return && style != 2)
2754 offset -= 2;
2755 offset *= -UNITS_PER_WORD;
2a2ab3f9 2756
1c71e60e
JH
2757#ifdef FUNCTION_BLOCK_PROFILER_EXIT
2758 if (profile_block_flag == 2)
564d80f4 2759 {
1c71e60e 2760 FUNCTION_BLOCK_PROFILER_EXIT;
564d80f4 2761 }
1c71e60e 2762#endif
564d80f4 2763
fdb8a883
JW
2764 /* If we're only restoring one register and sp is not valid then
2765 using a move instruction to restore the register since it's
0f290768 2766 less work than reloading sp and popping the register.
da2d1d3a
JH
2767
2768 The default code result in stack adjustment using add/lea instruction,
2769 while this code results in LEAVE instruction (or discrete equivalent),
2770 so it is profitable in some other cases as well. Especially when there
2771 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2772 and there is exactly one register to pop. This heruistic may need some
2773 tuning in future. */
4dd2ac2c 2774 if ((!sp_valid && frame.nregs <= 1)
c6036a37
JH
2775 || (TARGET_EPILOGUE_USING_MOVE && !optimize_size
2776 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 2777 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
da2d1d3a 2778 || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
1020a5ab
RH
2779 && frame.nregs == 1)
2780 || style == 2)
2a2ab3f9 2781 {
da2d1d3a
JH
2782 /* Restore registers. We can use ebp or esp to address the memory
2783 locations. If both are available, default to ebp, since offsets
2784 are known to be small. Only exception is esp pointing directly to the
2785 end of block of saved registers, where we may simplify addressing
2786 mode. */
2787
4dd2ac2c 2788 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
2789 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
2790 frame.to_allocate, style == 2);
da2d1d3a 2791 else
1020a5ab
RH
2792 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
2793 offset, style == 2);
2794
2795 /* eh_return epilogues need %ecx added to the stack pointer. */
2796 if (style == 2)
2797 {
2798 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 2799
1020a5ab
RH
2800 if (frame_pointer_needed)
2801 {
2802 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
2803 tmp = plus_constant (tmp, UNITS_PER_WORD);
2804 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
2805
2806 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
2807 emit_move_insn (hard_frame_pointer_rtx, tmp);
2808
2809 emit_insn (gen_pro_epilogue_adjust_stack
f2042df3 2810 (stack_pointer_rtx, sa, const0_rtx));
1020a5ab
RH
2811 }
2812 else
2813 {
2814 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
2815 tmp = plus_constant (tmp, (frame.to_allocate
2816 + frame.nregs * UNITS_PER_WORD));
2817 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
2818 }
2819 }
2820 else if (!frame_pointer_needed)
f2042df3
RH
2821 emit_insn (gen_pro_epilogue_adjust_stack
2822 (stack_pointer_rtx, stack_pointer_rtx,
2823 GEN_INT (frame.to_allocate
2824 + frame.nregs * UNITS_PER_WORD)));
0f290768 2825 /* If not an i386, mov & pop is faster than "leave". */
da2d1d3a 2826 else if (TARGET_USE_LEAVE || optimize_size)
8362f420 2827 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
c8c5cb99 2828 else
2a2ab3f9 2829 {
1c71e60e
JH
2830 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2831 hard_frame_pointer_rtx,
f2042df3 2832 const0_rtx));
8362f420
JH
2833 if (TARGET_64BIT)
2834 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
2835 else
2836 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
2837 }
2838 }
1c71e60e 2839 else
68f654ec 2840 {
1c71e60e
JH
2841 /* First step is to deallocate the stack frame so that we can
2842 pop the registers. */
2843 if (!sp_valid)
2844 {
2845 if (!frame_pointer_needed)
2846 abort ();
2847 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2848 hard_frame_pointer_rtx,
f2042df3 2849 GEN_INT (offset)));
1c71e60e 2850 }
4dd2ac2c 2851 else if (frame.to_allocate)
f2042df3
RH
2852 emit_insn (gen_pro_epilogue_adjust_stack
2853 (stack_pointer_rtx, stack_pointer_rtx,
2854 GEN_INT (frame.to_allocate)));
1c71e60e 2855
4dd2ac2c 2856 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 2857 if (ix86_save_reg (regno, false))
8362f420
JH
2858 {
2859 if (TARGET_64BIT)
2860 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
2861 else
2862 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
2863 }
4dd2ac2c 2864 if (frame_pointer_needed)
8362f420
JH
2865 {
2866 if (TARGET_64BIT)
2867 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
2868 else
2869 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
2870 }
68f654ec 2871 }
68f654ec 2872
cbbf65e0 2873 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 2874 if (style == 0)
cbbf65e0
RH
2875 return;
2876
2a2ab3f9
JVA
2877 if (current_function_pops_args && current_function_args_size)
2878 {
e075ae69 2879 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 2880
b8c752c8
UD
2881 /* i386 can only pop 64K bytes. If asked to pop more, pop
2882 return address, do explicit add, and jump indirectly to the
0f290768 2883 caller. */
2a2ab3f9 2884
b8c752c8 2885 if (current_function_pops_args >= 65536)
2a2ab3f9 2886 {
e075ae69 2887 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 2888
8362f420
JH
2889 /* There are is no "pascal" calling convention in 64bit ABI. */
2890 if (TARGET_64BIT)
2891 abort();
2892
e075ae69
RH
2893 emit_insn (gen_popsi1 (ecx));
2894 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 2895 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 2896 }
79325812 2897 else
e075ae69
RH
2898 emit_jump_insn (gen_return_pop_internal (popc));
2899 }
2900 else
2901 emit_jump_insn (gen_return_internal ());
2902}
2903\f
2904/* Extract the parts of an RTL expression that is a valid memory address
2905 for an instruction. Return false if the structure of the address is
2906 grossly off. */
2907
2908static int
2909ix86_decompose_address (addr, out)
2910 register rtx addr;
2911 struct ix86_address *out;
2912{
2913 rtx base = NULL_RTX;
2914 rtx index = NULL_RTX;
2915 rtx disp = NULL_RTX;
2916 HOST_WIDE_INT scale = 1;
2917 rtx scale_rtx = NULL_RTX;
2918
2919 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
2920 base = addr;
2921 else if (GET_CODE (addr) == PLUS)
2922 {
2923 rtx op0 = XEXP (addr, 0);
2924 rtx op1 = XEXP (addr, 1);
2925 enum rtx_code code0 = GET_CODE (op0);
2926 enum rtx_code code1 = GET_CODE (op1);
2927
2928 if (code0 == REG || code0 == SUBREG)
2929 {
2930 if (code1 == REG || code1 == SUBREG)
2931 index = op0, base = op1; /* index + base */
2932 else
2933 base = op0, disp = op1; /* base + displacement */
2934 }
2935 else if (code0 == MULT)
e9a25f70 2936 {
e075ae69
RH
2937 index = XEXP (op0, 0);
2938 scale_rtx = XEXP (op0, 1);
2939 if (code1 == REG || code1 == SUBREG)
2940 base = op1; /* index*scale + base */
e9a25f70 2941 else
e075ae69
RH
2942 disp = op1; /* index*scale + disp */
2943 }
2944 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
2945 {
2946 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
2947 scale_rtx = XEXP (XEXP (op0, 0), 1);
2948 base = XEXP (op0, 1);
2949 disp = op1;
2a2ab3f9 2950 }
e075ae69
RH
2951 else if (code0 == PLUS)
2952 {
2953 index = XEXP (op0, 0); /* index + base + disp */
2954 base = XEXP (op0, 1);
2955 disp = op1;
2956 }
2957 else
2958 return FALSE;
2959 }
2960 else if (GET_CODE (addr) == MULT)
2961 {
2962 index = XEXP (addr, 0); /* index*scale */
2963 scale_rtx = XEXP (addr, 1);
2964 }
2965 else if (GET_CODE (addr) == ASHIFT)
2966 {
2967 rtx tmp;
2968
2969 /* We're called for lea too, which implements ashift on occasion. */
2970 index = XEXP (addr, 0);
2971 tmp = XEXP (addr, 1);
2972 if (GET_CODE (tmp) != CONST_INT)
2973 return FALSE;
2974 scale = INTVAL (tmp);
2975 if ((unsigned HOST_WIDE_INT) scale > 3)
2976 return FALSE;
2977 scale = 1 << scale;
2a2ab3f9 2978 }
2a2ab3f9 2979 else
e075ae69
RH
2980 disp = addr; /* displacement */
2981
2982 /* Extract the integral value of scale. */
2983 if (scale_rtx)
e9a25f70 2984 {
e075ae69
RH
2985 if (GET_CODE (scale_rtx) != CONST_INT)
2986 return FALSE;
2987 scale = INTVAL (scale_rtx);
e9a25f70 2988 }
3b3c6a3f 2989
e075ae69
RH
2990 /* Allow arg pointer and stack pointer as index if there is not scaling */
2991 if (base && index && scale == 1
564d80f4
JH
2992 && (index == arg_pointer_rtx || index == frame_pointer_rtx
2993 || index == stack_pointer_rtx))
e075ae69
RH
2994 {
2995 rtx tmp = base;
2996 base = index;
2997 index = tmp;
2998 }
2999
3000 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
3001 if ((base == hard_frame_pointer_rtx
3002 || base == frame_pointer_rtx
3003 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
3004 disp = const0_rtx;
3005
3006 /* Special case: on K6, [%esi] makes the instruction vector decoded.
3007 Avoid this by transforming to [%esi+0]. */
3008 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
3009 && base && !index && !disp
329e1d01 3010 && REG_P (base)
e075ae69
RH
3011 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
3012 disp = const0_rtx;
3013
3014 /* Special case: encode reg+reg instead of reg*2. */
3015 if (!base && index && scale && scale == 2)
3016 base = index, scale = 1;
0f290768 3017
e075ae69
RH
3018 /* Special case: scaling cannot be encoded without base or displacement. */
3019 if (!base && !disp && index && scale != 1)
3020 disp = const0_rtx;
3021
3022 out->base = base;
3023 out->index = index;
3024 out->disp = disp;
3025 out->scale = scale;
3b3c6a3f 3026
e075ae69
RH
3027 return TRUE;
3028}
01329426
JH
3029\f
3030/* Return cost of the memory address x.
3031 For i386, it is better to use a complex address than let gcc copy
3032 the address into a reg and make a new pseudo. But not if the address
3033 requires to two regs - that would mean more pseudos with longer
3034 lifetimes. */
3035int
3036ix86_address_cost (x)
3037 rtx x;
3038{
3039 struct ix86_address parts;
3040 int cost = 1;
3b3c6a3f 3041
01329426
JH
3042 if (!ix86_decompose_address (x, &parts))
3043 abort ();
3044
3045 /* More complex memory references are better. */
3046 if (parts.disp && parts.disp != const0_rtx)
3047 cost--;
3048
3049 /* Attempt to minimize number of registers in the address. */
3050 if ((parts.base
3051 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
3052 || (parts.index
3053 && (!REG_P (parts.index)
3054 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
3055 cost++;
3056
3057 if (parts.base
3058 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
3059 && parts.index
3060 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
3061 && parts.base != parts.index)
3062 cost++;
3063
3064 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
3065 since it's predecode logic can't detect the length of instructions
3066 and it degenerates to vector decoded. Increase cost of such
3067 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 3068 to split such addresses or even refuse such addresses at all.
01329426
JH
3069
3070 Following addressing modes are affected:
3071 [base+scale*index]
3072 [scale*index+disp]
3073 [base+index]
0f290768 3074
01329426
JH
3075 The first and last case may be avoidable by explicitly coding the zero in
3076 memory address, but I don't have AMD-K6 machine handy to check this
3077 theory. */
3078
3079 if (TARGET_K6
3080 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
3081 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
3082 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
3083 cost += 10;
0f290768 3084
01329426
JH
3085 return cost;
3086}
3087\f
b949ea8b
JW
3088/* If X is a machine specific address (i.e. a symbol or label being
3089 referenced as a displacement from the GOT implemented using an
3090 UNSPEC), then return the base term. Otherwise return X. */
3091
3092rtx
3093ix86_find_base_term (x)
3094 rtx x;
3095{
3096 rtx term;
3097
3098 if (GET_CODE (x) != PLUS
3099 || XEXP (x, 0) != pic_offset_table_rtx
3100 || GET_CODE (XEXP (x, 1)) != CONST)
3101 return x;
3102
3103 term = XEXP (XEXP (x, 1), 0);
3104
3105 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
3106 term = XEXP (term, 0);
3107
3108 if (GET_CODE (term) != UNSPEC
3109 || XVECLEN (term, 0) != 1
3110 || XINT (term, 1) != 7)
3111 return x;
3112
3113 term = XVECEXP (term, 0, 0);
3114
3115 if (GET_CODE (term) != SYMBOL_REF
3116 && GET_CODE (term) != LABEL_REF)
3117 return x;
3118
3119 return term;
3120}
3121\f
e075ae69
RH
3122/* Determine if a given CONST RTX is a valid memory displacement
3123 in PIC mode. */
0f290768 3124
59be65f6 3125int
91bb873f
RH
3126legitimate_pic_address_disp_p (disp)
3127 register rtx disp;
3128{
3129 if (GET_CODE (disp) != CONST)
3130 return 0;
3131 disp = XEXP (disp, 0);
3132
3133 if (GET_CODE (disp) == PLUS)
3134 {
3135 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
3136 return 0;
3137 disp = XEXP (disp, 0);
3138 }
3139
3140 if (GET_CODE (disp) != UNSPEC
3141 || XVECLEN (disp, 0) != 1)
3142 return 0;
3143
3144 /* Must be @GOT or @GOTOFF. */
3145 if (XINT (disp, 1) != 6
3146 && XINT (disp, 1) != 7)
3147 return 0;
3148
3149 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
3150 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
3151 return 0;
3152
3153 return 1;
3154}
3155
e075ae69
RH
3156/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
3157 memory address for an instruction. The MODE argument is the machine mode
3158 for the MEM expression that wants to use this address.
3159
3160 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
3161 convert common non-canonical forms to canonical form so that they will
3162 be recognized. */
3163
3b3c6a3f
MM
3164int
3165legitimate_address_p (mode, addr, strict)
3166 enum machine_mode mode;
3167 register rtx addr;
3168 int strict;
3169{
e075ae69
RH
3170 struct ix86_address parts;
3171 rtx base, index, disp;
3172 HOST_WIDE_INT scale;
3173 const char *reason = NULL;
3174 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
3175
3176 if (TARGET_DEBUG_ADDR)
3177 {
3178 fprintf (stderr,
e9a25f70 3179 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 3180 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
3181 debug_rtx (addr);
3182 }
3183
e075ae69 3184 if (! ix86_decompose_address (addr, &parts))
3b3c6a3f 3185 {
e075ae69 3186 reason = "decomposition failed";
50e60bc3 3187 goto report_error;
3b3c6a3f
MM
3188 }
3189
e075ae69
RH
3190 base = parts.base;
3191 index = parts.index;
3192 disp = parts.disp;
3193 scale = parts.scale;
91f0226f 3194
e075ae69 3195 /* Validate base register.
e9a25f70
JL
3196
3197 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
3198 is one word out of a two word structure, which is represented internally
3199 as a DImode int. */
e9a25f70 3200
3b3c6a3f
MM
3201 if (base)
3202 {
e075ae69
RH
3203 reason_rtx = base;
3204
3d771dfd 3205 if (GET_CODE (base) != REG)
3b3c6a3f 3206 {
e075ae69 3207 reason = "base is not a register";
50e60bc3 3208 goto report_error;
3b3c6a3f
MM
3209 }
3210
c954bd01
RH
3211 if (GET_MODE (base) != Pmode)
3212 {
e075ae69 3213 reason = "base is not in Pmode";
50e60bc3 3214 goto report_error;
c954bd01
RH
3215 }
3216
e9a25f70
JL
3217 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
3218 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
3b3c6a3f 3219 {
e075ae69 3220 reason = "base is not valid";
50e60bc3 3221 goto report_error;
3b3c6a3f
MM
3222 }
3223 }
3224
e075ae69 3225 /* Validate index register.
e9a25f70
JL
3226
3227 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
3228 is one word out of a two word structure, which is represented internally
3229 as a DImode int. */
e075ae69
RH
3230
3231 if (index)
3b3c6a3f 3232 {
e075ae69
RH
3233 reason_rtx = index;
3234
3235 if (GET_CODE (index) != REG)
3b3c6a3f 3236 {
e075ae69 3237 reason = "index is not a register";
50e60bc3 3238 goto report_error;
3b3c6a3f
MM
3239 }
3240
e075ae69 3241 if (GET_MODE (index) != Pmode)
c954bd01 3242 {
e075ae69 3243 reason = "index is not in Pmode";
50e60bc3 3244 goto report_error;
c954bd01
RH
3245 }
3246
e075ae69
RH
3247 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
3248 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
3b3c6a3f 3249 {
e075ae69 3250 reason = "index is not valid";
50e60bc3 3251 goto report_error;
3b3c6a3f
MM
3252 }
3253 }
3b3c6a3f 3254
e075ae69
RH
3255 /* Validate scale factor. */
3256 if (scale != 1)
3b3c6a3f 3257 {
e075ae69
RH
3258 reason_rtx = GEN_INT (scale);
3259 if (!index)
3b3c6a3f 3260 {
e075ae69 3261 reason = "scale without index";
50e60bc3 3262 goto report_error;
3b3c6a3f
MM
3263 }
3264
e075ae69 3265 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 3266 {
e075ae69 3267 reason = "scale is not a valid multiplier";
50e60bc3 3268 goto report_error;
3b3c6a3f
MM
3269 }
3270 }
3271
91bb873f 3272 /* Validate displacement. */
3b3c6a3f
MM
3273 if (disp)
3274 {
e075ae69
RH
3275 reason_rtx = disp;
3276
91bb873f 3277 if (!CONSTANT_ADDRESS_P (disp))
3b3c6a3f 3278 {
e075ae69 3279 reason = "displacement is not constant";
50e60bc3 3280 goto report_error;
3b3c6a3f
MM
3281 }
3282
0d7d98ee 3283 if (TARGET_64BIT)
3b3c6a3f 3284 {
0d7d98ee
JH
3285 if (!x86_64_sign_extended_value (disp))
3286 {
3287 reason = "displacement is out of range";
3288 goto report_error;
3289 }
3290 }
3291 else
3292 {
3293 if (GET_CODE (disp) == CONST_DOUBLE)
3294 {
3295 reason = "displacement is a const_double";
3296 goto report_error;
3297 }
3b3c6a3f
MM
3298 }
3299
91bb873f 3300 if (flag_pic && SYMBOLIC_CONST (disp))
3b3c6a3f 3301 {
0d7d98ee
JH
3302 if (TARGET_64BIT && (index || base))
3303 {
3304 reason = "non-constant pic memory reference";
3305 goto report_error;
3306 }
91bb873f
RH
3307 if (! legitimate_pic_address_disp_p (disp))
3308 {
e075ae69 3309 reason = "displacement is an invalid pic construct";
50e60bc3 3310 goto report_error;
91bb873f
RH
3311 }
3312
4e9efe54 3313 /* This code used to verify that a symbolic pic displacement
0f290768
KH
3314 includes the pic_offset_table_rtx register.
3315
4e9efe54
JH
3316 While this is good idea, unfortunately these constructs may
3317 be created by "adds using lea" optimization for incorrect
3318 code like:
3319
3320 int a;
3321 int foo(int i)
3322 {
3323 return *(&a+i);
3324 }
3325
50e60bc3 3326 This code is nonsensical, but results in addressing
4e9efe54
JH
3327 GOT table with pic_offset_table_rtx base. We can't
3328 just refuse it easilly, since it gets matched by
3329 "addsi3" pattern, that later gets split to lea in the
3330 case output register differs from input. While this
3331 can be handled by separate addsi pattern for this case
3332 that never results in lea, this seems to be easier and
3333 correct fix for crash to disable this test. */
3b3c6a3f 3334 }
91bb873f 3335 else if (HALF_PIC_P ())
3b3c6a3f 3336 {
91bb873f 3337 if (! HALF_PIC_ADDRESS_P (disp)
e075ae69 3338 || (base != NULL_RTX || index != NULL_RTX))
91bb873f 3339 {
e075ae69 3340 reason = "displacement is an invalid half-pic reference";
50e60bc3 3341 goto report_error;
91bb873f 3342 }
3b3c6a3f
MM
3343 }
3344 }
3345
e075ae69 3346 /* Everything looks valid. */
3b3c6a3f 3347 if (TARGET_DEBUG_ADDR)
e075ae69 3348 fprintf (stderr, "Success.\n");
3b3c6a3f 3349 return TRUE;
e075ae69 3350
50e60bc3 3351report_error:
e075ae69
RH
3352 if (TARGET_DEBUG_ADDR)
3353 {
3354 fprintf (stderr, "Error: %s\n", reason);
3355 debug_rtx (reason_rtx);
3356 }
3357 return FALSE;
3b3c6a3f 3358}
3b3c6a3f 3359\f
55efb413
JW
3360/* Return an unique alias set for the GOT. */
3361
0f290768 3362static HOST_WIDE_INT
55efb413
JW
3363ix86_GOT_alias_set ()
3364{
3365 static HOST_WIDE_INT set = -1;
3366 if (set == -1)
3367 set = new_alias_set ();
3368 return set;
0f290768 3369}
55efb413 3370
3b3c6a3f
MM
3371/* Return a legitimate reference for ORIG (an address) using the
3372 register REG. If REG is 0, a new pseudo is generated.
3373
91bb873f 3374 There are two types of references that must be handled:
3b3c6a3f
MM
3375
3376 1. Global data references must load the address from the GOT, via
3377 the PIC reg. An insn is emitted to do this load, and the reg is
3378 returned.
3379
91bb873f
RH
3380 2. Static data references, constant pool addresses, and code labels
3381 compute the address as an offset from the GOT, whose base is in
3382 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
3383 differentiate them from global data objects. The returned
3384 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
3385
3386 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 3387 reg also appears in the address. */
3b3c6a3f
MM
3388
3389rtx
3390legitimize_pic_address (orig, reg)
3391 rtx orig;
3392 rtx reg;
3393{
3394 rtx addr = orig;
3395 rtx new = orig;
91bb873f 3396 rtx base;
3b3c6a3f 3397
91bb873f
RH
3398 if (GET_CODE (addr) == LABEL_REF
3399 || (GET_CODE (addr) == SYMBOL_REF
3400 && (CONSTANT_POOL_ADDRESS_P (addr)
3401 || SYMBOL_REF_FLAG (addr))))
3b3c6a3f 3402 {
91bb873f
RH
3403 /* This symbol may be referenced via a displacement from the PIC
3404 base address (@GOTOFF). */
3b3c6a3f 3405
91bb873f 3406 current_function_uses_pic_offset_table = 1;
4859dd36
RH
3407 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
3408 new = gen_rtx_CONST (Pmode, new);
91bb873f 3409 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 3410
91bb873f
RH
3411 if (reg != 0)
3412 {
3b3c6a3f 3413 emit_move_insn (reg, new);
91bb873f 3414 new = reg;
3b3c6a3f 3415 }
3b3c6a3f 3416 }
91bb873f 3417 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 3418 {
91bb873f 3419 /* This symbol must be referenced via a load from the
0f290768 3420 Global Offset Table (@GOT). */
3b3c6a3f 3421
91bb873f 3422 current_function_uses_pic_offset_table = 1;
4859dd36
RH
3423 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
3424 new = gen_rtx_CONST (Pmode, new);
91bb873f
RH
3425 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3426 new = gen_rtx_MEM (Pmode, new);
3427 RTX_UNCHANGING_P (new) = 1;
ba4828e0 3428 set_mem_alias_set (new, ix86_GOT_alias_set ());
3b3c6a3f
MM
3429
3430 if (reg == 0)
3431 reg = gen_reg_rtx (Pmode);
91bb873f
RH
3432 emit_move_insn (reg, new);
3433 new = reg;
0f290768 3434 }
91bb873f
RH
3435 else
3436 {
3437 if (GET_CODE (addr) == CONST)
3b3c6a3f 3438 {
91bb873f
RH
3439 addr = XEXP (addr, 0);
3440 if (GET_CODE (addr) == UNSPEC)
3441 {
3442 /* Check that the unspec is one of the ones we generate? */
3443 }
3444 else if (GET_CODE (addr) != PLUS)
564d80f4 3445 abort ();
3b3c6a3f 3446 }
91bb873f
RH
3447 if (GET_CODE (addr) == PLUS)
3448 {
3449 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 3450
91bb873f
RH
3451 /* Check first to see if this is a constant offset from a @GOTOFF
3452 symbol reference. */
3453 if ((GET_CODE (op0) == LABEL_REF
3454 || (GET_CODE (op0) == SYMBOL_REF
3455 && (CONSTANT_POOL_ADDRESS_P (op0)
3456 || SYMBOL_REF_FLAG (op0))))
3457 && GET_CODE (op1) == CONST_INT)
3458 {
3459 current_function_uses_pic_offset_table = 1;
4859dd36
RH
3460 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
3461 new = gen_rtx_PLUS (Pmode, new, op1);
3462 new = gen_rtx_CONST (Pmode, new);
91bb873f
RH
3463 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3464
3465 if (reg != 0)
3466 {
3467 emit_move_insn (reg, new);
3468 new = reg;
3469 }
3470 }
3471 else
3472 {
3473 base = legitimize_pic_address (XEXP (addr, 0), reg);
3474 new = legitimize_pic_address (XEXP (addr, 1),
3475 base == reg ? NULL_RTX : reg);
3476
3477 if (GET_CODE (new) == CONST_INT)
3478 new = plus_constant (base, INTVAL (new));
3479 else
3480 {
3481 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
3482 {
3483 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
3484 new = XEXP (new, 1);
3485 }
3486 new = gen_rtx_PLUS (Pmode, base, new);
3487 }
3488 }
3489 }
3b3c6a3f
MM
3490 }
3491 return new;
3492}
3493\f
3b3c6a3f
MM
3494/* Try machine-dependent ways of modifying an illegitimate address
3495 to be legitimate. If we find one, return the new, valid address.
3496 This macro is used in only one place: `memory_address' in explow.c.
3497
3498 OLDX is the address as it was before break_out_memory_refs was called.
3499 In some cases it is useful to look at this to decide what needs to be done.
3500
3501 MODE and WIN are passed so that this macro can use
3502 GO_IF_LEGITIMATE_ADDRESS.
3503
3504 It is always safe for this macro to do nothing. It exists to recognize
3505 opportunities to optimize the output.
3506
3507 For the 80386, we handle X+REG by loading X into a register R and
3508 using R+REG. R will go in a general reg and indexing will be used.
3509 However, if REG is a broken-out memory address or multiplication,
3510 nothing needs to be done because REG can certainly go in a general reg.
3511
3512 When -fpic is used, special handling is needed for symbolic references.
3513 See comments by legitimize_pic_address in i386.c for details. */
3514
3515rtx
3516legitimize_address (x, oldx, mode)
3517 register rtx x;
bb5177ac 3518 register rtx oldx ATTRIBUTE_UNUSED;
3b3c6a3f
MM
3519 enum machine_mode mode;
3520{
3521 int changed = 0;
3522 unsigned log;
3523
3524 if (TARGET_DEBUG_ADDR)
3525 {
e9a25f70
JL
3526 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
3527 GET_MODE_NAME (mode));
3b3c6a3f
MM
3528 debug_rtx (x);
3529 }
3530
3531 if (flag_pic && SYMBOLIC_CONST (x))
3532 return legitimize_pic_address (x, 0);
3533
3534 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
3535 if (GET_CODE (x) == ASHIFT
3536 && GET_CODE (XEXP (x, 1)) == CONST_INT
3537 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3538 {
3539 changed = 1;
a269a03c
JC
3540 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
3541 GEN_INT (1 << log));
3b3c6a3f
MM
3542 }
3543
3544 if (GET_CODE (x) == PLUS)
3545 {
0f290768 3546 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 3547
3b3c6a3f
MM
3548 if (GET_CODE (XEXP (x, 0)) == ASHIFT
3549 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3550 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3551 {
3552 changed = 1;
c5c76735
JL
3553 XEXP (x, 0) = gen_rtx_MULT (Pmode,
3554 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
3555 GEN_INT (1 << log));
3b3c6a3f
MM
3556 }
3557
3558 if (GET_CODE (XEXP (x, 1)) == ASHIFT
3559 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
3560 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3561 {
3562 changed = 1;
c5c76735
JL
3563 XEXP (x, 1) = gen_rtx_MULT (Pmode,
3564 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
3565 GEN_INT (1 << log));
3b3c6a3f
MM
3566 }
3567
0f290768 3568 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
3569 if (GET_CODE (XEXP (x, 1)) == MULT)
3570 {
3571 rtx tmp = XEXP (x, 0);
3572 XEXP (x, 0) = XEXP (x, 1);
3573 XEXP (x, 1) = tmp;
3574 changed = 1;
3575 }
3576
3577 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
3578 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
3579 created by virtual register instantiation, register elimination, and
3580 similar optimizations. */
3581 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
3582 {
3583 changed = 1;
c5c76735
JL
3584 x = gen_rtx_PLUS (Pmode,
3585 gen_rtx_PLUS (Pmode, XEXP (x, 0),
3586 XEXP (XEXP (x, 1), 0)),
3587 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
3588 }
3589
e9a25f70
JL
3590 /* Canonicalize
3591 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
3592 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
3593 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
3594 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
3595 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
3596 && CONSTANT_P (XEXP (x, 1)))
3597 {
00c79232
ML
3598 rtx constant;
3599 rtx other = NULL_RTX;
3b3c6a3f
MM
3600
3601 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
3602 {
3603 constant = XEXP (x, 1);
3604 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
3605 }
3606 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
3607 {
3608 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
3609 other = XEXP (x, 1);
3610 }
3611 else
3612 constant = 0;
3613
3614 if (constant)
3615 {
3616 changed = 1;
c5c76735
JL
3617 x = gen_rtx_PLUS (Pmode,
3618 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
3619 XEXP (XEXP (XEXP (x, 0), 1), 0)),
3620 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
3621 }
3622 }
3623
3624 if (changed && legitimate_address_p (mode, x, FALSE))
3625 return x;
3626
3627 if (GET_CODE (XEXP (x, 0)) == MULT)
3628 {
3629 changed = 1;
3630 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
3631 }
3632
3633 if (GET_CODE (XEXP (x, 1)) == MULT)
3634 {
3635 changed = 1;
3636 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
3637 }
3638
3639 if (changed
3640 && GET_CODE (XEXP (x, 1)) == REG
3641 && GET_CODE (XEXP (x, 0)) == REG)
3642 return x;
3643
3644 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
3645 {
3646 changed = 1;
3647 x = legitimize_pic_address (x, 0);
3648 }
3649
3650 if (changed && legitimate_address_p (mode, x, FALSE))
3651 return x;
3652
3653 if (GET_CODE (XEXP (x, 0)) == REG)
3654 {
3655 register rtx temp = gen_reg_rtx (Pmode);
3656 register rtx val = force_operand (XEXP (x, 1), temp);
3657 if (val != temp)
3658 emit_move_insn (temp, val);
3659
3660 XEXP (x, 1) = temp;
3661 return x;
3662 }
3663
3664 else if (GET_CODE (XEXP (x, 1)) == REG)
3665 {
3666 register rtx temp = gen_reg_rtx (Pmode);
3667 register rtx val = force_operand (XEXP (x, 0), temp);
3668 if (val != temp)
3669 emit_move_insn (temp, val);
3670
3671 XEXP (x, 0) = temp;
3672 return x;
3673 }
3674 }
3675
3676 return x;
3677}
2a2ab3f9
JVA
3678\f
3679/* Print an integer constant expression in assembler syntax. Addition
3680 and subtraction are the only arithmetic that may appear in these
3681 expressions. FILE is the stdio stream to write to, X is the rtx, and
3682 CODE is the operand print code from the output string. */
3683
3684static void
3685output_pic_addr_const (file, x, code)
3686 FILE *file;
3687 rtx x;
3688 int code;
3689{
3690 char buf[256];
3691
3692 switch (GET_CODE (x))
3693 {
3694 case PC:
3695 if (flag_pic)
3696 putc ('.', file);
3697 else
3698 abort ();
3699 break;
3700
3701 case SYMBOL_REF:
91bb873f
RH
3702 assemble_name (file, XSTR (x, 0));
3703 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
3704 fputs ("@PLT", file);
2a2ab3f9
JVA
3705 break;
3706
91bb873f
RH
3707 case LABEL_REF:
3708 x = XEXP (x, 0);
3709 /* FALLTHRU */
2a2ab3f9
JVA
3710 case CODE_LABEL:
3711 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
3712 assemble_name (asm_out_file, buf);
3713 break;
3714
3715 case CONST_INT:
f64cecad 3716 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
3717 break;
3718
3719 case CONST:
3720 /* This used to output parentheses around the expression,
3721 but that does not work on the 386 (either ATT or BSD assembler). */
3722 output_pic_addr_const (file, XEXP (x, 0), code);
3723 break;
3724
3725 case CONST_DOUBLE:
3726 if (GET_MODE (x) == VOIDmode)
3727 {
3728 /* We can use %d if the number is <32 bits and positive. */
3729 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
3730 fprintf (file, "0x%lx%08lx",
3731 (unsigned long) CONST_DOUBLE_HIGH (x),
3732 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 3733 else
f64cecad 3734 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
3735 }
3736 else
3737 /* We can't handle floating point constants;
3738 PRINT_OPERAND must handle them. */
3739 output_operand_lossage ("floating constant misused");
3740 break;
3741
3742 case PLUS:
e9a25f70 3743 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
3744 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
3745 {
2a2ab3f9 3746 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 3747 putc ('+', file);
e9a25f70 3748 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 3749 }
91bb873f 3750 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 3751 {
2a2ab3f9 3752 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 3753 putc ('+', file);
e9a25f70 3754 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 3755 }
91bb873f
RH
3756 else
3757 abort ();
2a2ab3f9
JVA
3758 break;
3759
3760 case MINUS:
e075ae69 3761 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
2a2ab3f9 3762 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 3763 putc ('-', file);
2a2ab3f9 3764 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 3765 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
2a2ab3f9
JVA
3766 break;
3767
91bb873f
RH
3768 case UNSPEC:
3769 if (XVECLEN (x, 0) != 1)
77ebd435 3770 abort ();
91bb873f
RH
3771 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
3772 switch (XINT (x, 1))
77ebd435
AJ
3773 {
3774 case 6:
3775 fputs ("@GOT", file);
3776 break;
3777 case 7:
3778 fputs ("@GOTOFF", file);
3779 break;
3780 case 8:
3781 fputs ("@PLT", file);
3782 break;
3783 default:
3784 output_operand_lossage ("invalid UNSPEC as operand");
3785 break;
3786 }
91bb873f
RH
3787 break;
3788
2a2ab3f9
JVA
3789 default:
3790 output_operand_lossage ("invalid expression as operand");
3791 }
3792}
1865dbb5 3793
0f290768 3794/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
1865dbb5
JM
3795 We need to handle our special PIC relocations. */
3796
0f290768 3797void
1865dbb5
JM
3798i386_dwarf_output_addr_const (file, x)
3799 FILE *file;
3800 rtx x;
3801{
f0ca81d2 3802 fprintf (file, "%s", INT_ASM_OP);
1865dbb5
JM
3803 if (flag_pic)
3804 output_pic_addr_const (file, x, '\0');
3805 else
3806 output_addr_const (file, x);
3807 fputc ('\n', file);
3808}
3809
3810/* In the name of slightly smaller debug output, and to cater to
3811 general assembler losage, recognize PIC+GOTOFF and turn it back
3812 into a direct symbol reference. */
3813
3814rtx
3815i386_simplify_dwarf_addr (orig_x)
3816 rtx orig_x;
3817{
3818 rtx x = orig_x;
3819
3820 if (GET_CODE (x) != PLUS
3821 || GET_CODE (XEXP (x, 0)) != REG
3822 || GET_CODE (XEXP (x, 1)) != CONST)
3823 return orig_x;
3824
3825 x = XEXP (XEXP (x, 1), 0);
3826 if (GET_CODE (x) == UNSPEC
3adbce3d
RH
3827 && (XINT (x, 1) == 6
3828 || XINT (x, 1) == 7))
1865dbb5
JM
3829 return XVECEXP (x, 0, 0);
3830
3831 if (GET_CODE (x) == PLUS
3832 && GET_CODE (XEXP (x, 0)) == UNSPEC
3833 && GET_CODE (XEXP (x, 1)) == CONST_INT
3adbce3d
RH
3834 && (XINT (XEXP (x, 0), 1) == 6
3835 || XINT (XEXP (x, 0), 1) == 7))
1865dbb5
JM
3836 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
3837
3838 return orig_x;
3839}
2a2ab3f9 3840\f
a269a03c 3841static void
e075ae69 3842put_condition_code (code, mode, reverse, fp, file)
a269a03c 3843 enum rtx_code code;
e075ae69
RH
3844 enum machine_mode mode;
3845 int reverse, fp;
a269a03c
JC
3846 FILE *file;
3847{
a269a03c
JC
3848 const char *suffix;
3849
9a915772
JH
3850 if (mode == CCFPmode || mode == CCFPUmode)
3851 {
3852 enum rtx_code second_code, bypass_code;
3853 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3854 if (bypass_code != NIL || second_code != NIL)
3855 abort();
3856 code = ix86_fp_compare_code_to_integer (code);
3857 mode = CCmode;
3858 }
a269a03c
JC
3859 if (reverse)
3860 code = reverse_condition (code);
e075ae69 3861
a269a03c
JC
3862 switch (code)
3863 {
3864 case EQ:
3865 suffix = "e";
3866 break;
a269a03c
JC
3867 case NE:
3868 suffix = "ne";
3869 break;
a269a03c 3870 case GT:
7e08e190 3871 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
3872 abort ();
3873 suffix = "g";
a269a03c 3874 break;
a269a03c 3875 case GTU:
e075ae69
RH
3876 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
3877 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 3878 if (mode != CCmode)
0f290768 3879 abort ();
e075ae69 3880 suffix = fp ? "nbe" : "a";
a269a03c 3881 break;
a269a03c 3882 case LT:
9076b9c1 3883 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 3884 suffix = "s";
7e08e190 3885 else if (mode == CCmode || mode == CCGCmode)
e075ae69 3886 suffix = "l";
9076b9c1 3887 else
0f290768 3888 abort ();
a269a03c 3889 break;
a269a03c 3890 case LTU:
9076b9c1 3891 if (mode != CCmode)
0f290768 3892 abort ();
a269a03c
JC
3893 suffix = "b";
3894 break;
a269a03c 3895 case GE:
9076b9c1 3896 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 3897 suffix = "ns";
7e08e190 3898 else if (mode == CCmode || mode == CCGCmode)
e075ae69 3899 suffix = "ge";
9076b9c1 3900 else
0f290768 3901 abort ();
a269a03c 3902 break;
a269a03c 3903 case GEU:
e075ae69 3904 /* ??? As above. */
7e08e190 3905 if (mode != CCmode)
0f290768 3906 abort ();
7e08e190 3907 suffix = fp ? "nb" : "ae";
a269a03c 3908 break;
a269a03c 3909 case LE:
7e08e190 3910 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
3911 abort ();
3912 suffix = "le";
a269a03c 3913 break;
a269a03c 3914 case LEU:
9076b9c1
JH
3915 if (mode != CCmode)
3916 abort ();
7e08e190 3917 suffix = "be";
a269a03c 3918 break;
3a3677ff 3919 case UNORDERED:
9e7adcb3 3920 suffix = fp ? "u" : "p";
3a3677ff
RH
3921 break;
3922 case ORDERED:
9e7adcb3 3923 suffix = fp ? "nu" : "np";
3a3677ff 3924 break;
a269a03c
JC
3925 default:
3926 abort ();
3927 }
3928 fputs (suffix, file);
3929}
3930
e075ae69
RH
3931void
3932print_reg (x, code, file)
3933 rtx x;
3934 int code;
3935 FILE *file;
e5cb57e8 3936{
e075ae69 3937 if (REGNO (x) == ARG_POINTER_REGNUM
564d80f4 3938 || REGNO (x) == FRAME_POINTER_REGNUM
e075ae69
RH
3939 || REGNO (x) == FLAGS_REG
3940 || REGNO (x) == FPSR_REG)
3941 abort ();
e9a25f70 3942
e075ae69
RH
3943 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
3944 putc ('%', file);
3945
ef6257cd 3946 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
3947 code = 2;
3948 else if (code == 'b')
3949 code = 1;
3950 else if (code == 'k')
3951 code = 4;
3f3f2124
JH
3952 else if (code == 'q')
3953 code = 8;
e075ae69
RH
3954 else if (code == 'y')
3955 code = 3;
3956 else if (code == 'h')
3957 code = 0;
3958 else
3959 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 3960
3f3f2124
JH
3961 /* Irritatingly, AMD extended registers use different naming convention
3962 from the normal registers. */
3963 if (REX_INT_REG_P (x))
3964 {
885a70fd
JH
3965 if (!TARGET_64BIT)
3966 abort ();
3f3f2124
JH
3967 switch (code)
3968 {
ef6257cd 3969 case 0:
3f3f2124
JH
3970 error ("Extended registers have no high halves\n");
3971 break;
3972 case 1:
3973 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
3974 break;
3975 case 2:
3976 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
3977 break;
3978 case 4:
3979 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
3980 break;
3981 case 8:
3982 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
3983 break;
3984 default:
3985 error ("Unsupported operand size for extended register.\n");
3986 break;
3987 }
3988 return;
3989 }
e075ae69
RH
3990 switch (code)
3991 {
3992 case 3:
3993 if (STACK_TOP_P (x))
3994 {
3995 fputs ("st(0)", file);
3996 break;
3997 }
3998 /* FALLTHRU */
e075ae69 3999 case 8:
3f3f2124 4000 case 4:
e075ae69 4001 case 12:
446988df 4002 if (! ANY_FP_REG_P (x))
885a70fd 4003 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
e075ae69 4004 /* FALLTHRU */
a7180f70 4005 case 16:
e075ae69
RH
4006 case 2:
4007 fputs (hi_reg_name[REGNO (x)], file);
4008 break;
4009 case 1:
4010 fputs (qi_reg_name[REGNO (x)], file);
4011 break;
4012 case 0:
4013 fputs (qi_high_reg_name[REGNO (x)], file);
4014 break;
4015 default:
4016 abort ();
fe25fea3 4017 }
e5cb57e8
SC
4018}
4019
2a2ab3f9 4020/* Meaning of CODE:
fe25fea3 4021 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 4022 C -- print opcode suffix for set/cmov insn.
fe25fea3 4023 c -- like C, but print reversed condition
ef6257cd 4024 F,f -- likewise, but for floating-point.
2a2ab3f9
JVA
4025 R -- print the prefix for register names.
4026 z -- print the opcode suffix for the size of the current operand.
4027 * -- print a star (in certain assembler syntax)
fb204271 4028 A -- print an absolute memory reference.
2a2ab3f9 4029 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
4030 s -- print a shift double count, followed by the assemblers argument
4031 delimiter.
fe25fea3
SC
4032 b -- print the QImode name of the register for the indicated operand.
4033 %b0 would print %al if operands[0] is reg 0.
4034 w -- likewise, print the HImode name of the register.
4035 k -- likewise, print the SImode name of the register.
3f3f2124 4036 q -- likewise, print the DImode name of the register.
ef6257cd
JH
4037 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
4038 y -- print "st(0)" instead of "st" as a register.
a46d1d38 4039 D -- print condition for SSE cmp instruction.
ef6257cd
JH
4040 P -- if PIC, print an @PLT suffix.
4041 X -- don't print any sort of PIC '@' suffix for a symbol.
a46d1d38 4042 */
2a2ab3f9
JVA
4043
4044void
4045print_operand (file, x, code)
4046 FILE *file;
4047 rtx x;
4048 int code;
4049{
4050 if (code)
4051 {
4052 switch (code)
4053 {
4054 case '*':
e075ae69 4055 if (ASSEMBLER_DIALECT == 0)
2a2ab3f9
JVA
4056 putc ('*', file);
4057 return;
4058
fb204271
DN
4059 case 'A':
4060 if (ASSEMBLER_DIALECT == 0)
4061 putc ('*', file);
4062 else if (ASSEMBLER_DIALECT == 1)
4063 {
4064 /* Intel syntax. For absolute addresses, registers should not
4065 be surrounded by braces. */
4066 if (GET_CODE (x) != REG)
4067 {
4068 putc ('[', file);
4069 PRINT_OPERAND (file, x, 0);
4070 putc (']', file);
4071 return;
4072 }
4073 }
4074
4075 PRINT_OPERAND (file, x, 0);
4076 return;
4077
4078
2a2ab3f9 4079 case 'L':
e075ae69
RH
4080 if (ASSEMBLER_DIALECT == 0)
4081 putc ('l', file);
2a2ab3f9
JVA
4082 return;
4083
4084 case 'W':
e075ae69
RH
4085 if (ASSEMBLER_DIALECT == 0)
4086 putc ('w', file);
2a2ab3f9
JVA
4087 return;
4088
4089 case 'B':
e075ae69
RH
4090 if (ASSEMBLER_DIALECT == 0)
4091 putc ('b', file);
2a2ab3f9
JVA
4092 return;
4093
4094 case 'Q':
e075ae69
RH
4095 if (ASSEMBLER_DIALECT == 0)
4096 putc ('l', file);
2a2ab3f9
JVA
4097 return;
4098
4099 case 'S':
e075ae69
RH
4100 if (ASSEMBLER_DIALECT == 0)
4101 putc ('s', file);
2a2ab3f9
JVA
4102 return;
4103
5f1ec3e6 4104 case 'T':
e075ae69
RH
4105 if (ASSEMBLER_DIALECT == 0)
4106 putc ('t', file);
5f1ec3e6
JVA
4107 return;
4108
2a2ab3f9
JVA
4109 case 'z':
4110 /* 387 opcodes don't get size suffixes if the operands are
0f290768 4111 registers. */
2a2ab3f9
JVA
4112
4113 if (STACK_REG_P (x))
4114 return;
4115
4116 /* this is the size of op from size of operand */
4117 switch (GET_MODE_SIZE (GET_MODE (x)))
4118 {
2a2ab3f9 4119 case 2:
155d8a47
JW
4120#ifdef HAVE_GAS_FILDS_FISTS
4121 putc ('s', file);
4122#endif
2a2ab3f9
JVA
4123 return;
4124
4125 case 4:
4126 if (GET_MODE (x) == SFmode)
4127 {
e075ae69 4128 putc ('s', file);
2a2ab3f9
JVA
4129 return;
4130 }
4131 else
e075ae69 4132 putc ('l', file);
2a2ab3f9
JVA
4133 return;
4134
5f1ec3e6 4135 case 12:
2b589241 4136 case 16:
e075ae69
RH
4137 putc ('t', file);
4138 return;
5f1ec3e6 4139
2a2ab3f9
JVA
4140 case 8:
4141 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
4142 {
4143#ifdef GAS_MNEMONICS
e075ae69 4144 putc ('q', file);
56c0e8fa 4145#else
e075ae69
RH
4146 putc ('l', file);
4147 putc ('l', file);
56c0e8fa
JVA
4148#endif
4149 }
e075ae69
RH
4150 else
4151 putc ('l', file);
2a2ab3f9 4152 return;
155d8a47
JW
4153
4154 default:
4155 abort ();
2a2ab3f9 4156 }
4af3895e
JVA
4157
4158 case 'b':
4159 case 'w':
4160 case 'k':
3f3f2124 4161 case 'q':
4af3895e
JVA
4162 case 'h':
4163 case 'y':
5cb6195d 4164 case 'X':
e075ae69 4165 case 'P':
4af3895e
JVA
4166 break;
4167
2d49677f
SC
4168 case 's':
4169 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
4170 {
4171 PRINT_OPERAND (file, x, 0);
e075ae69 4172 putc (',', file);
2d49677f 4173 }
a269a03c
JC
4174 return;
4175
a46d1d38
JH
4176 case 'D':
4177 /* Little bit of braindamage here. The SSE compare instructions
4178 does use completely different names for the comparisons that the
4179 fp conditional moves. */
4180 switch (GET_CODE (x))
4181 {
4182 case EQ:
4183 case UNEQ:
4184 fputs ("eq", file);
4185 break;
4186 case LT:
4187 case UNLT:
4188 fputs ("lt", file);
4189 break;
4190 case LE:
4191 case UNLE:
4192 fputs ("le", file);
4193 break;
4194 case UNORDERED:
4195 fputs ("unord", file);
4196 break;
4197 case NE:
4198 case LTGT:
4199 fputs ("neq", file);
4200 break;
4201 case UNGE:
4202 case GE:
4203 fputs ("nlt", file);
4204 break;
4205 case UNGT:
4206 case GT:
4207 fputs ("nle", file);
4208 break;
4209 case ORDERED:
4210 fputs ("ord", file);
4211 break;
4212 default:
4213 abort ();
4214 break;
4215 }
4216 return;
1853aadd 4217 case 'C':
e075ae69 4218 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 4219 return;
fe25fea3 4220 case 'F':
e075ae69 4221 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
4222 return;
4223
e9a25f70 4224 /* Like above, but reverse condition */
e075ae69
RH
4225 case 'c':
4226 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
4227 return;
fe25fea3 4228 case 'f':
e075ae69 4229 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 4230 return;
ef6257cd
JH
4231 case '+':
4232 {
4233 rtx x;
e5cb57e8 4234
ef6257cd
JH
4235 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
4236 return;
a4f31c00 4237
ef6257cd
JH
4238 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4239 if (x)
4240 {
4241 int pred_val = INTVAL (XEXP (x, 0));
4242
4243 if (pred_val < REG_BR_PROB_BASE * 45 / 100
4244 || pred_val > REG_BR_PROB_BASE * 55 / 100)
4245 {
4246 int taken = pred_val > REG_BR_PROB_BASE / 2;
4247 int cputaken = final_forward_branch_p (current_output_insn) == 0;
4248
4249 /* Emit hints only in the case default branch prediction
4250 heruistics would fail. */
4251 if (taken != cputaken)
4252 {
4253 /* We use 3e (DS) prefix for taken branches and
4254 2e (CS) prefix for not taken branches. */
4255 if (taken)
4256 fputs ("ds ; ", file);
4257 else
4258 fputs ("cs ; ", file);
4259 }
4260 }
4261 }
4262 return;
4263 }
4af3895e 4264 default:
68daafd4
JVA
4265 {
4266 char str[50];
68daafd4
JVA
4267 sprintf (str, "invalid operand code `%c'", code);
4268 output_operand_lossage (str);
4269 }
2a2ab3f9
JVA
4270 }
4271 }
e9a25f70 4272
2a2ab3f9
JVA
4273 if (GET_CODE (x) == REG)
4274 {
4275 PRINT_REG (x, code, file);
4276 }
e9a25f70 4277
2a2ab3f9
JVA
4278 else if (GET_CODE (x) == MEM)
4279 {
e075ae69
RH
4280 /* No `byte ptr' prefix for call instructions. */
4281 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
2a2ab3f9 4282 {
69ddee61 4283 const char * size;
e075ae69
RH
4284 switch (GET_MODE_SIZE (GET_MODE (x)))
4285 {
4286 case 1: size = "BYTE"; break;
4287 case 2: size = "WORD"; break;
4288 case 4: size = "DWORD"; break;
4289 case 8: size = "QWORD"; break;
4290 case 12: size = "XWORD"; break;
a7180f70 4291 case 16: size = "XMMWORD"; break;
e075ae69 4292 default:
564d80f4 4293 abort ();
e075ae69 4294 }
fb204271
DN
4295
4296 /* Check for explicit size override (codes 'b', 'w' and 'k') */
4297 if (code == 'b')
4298 size = "BYTE";
4299 else if (code == 'w')
4300 size = "WORD";
4301 else if (code == 'k')
4302 size = "DWORD";
4303
e075ae69
RH
4304 fputs (size, file);
4305 fputs (" PTR ", file);
2a2ab3f9 4306 }
e075ae69
RH
4307
4308 x = XEXP (x, 0);
4309 if (flag_pic && CONSTANT_ADDRESS_P (x))
4310 output_pic_addr_const (file, x, code);
0d7d98ee
JH
4311 /* Avoid (%rip) for call operands. */
4312 else if (CONSTANT_ADDRESS_P (x) && code =='P'
4313 && GET_CODE (x) != CONST_INT)
4314 output_addr_const (file, x);
2a2ab3f9 4315 else
e075ae69 4316 output_address (x);
2a2ab3f9 4317 }
e9a25f70 4318
2a2ab3f9
JVA
4319 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
4320 {
e9a25f70
JL
4321 REAL_VALUE_TYPE r;
4322 long l;
4323
5f1ec3e6
JVA
4324 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4325 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69
RH
4326
4327 if (ASSEMBLER_DIALECT == 0)
4328 putc ('$', file);
52267fcb 4329 fprintf (file, "0x%lx", l);
5f1ec3e6 4330 }
e9a25f70 4331
0f290768 4332 /* These float cases don't actually occur as immediate operands. */
5f1ec3e6
JVA
4333 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
4334 {
e9a25f70
JL
4335 REAL_VALUE_TYPE r;
4336 char dstr[30];
4337
5f1ec3e6
JVA
4338 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4339 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
4340 fprintf (file, "%s", dstr);
2a2ab3f9 4341 }
e9a25f70 4342
2b589241
JH
4343 else if (GET_CODE (x) == CONST_DOUBLE
4344 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
2a2ab3f9 4345 {
e9a25f70
JL
4346 REAL_VALUE_TYPE r;
4347 char dstr[30];
4348
5f1ec3e6
JVA
4349 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4350 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
4351 fprintf (file, "%s", dstr);
2a2ab3f9 4352 }
79325812 4353 else
2a2ab3f9 4354 {
4af3895e 4355 if (code != 'P')
2a2ab3f9 4356 {
695dac07 4357 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69
RH
4358 {
4359 if (ASSEMBLER_DIALECT == 0)
4360 putc ('$', file);
4361 }
2a2ab3f9
JVA
4362 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
4363 || GET_CODE (x) == LABEL_REF)
e075ae69
RH
4364 {
4365 if (ASSEMBLER_DIALECT == 0)
4366 putc ('$', file);
4367 else
4368 fputs ("OFFSET FLAT:", file);
4369 }
2a2ab3f9 4370 }
e075ae69
RH
4371 if (GET_CODE (x) == CONST_INT)
4372 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
4373 else if (flag_pic)
2a2ab3f9
JVA
4374 output_pic_addr_const (file, x, code);
4375 else
4376 output_addr_const (file, x);
4377 }
4378}
4379\f
4380/* Print a memory operand whose address is ADDR. */
4381
4382void
4383print_operand_address (file, addr)
4384 FILE *file;
4385 register rtx addr;
4386{
e075ae69
RH
4387 struct ix86_address parts;
4388 rtx base, index, disp;
4389 int scale;
e9a25f70 4390
e075ae69
RH
4391 if (! ix86_decompose_address (addr, &parts))
4392 abort ();
e9a25f70 4393
e075ae69
RH
4394 base = parts.base;
4395 index = parts.index;
4396 disp = parts.disp;
4397 scale = parts.scale;
e9a25f70 4398
e075ae69
RH
4399 if (!base && !index)
4400 {
4401 /* Displacement only requires special attention. */
e9a25f70 4402
e075ae69 4403 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 4404 {
e075ae69 4405 if (ASSEMBLER_DIALECT != 0)
fb204271
DN
4406 {
4407 if (USER_LABEL_PREFIX[0] == 0)
4408 putc ('%', file);
4409 fputs ("ds:", file);
4410 }
e075ae69 4411 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
2a2ab3f9 4412 }
e075ae69
RH
4413 else if (flag_pic)
4414 output_pic_addr_const (file, addr, 0);
4415 else
4416 output_addr_const (file, addr);
0d7d98ee
JH
4417
4418 /* Use one byte shorter RIP relative addressing for 64bit mode. */
4419 if (GET_CODE (disp) != CONST_INT && TARGET_64BIT)
4420 fputs ("(%rip)", file);
e075ae69
RH
4421 }
4422 else
4423 {
4424 if (ASSEMBLER_DIALECT == 0)
2a2ab3f9 4425 {
e075ae69 4426 if (disp)
2a2ab3f9 4427 {
c399861d 4428 if (flag_pic)
e075ae69
RH
4429 output_pic_addr_const (file, disp, 0);
4430 else if (GET_CODE (disp) == LABEL_REF)
4431 output_asm_label (disp);
2a2ab3f9 4432 else
e075ae69 4433 output_addr_const (file, disp);
2a2ab3f9
JVA
4434 }
4435
e075ae69
RH
4436 putc ('(', file);
4437 if (base)
4438 PRINT_REG (base, 0, file);
4439 if (index)
2a2ab3f9 4440 {
e075ae69
RH
4441 putc (',', file);
4442 PRINT_REG (index, 0, file);
4443 if (scale != 1)
4444 fprintf (file, ",%d", scale);
2a2ab3f9 4445 }
e075ae69 4446 putc (')', file);
2a2ab3f9 4447 }
2a2ab3f9
JVA
4448 else
4449 {
e075ae69 4450 rtx offset = NULL_RTX;
e9a25f70 4451
e075ae69
RH
4452 if (disp)
4453 {
4454 /* Pull out the offset of a symbol; print any symbol itself. */
4455 if (GET_CODE (disp) == CONST
4456 && GET_CODE (XEXP (disp, 0)) == PLUS
4457 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
4458 {
4459 offset = XEXP (XEXP (disp, 0), 1);
4460 disp = gen_rtx_CONST (VOIDmode,
4461 XEXP (XEXP (disp, 0), 0));
4462 }
ce193852 4463
e075ae69
RH
4464 if (flag_pic)
4465 output_pic_addr_const (file, disp, 0);
4466 else if (GET_CODE (disp) == LABEL_REF)
4467 output_asm_label (disp);
4468 else if (GET_CODE (disp) == CONST_INT)
4469 offset = disp;
4470 else
4471 output_addr_const (file, disp);
4472 }
e9a25f70 4473
e075ae69
RH
4474 putc ('[', file);
4475 if (base)
a8620236 4476 {
e075ae69
RH
4477 PRINT_REG (base, 0, file);
4478 if (offset)
4479 {
4480 if (INTVAL (offset) >= 0)
4481 putc ('+', file);
4482 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
4483 }
a8620236 4484 }
e075ae69
RH
4485 else if (offset)
4486 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 4487 else
e075ae69 4488 putc ('0', file);
e9a25f70 4489
e075ae69
RH
4490 if (index)
4491 {
4492 putc ('+', file);
4493 PRINT_REG (index, 0, file);
4494 if (scale != 1)
4495 fprintf (file, "*%d", scale);
4496 }
4497 putc (']', file);
4498 }
2a2ab3f9
JVA
4499 }
4500}
4501\f
4502/* Split one or more DImode RTL references into pairs of SImode
4503 references. The RTL can be REG, offsettable MEM, integer constant, or
4504 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
4505 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 4506 that parallel "operands". */
2a2ab3f9
JVA
4507
4508void
4509split_di (operands, num, lo_half, hi_half)
4510 rtx operands[];
4511 int num;
4512 rtx lo_half[], hi_half[];
4513{
4514 while (num--)
4515 {
57dbca5e 4516 rtx op = operands[num];
e075ae69
RH
4517 if (CONSTANT_P (op))
4518 split_double (op, &lo_half[num], &hi_half[num]);
4519 else if (! reload_completed)
a269a03c
JC
4520 {
4521 lo_half[num] = gen_lowpart (SImode, op);
4522 hi_half[num] = gen_highpart (SImode, op);
4523 }
4524 else if (GET_CODE (op) == REG)
2a2ab3f9 4525 {
0d7d98ee
JH
4526 if (TARGET_64BIT)
4527 abort();
57dbca5e
BS
4528 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
4529 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
2a2ab3f9 4530 }
57dbca5e 4531 else if (offsettable_memref_p (op))
2a2ab3f9 4532 {
f4ef873c 4533 lo_half[num] = adjust_address (op, SImode, 0);
b72f00af 4534 hi_half[num] = adjust_address (op, SImode, 4);
2a2ab3f9
JVA
4535 }
4536 else
564d80f4 4537 abort ();
2a2ab3f9
JVA
4538 }
4539}
4540\f
2a2ab3f9
JVA
4541/* Output code to perform a 387 binary operation in INSN, one of PLUS,
4542 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
4543 is the expression of the binary operation. The output may either be
4544 emitted here, or returned to the caller, like all output_* functions.
4545
4546 There is no guarantee that the operands are the same mode, as they
0f290768 4547 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 4548
e3c2afab
AM
4549#ifndef SYSV386_COMPAT
4550/* Set to 1 for compatibility with brain-damaged assemblers. No-one
4551 wants to fix the assemblers because that causes incompatibility
4552 with gcc. No-one wants to fix gcc because that causes
4553 incompatibility with assemblers... You can use the option of
4554 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
4555#define SYSV386_COMPAT 1
4556#endif
4557
69ddee61 4558const char *
2a2ab3f9
JVA
4559output_387_binary_op (insn, operands)
4560 rtx insn;
4561 rtx *operands;
4562{
e3c2afab 4563 static char buf[30];
69ddee61 4564 const char *p;
1deaa899
JH
4565 const char *ssep;
4566 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
2a2ab3f9 4567
e3c2afab
AM
4568#ifdef ENABLE_CHECKING
4569 /* Even if we do not want to check the inputs, this documents input
4570 constraints. Which helps in understanding the following code. */
4571 if (STACK_REG_P (operands[0])
4572 && ((REG_P (operands[1])
4573 && REGNO (operands[0]) == REGNO (operands[1])
4574 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
4575 || (REG_P (operands[2])
4576 && REGNO (operands[0]) == REGNO (operands[2])
4577 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
4578 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
4579 ; /* ok */
1deaa899 4580 else if (!is_sse)
e3c2afab
AM
4581 abort ();
4582#endif
4583
2a2ab3f9
JVA
4584 switch (GET_CODE (operands[3]))
4585 {
4586 case PLUS:
e075ae69
RH
4587 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4588 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4589 p = "fiadd";
4590 else
4591 p = "fadd";
1deaa899 4592 ssep = "add";
2a2ab3f9
JVA
4593 break;
4594
4595 case MINUS:
e075ae69
RH
4596 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4597 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4598 p = "fisub";
4599 else
4600 p = "fsub";
1deaa899 4601 ssep = "sub";
2a2ab3f9
JVA
4602 break;
4603
4604 case MULT:
e075ae69
RH
4605 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4606 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4607 p = "fimul";
4608 else
4609 p = "fmul";
1deaa899 4610 ssep = "mul";
2a2ab3f9
JVA
4611 break;
4612
4613 case DIV:
e075ae69
RH
4614 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4615 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4616 p = "fidiv";
4617 else
4618 p = "fdiv";
1deaa899 4619 ssep = "div";
2a2ab3f9
JVA
4620 break;
4621
4622 default:
4623 abort ();
4624 }
4625
1deaa899
JH
4626 if (is_sse)
4627 {
4628 strcpy (buf, ssep);
4629 if (GET_MODE (operands[0]) == SFmode)
4630 strcat (buf, "ss\t{%2, %0|%0, %2}");
4631 else
4632 strcat (buf, "sd\t{%2, %0|%0, %2}");
4633 return buf;
4634 }
e075ae69 4635 strcpy (buf, p);
2a2ab3f9
JVA
4636
4637 switch (GET_CODE (operands[3]))
4638 {
4639 case MULT:
4640 case PLUS:
4641 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
4642 {
e3c2afab 4643 rtx temp = operands[2];
2a2ab3f9
JVA
4644 operands[2] = operands[1];
4645 operands[1] = temp;
4646 }
4647
e3c2afab
AM
4648 /* know operands[0] == operands[1]. */
4649
2a2ab3f9 4650 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
4651 {
4652 p = "%z2\t%2";
4653 break;
4654 }
2a2ab3f9
JVA
4655
4656 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
4657 {
4658 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
4659 /* How is it that we are storing to a dead operand[2]?
4660 Well, presumably operands[1] is dead too. We can't
4661 store the result to st(0) as st(0) gets popped on this
4662 instruction. Instead store to operands[2] (which I
4663 think has to be st(1)). st(1) will be popped later.
4664 gcc <= 2.8.1 didn't have this check and generated
4665 assembly code that the Unixware assembler rejected. */
4666 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 4667 else
e3c2afab 4668 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 4669 break;
6b28fd63 4670 }
2a2ab3f9
JVA
4671
4672 if (STACK_TOP_P (operands[0]))
e3c2afab 4673 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 4674 else
e3c2afab 4675 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 4676 break;
2a2ab3f9
JVA
4677
4678 case MINUS:
4679 case DIV:
4680 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
4681 {
4682 p = "r%z1\t%1";
4683 break;
4684 }
2a2ab3f9
JVA
4685
4686 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
4687 {
4688 p = "%z2\t%2";
4689 break;
4690 }
2a2ab3f9 4691
2a2ab3f9 4692 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 4693 {
e3c2afab
AM
4694#if SYSV386_COMPAT
4695 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
4696 derived assemblers, confusingly reverse the direction of
4697 the operation for fsub{r} and fdiv{r} when the
4698 destination register is not st(0). The Intel assembler
4699 doesn't have this brain damage. Read !SYSV386_COMPAT to
4700 figure out what the hardware really does. */
4701 if (STACK_TOP_P (operands[0]))
4702 p = "{p\t%0, %2|rp\t%2, %0}";
4703 else
4704 p = "{rp\t%2, %0|p\t%0, %2}";
4705#else
6b28fd63 4706 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
4707 /* As above for fmul/fadd, we can't store to st(0). */
4708 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 4709 else
e3c2afab
AM
4710 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
4711#endif
e075ae69 4712 break;
6b28fd63 4713 }
2a2ab3f9
JVA
4714
4715 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 4716 {
e3c2afab 4717#if SYSV386_COMPAT
6b28fd63 4718 if (STACK_TOP_P (operands[0]))
e3c2afab 4719 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 4720 else
e3c2afab
AM
4721 p = "{p\t%1, %0|rp\t%0, %1}";
4722#else
4723 if (STACK_TOP_P (operands[0]))
4724 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
4725 else
4726 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
4727#endif
e075ae69 4728 break;
6b28fd63 4729 }
2a2ab3f9
JVA
4730
4731 if (STACK_TOP_P (operands[0]))
4732 {
4733 if (STACK_TOP_P (operands[1]))
e3c2afab 4734 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 4735 else
e3c2afab 4736 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 4737 break;
2a2ab3f9
JVA
4738 }
4739 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
4740 {
4741#if SYSV386_COMPAT
4742 p = "{\t%1, %0|r\t%0, %1}";
4743#else
4744 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
4745#endif
4746 }
2a2ab3f9 4747 else
e3c2afab
AM
4748 {
4749#if SYSV386_COMPAT
4750 p = "{r\t%2, %0|\t%0, %2}";
4751#else
4752 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
4753#endif
4754 }
e075ae69 4755 break;
2a2ab3f9
JVA
4756
4757 default:
4758 abort ();
4759 }
e075ae69
RH
4760
4761 strcat (buf, p);
4762 return buf;
2a2ab3f9 4763}
e075ae69 4764
a4f31c00 4765/* Output code to initialize control word copies used by
7a2e09f4
JH
4766 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
4767 is set to control word rounding downwards. */
4768void
4769emit_i387_cw_initialization (normal, round_down)
4770 rtx normal, round_down;
4771{
4772 rtx reg = gen_reg_rtx (HImode);
4773
4774 emit_insn (gen_x86_fnstcw_1 (normal));
4775 emit_move_insn (reg, normal);
4776 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
4777 && !TARGET_64BIT)
4778 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
4779 else
4780 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
4781 emit_move_insn (round_down, reg);
4782}
4783
2a2ab3f9 4784/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 4785 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 4786 operand may be [SDX]Fmode. */
2a2ab3f9 4787
69ddee61 4788const char *
2a2ab3f9
JVA
4789output_fix_trunc (insn, operands)
4790 rtx insn;
4791 rtx *operands;
4792{
4793 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69 4794 int dimode_p = GET_MODE (operands[0]) == DImode;
2a2ab3f9 4795
e075ae69
RH
4796 /* Jump through a hoop or two for DImode, since the hardware has no
4797 non-popping instruction. We used to do this a different way, but
4798 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
4799 if (dimode_p && !stack_top_dies)
4800 output_asm_insn ("fld\t%y1", operands);
e075ae69 4801
7a2e09f4 4802 if (!STACK_TOP_P (operands[1]))
10195bd8
JW
4803 abort ();
4804
e075ae69 4805 if (GET_CODE (operands[0]) != MEM)
7a2e09f4 4806 abort ();
e9a25f70 4807
7a2e09f4 4808 output_asm_insn ("fldcw\t%3", operands);
e075ae69 4809 if (stack_top_dies || dimode_p)
7a2e09f4 4810 output_asm_insn ("fistp%z0\t%0", operands);
10195bd8 4811 else
7a2e09f4 4812 output_asm_insn ("fist%z0\t%0", operands);
e075ae69 4813 output_asm_insn ("fldcw\t%2", operands);
10195bd8 4814
e075ae69 4815 return "";
2a2ab3f9 4816}
cda749b1 4817
e075ae69
RH
4818/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
4819 should be used and 2 when fnstsw should be used. UNORDERED_P is true
4820 when fucom should be used. */
4821
69ddee61 4822const char *
e075ae69 4823output_fp_compare (insn, operands, eflags_p, unordered_p)
cda749b1
JW
4824 rtx insn;
4825 rtx *operands;
e075ae69 4826 int eflags_p, unordered_p;
cda749b1 4827{
e075ae69
RH
4828 int stack_top_dies;
4829 rtx cmp_op0 = operands[0];
4830 rtx cmp_op1 = operands[1];
0644b628 4831 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
e075ae69
RH
4832
4833 if (eflags_p == 2)
4834 {
4835 cmp_op0 = cmp_op1;
4836 cmp_op1 = operands[2];
4837 }
0644b628
JH
4838 if (is_sse)
4839 {
4840 if (GET_MODE (operands[0]) == SFmode)
4841 if (unordered_p)
4842 return "ucomiss\t{%1, %0|%0, %1}";
4843 else
4844 return "comiss\t{%1, %0|%0, %y}";
4845 else
4846 if (unordered_p)
4847 return "ucomisd\t{%1, %0|%0, %1}";
4848 else
4849 return "comisd\t{%1, %0|%0, %y}";
4850 }
cda749b1 4851
e075ae69 4852 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
4853 abort ();
4854
e075ae69 4855 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 4856
e075ae69
RH
4857 if (STACK_REG_P (cmp_op1)
4858 && stack_top_dies
4859 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
4860 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 4861 {
e075ae69
RH
4862 /* If both the top of the 387 stack dies, and the other operand
4863 is also a stack register that dies, then this must be a
4864 `fcompp' float compare */
4865
4866 if (eflags_p == 1)
4867 {
4868 /* There is no double popping fcomi variant. Fortunately,
4869 eflags is immune from the fstp's cc clobbering. */
4870 if (unordered_p)
4871 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
4872 else
4873 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
4874 return "fstp\t%y0";
4875 }
4876 else
cda749b1 4877 {
e075ae69
RH
4878 if (eflags_p == 2)
4879 {
4880 if (unordered_p)
4881 return "fucompp\n\tfnstsw\t%0";
4882 else
4883 return "fcompp\n\tfnstsw\t%0";
4884 }
cda749b1
JW
4885 else
4886 {
e075ae69
RH
4887 if (unordered_p)
4888 return "fucompp";
4889 else
4890 return "fcompp";
cda749b1
JW
4891 }
4892 }
cda749b1
JW
4893 }
4894 else
4895 {
e075ae69 4896 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 4897
0f290768 4898 static const char * const alt[24] =
e075ae69
RH
4899 {
4900 "fcom%z1\t%y1",
4901 "fcomp%z1\t%y1",
4902 "fucom%z1\t%y1",
4903 "fucomp%z1\t%y1",
0f290768 4904
e075ae69
RH
4905 "ficom%z1\t%y1",
4906 "ficomp%z1\t%y1",
4907 NULL,
4908 NULL,
4909
4910 "fcomi\t{%y1, %0|%0, %y1}",
4911 "fcomip\t{%y1, %0|%0, %y1}",
4912 "fucomi\t{%y1, %0|%0, %y1}",
4913 "fucomip\t{%y1, %0|%0, %y1}",
4914
4915 NULL,
4916 NULL,
4917 NULL,
4918 NULL,
4919
4920 "fcom%z2\t%y2\n\tfnstsw\t%0",
4921 "fcomp%z2\t%y2\n\tfnstsw\t%0",
4922 "fucom%z2\t%y2\n\tfnstsw\t%0",
4923 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 4924
e075ae69
RH
4925 "ficom%z2\t%y2\n\tfnstsw\t%0",
4926 "ficomp%z2\t%y2\n\tfnstsw\t%0",
4927 NULL,
4928 NULL
4929 };
4930
4931 int mask;
69ddee61 4932 const char *ret;
e075ae69
RH
4933
4934 mask = eflags_p << 3;
4935 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
4936 mask |= unordered_p << 1;
4937 mask |= stack_top_dies;
4938
4939 if (mask >= 24)
4940 abort ();
4941 ret = alt[mask];
4942 if (ret == NULL)
4943 abort ();
cda749b1 4944
e075ae69 4945 return ret;
cda749b1
JW
4946 }
4947}
2a2ab3f9 4948
e075ae69 4949/* Output assembler code to FILE to initialize basic-block profiling.
2a2ab3f9 4950
e075ae69 4951 If profile_block_flag == 2
2a2ab3f9 4952
e075ae69
RH
4953 Output code to call the subroutine `__bb_init_trace_func'
4954 and pass two parameters to it. The first parameter is
4955 the address of a block allocated in the object module.
4956 The second parameter is the number of the first basic block
4957 of the function.
2a2ab3f9 4958
e075ae69 4959 The name of the block is a local symbol made with this statement:
0f290768 4960
e075ae69 4961 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
2a2ab3f9 4962
e075ae69
RH
4963 Of course, since you are writing the definition of
4964 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4965 can take a short cut in the definition of this macro and use the
4966 name that you know will result.
2a2ab3f9 4967
e075ae69
RH
4968 The number of the first basic block of the function is
4969 passed to the macro in BLOCK_OR_LABEL.
2a2ab3f9 4970
e075ae69
RH
4971 If described in a virtual assembler language the code to be
4972 output looks like:
2a2ab3f9 4973
e075ae69
RH
4974 parameter1 <- LPBX0
4975 parameter2 <- BLOCK_OR_LABEL
4976 call __bb_init_trace_func
2a2ab3f9 4977
e075ae69 4978 else if profile_block_flag != 0
e74389ff 4979
e075ae69
RH
4980 Output code to call the subroutine `__bb_init_func'
4981 and pass one single parameter to it, which is the same
4982 as the first parameter to `__bb_init_trace_func'.
e74389ff 4983
e075ae69
RH
4984 The first word of this parameter is a flag which will be nonzero if
4985 the object module has already been initialized. So test this word
4986 first, and do not call `__bb_init_func' if the flag is nonzero.
4987 Note: When profile_block_flag == 2 the test need not be done
4988 but `__bb_init_trace_func' *must* be called.
e74389ff 4989
e075ae69
RH
4990 BLOCK_OR_LABEL may be used to generate a label number as a
4991 branch destination in case `__bb_init_func' will not be called.
e74389ff 4992
e075ae69
RH
4993 If described in a virtual assembler language the code to be
4994 output looks like:
2a2ab3f9 4995
e075ae69
RH
4996 cmp (LPBX0),0
4997 jne local_label
4998 parameter1 <- LPBX0
4999 call __bb_init_func
5000 local_label:
5001*/
c572e5ba 5002
e075ae69
RH
5003void
5004ix86_output_function_block_profiler (file, block_or_label)
5005 FILE *file;
5006 int block_or_label;
c572e5ba 5007{
e075ae69
RH
5008 static int num_func = 0;
5009 rtx xops[8];
5010 char block_table[80], false_label[80];
c572e5ba 5011
e075ae69 5012 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
e9a25f70 5013
e075ae69
RH
5014 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
5015 xops[5] = stack_pointer_rtx;
5016 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
2a2ab3f9 5017
e075ae69 5018 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
c572e5ba 5019
e075ae69 5020 switch (profile_block_flag)
c572e5ba 5021 {
e075ae69
RH
5022 case 2:
5023 xops[2] = GEN_INT (block_or_label);
5024 xops[3] = gen_rtx_MEM (Pmode,
5025 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
5026 xops[6] = GEN_INT (8);
e9a25f70 5027
e075ae69
RH
5028 output_asm_insn ("push{l}\t%2", xops);
5029 if (!flag_pic)
5030 output_asm_insn ("push{l}\t%1", xops);
e9a25f70 5031 else
870a0c2c 5032 {
e075ae69
RH
5033 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
5034 output_asm_insn ("push{l}\t%7", xops);
870a0c2c 5035 }
e075ae69
RH
5036 output_asm_insn ("call\t%P3", xops);
5037 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
5038 break;
c572e5ba 5039
e075ae69
RH
5040 default:
5041 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
c572e5ba 5042
e075ae69
RH
5043 xops[0] = const0_rtx;
5044 xops[2] = gen_rtx_MEM (Pmode,
5045 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
5046 xops[3] = gen_rtx_MEM (Pmode,
5047 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
5048 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
5049 xops[6] = GEN_INT (4);
a14003ee 5050
e075ae69 5051 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
446ba526 5052
e075ae69
RH
5053 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
5054 output_asm_insn ("jne\t%2", xops);
870a0c2c 5055
e075ae69
RH
5056 if (!flag_pic)
5057 output_asm_insn ("push{l}\t%1", xops);
5058 else
5059 {
5060 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
5061 output_asm_insn ("push{l}\t%7", xops);
870a0c2c 5062 }
e075ae69
RH
5063 output_asm_insn ("call\t%P3", xops);
5064 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
5065 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
5066 num_func++;
5067 break;
c572e5ba 5068 }
2a2ab3f9 5069}
305f097e 5070
e075ae69
RH
5071/* Output assembler code to FILE to increment a counter associated
5072 with basic block number BLOCKNO.
305f097e 5073
e075ae69 5074 If profile_block_flag == 2
ecbc4695 5075
e075ae69
RH
5076 Output code to initialize the global structure `__bb' and
5077 call the function `__bb_trace_func' which will increment the
5078 counter.
ecbc4695 5079
e075ae69
RH
5080 `__bb' consists of two words. In the first word the number
5081 of the basic block has to be stored. In the second word
0f290768 5082 the address of a block allocated in the object module
e075ae69 5083 has to be stored.
ecbc4695 5084
e075ae69 5085 The basic block number is given by BLOCKNO.
ecbc4695 5086
0f290768 5087 The address of the block is given by the label created with
305f097e 5088
e075ae69 5089 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
305f097e 5090
e075ae69 5091 by FUNCTION_BLOCK_PROFILER.
ecbc4695 5092
e075ae69
RH
5093 Of course, since you are writing the definition of
5094 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
5095 can take a short cut in the definition of this macro and use the
5096 name that you know will result.
305f097e 5097
e075ae69
RH
5098 If described in a virtual assembler language the code to be
5099 output looks like:
305f097e 5100
e075ae69
RH
5101 move BLOCKNO -> (__bb)
5102 move LPBX0 -> (__bb+4)
5103 call __bb_trace_func
305f097e 5104
e075ae69
RH
5105 Note that function `__bb_trace_func' must not change the
5106 machine state, especially the flag register. To grant
5107 this, you must output code to save and restore registers
5108 either in this macro or in the macros MACHINE_STATE_SAVE
5109 and MACHINE_STATE_RESTORE. The last two macros will be
5110 used in the function `__bb_trace_func', so you must make
0f290768 5111 sure that the function prologue does not change any
e075ae69 5112 register prior to saving it with MACHINE_STATE_SAVE.
305f097e 5113
e075ae69 5114 else if profile_block_flag != 0
305f097e 5115
e075ae69
RH
5116 Output code to increment the counter directly.
5117 Basic blocks are numbered separately from zero within each
5118 compiled object module. The count associated with block number
0f290768 5119 BLOCKNO is at index BLOCKNO in an array of words; the name of
e075ae69 5120 this array is a local symbol made with this statement:
32b5b1aa 5121
e075ae69 5122 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
32b5b1aa 5123
e075ae69
RH
5124 Of course, since you are writing the definition of
5125 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
5126 can take a short cut in the definition of this macro and use the
0f290768 5127 name that you know will result.
32b5b1aa 5128
e075ae69
RH
5129 If described in a virtual assembler language the code to be
5130 output looks like:
32b5b1aa 5131
e075ae69
RH
5132 inc (LPBX2+4*BLOCKNO)
5133*/
32b5b1aa 5134
e075ae69
RH
5135void
5136ix86_output_block_profiler (file, blockno)
5137 FILE *file ATTRIBUTE_UNUSED;
5138 int blockno;
5139{
5140 rtx xops[8], cnt_rtx;
5141 char counts[80];
5142 char *block_table = counts;
5143
5144 switch (profile_block_flag)
5145 {
5146 case 2:
5147 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
32b5b1aa 5148
e075ae69
RH
5149 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
5150 xops[2] = GEN_INT (blockno);
5151 xops[3] = gen_rtx_MEM (Pmode,
5152 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
5153 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
5154 xops[5] = plus_constant (xops[4], 4);
5155 xops[0] = gen_rtx_MEM (SImode, xops[4]);
5156 xops[6] = gen_rtx_MEM (SImode, xops[5]);
79325812 5157
e075ae69 5158 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
32b5b1aa 5159
e075ae69
RH
5160 output_asm_insn ("pushf", xops);
5161 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5162 if (flag_pic)
32b5b1aa 5163 {
e075ae69
RH
5164 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
5165 output_asm_insn ("push{l}\t%7", xops);
5166 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
5167 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
5168 output_asm_insn ("pop{l}\t%7", xops);
5169 }
5170 else
5171 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
5172 output_asm_insn ("call\t%P3", xops);
5173 output_asm_insn ("popf", xops);
32b5b1aa 5174
e075ae69 5175 break;
32b5b1aa 5176
e075ae69
RH
5177 default:
5178 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
5179 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
5180 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
32b5b1aa 5181
e075ae69
RH
5182 if (blockno)
5183 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
32b5b1aa 5184
e075ae69
RH
5185 if (flag_pic)
5186 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
32b5b1aa 5187
e075ae69
RH
5188 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
5189 output_asm_insn ("inc{l}\t%0", xops);
32b5b1aa 5190
e075ae69 5191 break;
32b5b1aa 5192 }
32b5b1aa 5193}
32b5b1aa 5194\f
79325812 5195void
e075ae69
RH
5196ix86_expand_move (mode, operands)
5197 enum machine_mode mode;
5198 rtx operands[];
32b5b1aa 5199{
e075ae69 5200 int strict = (reload_in_progress || reload_completed);
e075ae69 5201 rtx insn;
e9a25f70 5202
e075ae69 5203 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
32b5b1aa 5204 {
e075ae69 5205 /* Emit insns to move operands[1] into operands[0]. */
e9a25f70 5206
e075ae69
RH
5207 if (GET_CODE (operands[0]) == MEM)
5208 operands[1] = force_reg (Pmode, operands[1]);
5209 else
32b5b1aa 5210 {
e075ae69
RH
5211 rtx temp = operands[0];
5212 if (GET_CODE (temp) != REG)
5213 temp = gen_reg_rtx (Pmode);
5214 temp = legitimize_pic_address (operands[1], temp);
5215 if (temp == operands[0])
5216 return;
5217 operands[1] = temp;
32b5b1aa 5218 }
e075ae69
RH
5219 }
5220 else
5221 {
d7a29404
JH
5222 if (GET_CODE (operands[0]) == MEM
5223 && (GET_MODE (operands[0]) == QImode
5224 || !push_operand (operands[0], mode))
5225 && GET_CODE (operands[1]) == MEM)
e075ae69 5226 operands[1] = force_reg (mode, operands[1]);
e9a25f70 5227
2c5a510c
RH
5228 if (push_operand (operands[0], mode)
5229 && ! general_no_elim_operand (operands[1], mode))
5230 operands[1] = copy_to_mode_reg (mode, operands[1]);
5231
e075ae69 5232 if (FLOAT_MODE_P (mode))
32b5b1aa 5233 {
d7a29404
JH
5234 /* If we are loading a floating point constant to a register,
5235 force the value to memory now, since we'll get better code
5236 out the back end. */
e075ae69
RH
5237
5238 if (strict)
5239 ;
e075ae69 5240 else if (GET_CODE (operands[1]) == CONST_DOUBLE
d7a29404 5241 && register_operand (operands[0], mode))
e075ae69 5242 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
32b5b1aa 5243 }
32b5b1aa 5244 }
e9a25f70 5245
e075ae69 5246 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
e9a25f70 5247
e075ae69
RH
5248 emit_insn (insn);
5249}
e9a25f70 5250
e075ae69
RH
5251/* Attempt to expand a binary operator. Make the expansion closer to the
5252 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 5253 memory references (one output, two input) in a single insn. */
e9a25f70 5254
e075ae69
RH
5255void
5256ix86_expand_binary_operator (code, mode, operands)
5257 enum rtx_code code;
5258 enum machine_mode mode;
5259 rtx operands[];
5260{
5261 int matching_memory;
5262 rtx src1, src2, dst, op, clob;
5263
5264 dst = operands[0];
5265 src1 = operands[1];
5266 src2 = operands[2];
5267
5268 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
5269 if (GET_RTX_CLASS (code) == 'c'
5270 && (rtx_equal_p (dst, src2)
5271 || immediate_operand (src1, mode)))
5272 {
5273 rtx temp = src1;
5274 src1 = src2;
5275 src2 = temp;
32b5b1aa 5276 }
e9a25f70 5277
e075ae69
RH
5278 /* If the destination is memory, and we do not have matching source
5279 operands, do things in registers. */
5280 matching_memory = 0;
5281 if (GET_CODE (dst) == MEM)
32b5b1aa 5282 {
e075ae69
RH
5283 if (rtx_equal_p (dst, src1))
5284 matching_memory = 1;
5285 else if (GET_RTX_CLASS (code) == 'c'
5286 && rtx_equal_p (dst, src2))
5287 matching_memory = 2;
5288 else
5289 dst = gen_reg_rtx (mode);
5290 }
0f290768 5291
e075ae69
RH
5292 /* Both source operands cannot be in memory. */
5293 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
5294 {
5295 if (matching_memory != 2)
5296 src2 = force_reg (mode, src2);
5297 else
5298 src1 = force_reg (mode, src1);
32b5b1aa 5299 }
e9a25f70 5300
06a964de
JH
5301 /* If the operation is not commutable, source 1 cannot be a constant
5302 or non-matching memory. */
0f290768 5303 if ((CONSTANT_P (src1)
06a964de
JH
5304 || (!matching_memory && GET_CODE (src1) == MEM))
5305 && GET_RTX_CLASS (code) != 'c')
e075ae69 5306 src1 = force_reg (mode, src1);
0f290768 5307
e075ae69 5308 /* If optimizing, copy to regs to improve CSE */
fe577e58 5309 if (optimize && ! no_new_pseudos)
32b5b1aa 5310 {
e075ae69
RH
5311 if (GET_CODE (dst) == MEM)
5312 dst = gen_reg_rtx (mode);
5313 if (GET_CODE (src1) == MEM)
5314 src1 = force_reg (mode, src1);
5315 if (GET_CODE (src2) == MEM)
5316 src2 = force_reg (mode, src2);
32b5b1aa 5317 }
e9a25f70 5318
e075ae69
RH
5319 /* Emit the instruction. */
5320
5321 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
5322 if (reload_in_progress)
5323 {
5324 /* Reload doesn't know about the flags register, and doesn't know that
5325 it doesn't want to clobber it. We can only do this with PLUS. */
5326 if (code != PLUS)
5327 abort ();
5328 emit_insn (op);
5329 }
5330 else
32b5b1aa 5331 {
e075ae69
RH
5332 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
5333 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 5334 }
e9a25f70 5335
e075ae69
RH
5336 /* Fix up the destination if needed. */
5337 if (dst != operands[0])
5338 emit_move_insn (operands[0], dst);
5339}
5340
5341/* Return TRUE or FALSE depending on whether the binary operator meets the
5342 appropriate constraints. */
5343
5344int
5345ix86_binary_operator_ok (code, mode, operands)
5346 enum rtx_code code;
5347 enum machine_mode mode ATTRIBUTE_UNUSED;
5348 rtx operands[3];
5349{
5350 /* Both source operands cannot be in memory. */
5351 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
5352 return 0;
5353 /* If the operation is not commutable, source 1 cannot be a constant. */
5354 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
5355 return 0;
5356 /* If the destination is memory, we must have a matching source operand. */
5357 if (GET_CODE (operands[0]) == MEM
5358 && ! (rtx_equal_p (operands[0], operands[1])
5359 || (GET_RTX_CLASS (code) == 'c'
5360 && rtx_equal_p (operands[0], operands[2]))))
5361 return 0;
06a964de
JH
5362 /* If the operation is not commutable and the source 1 is memory, we must
5363 have a matching destionation. */
5364 if (GET_CODE (operands[1]) == MEM
5365 && GET_RTX_CLASS (code) != 'c'
5366 && ! rtx_equal_p (operands[0], operands[1]))
5367 return 0;
e075ae69
RH
5368 return 1;
5369}
5370
5371/* Attempt to expand a unary operator. Make the expansion closer to the
5372 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 5373 memory references (one output, one input) in a single insn. */
e075ae69 5374
9d81fc27 5375void
e075ae69
RH
5376ix86_expand_unary_operator (code, mode, operands)
5377 enum rtx_code code;
5378 enum machine_mode mode;
5379 rtx operands[];
5380{
06a964de
JH
5381 int matching_memory;
5382 rtx src, dst, op, clob;
5383
5384 dst = operands[0];
5385 src = operands[1];
e075ae69 5386
06a964de
JH
5387 /* If the destination is memory, and we do not have matching source
5388 operands, do things in registers. */
5389 matching_memory = 0;
5390 if (GET_CODE (dst) == MEM)
32b5b1aa 5391 {
06a964de
JH
5392 if (rtx_equal_p (dst, src))
5393 matching_memory = 1;
e075ae69 5394 else
06a964de 5395 dst = gen_reg_rtx (mode);
32b5b1aa 5396 }
e9a25f70 5397
06a964de
JH
5398 /* When source operand is memory, destination must match. */
5399 if (!matching_memory && GET_CODE (src) == MEM)
5400 src = force_reg (mode, src);
0f290768 5401
06a964de 5402 /* If optimizing, copy to regs to improve CSE */
fe577e58 5403 if (optimize && ! no_new_pseudos)
06a964de
JH
5404 {
5405 if (GET_CODE (dst) == MEM)
5406 dst = gen_reg_rtx (mode);
5407 if (GET_CODE (src) == MEM)
5408 src = force_reg (mode, src);
5409 }
5410
5411 /* Emit the instruction. */
5412
5413 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
5414 if (reload_in_progress || code == NOT)
5415 {
5416 /* Reload doesn't know about the flags register, and doesn't know that
5417 it doesn't want to clobber it. */
5418 if (code != NOT)
5419 abort ();
5420 emit_insn (op);
5421 }
5422 else
5423 {
5424 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
5425 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
5426 }
5427
5428 /* Fix up the destination if needed. */
5429 if (dst != operands[0])
5430 emit_move_insn (operands[0], dst);
e075ae69
RH
5431}
5432
5433/* Return TRUE or FALSE depending on whether the unary operator meets the
5434 appropriate constraints. */
5435
5436int
5437ix86_unary_operator_ok (code, mode, operands)
5438 enum rtx_code code ATTRIBUTE_UNUSED;
5439 enum machine_mode mode ATTRIBUTE_UNUSED;
5440 rtx operands[2] ATTRIBUTE_UNUSED;
5441{
06a964de
JH
5442 /* If one of operands is memory, source and destination must match. */
5443 if ((GET_CODE (operands[0]) == MEM
5444 || GET_CODE (operands[1]) == MEM)
5445 && ! rtx_equal_p (operands[0], operands[1]))
5446 return FALSE;
e075ae69
RH
5447 return TRUE;
5448}
5449
16189740
RH
5450/* Return TRUE or FALSE depending on whether the first SET in INSN
5451 has source and destination with matching CC modes, and that the
5452 CC mode is at least as constrained as REQ_MODE. */
5453
5454int
5455ix86_match_ccmode (insn, req_mode)
5456 rtx insn;
5457 enum machine_mode req_mode;
5458{
5459 rtx set;
5460 enum machine_mode set_mode;
5461
5462 set = PATTERN (insn);
5463 if (GET_CODE (set) == PARALLEL)
5464 set = XVECEXP (set, 0, 0);
5465 if (GET_CODE (set) != SET)
5466 abort ();
9076b9c1
JH
5467 if (GET_CODE (SET_SRC (set)) != COMPARE)
5468 abort ();
16189740
RH
5469
5470 set_mode = GET_MODE (SET_DEST (set));
5471 switch (set_mode)
5472 {
9076b9c1
JH
5473 case CCNOmode:
5474 if (req_mode != CCNOmode
5475 && (req_mode != CCmode
5476 || XEXP (SET_SRC (set), 1) != const0_rtx))
5477 return 0;
5478 break;
16189740 5479 case CCmode:
9076b9c1 5480 if (req_mode == CCGCmode)
16189740
RH
5481 return 0;
5482 /* FALLTHRU */
9076b9c1
JH
5483 case CCGCmode:
5484 if (req_mode == CCGOCmode || req_mode == CCNOmode)
5485 return 0;
5486 /* FALLTHRU */
5487 case CCGOCmode:
16189740
RH
5488 if (req_mode == CCZmode)
5489 return 0;
5490 /* FALLTHRU */
5491 case CCZmode:
5492 break;
5493
5494 default:
5495 abort ();
5496 }
5497
5498 return (GET_MODE (SET_SRC (set)) == set_mode);
5499}
5500
e075ae69
RH
5501/* Generate insn patterns to do an integer compare of OPERANDS. */
5502
5503static rtx
5504ix86_expand_int_compare (code, op0, op1)
5505 enum rtx_code code;
5506 rtx op0, op1;
5507{
5508 enum machine_mode cmpmode;
5509 rtx tmp, flags;
5510
5511 cmpmode = SELECT_CC_MODE (code, op0, op1);
5512 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
5513
5514 /* This is very simple, but making the interface the same as in the
5515 FP case makes the rest of the code easier. */
5516 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
5517 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
5518
5519 /* Return the test that should be put into the flags user, i.e.
5520 the bcc, scc, or cmov instruction. */
5521 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
5522}
5523
3a3677ff
RH
5524/* Figure out whether to use ordered or unordered fp comparisons.
5525 Return the appropriate mode to use. */
e075ae69 5526
b1cdafbb 5527enum machine_mode
3a3677ff 5528ix86_fp_compare_mode (code)
8752c357 5529 enum rtx_code code ATTRIBUTE_UNUSED;
e075ae69 5530{
9e7adcb3
JH
5531 /* ??? In order to make all comparisons reversible, we do all comparisons
5532 non-trapping when compiling for IEEE. Once gcc is able to distinguish
5533 all forms trapping and nontrapping comparisons, we can make inequality
5534 comparisons trapping again, since it results in better code when using
5535 FCOM based compares. */
5536 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
5537}
5538
9076b9c1
JH
5539enum machine_mode
5540ix86_cc_mode (code, op0, op1)
5541 enum rtx_code code;
5542 rtx op0, op1;
5543{
5544 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5545 return ix86_fp_compare_mode (code);
5546 switch (code)
5547 {
5548 /* Only zero flag is needed. */
5549 case EQ: /* ZF=0 */
5550 case NE: /* ZF!=0 */
5551 return CCZmode;
5552 /* Codes needing carry flag. */
265dab10
JH
5553 case GEU: /* CF=0 */
5554 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
5555 case LTU: /* CF=1 */
5556 case LEU: /* CF=1 | ZF=1 */
265dab10 5557 return CCmode;
9076b9c1
JH
5558 /* Codes possibly doable only with sign flag when
5559 comparing against zero. */
5560 case GE: /* SF=OF or SF=0 */
7e08e190 5561 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
5562 if (op1 == const0_rtx)
5563 return CCGOCmode;
5564 else
5565 /* For other cases Carry flag is not required. */
5566 return CCGCmode;
5567 /* Codes doable only with sign flag when comparing
5568 against zero, but we miss jump instruction for it
5569 so we need to use relational tests agains overflow
5570 that thus needs to be zero. */
5571 case GT: /* ZF=0 & SF=OF */
5572 case LE: /* ZF=1 | SF<>OF */
5573 if (op1 == const0_rtx)
5574 return CCNOmode;
5575 else
5576 return CCGCmode;
5577 default:
0f290768 5578 abort ();
9076b9c1
JH
5579 }
5580}
5581
3a3677ff
RH
5582/* Return true if we should use an FCOMI instruction for this fp comparison. */
5583
a940d8bd 5584int
3a3677ff 5585ix86_use_fcomi_compare (code)
9e7adcb3 5586 enum rtx_code code ATTRIBUTE_UNUSED;
3a3677ff 5587{
9e7adcb3
JH
5588 enum rtx_code swapped_code = swap_condition (code);
5589 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
5590 || (ix86_fp_comparison_cost (swapped_code)
5591 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
5592}
5593
0f290768 5594/* Swap, force into registers, or otherwise massage the two operands
3a3677ff
RH
5595 to a fp comparison. The operands are updated in place; the new
5596 comparsion code is returned. */
5597
5598static enum rtx_code
5599ix86_prepare_fp_compare_args (code, pop0, pop1)
5600 enum rtx_code code;
5601 rtx *pop0, *pop1;
5602{
5603 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
5604 rtx op0 = *pop0, op1 = *pop1;
5605 enum machine_mode op_mode = GET_MODE (op0);
0644b628 5606 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
3a3677ff 5607
e075ae69 5608 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
5609 The same is true of the XFmode compare instructions. The same is
5610 true of the fcomi compare instructions. */
5611
0644b628
JH
5612 if (!is_sse
5613 && (fpcmp_mode == CCFPUmode
5614 || op_mode == XFmode
5615 || op_mode == TFmode
5616 || ix86_use_fcomi_compare (code)))
e075ae69 5617 {
3a3677ff
RH
5618 op0 = force_reg (op_mode, op0);
5619 op1 = force_reg (op_mode, op1);
e075ae69
RH
5620 }
5621 else
5622 {
5623 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
5624 things around if they appear profitable, otherwise force op0
5625 into a register. */
5626
5627 if (standard_80387_constant_p (op0) == 0
5628 || (GET_CODE (op0) == MEM
5629 && ! (standard_80387_constant_p (op1) == 0
5630 || GET_CODE (op1) == MEM)))
32b5b1aa 5631 {
e075ae69
RH
5632 rtx tmp;
5633 tmp = op0, op0 = op1, op1 = tmp;
5634 code = swap_condition (code);
5635 }
5636
5637 if (GET_CODE (op0) != REG)
3a3677ff 5638 op0 = force_reg (op_mode, op0);
e075ae69
RH
5639
5640 if (CONSTANT_P (op1))
5641 {
5642 if (standard_80387_constant_p (op1))
3a3677ff 5643 op1 = force_reg (op_mode, op1);
e075ae69 5644 else
3a3677ff 5645 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
5646 }
5647 }
e9a25f70 5648
9e7adcb3
JH
5649 /* Try to rearrange the comparison to make it cheaper. */
5650 if (ix86_fp_comparison_cost (code)
5651 > ix86_fp_comparison_cost (swap_condition (code))
5652 && (GET_CODE (op0) == REG || !reload_completed))
5653 {
5654 rtx tmp;
5655 tmp = op0, op0 = op1, op1 = tmp;
5656 code = swap_condition (code);
5657 if (GET_CODE (op0) != REG)
5658 op0 = force_reg (op_mode, op0);
5659 }
5660
3a3677ff
RH
5661 *pop0 = op0;
5662 *pop1 = op1;
5663 return code;
5664}
5665
c0c102a9
JH
5666/* Convert comparison codes we use to represent FP comparison to integer
5667 code that will result in proper branch. Return UNKNOWN if no such code
5668 is available. */
5669static enum rtx_code
5670ix86_fp_compare_code_to_integer (code)
5671 enum rtx_code code;
5672{
5673 switch (code)
5674 {
5675 case GT:
5676 return GTU;
5677 case GE:
5678 return GEU;
5679 case ORDERED:
5680 case UNORDERED:
5681 return code;
5682 break;
5683 case UNEQ:
5684 return EQ;
5685 break;
5686 case UNLT:
5687 return LTU;
5688 break;
5689 case UNLE:
5690 return LEU;
5691 break;
5692 case LTGT:
5693 return NE;
5694 break;
5695 default:
5696 return UNKNOWN;
5697 }
5698}
5699
5700/* Split comparison code CODE into comparisons we can do using branch
5701 instructions. BYPASS_CODE is comparison code for branch that will
5702 branch around FIRST_CODE and SECOND_CODE. If some of branches
5703 is not required, set value to NIL.
5704 We never require more than two branches. */
5705static void
5706ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
5707 enum rtx_code code, *bypass_code, *first_code, *second_code;
5708{
5709 *first_code = code;
5710 *bypass_code = NIL;
5711 *second_code = NIL;
5712
5713 /* The fcomi comparison sets flags as follows:
5714
5715 cmp ZF PF CF
5716 > 0 0 0
5717 < 0 0 1
5718 = 1 0 0
5719 un 1 1 1 */
5720
5721 switch (code)
5722 {
5723 case GT: /* GTU - CF=0 & ZF=0 */
5724 case GE: /* GEU - CF=0 */
5725 case ORDERED: /* PF=0 */
5726 case UNORDERED: /* PF=1 */
5727 case UNEQ: /* EQ - ZF=1 */
5728 case UNLT: /* LTU - CF=1 */
5729 case UNLE: /* LEU - CF=1 | ZF=1 */
5730 case LTGT: /* EQ - ZF=0 */
5731 break;
5732 case LT: /* LTU - CF=1 - fails on unordered */
5733 *first_code = UNLT;
5734 *bypass_code = UNORDERED;
5735 break;
5736 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
5737 *first_code = UNLE;
5738 *bypass_code = UNORDERED;
5739 break;
5740 case EQ: /* EQ - ZF=1 - fails on unordered */
5741 *first_code = UNEQ;
5742 *bypass_code = UNORDERED;
5743 break;
5744 case NE: /* NE - ZF=0 - fails on unordered */
5745 *first_code = LTGT;
5746 *second_code = UNORDERED;
5747 break;
5748 case UNGE: /* GEU - CF=0 - fails on unordered */
5749 *first_code = GE;
5750 *second_code = UNORDERED;
5751 break;
5752 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
5753 *first_code = GT;
5754 *second_code = UNORDERED;
5755 break;
5756 default:
5757 abort ();
5758 }
5759 if (!TARGET_IEEE_FP)
5760 {
5761 *second_code = NIL;
5762 *bypass_code = NIL;
5763 }
5764}
5765
9e7adcb3
JH
5766/* Return cost of comparison done fcom + arithmetics operations on AX.
5767 All following functions do use number of instructions as an cost metrics.
5768 In future this should be tweaked to compute bytes for optimize_size and
5769 take into account performance of various instructions on various CPUs. */
5770static int
5771ix86_fp_comparison_arithmetics_cost (code)
5772 enum rtx_code code;
5773{
5774 if (!TARGET_IEEE_FP)
5775 return 4;
5776 /* The cost of code output by ix86_expand_fp_compare. */
5777 switch (code)
5778 {
5779 case UNLE:
5780 case UNLT:
5781 case LTGT:
5782 case GT:
5783 case GE:
5784 case UNORDERED:
5785 case ORDERED:
5786 case UNEQ:
5787 return 4;
5788 break;
5789 case LT:
5790 case NE:
5791 case EQ:
5792 case UNGE:
5793 return 5;
5794 break;
5795 case LE:
5796 case UNGT:
5797 return 6;
5798 break;
5799 default:
5800 abort ();
5801 }
5802}
5803
5804/* Return cost of comparison done using fcomi operation.
5805 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5806static int
5807ix86_fp_comparison_fcomi_cost (code)
5808 enum rtx_code code;
5809{
5810 enum rtx_code bypass_code, first_code, second_code;
5811 /* Return arbitarily high cost when instruction is not supported - this
5812 prevents gcc from using it. */
5813 if (!TARGET_CMOVE)
5814 return 1024;
5815 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5816 return (bypass_code != NIL || second_code != NIL) + 2;
5817}
5818
5819/* Return cost of comparison done using sahf operation.
5820 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5821static int
5822ix86_fp_comparison_sahf_cost (code)
5823 enum rtx_code code;
5824{
5825 enum rtx_code bypass_code, first_code, second_code;
5826 /* Return arbitarily high cost when instruction is not preferred - this
5827 avoids gcc from using it. */
5828 if (!TARGET_USE_SAHF && !optimize_size)
5829 return 1024;
5830 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5831 return (bypass_code != NIL || second_code != NIL) + 3;
5832}
5833
5834/* Compute cost of the comparison done using any method.
5835 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5836static int
5837ix86_fp_comparison_cost (code)
5838 enum rtx_code code;
5839{
5840 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
5841 int min;
5842
5843 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
5844 sahf_cost = ix86_fp_comparison_sahf_cost (code);
5845
5846 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
5847 if (min > sahf_cost)
5848 min = sahf_cost;
5849 if (min > fcomi_cost)
5850 min = fcomi_cost;
5851 return min;
5852}
c0c102a9 5853
3a3677ff
RH
5854/* Generate insn patterns to do a floating point compare of OPERANDS. */
5855
9e7adcb3
JH
5856static rtx
5857ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
3a3677ff
RH
5858 enum rtx_code code;
5859 rtx op0, op1, scratch;
9e7adcb3
JH
5860 rtx *second_test;
5861 rtx *bypass_test;
3a3677ff
RH
5862{
5863 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 5864 rtx tmp, tmp2;
9e7adcb3 5865 int cost = ix86_fp_comparison_cost (code);
c0c102a9 5866 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
5867
5868 fpcmp_mode = ix86_fp_compare_mode (code);
5869 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
5870
9e7adcb3
JH
5871 if (second_test)
5872 *second_test = NULL_RTX;
5873 if (bypass_test)
5874 *bypass_test = NULL_RTX;
5875
c0c102a9
JH
5876 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5877
9e7adcb3
JH
5878 /* Do fcomi/sahf based test when profitable. */
5879 if ((bypass_code == NIL || bypass_test)
5880 && (second_code == NIL || second_test)
5881 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 5882 {
c0c102a9
JH
5883 if (TARGET_CMOVE)
5884 {
5885 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5886 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
5887 tmp);
5888 emit_insn (tmp);
5889 }
5890 else
5891 {
5892 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5893 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
bf71a4f8
JH
5894 if (!scratch)
5895 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
5896 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
5897 emit_insn (gen_x86_sahf_1 (scratch));
5898 }
e075ae69
RH
5899
5900 /* The FP codes work out to act like unsigned. */
9a915772 5901 intcmp_mode = fpcmp_mode;
9e7adcb3
JH
5902 code = first_code;
5903 if (bypass_code != NIL)
5904 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
5905 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5906 const0_rtx);
5907 if (second_code != NIL)
5908 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
5909 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5910 const0_rtx);
e075ae69
RH
5911 }
5912 else
5913 {
5914 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69
RH
5915 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5916 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
bf71a4f8
JH
5917 if (!scratch)
5918 scratch = gen_reg_rtx (HImode);
3a3677ff 5919 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 5920
9a915772
JH
5921 /* In the unordered case, we have to check C2 for NaN's, which
5922 doesn't happen to work out to anything nice combination-wise.
5923 So do some bit twiddling on the value we've got in AH to come
5924 up with an appropriate set of condition codes. */
e075ae69 5925
9a915772
JH
5926 intcmp_mode = CCNOmode;
5927 switch (code)
32b5b1aa 5928 {
9a915772
JH
5929 case GT:
5930 case UNGT:
5931 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 5932 {
3a3677ff 5933 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 5934 code = EQ;
9a915772
JH
5935 }
5936 else
5937 {
5938 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5939 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5940 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
5941 intcmp_mode = CCmode;
5942 code = GEU;
5943 }
5944 break;
5945 case LT:
5946 case UNLT:
5947 if (code == LT && TARGET_IEEE_FP)
5948 {
3a3677ff
RH
5949 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5950 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
5951 intcmp_mode = CCmode;
5952 code = EQ;
9a915772
JH
5953 }
5954 else
5955 {
5956 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
5957 code = NE;
5958 }
5959 break;
5960 case GE:
5961 case UNGE:
5962 if (code == GE || !TARGET_IEEE_FP)
5963 {
3a3677ff 5964 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 5965 code = EQ;
9a915772
JH
5966 }
5967 else
5968 {
5969 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5970 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
5971 GEN_INT (0x01)));
5972 code = NE;
5973 }
5974 break;
5975 case LE:
5976 case UNLE:
5977 if (code == LE && TARGET_IEEE_FP)
5978 {
3a3677ff
RH
5979 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5980 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5981 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
5982 intcmp_mode = CCmode;
5983 code = LTU;
9a915772
JH
5984 }
5985 else
5986 {
5987 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
5988 code = NE;
5989 }
5990 break;
5991 case EQ:
5992 case UNEQ:
5993 if (code == EQ && TARGET_IEEE_FP)
5994 {
3a3677ff
RH
5995 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5996 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
5997 intcmp_mode = CCmode;
5998 code = EQ;
9a915772
JH
5999 }
6000 else
6001 {
3a3677ff
RH
6002 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
6003 code = NE;
6004 break;
9a915772
JH
6005 }
6006 break;
6007 case NE:
6008 case LTGT:
6009 if (code == NE && TARGET_IEEE_FP)
6010 {
3a3677ff 6011 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
6012 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
6013 GEN_INT (0x40)));
3a3677ff 6014 code = NE;
9a915772
JH
6015 }
6016 else
6017 {
3a3677ff
RH
6018 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
6019 code = EQ;
32b5b1aa 6020 }
9a915772
JH
6021 break;
6022
6023 case UNORDERED:
6024 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
6025 code = NE;
6026 break;
6027 case ORDERED:
6028 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
6029 code = EQ;
6030 break;
6031
6032 default:
6033 abort ();
32b5b1aa 6034 }
32b5b1aa 6035 }
e075ae69
RH
6036
6037 /* Return the test that should be put into the flags user, i.e.
6038 the bcc, scc, or cmov instruction. */
6039 return gen_rtx_fmt_ee (code, VOIDmode,
6040 gen_rtx_REG (intcmp_mode, FLAGS_REG),
6041 const0_rtx);
6042}
6043
9e3e266c 6044rtx
a1b8572c 6045ix86_expand_compare (code, second_test, bypass_test)
e075ae69 6046 enum rtx_code code;
a1b8572c 6047 rtx *second_test, *bypass_test;
e075ae69
RH
6048{
6049 rtx op0, op1, ret;
6050 op0 = ix86_compare_op0;
6051 op1 = ix86_compare_op1;
6052
a1b8572c
JH
6053 if (second_test)
6054 *second_test = NULL_RTX;
6055 if (bypass_test)
6056 *bypass_test = NULL_RTX;
6057
e075ae69 6058 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
bf71a4f8 6059 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
77ebd435 6060 second_test, bypass_test);
32b5b1aa 6061 else
e075ae69
RH
6062 ret = ix86_expand_int_compare (code, op0, op1);
6063
6064 return ret;
6065}
6066
03598dea
JH
6067/* Return true if the CODE will result in nontrivial jump sequence. */
6068bool
6069ix86_fp_jump_nontrivial_p (code)
6070 enum rtx_code code;
6071{
6072 enum rtx_code bypass_code, first_code, second_code;
6073 if (!TARGET_CMOVE)
6074 return true;
6075 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
6076 return bypass_code != NIL || second_code != NIL;
6077}
6078
e075ae69 6079void
3a3677ff 6080ix86_expand_branch (code, label)
e075ae69 6081 enum rtx_code code;
e075ae69
RH
6082 rtx label;
6083{
3a3677ff 6084 rtx tmp;
e075ae69 6085
3a3677ff 6086 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 6087 {
3a3677ff
RH
6088 case QImode:
6089 case HImode:
6090 case SImode:
0d7d98ee 6091 simple:
a1b8572c 6092 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
6093 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
6094 gen_rtx_LABEL_REF (VOIDmode, label),
6095 pc_rtx);
6096 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 6097 return;
e075ae69 6098
3a3677ff
RH
6099 case SFmode:
6100 case DFmode:
0f290768 6101 case XFmode:
2b589241 6102 case TFmode:
3a3677ff
RH
6103 {
6104 rtvec vec;
6105 int use_fcomi;
03598dea 6106 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
6107
6108 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
6109 &ix86_compare_op1);
03598dea
JH
6110
6111 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
6112
6113 /* Check whether we will use the natural sequence with one jump. If
6114 so, we can expand jump early. Otherwise delay expansion by
6115 creating compound insn to not confuse optimizers. */
6116 if (bypass_code == NIL && second_code == NIL
6117 && TARGET_CMOVE)
6118 {
6119 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
6120 gen_rtx_LABEL_REF (VOIDmode, label),
6121 pc_rtx, NULL_RTX);
6122 }
6123 else
6124 {
6125 tmp = gen_rtx_fmt_ee (code, VOIDmode,
6126 ix86_compare_op0, ix86_compare_op1);
6127 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
6128 gen_rtx_LABEL_REF (VOIDmode, label),
6129 pc_rtx);
6130 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
6131
6132 use_fcomi = ix86_use_fcomi_compare (code);
6133 vec = rtvec_alloc (3 + !use_fcomi);
6134 RTVEC_ELT (vec, 0) = tmp;
6135 RTVEC_ELT (vec, 1)
6136 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
6137 RTVEC_ELT (vec, 2)
6138 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
6139 if (! use_fcomi)
6140 RTVEC_ELT (vec, 3)
6141 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
6142
6143 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
6144 }
3a3677ff
RH
6145 return;
6146 }
32b5b1aa 6147
3a3677ff 6148 case DImode:
0d7d98ee
JH
6149 if (TARGET_64BIT)
6150 goto simple;
3a3677ff
RH
6151 /* Expand DImode branch into multiple compare+branch. */
6152 {
6153 rtx lo[2], hi[2], label2;
6154 enum rtx_code code1, code2, code3;
32b5b1aa 6155
3a3677ff
RH
6156 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
6157 {
6158 tmp = ix86_compare_op0;
6159 ix86_compare_op0 = ix86_compare_op1;
6160 ix86_compare_op1 = tmp;
6161 code = swap_condition (code);
6162 }
6163 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
6164 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 6165
3a3677ff
RH
6166 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
6167 avoid two branches. This costs one extra insn, so disable when
6168 optimizing for size. */
32b5b1aa 6169
3a3677ff
RH
6170 if ((code == EQ || code == NE)
6171 && (!optimize_size
6172 || hi[1] == const0_rtx || lo[1] == const0_rtx))
6173 {
6174 rtx xor0, xor1;
32b5b1aa 6175
3a3677ff
RH
6176 xor1 = hi[0];
6177 if (hi[1] != const0_rtx)
6178 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
6179 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 6180
3a3677ff
RH
6181 xor0 = lo[0];
6182 if (lo[1] != const0_rtx)
6183 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
6184 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 6185
3a3677ff
RH
6186 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
6187 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 6188
3a3677ff
RH
6189 ix86_compare_op0 = tmp;
6190 ix86_compare_op1 = const0_rtx;
6191 ix86_expand_branch (code, label);
6192 return;
6193 }
e075ae69 6194
1f9124e4
JJ
6195 /* Otherwise, if we are doing less-than or greater-or-equal-than,
6196 op1 is a constant and the low word is zero, then we can just
6197 examine the high word. */
32b5b1aa 6198
1f9124e4
JJ
6199 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
6200 switch (code)
6201 {
6202 case LT: case LTU: case GE: case GEU:
6203 ix86_compare_op0 = hi[0];
6204 ix86_compare_op1 = hi[1];
6205 ix86_expand_branch (code, label);
6206 return;
6207 default:
6208 break;
6209 }
e075ae69 6210
3a3677ff 6211 /* Otherwise, we need two or three jumps. */
e075ae69 6212
3a3677ff 6213 label2 = gen_label_rtx ();
e075ae69 6214
3a3677ff
RH
6215 code1 = code;
6216 code2 = swap_condition (code);
6217 code3 = unsigned_condition (code);
e075ae69 6218
3a3677ff
RH
6219 switch (code)
6220 {
6221 case LT: case GT: case LTU: case GTU:
6222 break;
e075ae69 6223
3a3677ff
RH
6224 case LE: code1 = LT; code2 = GT; break;
6225 case GE: code1 = GT; code2 = LT; break;
6226 case LEU: code1 = LTU; code2 = GTU; break;
6227 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 6228
3a3677ff
RH
6229 case EQ: code1 = NIL; code2 = NE; break;
6230 case NE: code2 = NIL; break;
e075ae69 6231
3a3677ff
RH
6232 default:
6233 abort ();
6234 }
e075ae69 6235
3a3677ff
RH
6236 /*
6237 * a < b =>
6238 * if (hi(a) < hi(b)) goto true;
6239 * if (hi(a) > hi(b)) goto false;
6240 * if (lo(a) < lo(b)) goto true;
6241 * false:
6242 */
6243
6244 ix86_compare_op0 = hi[0];
6245 ix86_compare_op1 = hi[1];
6246
6247 if (code1 != NIL)
6248 ix86_expand_branch (code1, label);
6249 if (code2 != NIL)
6250 ix86_expand_branch (code2, label2);
6251
6252 ix86_compare_op0 = lo[0];
6253 ix86_compare_op1 = lo[1];
6254 ix86_expand_branch (code3, label);
6255
6256 if (code2 != NIL)
6257 emit_label (label2);
6258 return;
6259 }
e075ae69 6260
3a3677ff
RH
6261 default:
6262 abort ();
6263 }
32b5b1aa 6264}
e075ae69 6265
9e7adcb3
JH
6266/* Split branch based on floating point condition. */
6267void
03598dea
JH
6268ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
6269 enum rtx_code code;
6270 rtx op1, op2, target1, target2, tmp;
9e7adcb3
JH
6271{
6272 rtx second, bypass;
6273 rtx label = NULL_RTX;
03598dea 6274 rtx condition;
6b24c259
JH
6275 int bypass_probability = -1, second_probability = -1, probability = -1;
6276 rtx i;
9e7adcb3
JH
6277
6278 if (target2 != pc_rtx)
6279 {
6280 rtx tmp = target2;
6281 code = reverse_condition_maybe_unordered (code);
6282 target2 = target1;
6283 target1 = tmp;
6284 }
6285
6286 condition = ix86_expand_fp_compare (code, op1, op2,
6287 tmp, &second, &bypass);
6b24c259
JH
6288
6289 if (split_branch_probability >= 0)
6290 {
6291 /* Distribute the probabilities across the jumps.
6292 Assume the BYPASS and SECOND to be always test
6293 for UNORDERED. */
6294 probability = split_branch_probability;
6295
6296 /* Value of 1 is low enought to make no need for probability
6297 to be updated. Later we may run some experiments and see
6298 if unordered values are more frequent in practice. */
6299 if (bypass)
6300 bypass_probability = 1;
6301 if (second)
6302 second_probability = 1;
6303 }
9e7adcb3
JH
6304 if (bypass != NULL_RTX)
6305 {
6306 label = gen_label_rtx ();
6b24c259
JH
6307 i = emit_jump_insn (gen_rtx_SET
6308 (VOIDmode, pc_rtx,
6309 gen_rtx_IF_THEN_ELSE (VOIDmode,
6310 bypass,
6311 gen_rtx_LABEL_REF (VOIDmode,
6312 label),
6313 pc_rtx)));
6314 if (bypass_probability >= 0)
6315 REG_NOTES (i)
6316 = gen_rtx_EXPR_LIST (REG_BR_PROB,
6317 GEN_INT (bypass_probability),
6318 REG_NOTES (i));
6319 }
6320 i = emit_jump_insn (gen_rtx_SET
9e7adcb3
JH
6321 (VOIDmode, pc_rtx,
6322 gen_rtx_IF_THEN_ELSE (VOIDmode,
6b24c259
JH
6323 condition, target1, target2)));
6324 if (probability >= 0)
6325 REG_NOTES (i)
6326 = gen_rtx_EXPR_LIST (REG_BR_PROB,
6327 GEN_INT (probability),
6328 REG_NOTES (i));
6329 if (second != NULL_RTX)
9e7adcb3 6330 {
6b24c259
JH
6331 i = emit_jump_insn (gen_rtx_SET
6332 (VOIDmode, pc_rtx,
6333 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
6334 target2)));
6335 if (second_probability >= 0)
6336 REG_NOTES (i)
6337 = gen_rtx_EXPR_LIST (REG_BR_PROB,
6338 GEN_INT (second_probability),
6339 REG_NOTES (i));
9e7adcb3 6340 }
9e7adcb3
JH
6341 if (label != NULL_RTX)
6342 emit_label (label);
6343}
6344
32b5b1aa 6345int
3a3677ff 6346ix86_expand_setcc (code, dest)
e075ae69 6347 enum rtx_code code;
e075ae69 6348 rtx dest;
32b5b1aa 6349{
a1b8572c
JH
6350 rtx ret, tmp, tmpreg;
6351 rtx second_test, bypass_test;
e075ae69
RH
6352 int type;
6353
885a70fd
JH
6354 if (GET_MODE (ix86_compare_op0) == DImode
6355 && !TARGET_64BIT)
e075ae69
RH
6356 return 0; /* FAIL */
6357
6358 /* Three modes of generation:
6359 0 -- destination does not overlap compare sources:
6360 clear dest first, emit strict_low_part setcc.
6361 1 -- destination does overlap compare sources:
6362 emit subreg setcc, zero extend.
6363 2 -- destination is in QImode:
6364 emit setcc only.
e075ae69 6365
c50e5bc0
RH
6366 We don't use mode 0 early in compilation because it confuses CSE.
6367 There are peepholes to turn mode 1 into mode 0 if things work out
6368 nicely after reload. */
6369
6370 type = cse_not_expected ? 0 : 1;
e075ae69
RH
6371
6372 if (GET_MODE (dest) == QImode)
6373 type = 2;
6374 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
a500c31b 6375 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
e075ae69
RH
6376 type = 1;
6377
6378 if (type == 0)
6379 emit_move_insn (dest, const0_rtx);
6380
a1b8572c 6381 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
6382 PUT_MODE (ret, QImode);
6383
6384 tmp = dest;
a1b8572c 6385 tmpreg = dest;
e075ae69 6386 if (type == 0)
32b5b1aa 6387 {
e075ae69 6388 tmp = gen_lowpart (QImode, dest);
a1b8572c 6389 tmpreg = tmp;
e075ae69
RH
6390 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
6391 }
6392 else if (type == 1)
6393 {
6394 if (!cse_not_expected)
6395 tmp = gen_reg_rtx (QImode);
6396 else
6397 tmp = gen_lowpart (QImode, dest);
a1b8572c 6398 tmpreg = tmp;
e075ae69 6399 }
32b5b1aa 6400
e075ae69 6401 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
6402 if (bypass_test || second_test)
6403 {
6404 rtx test = second_test;
6405 int bypass = 0;
6406 rtx tmp2 = gen_reg_rtx (QImode);
6407 if (bypass_test)
6408 {
6409 if (second_test)
6410 abort();
6411 test = bypass_test;
6412 bypass = 1;
6413 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
6414 }
6415 PUT_MODE (test, QImode);
6416 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
6417
6418 if (bypass)
6419 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
6420 else
6421 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
6422 }
e075ae69
RH
6423
6424 if (type == 1)
6425 {
6426 rtx clob;
6427
6428 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
6429 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
6430 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6431 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6432 emit_insn (tmp);
32b5b1aa 6433 }
e075ae69
RH
6434
6435 return 1; /* DONE */
32b5b1aa 6436}
e075ae69 6437
32b5b1aa 6438int
e075ae69
RH
6439ix86_expand_int_movcc (operands)
6440 rtx operands[];
32b5b1aa 6441{
e075ae69
RH
6442 enum rtx_code code = GET_CODE (operands[1]), compare_code;
6443 rtx compare_seq, compare_op;
a1b8572c 6444 rtx second_test, bypass_test;
32b5b1aa 6445
36583fea
JH
6446 /* When the compare code is not LTU or GEU, we can not use sbbl case.
6447 In case comparsion is done with immediate, we can convert it to LTU or
6448 GEU by altering the integer. */
6449
6450 if ((code == LEU || code == GTU)
6451 && GET_CODE (ix86_compare_op1) == CONST_INT
6452 && GET_MODE (operands[0]) != HImode
6453 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
0f290768 6454 && GET_CODE (operands[2]) == CONST_INT
36583fea
JH
6455 && GET_CODE (operands[3]) == CONST_INT)
6456 {
6457 if (code == LEU)
6458 code = LTU;
6459 else
6460 code = GEU;
6461 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
6462 }
3a3677ff 6463
e075ae69 6464 start_sequence ();
a1b8572c 6465 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
6466 compare_seq = gen_sequence ();
6467 end_sequence ();
6468
6469 compare_code = GET_CODE (compare_op);
6470
6471 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
6472 HImode insns, we'd be swallowed in word prefix ops. */
6473
6474 if (GET_MODE (operands[0]) != HImode
885a70fd 6475 && GET_MODE (operands[0]) != DImode
0f290768 6476 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
6477 && GET_CODE (operands[3]) == CONST_INT)
6478 {
6479 rtx out = operands[0];
6480 HOST_WIDE_INT ct = INTVAL (operands[2]);
6481 HOST_WIDE_INT cf = INTVAL (operands[3]);
6482 HOST_WIDE_INT diff;
6483
a1b8572c
JH
6484 if ((compare_code == LTU || compare_code == GEU)
6485 && !second_test && !bypass_test)
e075ae69 6486 {
e075ae69
RH
6487
6488 /* Detect overlap between destination and compare sources. */
6489 rtx tmp = out;
6490
0f290768 6491 /* To simplify rest of code, restrict to the GEU case. */
36583fea
JH
6492 if (compare_code == LTU)
6493 {
6494 int tmp = ct;
6495 ct = cf;
6496 cf = tmp;
6497 compare_code = reverse_condition (compare_code);
6498 code = reverse_condition (code);
6499 }
6500 diff = ct - cf;
6501
e075ae69 6502 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
a500c31b 6503 || reg_overlap_mentioned_p (out, ix86_compare_op1))
e075ae69
RH
6504 tmp = gen_reg_rtx (SImode);
6505
6506 emit_insn (compare_seq);
6507 emit_insn (gen_x86_movsicc_0_m1 (tmp));
6508
36583fea
JH
6509 if (diff == 1)
6510 {
6511 /*
6512 * cmpl op0,op1
6513 * sbbl dest,dest
6514 * [addl dest, ct]
6515 *
6516 * Size 5 - 8.
6517 */
6518 if (ct)
e99af66b 6519 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (ct)));
36583fea
JH
6520 }
6521 else if (cf == -1)
6522 {
6523 /*
6524 * cmpl op0,op1
6525 * sbbl dest,dest
6526 * orl $ct, dest
6527 *
6528 * Size 8.
6529 */
e99af66b 6530 emit_insn (gen_iorsi3 (tmp, tmp, GEN_INT (ct)));
36583fea
JH
6531 }
6532 else if (diff == -1 && ct)
6533 {
6534 /*
6535 * cmpl op0,op1
6536 * sbbl dest,dest
6537 * xorl $-1, dest
6538 * [addl dest, cf]
6539 *
6540 * Size 8 - 11.
6541 */
6542 emit_insn (gen_one_cmplsi2 (tmp, tmp));
6543 if (cf)
e99af66b 6544 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (cf)));
36583fea
JH
6545 }
6546 else
6547 {
6548 /*
6549 * cmpl op0,op1
6550 * sbbl dest,dest
6551 * andl cf - ct, dest
6552 * [addl dest, ct]
6553 *
6554 * Size 8 - 11.
6555 */
e99af66b 6556 emit_insn (gen_andsi3 (tmp, tmp, GEN_INT (trunc_int_for_mode
7471a1f0 6557 (cf - ct, SImode))));
36583fea 6558 if (ct)
e99af66b 6559 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (ct)));
36583fea 6560 }
e075ae69
RH
6561
6562 if (tmp != out)
6563 emit_move_insn (out, tmp);
6564
6565 return 1; /* DONE */
6566 }
6567
6568 diff = ct - cf;
6569 if (diff < 0)
6570 {
6571 HOST_WIDE_INT tmp;
6572 tmp = ct, ct = cf, cf = tmp;
6573 diff = -diff;
734dba19
JH
6574 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
6575 {
6576 /* We may be reversing unordered compare to normal compare, that
6577 is not valid in general (we may convert non-trapping condition
6578 to trapping one), however on i386 we currently emit all
6579 comparisons unordered. */
6580 compare_code = reverse_condition_maybe_unordered (compare_code);
6581 code = reverse_condition_maybe_unordered (code);
6582 }
6583 else
6584 {
6585 compare_code = reverse_condition (compare_code);
6586 code = reverse_condition (code);
6587 }
e075ae69
RH
6588 }
6589 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
6590 || diff == 3 || diff == 5 || diff == 9)
6591 {
6592 /*
6593 * xorl dest,dest
6594 * cmpl op1,op2
6595 * setcc dest
6596 * lea cf(dest*(ct-cf)),dest
6597 *
6598 * Size 14.
6599 *
6600 * This also catches the degenerate setcc-only case.
6601 */
6602
6603 rtx tmp;
6604 int nops;
6605
6606 out = emit_store_flag (out, code, ix86_compare_op0,
6607 ix86_compare_op1, VOIDmode, 0, 1);
6608
6609 nops = 0;
885a70fd
JH
6610 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
6611 done in proper mode to match. */
e075ae69 6612 if (diff == 1)
885a70fd
JH
6613 {
6614 if (Pmode != SImode)
6615 tmp = gen_lowpart (Pmode, out);
6616 else
6617 tmp = out;
6618 }
e075ae69
RH
6619 else
6620 {
885a70fd
JH
6621 rtx out1;
6622 if (Pmode != SImode)
6623 out1 = gen_lowpart (Pmode, out);
6624 else
6625 out1 = out;
6626 tmp = gen_rtx_MULT (Pmode, out1, GEN_INT (diff & ~1));
e075ae69
RH
6627 nops++;
6628 if (diff & 1)
6629 {
885a70fd 6630 tmp = gen_rtx_PLUS (Pmode, tmp, out1);
e075ae69
RH
6631 nops++;
6632 }
6633 }
6634 if (cf != 0)
6635 {
885a70fd 6636 tmp = gen_rtx_PLUS (Pmode, tmp, GEN_INT (cf));
e075ae69
RH
6637 nops++;
6638 }
885a70fd
JH
6639 if (tmp != out
6640 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
e075ae69 6641 {
885a70fd
JH
6642 if (Pmode != SImode)
6643 tmp = gen_rtx_SUBREG (SImode, tmp, 0);
6644
6645 /* ??? We should to take care for outputing non-lea arithmetics
6646 for Pmode != SImode case too, but it is quite tricky and not
6647 too important, since all TARGET_64BIT machines support real
6648 conditional moves. */
6649 if (nops == 1 && Pmode == SImode)
e075ae69
RH
6650 {
6651 rtx clob;
6652
6653 clob = gen_rtx_REG (CCmode, FLAGS_REG);
6654 clob = gen_rtx_CLOBBER (VOIDmode, clob);
6655
6656 tmp = gen_rtx_SET (VOIDmode, out, tmp);
6657 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6658 emit_insn (tmp);
6659 }
6660 else
6661 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
6662 }
6663 if (out != operands[0])
6664 emit_move_insn (operands[0], out);
6665
6666 return 1; /* DONE */
6667 }
6668
6669 /*
6670 * General case: Jumpful:
6671 * xorl dest,dest cmpl op1, op2
6672 * cmpl op1, op2 movl ct, dest
6673 * setcc dest jcc 1f
6674 * decl dest movl cf, dest
6675 * andl (cf-ct),dest 1:
6676 * addl ct,dest
0f290768 6677 *
e075ae69
RH
6678 * Size 20. Size 14.
6679 *
6680 * This is reasonably steep, but branch mispredict costs are
6681 * high on modern cpus, so consider failing only if optimizing
6682 * for space.
6683 *
6684 * %%% Parameterize branch_cost on the tuning architecture, then
6685 * use that. The 80386 couldn't care less about mispredicts.
6686 */
6687
6688 if (!optimize_size && !TARGET_CMOVE)
6689 {
6690 if (ct == 0)
6691 {
6692 ct = cf;
6693 cf = 0;
734dba19
JH
6694 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
6695 {
6696 /* We may be reversing unordered compare to normal compare,
6697 that is not valid in general (we may convert non-trapping
6698 condition to trapping one), however on i386 we currently
6699 emit all comparisons unordered. */
6700 compare_code = reverse_condition_maybe_unordered (compare_code);
6701 code = reverse_condition_maybe_unordered (code);
6702 }
6703 else
6704 {
6705 compare_code = reverse_condition (compare_code);
6706 code = reverse_condition (code);
6707 }
e075ae69
RH
6708 }
6709
6710 out = emit_store_flag (out, code, ix86_compare_op0,
6711 ix86_compare_op1, VOIDmode, 0, 1);
6712
6713 emit_insn (gen_addsi3 (out, out, constm1_rtx));
7471a1f0
AO
6714 emit_insn (gen_andsi3 (out, out, GEN_INT (trunc_int_for_mode
6715 (cf - ct, SImode))));
e075ae69
RH
6716 if (ct != 0)
6717 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
6718 if (out != operands[0])
6719 emit_move_insn (operands[0], out);
6720
6721 return 1; /* DONE */
6722 }
6723 }
6724
6725 if (!TARGET_CMOVE)
6726 {
6727 /* Try a few things more with specific constants and a variable. */
6728
78a0d70c 6729 optab op;
e075ae69
RH
6730 rtx var, orig_out, out, tmp;
6731
6732 if (optimize_size)
6733 return 0; /* FAIL */
6734
0f290768 6735 /* If one of the two operands is an interesting constant, load a
e075ae69 6736 constant with the above and mask it in with a logical operation. */
0f290768 6737
e075ae69
RH
6738 if (GET_CODE (operands[2]) == CONST_INT)
6739 {
6740 var = operands[3];
6741 if (INTVAL (operands[2]) == 0)
6742 operands[3] = constm1_rtx, op = and_optab;
6743 else if (INTVAL (operands[2]) == -1)
6744 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
6745 else
6746 return 0; /* FAIL */
e075ae69
RH
6747 }
6748 else if (GET_CODE (operands[3]) == CONST_INT)
6749 {
6750 var = operands[2];
6751 if (INTVAL (operands[3]) == 0)
6752 operands[2] = constm1_rtx, op = and_optab;
6753 else if (INTVAL (operands[3]) == -1)
6754 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
6755 else
6756 return 0; /* FAIL */
e075ae69 6757 }
78a0d70c 6758 else
e075ae69
RH
6759 return 0; /* FAIL */
6760
6761 orig_out = operands[0];
6762 tmp = gen_reg_rtx (GET_MODE (orig_out));
6763 operands[0] = tmp;
6764
6765 /* Recurse to get the constant loaded. */
6766 if (ix86_expand_int_movcc (operands) == 0)
6767 return 0; /* FAIL */
6768
6769 /* Mask in the interesting variable. */
6770 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
6771 OPTAB_WIDEN);
6772 if (out != orig_out)
6773 emit_move_insn (orig_out, out);
6774
6775 return 1; /* DONE */
6776 }
6777
6778 /*
6779 * For comparison with above,
6780 *
6781 * movl cf,dest
6782 * movl ct,tmp
6783 * cmpl op1,op2
6784 * cmovcc tmp,dest
6785 *
6786 * Size 15.
6787 */
6788
6789 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
6790 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
6791 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
6792 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
6793
a1b8572c
JH
6794 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
6795 {
6796 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
6797 emit_move_insn (tmp, operands[3]);
6798 operands[3] = tmp;
6799 }
6800 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
6801 {
6802 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
6803 emit_move_insn (tmp, operands[2]);
6804 operands[2] = tmp;
6805 }
c9682caf
JH
6806 if (! register_operand (operands[2], VOIDmode)
6807 && ! register_operand (operands[3], VOIDmode))
6808 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
a1b8572c 6809
e075ae69
RH
6810 emit_insn (compare_seq);
6811 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6812 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6813 compare_op, operands[2],
6814 operands[3])));
a1b8572c
JH
6815 if (bypass_test)
6816 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6817 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6818 bypass_test,
6819 operands[3],
6820 operands[0])));
6821 if (second_test)
6822 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6823 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6824 second_test,
6825 operands[2],
6826 operands[0])));
e075ae69
RH
6827
6828 return 1; /* DONE */
e9a25f70 6829}
e075ae69 6830
32b5b1aa 6831int
e075ae69
RH
6832ix86_expand_fp_movcc (operands)
6833 rtx operands[];
32b5b1aa 6834{
e075ae69 6835 enum rtx_code code;
e075ae69 6836 rtx tmp;
a1b8572c 6837 rtx compare_op, second_test, bypass_test;
32b5b1aa 6838
0073023d
JH
6839 /* For SF/DFmode conditional moves based on comparisons
6840 in same mode, we may want to use SSE min/max instructions. */
6841 if (((TARGET_SSE && GET_MODE (operands[0]) == SFmode)
6842 || (TARGET_SSE2 && GET_MODE (operands[0]) == DFmode))
6843 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
fa9f36a1
JH
6844 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
6845 && (!TARGET_IEEE_FP
6846 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
0073023d
JH
6847 /* We may be called from the post-reload splitter. */
6848 && (!REG_P (operands[0])
6849 || SSE_REG_P (operands[0])
52a661a6 6850 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
0073023d
JH
6851 {
6852 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
6853 code = GET_CODE (operands[1]);
6854
6855 /* See if we have (cross) match between comparison operands and
6856 conditional move operands. */
6857 if (rtx_equal_p (operands[2], op1))
6858 {
6859 rtx tmp = op0;
6860 op0 = op1;
6861 op1 = tmp;
6862 code = reverse_condition_maybe_unordered (code);
6863 }
6864 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
6865 {
6866 /* Check for min operation. */
6867 if (code == LT)
6868 {
6869 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
6870 if (memory_operand (op0, VOIDmode))
6871 op0 = force_reg (GET_MODE (operands[0]), op0);
6872 if (GET_MODE (operands[0]) == SFmode)
6873 emit_insn (gen_minsf3 (operands[0], op0, op1));
6874 else
6875 emit_insn (gen_mindf3 (operands[0], op0, op1));
6876 return 1;
6877 }
6878 /* Check for max operation. */
6879 if (code == GT)
6880 {
6881 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
6882 if (memory_operand (op0, VOIDmode))
6883 op0 = force_reg (GET_MODE (operands[0]), op0);
6884 if (GET_MODE (operands[0]) == SFmode)
6885 emit_insn (gen_maxsf3 (operands[0], op0, op1));
6886 else
6887 emit_insn (gen_maxdf3 (operands[0], op0, op1));
6888 return 1;
6889 }
6890 }
6891 /* Manage condition to be sse_comparison_operator. In case we are
6892 in non-ieee mode, try to canonicalize the destination operand
6893 to be first in the comparison - this helps reload to avoid extra
6894 moves. */
6895 if (!sse_comparison_operator (operands[1], VOIDmode)
6896 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
6897 {
6898 rtx tmp = ix86_compare_op0;
6899 ix86_compare_op0 = ix86_compare_op1;
6900 ix86_compare_op1 = tmp;
6901 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
6902 VOIDmode, ix86_compare_op0,
6903 ix86_compare_op1);
6904 }
6905 /* Similary try to manage result to be first operand of conditional
fa9f36a1
JH
6906 move. We also don't support the NE comparison on SSE, so try to
6907 avoid it. */
037f20f1
JH
6908 if ((rtx_equal_p (operands[0], operands[3])
6909 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
6910 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
0073023d
JH
6911 {
6912 rtx tmp = operands[2];
6913 operands[2] = operands[3];
92d0fb09 6914 operands[3] = tmp;
0073023d
JH
6915 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
6916 (GET_CODE (operands[1])),
6917 VOIDmode, ix86_compare_op0,
6918 ix86_compare_op1);
6919 }
6920 if (GET_MODE (operands[0]) == SFmode)
6921 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
6922 operands[2], operands[3],
6923 ix86_compare_op0, ix86_compare_op1));
6924 else
6925 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
6926 operands[2], operands[3],
6927 ix86_compare_op0, ix86_compare_op1));
6928 return 1;
6929 }
6930
e075ae69 6931 /* The floating point conditional move instructions don't directly
0f290768 6932 support conditions resulting from a signed integer comparison. */
32b5b1aa 6933
e075ae69 6934 code = GET_CODE (operands[1]);
a1b8572c 6935 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
6936
6937 /* The floating point conditional move instructions don't directly
6938 support signed integer comparisons. */
6939
a1b8572c 6940 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 6941 {
a1b8572c
JH
6942 if (second_test != NULL || bypass_test != NULL)
6943 abort();
e075ae69 6944 tmp = gen_reg_rtx (QImode);
3a3677ff 6945 ix86_expand_setcc (code, tmp);
e075ae69
RH
6946 code = NE;
6947 ix86_compare_op0 = tmp;
6948 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
6949 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
6950 }
6951 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
6952 {
6953 tmp = gen_reg_rtx (GET_MODE (operands[0]));
6954 emit_move_insn (tmp, operands[3]);
6955 operands[3] = tmp;
6956 }
6957 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
6958 {
6959 tmp = gen_reg_rtx (GET_MODE (operands[0]));
6960 emit_move_insn (tmp, operands[2]);
6961 operands[2] = tmp;
e075ae69 6962 }
e9a25f70 6963
e075ae69
RH
6964 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6965 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9e7adcb3 6966 compare_op,
e075ae69
RH
6967 operands[2],
6968 operands[3])));
a1b8572c
JH
6969 if (bypass_test)
6970 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6971 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6972 bypass_test,
6973 operands[3],
6974 operands[0])));
6975 if (second_test)
6976 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6977 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6978 second_test,
6979 operands[2],
6980 operands[0])));
32b5b1aa 6981
e075ae69 6982 return 1;
32b5b1aa
SC
6983}
6984
2450a057
JH
6985/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
6986 works for floating pointer parameters and nonoffsetable memories.
6987 For pushes, it returns just stack offsets; the values will be saved
6988 in the right order. Maximally three parts are generated. */
6989
2b589241 6990static int
2450a057
JH
6991ix86_split_to_parts (operand, parts, mode)
6992 rtx operand;
6993 rtx *parts;
6994 enum machine_mode mode;
32b5b1aa 6995{
26e5b205
JH
6996 int size;
6997
6998 if (!TARGET_64BIT)
6999 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
7000 else
7001 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 7002
a7180f70
BS
7003 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
7004 abort ();
2450a057
JH
7005 if (size < 2 || size > 3)
7006 abort ();
7007
d7a29404
JH
7008 /* Optimize constant pool reference to immediates. This is used by fp moves,
7009 that force all constants to memory to allow combining. */
7010
7011 if (GET_CODE (operand) == MEM
7012 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
7013 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
7014 operand = get_pool_constant (XEXP (operand, 0));
7015
2450a057 7016 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 7017 {
2450a057
JH
7018 /* The only non-offsetable memories we handle are pushes. */
7019 if (! push_operand (operand, VOIDmode))
7020 abort ();
7021
26e5b205
JH
7022 operand = copy_rtx (operand);
7023 PUT_MODE (operand, Pmode);
2450a057
JH
7024 parts[0] = parts[1] = parts[2] = operand;
7025 }
26e5b205 7026 else if (!TARGET_64BIT)
2450a057
JH
7027 {
7028 if (mode == DImode)
7029 split_di (&operand, 1, &parts[0], &parts[1]);
7030 else
e075ae69 7031 {
2450a057
JH
7032 if (REG_P (operand))
7033 {
7034 if (!reload_completed)
7035 abort ();
7036 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
7037 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
7038 if (size == 3)
7039 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
7040 }
7041 else if (offsettable_memref_p (operand))
7042 {
f4ef873c 7043 operand = adjust_address (operand, SImode, 0);
2450a057 7044 parts[0] = operand;
b72f00af 7045 parts[1] = adjust_address (operand, SImode, 4);
2450a057 7046 if (size == 3)
b72f00af 7047 parts[2] = adjust_address (operand, SImode, 8);
2450a057
JH
7048 }
7049 else if (GET_CODE (operand) == CONST_DOUBLE)
7050 {
7051 REAL_VALUE_TYPE r;
2b589241 7052 long l[4];
2450a057
JH
7053
7054 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
7055 switch (mode)
7056 {
7057 case XFmode:
2b589241 7058 case TFmode:
2450a057
JH
7059 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
7060 parts[2] = GEN_INT (l[2]);
7061 break;
7062 case DFmode:
7063 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
7064 break;
7065 default:
7066 abort ();
7067 }
7068 parts[1] = GEN_INT (l[1]);
7069 parts[0] = GEN_INT (l[0]);
7070 }
7071 else
7072 abort ();
e075ae69 7073 }
2450a057 7074 }
26e5b205
JH
7075 else
7076 {
7077 if (mode == XFmode || mode == TFmode)
7078 {
7079 if (REG_P (operand))
7080 {
7081 if (!reload_completed)
7082 abort ();
7083 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
7084 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
7085 }
7086 else if (offsettable_memref_p (operand))
7087 {
b72f00af 7088 operand = adjust_address (operand, DImode, 0);
26e5b205 7089 parts[0] = operand;
b72f00af 7090 parts[1] = adjust_address (operand, SImode, 8);
26e5b205
JH
7091 }
7092 else if (GET_CODE (operand) == CONST_DOUBLE)
7093 {
7094 REAL_VALUE_TYPE r;
7095 long l[3];
7096
7097 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
7098 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
7099 /* Do not use shift by 32 to avoid warning on 32bit systems. */
7100 if (HOST_BITS_PER_WIDE_INT >= 64)
7101 parts[0] = GEN_INT (l[0] + ((l[1] << 31) << 1));
7102 else
7103 parts[0] = immed_double_const (l[0], l[1], DImode);
7104 parts[1] = GEN_INT (l[2]);
7105 }
7106 else
7107 abort ();
7108 }
7109 }
2450a057 7110
2b589241 7111 return size;
2450a057
JH
7112}
7113
7114/* Emit insns to perform a move or push of DI, DF, and XF values.
7115 Return false when normal moves are needed; true when all required
7116 insns have been emitted. Operands 2-4 contain the input values
7117 int the correct order; operands 5-7 contain the output values. */
7118
26e5b205
JH
7119void
7120ix86_split_long_move (operands)
7121 rtx operands[];
2450a057
JH
7122{
7123 rtx part[2][3];
26e5b205 7124 int nparts;
2450a057
JH
7125 int push = 0;
7126 int collisions = 0;
26e5b205
JH
7127 enum machine_mode mode = GET_MODE (operands[0]);
7128
7129 /* The DFmode expanders may ask us to move double.
7130 For 64bit target this is single move. By hiding the fact
7131 here we simplify i386.md splitters. */
7132 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
7133 {
7134 /* Optimize constant pool reference to immediates. This is used by fp moves,
7135 that force all constants to memory to allow combining. */
7136
7137 if (GET_CODE (operands[1]) == MEM
7138 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
7139 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
7140 operands[1] = get_pool_constant (XEXP (operands[1], 0));
7141 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
7142 {
7143 operands[0] = copy_rtx (operands[0]);
7144 PUT_MODE (operands[0], Pmode);
7145 }
26e5b205
JH
7146 else
7147 operands[0] = gen_lowpart (DImode, operands[0]);
7148 operands[1] = gen_lowpart (DImode, operands[1]);
7149 emit_move_insn (operands[0], operands[1]);
7150 return;
7151 }
2450a057 7152
2450a057
JH
7153 /* The only non-offsettable memory we handle is push. */
7154 if (push_operand (operands[0], VOIDmode))
7155 push = 1;
7156 else if (GET_CODE (operands[0]) == MEM
7157 && ! offsettable_memref_p (operands[0]))
7158 abort ();
7159
26e5b205
JH
7160 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
7161 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
7162
7163 /* When emitting push, take care for source operands on the stack. */
7164 if (push && GET_CODE (operands[1]) == MEM
7165 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
7166 {
26e5b205 7167 if (nparts == 3)
886cbb88
JH
7168 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
7169 XEXP (part[1][2], 0));
7170 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
7171 XEXP (part[1][1], 0));
2450a057
JH
7172 }
7173
0f290768 7174 /* We need to do copy in the right order in case an address register
2450a057
JH
7175 of the source overlaps the destination. */
7176 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
7177 {
7178 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
7179 collisions++;
7180 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
7181 collisions++;
26e5b205 7182 if (nparts == 3
2450a057
JH
7183 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
7184 collisions++;
7185
7186 /* Collision in the middle part can be handled by reordering. */
26e5b205 7187 if (collisions == 1 && nparts == 3
2450a057 7188 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 7189 {
2450a057
JH
7190 rtx tmp;
7191 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
7192 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
7193 }
e075ae69 7194
2450a057
JH
7195 /* If there are more collisions, we can't handle it by reordering.
7196 Do an lea to the last part and use only one colliding move. */
7197 else if (collisions > 1)
7198 {
7199 collisions = 1;
26e5b205 7200 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
2450a057 7201 XEXP (part[1][0], 0)));
26e5b205
JH
7202 part[1][0] = change_address (part[1][0],
7203 TARGET_64BIT ? DImode : SImode,
7204 part[0][nparts - 1]);
b72f00af 7205 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
26e5b205 7206 if (nparts == 3)
b72f00af 7207 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
2450a057
JH
7208 }
7209 }
7210
7211 if (push)
7212 {
26e5b205 7213 if (!TARGET_64BIT)
2b589241 7214 {
26e5b205
JH
7215 if (nparts == 3)
7216 {
7217 /* We use only first 12 bytes of TFmode value, but for pushing we
7218 are required to adjust stack as if we were pushing real 16byte
7219 value. */
7220 if (mode == TFmode && !TARGET_64BIT)
7221 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
7222 GEN_INT (-4)));
7223 emit_move_insn (part[0][2], part[1][2]);
7224 }
2b589241 7225 }
26e5b205
JH
7226 else
7227 {
7228 /* In 64bit mode we don't have 32bit push available. In case this is
7229 register, it is OK - we will just use larger counterpart. We also
7230 retype memory - these comes from attempt to avoid REX prefix on
7231 moving of second half of TFmode value. */
7232 if (GET_MODE (part[1][1]) == SImode)
7233 {
7234 if (GET_CODE (part[1][1]) == MEM)
f4ef873c 7235 part[1][1] = adjust_address (part[1][1], DImode, 0);
26e5b205
JH
7236 else if (REG_P (part[1][1]))
7237 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
7238 else
7239 abort();
886cbb88
JH
7240 if (GET_MODE (part[1][0]) == SImode)
7241 part[1][0] = part[1][1];
26e5b205
JH
7242 }
7243 }
7244 emit_move_insn (part[0][1], part[1][1]);
7245 emit_move_insn (part[0][0], part[1][0]);
7246 return;
2450a057
JH
7247 }
7248
7249 /* Choose correct order to not overwrite the source before it is copied. */
7250 if ((REG_P (part[0][0])
7251 && REG_P (part[1][1])
7252 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 7253 || (nparts == 3
2450a057
JH
7254 && REGNO (part[0][0]) == REGNO (part[1][2]))))
7255 || (collisions > 0
7256 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
7257 {
26e5b205 7258 if (nparts == 3)
2450a057 7259 {
26e5b205
JH
7260 operands[2] = part[0][2];
7261 operands[3] = part[0][1];
7262 operands[4] = part[0][0];
7263 operands[5] = part[1][2];
7264 operands[6] = part[1][1];
7265 operands[7] = part[1][0];
2450a057
JH
7266 }
7267 else
7268 {
26e5b205
JH
7269 operands[2] = part[0][1];
7270 operands[3] = part[0][0];
7271 operands[5] = part[1][1];
7272 operands[6] = part[1][0];
2450a057
JH
7273 }
7274 }
7275 else
7276 {
26e5b205 7277 if (nparts == 3)
2450a057 7278 {
26e5b205
JH
7279 operands[2] = part[0][0];
7280 operands[3] = part[0][1];
7281 operands[4] = part[0][2];
7282 operands[5] = part[1][0];
7283 operands[6] = part[1][1];
7284 operands[7] = part[1][2];
2450a057
JH
7285 }
7286 else
7287 {
26e5b205
JH
7288 operands[2] = part[0][0];
7289 operands[3] = part[0][1];
7290 operands[5] = part[1][0];
7291 operands[6] = part[1][1];
e075ae69
RH
7292 }
7293 }
26e5b205
JH
7294 emit_move_insn (operands[2], operands[5]);
7295 emit_move_insn (operands[3], operands[6]);
7296 if (nparts == 3)
7297 emit_move_insn (operands[4], operands[7]);
32b5b1aa 7298
26e5b205 7299 return;
32b5b1aa 7300}
32b5b1aa 7301
e075ae69
RH
7302void
7303ix86_split_ashldi (operands, scratch)
7304 rtx *operands, scratch;
32b5b1aa 7305{
e075ae69
RH
7306 rtx low[2], high[2];
7307 int count;
b985a30f 7308
e075ae69
RH
7309 if (GET_CODE (operands[2]) == CONST_INT)
7310 {
7311 split_di (operands, 2, low, high);
7312 count = INTVAL (operands[2]) & 63;
32b5b1aa 7313
e075ae69
RH
7314 if (count >= 32)
7315 {
7316 emit_move_insn (high[0], low[1]);
7317 emit_move_insn (low[0], const0_rtx);
b985a30f 7318
e075ae69
RH
7319 if (count > 32)
7320 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
7321 }
7322 else
7323 {
7324 if (!rtx_equal_p (operands[0], operands[1]))
7325 emit_move_insn (operands[0], operands[1]);
7326 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
7327 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
7328 }
7329 }
7330 else
7331 {
7332 if (!rtx_equal_p (operands[0], operands[1]))
7333 emit_move_insn (operands[0], operands[1]);
b985a30f 7334
e075ae69 7335 split_di (operands, 1, low, high);
b985a30f 7336
e075ae69
RH
7337 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
7338 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 7339
fe577e58 7340 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 7341 {
fe577e58 7342 if (! no_new_pseudos)
e075ae69
RH
7343 scratch = force_reg (SImode, const0_rtx);
7344 else
7345 emit_move_insn (scratch, const0_rtx);
7346
7347 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
7348 scratch));
7349 }
7350 else
7351 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
7352 }
e9a25f70 7353}
32b5b1aa 7354
e075ae69
RH
7355void
7356ix86_split_ashrdi (operands, scratch)
7357 rtx *operands, scratch;
32b5b1aa 7358{
e075ae69
RH
7359 rtx low[2], high[2];
7360 int count;
32b5b1aa 7361
e075ae69
RH
7362 if (GET_CODE (operands[2]) == CONST_INT)
7363 {
7364 split_di (operands, 2, low, high);
7365 count = INTVAL (operands[2]) & 63;
32b5b1aa 7366
e075ae69
RH
7367 if (count >= 32)
7368 {
7369 emit_move_insn (low[0], high[1]);
32b5b1aa 7370
e075ae69
RH
7371 if (! reload_completed)
7372 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
7373 else
7374 {
7375 emit_move_insn (high[0], low[0]);
7376 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
7377 }
7378
7379 if (count > 32)
7380 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
7381 }
7382 else
7383 {
7384 if (!rtx_equal_p (operands[0], operands[1]))
7385 emit_move_insn (operands[0], operands[1]);
7386 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
7387 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
7388 }
7389 }
7390 else
32b5b1aa 7391 {
e075ae69
RH
7392 if (!rtx_equal_p (operands[0], operands[1]))
7393 emit_move_insn (operands[0], operands[1]);
7394
7395 split_di (operands, 1, low, high);
7396
7397 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
7398 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
7399
fe577e58 7400 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 7401 {
fe577e58 7402 if (! no_new_pseudos)
e075ae69
RH
7403 scratch = gen_reg_rtx (SImode);
7404 emit_move_insn (scratch, high[0]);
7405 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
7406 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
7407 scratch));
7408 }
7409 else
7410 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 7411 }
e075ae69 7412}
32b5b1aa 7413
e075ae69
RH
7414void
7415ix86_split_lshrdi (operands, scratch)
7416 rtx *operands, scratch;
7417{
7418 rtx low[2], high[2];
7419 int count;
32b5b1aa 7420
e075ae69 7421 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 7422 {
e075ae69
RH
7423 split_di (operands, 2, low, high);
7424 count = INTVAL (operands[2]) & 63;
7425
7426 if (count >= 32)
c7271385 7427 {
e075ae69
RH
7428 emit_move_insn (low[0], high[1]);
7429 emit_move_insn (high[0], const0_rtx);
32b5b1aa 7430
e075ae69
RH
7431 if (count > 32)
7432 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
7433 }
7434 else
7435 {
7436 if (!rtx_equal_p (operands[0], operands[1]))
7437 emit_move_insn (operands[0], operands[1]);
7438 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
7439 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
7440 }
32b5b1aa 7441 }
e075ae69
RH
7442 else
7443 {
7444 if (!rtx_equal_p (operands[0], operands[1]))
7445 emit_move_insn (operands[0], operands[1]);
32b5b1aa 7446
e075ae69
RH
7447 split_di (operands, 1, low, high);
7448
7449 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
7450 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
7451
7452 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 7453 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 7454 {
fe577e58 7455 if (! no_new_pseudos)
e075ae69
RH
7456 scratch = force_reg (SImode, const0_rtx);
7457 else
7458 emit_move_insn (scratch, const0_rtx);
7459
7460 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
7461 scratch));
7462 }
7463 else
7464 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
7465 }
32b5b1aa 7466}
3f803cd9 7467
0407c02b 7468/* Helper function for the string operations below. Dest VARIABLE whether
0945b39d
JH
7469 it is aligned to VALUE bytes. If true, jump to the label. */
7470static rtx
7471ix86_expand_aligntest (variable, value)
7472 rtx variable;
7473 int value;
7474{
7475 rtx label = gen_label_rtx ();
7476 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
7477 if (GET_MODE (variable) == DImode)
7478 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
7479 else
7480 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
7481 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
7482 1, 0, label);
7483 return label;
7484}
7485
7486/* Adjust COUNTER by the VALUE. */
7487static void
7488ix86_adjust_counter (countreg, value)
7489 rtx countreg;
7490 HOST_WIDE_INT value;
7491{
7492 if (GET_MODE (countreg) == DImode)
7493 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
7494 else
7495 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
7496}
7497
7498/* Zero extend possibly SImode EXP to Pmode register. */
d24b3457 7499rtx
0945b39d
JH
7500ix86_zero_extend_to_Pmode (exp)
7501 rtx exp;
7502{
7503 rtx r;
7504 if (GET_MODE (exp) == VOIDmode)
7505 return force_reg (Pmode, exp);
7506 if (GET_MODE (exp) == Pmode)
7507 return copy_to_mode_reg (Pmode, exp);
7508 r = gen_reg_rtx (Pmode);
7509 emit_insn (gen_zero_extendsidi2 (r, exp));
7510 return r;
7511}
7512
7513/* Expand string move (memcpy) operation. Use i386 string operations when
7514 profitable. expand_clrstr contains similar code. */
7515int
7516ix86_expand_movstr (dst, src, count_exp, align_exp)
7517 rtx dst, src, count_exp, align_exp;
7518{
7519 rtx srcreg, destreg, countreg;
7520 enum machine_mode counter_mode;
7521 HOST_WIDE_INT align = 0;
7522 unsigned HOST_WIDE_INT count = 0;
7523 rtx insns;
7524
7525 start_sequence ();
7526
7527 if (GET_CODE (align_exp) == CONST_INT)
7528 align = INTVAL (align_exp);
7529
7530 /* This simple hack avoids all inlining code and simplifies code bellow. */
7531 if (!TARGET_ALIGN_STRINGOPS)
7532 align = 64;
7533
7534 if (GET_CODE (count_exp) == CONST_INT)
7535 count = INTVAL (count_exp);
7536
7537 /* Figure out proper mode for counter. For 32bits it is always SImode,
7538 for 64bits use SImode when possible, otherwise DImode.
7539 Set count to number of bytes copied when known at compile time. */
7540 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
7541 || x86_64_zero_extended_value (count_exp))
7542 counter_mode = SImode;
7543 else
7544 counter_mode = DImode;
7545
7546 if (counter_mode != SImode && counter_mode != DImode)
7547 abort ();
7548
7549 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
7550 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
7551
7552 emit_insn (gen_cld ());
7553
7554 /* When optimizing for size emit simple rep ; movsb instruction for
7555 counts not divisible by 4. */
7556
7557 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
7558 {
7559 countreg = ix86_zero_extend_to_Pmode (count_exp);
7560 if (TARGET_64BIT)
7561 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
7562 destreg, srcreg, countreg));
7563 else
7564 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
7565 destreg, srcreg, countreg));
7566 }
7567
7568 /* For constant aligned (or small unaligned) copies use rep movsl
7569 followed by code copying the rest. For PentiumPro ensure 8 byte
7570 alignment to allow rep movsl acceleration. */
7571
7572 else if (count != 0
7573 && (align >= 8
7574 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
7575 || optimize_size || count < (unsigned int)64))
7576 {
7577 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
7578 if (count & ~(size - 1))
7579 {
7580 countreg = copy_to_mode_reg (counter_mode,
7581 GEN_INT ((count >> (size == 4 ? 2 : 3))
7582 & (TARGET_64BIT ? -1 : 0x3fffffff)));
7583 countreg = ix86_zero_extend_to_Pmode (countreg);
7584 if (size == 4)
7585 {
7586 if (TARGET_64BIT)
7587 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
7588 destreg, srcreg, countreg));
7589 else
7590 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
7591 destreg, srcreg, countreg));
7592 }
7593 else
7594 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
7595 destreg, srcreg, countreg));
7596 }
7597 if (size == 8 && (count & 0x04))
7598 emit_insn (gen_strmovsi (destreg, srcreg));
7599 if (count & 0x02)
7600 emit_insn (gen_strmovhi (destreg, srcreg));
7601 if (count & 0x01)
7602 emit_insn (gen_strmovqi (destreg, srcreg));
7603 }
7604 /* The generic code based on the glibc implementation:
7605 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
7606 allowing accelerated copying there)
7607 - copy the data using rep movsl
7608 - copy the rest. */
7609 else
7610 {
7611 rtx countreg2;
7612 rtx label = NULL;
7613
7614 /* In case we don't know anything about the alignment, default to
7615 library version, since it is usually equally fast and result in
7616 shorter code. */
7617 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
7618 {
7619 end_sequence ();
7620 return 0;
7621 }
7622
7623 if (TARGET_SINGLE_STRINGOP)
7624 emit_insn (gen_cld ());
7625
7626 countreg2 = gen_reg_rtx (Pmode);
7627 countreg = copy_to_mode_reg (counter_mode, count_exp);
7628
7629 /* We don't use loops to align destination and to copy parts smaller
7630 than 4 bytes, because gcc is able to optimize such code better (in
7631 the case the destination or the count really is aligned, gcc is often
7632 able to predict the branches) and also it is friendlier to the
a4f31c00 7633 hardware branch prediction.
0945b39d
JH
7634
7635 Using loops is benefical for generic case, because we can
7636 handle small counts using the loops. Many CPUs (such as Athlon)
7637 have large REP prefix setup costs.
7638
7639 This is quite costy. Maybe we can revisit this decision later or
7640 add some customizability to this code. */
7641
7642 if (count == 0
7643 && align < (TARGET_PENTIUMPRO && (count == 0
7644 || count >= (unsigned int)260)
7645 ? 8 : UNITS_PER_WORD))
7646 {
7647 label = gen_label_rtx ();
7648 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
7649 LEU, 0, counter_mode, 1, 0, label);
7650 }
7651 if (align <= 1)
7652 {
7653 rtx label = ix86_expand_aligntest (destreg, 1);
7654 emit_insn (gen_strmovqi (destreg, srcreg));
7655 ix86_adjust_counter (countreg, 1);
7656 emit_label (label);
7657 LABEL_NUSES (label) = 1;
7658 }
7659 if (align <= 2)
7660 {
7661 rtx label = ix86_expand_aligntest (destreg, 2);
7662 emit_insn (gen_strmovhi (destreg, srcreg));
7663 ix86_adjust_counter (countreg, 2);
7664 emit_label (label);
7665 LABEL_NUSES (label) = 1;
7666 }
7667 if (align <= 4
7668 && ((TARGET_PENTIUMPRO && (count == 0
7669 || count >= (unsigned int)260))
7670 || TARGET_64BIT))
7671 {
7672 rtx label = ix86_expand_aligntest (destreg, 4);
7673 emit_insn (gen_strmovsi (destreg, srcreg));
7674 ix86_adjust_counter (countreg, 4);
7675 emit_label (label);
7676 LABEL_NUSES (label) = 1;
7677 }
7678
7679 if (!TARGET_SINGLE_STRINGOP)
7680 emit_insn (gen_cld ());
7681 if (TARGET_64BIT)
7682 {
7683 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
7684 GEN_INT (3)));
7685 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
7686 destreg, srcreg, countreg2));
7687 }
7688 else
7689 {
7690 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
7691 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
7692 destreg, srcreg, countreg2));
7693 }
7694
7695 if (label)
7696 {
7697 emit_label (label);
7698 LABEL_NUSES (label) = 1;
7699 }
7700 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
7701 emit_insn (gen_strmovsi (destreg, srcreg));
7702 if ((align <= 4 || count == 0) && TARGET_64BIT)
7703 {
7704 rtx label = ix86_expand_aligntest (countreg, 4);
7705 emit_insn (gen_strmovsi (destreg, srcreg));
7706 emit_label (label);
7707 LABEL_NUSES (label) = 1;
7708 }
7709 if (align > 2 && count != 0 && (count & 2))
7710 emit_insn (gen_strmovhi (destreg, srcreg));
7711 if (align <= 2 || count == 0)
7712 {
7713 rtx label = ix86_expand_aligntest (countreg, 2);
7714 emit_insn (gen_strmovhi (destreg, srcreg));
7715 emit_label (label);
7716 LABEL_NUSES (label) = 1;
7717 }
7718 if (align > 1 && count != 0 && (count & 1))
7719 emit_insn (gen_strmovqi (destreg, srcreg));
7720 if (align <= 1 || count == 0)
7721 {
7722 rtx label = ix86_expand_aligntest (countreg, 1);
7723 emit_insn (gen_strmovqi (destreg, srcreg));
7724 emit_label (label);
7725 LABEL_NUSES (label) = 1;
7726 }
7727 }
7728
7729 insns = get_insns ();
7730 end_sequence ();
7731
7732 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
7733 emit_insns (insns);
7734 return 1;
7735}
7736
7737/* Expand string clear operation (bzero). Use i386 string operations when
7738 profitable. expand_movstr contains similar code. */
7739int
7740ix86_expand_clrstr (src, count_exp, align_exp)
7741 rtx src, count_exp, align_exp;
7742{
7743 rtx destreg, zeroreg, countreg;
7744 enum machine_mode counter_mode;
7745 HOST_WIDE_INT align = 0;
7746 unsigned HOST_WIDE_INT count = 0;
7747
7748 if (GET_CODE (align_exp) == CONST_INT)
7749 align = INTVAL (align_exp);
7750
7751 /* This simple hack avoids all inlining code and simplifies code bellow. */
7752 if (!TARGET_ALIGN_STRINGOPS)
7753 align = 32;
7754
7755 if (GET_CODE (count_exp) == CONST_INT)
7756 count = INTVAL (count_exp);
7757 /* Figure out proper mode for counter. For 32bits it is always SImode,
7758 for 64bits use SImode when possible, otherwise DImode.
7759 Set count to number of bytes copied when known at compile time. */
7760 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
7761 || x86_64_zero_extended_value (count_exp))
7762 counter_mode = SImode;
7763 else
7764 counter_mode = DImode;
7765
7766 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
7767
7768 emit_insn (gen_cld ());
7769
7770 /* When optimizing for size emit simple rep ; movsb instruction for
7771 counts not divisible by 4. */
7772
7773 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
7774 {
7775 countreg = ix86_zero_extend_to_Pmode (count_exp);
7776 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
7777 if (TARGET_64BIT)
7778 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
7779 destreg, countreg));
7780 else
7781 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
7782 destreg, countreg));
7783 }
7784 else if (count != 0
7785 && (align >= 8
7786 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
7787 || optimize_size || count < (unsigned int)64))
7788 {
7789 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
7790 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
7791 if (count & ~(size - 1))
7792 {
7793 countreg = copy_to_mode_reg (counter_mode,
7794 GEN_INT ((count >> (size == 4 ? 2 : 3))
7795 & (TARGET_64BIT ? -1 : 0x3fffffff)));
7796 countreg = ix86_zero_extend_to_Pmode (countreg);
7797 if (size == 4)
7798 {
7799 if (TARGET_64BIT)
7800 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
7801 destreg, countreg));
7802 else
7803 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
7804 destreg, countreg));
7805 }
7806 else
7807 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
7808 destreg, countreg));
7809 }
7810 if (size == 8 && (count & 0x04))
7811 emit_insn (gen_strsetsi (destreg,
7812 gen_rtx_SUBREG (SImode, zeroreg, 0)));
7813 if (count & 0x02)
7814 emit_insn (gen_strsethi (destreg,
7815 gen_rtx_SUBREG (HImode, zeroreg, 0)));
7816 if (count & 0x01)
7817 emit_insn (gen_strsetqi (destreg,
7818 gen_rtx_SUBREG (QImode, zeroreg, 0)));
7819 }
7820 else
7821 {
7822 rtx countreg2;
7823 rtx label = NULL;
7824
7825 /* In case we don't know anything about the alignment, default to
7826 library version, since it is usually equally fast and result in
7827 shorter code. */
7828 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
7829 return 0;
7830
7831 if (TARGET_SINGLE_STRINGOP)
7832 emit_insn (gen_cld ());
7833
7834 countreg2 = gen_reg_rtx (Pmode);
7835 countreg = copy_to_mode_reg (counter_mode, count_exp);
7836 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
7837
7838 if (count == 0
7839 && align < (TARGET_PENTIUMPRO && (count == 0
7840 || count >= (unsigned int)260)
7841 ? 8 : UNITS_PER_WORD))
7842 {
7843 label = gen_label_rtx ();
7844 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
7845 LEU, 0, counter_mode, 1, 0, label);
7846 }
7847 if (align <= 1)
7848 {
7849 rtx label = ix86_expand_aligntest (destreg, 1);
7850 emit_insn (gen_strsetqi (destreg,
7851 gen_rtx_SUBREG (QImode, zeroreg, 0)));
7852 ix86_adjust_counter (countreg, 1);
7853 emit_label (label);
7854 LABEL_NUSES (label) = 1;
7855 }
7856 if (align <= 2)
7857 {
7858 rtx label = ix86_expand_aligntest (destreg, 2);
7859 emit_insn (gen_strsethi (destreg,
7860 gen_rtx_SUBREG (HImode, zeroreg, 0)));
7861 ix86_adjust_counter (countreg, 2);
7862 emit_label (label);
7863 LABEL_NUSES (label) = 1;
7864 }
7865 if (align <= 4 && TARGET_PENTIUMPRO && (count == 0
7866 || count >= (unsigned int)260))
7867 {
7868 rtx label = ix86_expand_aligntest (destreg, 4);
7869 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
7870 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
7871 : zeroreg)));
7872 ix86_adjust_counter (countreg, 4);
7873 emit_label (label);
7874 LABEL_NUSES (label) = 1;
7875 }
7876
7877 if (!TARGET_SINGLE_STRINGOP)
7878 emit_insn (gen_cld ());
7879 if (TARGET_64BIT)
7880 {
7881 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
7882 GEN_INT (3)));
7883 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
7884 destreg, countreg2));
7885 }
7886 else
7887 {
7888 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
7889 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
7890 destreg, countreg2));
7891 }
7892
7893 if (label)
7894 {
7895 emit_label (label);
7896 LABEL_NUSES (label) = 1;
7897 }
7898 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
7899 emit_insn (gen_strsetsi (destreg,
7900 gen_rtx_SUBREG (SImode, zeroreg, 0)));
7901 if (TARGET_64BIT && (align <= 4 || count == 0))
7902 {
7903 rtx label = ix86_expand_aligntest (destreg, 2);
7904 emit_insn (gen_strsetsi (destreg,
7905 gen_rtx_SUBREG (SImode, zeroreg, 0)));
7906 emit_label (label);
7907 LABEL_NUSES (label) = 1;
7908 }
7909 if (align > 2 && count != 0 && (count & 2))
7910 emit_insn (gen_strsethi (destreg,
7911 gen_rtx_SUBREG (HImode, zeroreg, 0)));
7912 if (align <= 2 || count == 0)
7913 {
7914 rtx label = ix86_expand_aligntest (destreg, 2);
7915 emit_insn (gen_strsethi (destreg,
7916 gen_rtx_SUBREG (HImode, zeroreg, 0)));
7917 emit_label (label);
7918 LABEL_NUSES (label) = 1;
7919 }
7920 if (align > 1 && count != 0 && (count & 1))
7921 emit_insn (gen_strsetqi (destreg,
7922 gen_rtx_SUBREG (QImode, zeroreg, 0)));
7923 if (align <= 1 || count == 0)
7924 {
7925 rtx label = ix86_expand_aligntest (destreg, 1);
7926 emit_insn (gen_strsetqi (destreg,
7927 gen_rtx_SUBREG (QImode, zeroreg, 0)));
7928 emit_label (label);
7929 LABEL_NUSES (label) = 1;
7930 }
7931 }
7932 return 1;
7933}
7934/* Expand strlen. */
7935int
7936ix86_expand_strlen (out, src, eoschar, align)
7937 rtx out, src, eoschar, align;
7938{
7939 rtx addr, scratch1, scratch2, scratch3, scratch4;
7940
7941 /* The generic case of strlen expander is long. Avoid it's
7942 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
7943
7944 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
7945 && !TARGET_INLINE_ALL_STRINGOPS
7946 && !optimize_size
7947 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
7948 return 0;
7949
7950 addr = force_reg (Pmode, XEXP (src, 0));
7951 scratch1 = gen_reg_rtx (Pmode);
7952
7953 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
7954 && !optimize_size)
7955 {
7956 /* Well it seems that some optimizer does not combine a call like
7957 foo(strlen(bar), strlen(bar));
7958 when the move and the subtraction is done here. It does calculate
7959 the length just once when these instructions are done inside of
7960 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
7961 often used and I use one fewer register for the lifetime of
7962 output_strlen_unroll() this is better. */
7963
7964 emit_move_insn (out, addr);
7965
7966 ix86_expand_strlensi_unroll_1 (out, align);
7967
7968 /* strlensi_unroll_1 returns the address of the zero at the end of
7969 the string, like memchr(), so compute the length by subtracting
7970 the start address. */
7971 if (TARGET_64BIT)
7972 emit_insn (gen_subdi3 (out, out, addr));
7973 else
7974 emit_insn (gen_subsi3 (out, out, addr));
7975 }
7976 else
7977 {
7978 scratch2 = gen_reg_rtx (Pmode);
7979 scratch3 = gen_reg_rtx (Pmode);
7980 scratch4 = force_reg (Pmode, constm1_rtx);
7981
7982 emit_move_insn (scratch3, addr);
7983 eoschar = force_reg (QImode, eoschar);
7984
7985 emit_insn (gen_cld ());
7986 if (TARGET_64BIT)
7987 {
7988 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
7989 align, scratch4, scratch3));
7990 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
7991 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
7992 }
7993 else
7994 {
7995 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
7996 align, scratch4, scratch3));
7997 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
7998 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
7999 }
8000 }
8001 return 1;
8002}
8003
e075ae69
RH
8004/* Expand the appropriate insns for doing strlen if not just doing
8005 repnz; scasb
8006
8007 out = result, initialized with the start address
8008 align_rtx = alignment of the address.
8009 scratch = scratch register, initialized with the startaddress when
77ebd435 8010 not aligned, otherwise undefined
3f803cd9
SC
8011
8012 This is just the body. It needs the initialisations mentioned above and
8013 some address computing at the end. These things are done in i386.md. */
8014
0945b39d
JH
8015static void
8016ix86_expand_strlensi_unroll_1 (out, align_rtx)
8017 rtx out, align_rtx;
3f803cd9 8018{
e075ae69
RH
8019 int align;
8020 rtx tmp;
8021 rtx align_2_label = NULL_RTX;
8022 rtx align_3_label = NULL_RTX;
8023 rtx align_4_label = gen_label_rtx ();
8024 rtx end_0_label = gen_label_rtx ();
e075ae69 8025 rtx mem;
e2e52e1b 8026 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 8027 rtx scratch = gen_reg_rtx (SImode);
e075ae69
RH
8028
8029 align = 0;
8030 if (GET_CODE (align_rtx) == CONST_INT)
8031 align = INTVAL (align_rtx);
3f803cd9 8032
e9a25f70 8033 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 8034
e9a25f70 8035 /* Is there a known alignment and is it less than 4? */
e075ae69 8036 if (align < 4)
3f803cd9 8037 {
0945b39d
JH
8038 rtx scratch1 = gen_reg_rtx (Pmode);
8039 emit_move_insn (scratch1, out);
e9a25f70 8040 /* Is there a known alignment and is it not 2? */
e075ae69 8041 if (align != 2)
3f803cd9 8042 {
e075ae69
RH
8043 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
8044 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
8045
8046 /* Leave just the 3 lower bits. */
0945b39d 8047 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
8048 NULL_RTX, 0, OPTAB_WIDEN);
8049
9076b9c1 8050 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
0945b39d 8051 Pmode, 1, 0, align_4_label);
9076b9c1 8052 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
0945b39d 8053 Pmode, 1, 0, align_2_label);
9076b9c1 8054 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
0945b39d 8055 Pmode, 1, 0, align_3_label);
3f803cd9
SC
8056 }
8057 else
8058 {
e9a25f70
JL
8059 /* Since the alignment is 2, we have to check 2 or 0 bytes;
8060 check if is aligned to 4 - byte. */
e9a25f70 8061
0945b39d 8062 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
e075ae69
RH
8063 NULL_RTX, 0, OPTAB_WIDEN);
8064
9076b9c1 8065 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
0945b39d 8066 Pmode, 1, 0, align_4_label);
3f803cd9
SC
8067 }
8068
e075ae69 8069 mem = gen_rtx_MEM (QImode, out);
e9a25f70 8070
e075ae69 8071 /* Now compare the bytes. */
e9a25f70 8072
0f290768 8073 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1
JH
8074 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
8075 QImode, 1, 0, end_0_label);
3f803cd9 8076
0f290768 8077 /* Increment the address. */
0945b39d
JH
8078 if (TARGET_64BIT)
8079 emit_insn (gen_adddi3 (out, out, const1_rtx));
8080 else
8081 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 8082
e075ae69
RH
8083 /* Not needed with an alignment of 2 */
8084 if (align != 2)
8085 {
8086 emit_label (align_2_label);
3f803cd9 8087
9076b9c1
JH
8088 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
8089 QImode, 1, 0, end_0_label);
e075ae69 8090
0945b39d
JH
8091 if (TARGET_64BIT)
8092 emit_insn (gen_adddi3 (out, out, const1_rtx));
8093 else
8094 emit_insn (gen_addsi3 (out, out, const1_rtx));
e075ae69
RH
8095
8096 emit_label (align_3_label);
8097 }
8098
9076b9c1
JH
8099 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
8100 QImode, 1, 0, end_0_label);
e075ae69 8101
0945b39d
JH
8102 if (TARGET_64BIT)
8103 emit_insn (gen_adddi3 (out, out, const1_rtx));
8104 else
8105 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
8106 }
8107
e075ae69
RH
8108 /* Generate loop to check 4 bytes at a time. It is not a good idea to
8109 align this loop. It gives only huge programs, but does not help to
8110 speed up. */
8111 emit_label (align_4_label);
3f803cd9 8112
e075ae69
RH
8113 mem = gen_rtx_MEM (SImode, out);
8114 emit_move_insn (scratch, mem);
0945b39d
JH
8115 if (TARGET_64BIT)
8116 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
8117 else
8118 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 8119
e2e52e1b
JH
8120 /* This formula yields a nonzero result iff one of the bytes is zero.
8121 This saves three branches inside loop and many cycles. */
8122
8123 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
8124 emit_insn (gen_one_cmplsi2 (scratch, scratch));
8125 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0
AO
8126 emit_insn (gen_andsi3 (tmpreg, tmpreg,
8127 GEN_INT (trunc_int_for_mode
8128 (0x80808080, SImode))));
9076b9c1
JH
8129 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0,
8130 SImode, 1, 0, align_4_label);
e2e52e1b
JH
8131
8132 if (TARGET_CMOVE)
8133 {
8134 rtx reg = gen_reg_rtx (SImode);
0945b39d 8135 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
8136 emit_move_insn (reg, tmpreg);
8137 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
8138
0f290768 8139 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 8140 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
8141 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
8142 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
8143 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
8144 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
8145 reg,
8146 tmpreg)));
e2e52e1b 8147 /* Emit lea manually to avoid clobbering of flags. */
0945b39d
JH
8148 emit_insn (gen_rtx_SET (SImode, reg2,
8149 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
e2e52e1b
JH
8150
8151 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
8152 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
8153 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d 8154 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
a4f31c00
AJ
8155 reg2,
8156 out)));
e2e52e1b
JH
8157
8158 }
8159 else
8160 {
8161 rtx end_2_label = gen_label_rtx ();
8162 /* Is zero in the first two bytes? */
8163
16189740 8164 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
8165 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
8166 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
8167 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8168 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
8169 pc_rtx);
8170 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8171 JUMP_LABEL (tmp) = end_2_label;
8172
0f290768 8173 /* Not in the first two. Move two bytes forward. */
e2e52e1b 8174 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
0945b39d
JH
8175 if (TARGET_64BIT)
8176 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
8177 else
8178 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
e2e52e1b
JH
8179
8180 emit_label (end_2_label);
8181
8182 }
8183
0f290768 8184 /* Avoid branch in fixing the byte. */
e2e52e1b 8185 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 8186 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
0945b39d
JH
8187 if (TARGET_64BIT)
8188 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
8189 else
8190 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
e075ae69
RH
8191
8192 emit_label (end_0_label);
8193}
8194\f
e075ae69
RH
8195/* Clear stack slot assignments remembered from previous functions.
8196 This is called from INIT_EXPANDERS once before RTL is emitted for each
8197 function. */
8198
36edd3cc
BS
8199static void
8200ix86_init_machine_status (p)
1526a060 8201 struct function *p;
e075ae69 8202{
37b15744
RH
8203 p->machine = (struct machine_function *)
8204 xcalloc (1, sizeof (struct machine_function));
e075ae69
RH
8205}
8206
1526a060
BS
8207/* Mark machine specific bits of P for GC. */
8208static void
8209ix86_mark_machine_status (p)
8210 struct function *p;
8211{
37b15744 8212 struct machine_function *machine = p->machine;
1526a060
BS
8213 enum machine_mode mode;
8214 int n;
8215
37b15744
RH
8216 if (! machine)
8217 return;
8218
1526a060
BS
8219 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
8220 mode = (enum machine_mode) ((int) mode + 1))
8221 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
37b15744
RH
8222 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
8223}
8224
8225static void
8226ix86_free_machine_status (p)
8227 struct function *p;
8228{
8229 free (p->machine);
8230 p->machine = NULL;
1526a060
BS
8231}
8232
e075ae69
RH
8233/* Return a MEM corresponding to a stack slot with mode MODE.
8234 Allocate a new slot if necessary.
8235
8236 The RTL for a function can have several slots available: N is
8237 which slot to use. */
8238
8239rtx
8240assign_386_stack_local (mode, n)
8241 enum machine_mode mode;
8242 int n;
8243{
8244 if (n < 0 || n >= MAX_386_STACK_LOCALS)
8245 abort ();
8246
8247 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
8248 ix86_stack_locals[(int) mode][n]
8249 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
8250
8251 return ix86_stack_locals[(int) mode][n];
8252}
8253\f
8254/* Calculate the length of the memory address in the instruction
8255 encoding. Does not include the one-byte modrm, opcode, or prefix. */
8256
8257static int
8258memory_address_length (addr)
8259 rtx addr;
8260{
8261 struct ix86_address parts;
8262 rtx base, index, disp;
8263 int len;
8264
8265 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
8266 || GET_CODE (addr) == POST_INC
8267 || GET_CODE (addr) == PRE_MODIFY
8268 || GET_CODE (addr) == POST_MODIFY)
e075ae69 8269 return 0;
3f803cd9 8270
e075ae69
RH
8271 if (! ix86_decompose_address (addr, &parts))
8272 abort ();
3f803cd9 8273
e075ae69
RH
8274 base = parts.base;
8275 index = parts.index;
8276 disp = parts.disp;
8277 len = 0;
3f803cd9 8278
e075ae69
RH
8279 /* Register Indirect. */
8280 if (base && !index && !disp)
8281 {
8282 /* Special cases: ebp and esp need the two-byte modrm form. */
8283 if (addr == stack_pointer_rtx
8284 || addr == arg_pointer_rtx
564d80f4
JH
8285 || addr == frame_pointer_rtx
8286 || addr == hard_frame_pointer_rtx)
e075ae69 8287 len = 1;
3f803cd9 8288 }
e9a25f70 8289
e075ae69
RH
8290 /* Direct Addressing. */
8291 else if (disp && !base && !index)
8292 len = 4;
8293
3f803cd9
SC
8294 else
8295 {
e075ae69
RH
8296 /* Find the length of the displacement constant. */
8297 if (disp)
8298 {
8299 if (GET_CODE (disp) == CONST_INT
8300 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
8301 len = 1;
8302 else
8303 len = 4;
8304 }
3f803cd9 8305
e075ae69
RH
8306 /* An index requires the two-byte modrm form. */
8307 if (index)
8308 len += 1;
3f803cd9
SC
8309 }
8310
e075ae69
RH
8311 return len;
8312}
79325812 8313
6ef67412
JH
8314/* Compute default value for "length_immediate" attribute. When SHORTFORM is set
8315 expect that insn have 8bit immediate alternative. */
e075ae69 8316int
6ef67412 8317ix86_attr_length_immediate_default (insn, shortform)
e075ae69 8318 rtx insn;
6ef67412 8319 int shortform;
e075ae69 8320{
6ef67412
JH
8321 int len = 0;
8322 int i;
6c698a6d 8323 extract_insn_cached (insn);
6ef67412
JH
8324 for (i = recog_data.n_operands - 1; i >= 0; --i)
8325 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 8326 {
6ef67412 8327 if (len)
3071fab5 8328 abort ();
6ef67412
JH
8329 if (shortform
8330 && GET_CODE (recog_data.operand[i]) == CONST_INT
8331 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
8332 len = 1;
8333 else
8334 {
8335 switch (get_attr_mode (insn))
8336 {
8337 case MODE_QI:
8338 len+=1;
8339 break;
8340 case MODE_HI:
8341 len+=2;
8342 break;
8343 case MODE_SI:
8344 len+=4;
8345 break;
8346 default:
8347 fatal_insn ("Unknown insn mode", insn);
8348 }
8349 }
3071fab5 8350 }
6ef67412
JH
8351 return len;
8352}
8353/* Compute default value for "length_address" attribute. */
8354int
8355ix86_attr_length_address_default (insn)
8356 rtx insn;
8357{
8358 int i;
6c698a6d 8359 extract_insn_cached (insn);
1ccbefce
RH
8360 for (i = recog_data.n_operands - 1; i >= 0; --i)
8361 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 8362 {
6ef67412 8363 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
8364 break;
8365 }
6ef67412 8366 return 0;
3f803cd9 8367}
e075ae69
RH
8368\f
8369/* Return the maximum number of instructions a cpu can issue. */
b657fc39 8370
e075ae69
RH
8371int
8372ix86_issue_rate ()
b657fc39 8373{
e075ae69 8374 switch (ix86_cpu)
b657fc39 8375 {
e075ae69
RH
8376 case PROCESSOR_PENTIUM:
8377 case PROCESSOR_K6:
8378 return 2;
79325812 8379
e075ae69 8380 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
8381 case PROCESSOR_PENTIUM4:
8382 case PROCESSOR_ATHLON:
e075ae69 8383 return 3;
b657fc39 8384
b657fc39 8385 default:
e075ae69 8386 return 1;
b657fc39 8387 }
b657fc39
L
8388}
8389
e075ae69
RH
8390/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
8391 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 8392
e075ae69
RH
8393static int
8394ix86_flags_dependant (insn, dep_insn, insn_type)
8395 rtx insn, dep_insn;
8396 enum attr_type insn_type;
8397{
8398 rtx set, set2;
b657fc39 8399
e075ae69
RH
8400 /* Simplify the test for uninteresting insns. */
8401 if (insn_type != TYPE_SETCC
8402 && insn_type != TYPE_ICMOV
8403 && insn_type != TYPE_FCMOV
8404 && insn_type != TYPE_IBR)
8405 return 0;
b657fc39 8406
e075ae69
RH
8407 if ((set = single_set (dep_insn)) != 0)
8408 {
8409 set = SET_DEST (set);
8410 set2 = NULL_RTX;
8411 }
8412 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
8413 && XVECLEN (PATTERN (dep_insn), 0) == 2
8414 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
8415 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
8416 {
8417 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
8418 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
8419 }
78a0d70c
ZW
8420 else
8421 return 0;
b657fc39 8422
78a0d70c
ZW
8423 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
8424 return 0;
b657fc39 8425
78a0d70c
ZW
8426 /* This test is true if the dependant insn reads the flags but
8427 not any other potentially set register. */
8428 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
8429 return 0;
8430
8431 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
8432 return 0;
8433
8434 return 1;
e075ae69 8435}
b657fc39 8436
e075ae69
RH
8437/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
8438 address with operands set by DEP_INSN. */
8439
8440static int
8441ix86_agi_dependant (insn, dep_insn, insn_type)
8442 rtx insn, dep_insn;
8443 enum attr_type insn_type;
8444{
8445 rtx addr;
8446
8447 if (insn_type == TYPE_LEA)
5fbdde42
RH
8448 {
8449 addr = PATTERN (insn);
8450 if (GET_CODE (addr) == SET)
8451 ;
8452 else if (GET_CODE (addr) == PARALLEL
8453 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
8454 addr = XVECEXP (addr, 0, 0);
8455 else
8456 abort ();
8457 addr = SET_SRC (addr);
8458 }
e075ae69
RH
8459 else
8460 {
8461 int i;
6c698a6d 8462 extract_insn_cached (insn);
1ccbefce
RH
8463 for (i = recog_data.n_operands - 1; i >= 0; --i)
8464 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 8465 {
1ccbefce 8466 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
8467 goto found;
8468 }
8469 return 0;
8470 found:;
b657fc39
L
8471 }
8472
e075ae69 8473 return modified_in_p (addr, dep_insn);
b657fc39 8474}
a269a03c
JC
8475
8476int
e075ae69 8477ix86_adjust_cost (insn, link, dep_insn, cost)
a269a03c
JC
8478 rtx insn, link, dep_insn;
8479 int cost;
8480{
e075ae69 8481 enum attr_type insn_type, dep_insn_type;
0b5107cf 8482 enum attr_memory memory;
e075ae69 8483 rtx set, set2;
9b00189f 8484 int dep_insn_code_number;
a269a03c 8485
309ada50 8486 /* Anti and output depenancies have zero cost on all CPUs. */
e075ae69 8487 if (REG_NOTE_KIND (link) != 0)
309ada50 8488 return 0;
a269a03c 8489
9b00189f
JH
8490 dep_insn_code_number = recog_memoized (dep_insn);
8491
e075ae69 8492 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 8493 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 8494 return cost;
a269a03c 8495
1c71e60e
JH
8496 insn_type = get_attr_type (insn);
8497 dep_insn_type = get_attr_type (dep_insn);
9b00189f 8498
a269a03c
JC
8499 switch (ix86_cpu)
8500 {
8501 case PROCESSOR_PENTIUM:
e075ae69
RH
8502 /* Address Generation Interlock adds a cycle of latency. */
8503 if (ix86_agi_dependant (insn, dep_insn, insn_type))
8504 cost += 1;
8505
8506 /* ??? Compares pair with jump/setcc. */
8507 if (ix86_flags_dependant (insn, dep_insn, insn_type))
8508 cost = 0;
8509
8510 /* Floating point stores require value to be ready one cycle ealier. */
0f290768 8511 if (insn_type == TYPE_FMOV
e075ae69
RH
8512 && get_attr_memory (insn) == MEMORY_STORE
8513 && !ix86_agi_dependant (insn, dep_insn, insn_type))
8514 cost += 1;
8515 break;
a269a03c 8516
e075ae69 8517 case PROCESSOR_PENTIUMPRO:
0f290768 8518 /* Since we can't represent delayed latencies of load+operation,
e075ae69
RH
8519 increase the cost here for non-imov insns. */
8520 if (dep_insn_type != TYPE_IMOV
8521 && dep_insn_type != TYPE_FMOV
0b5107cf
JH
8522 && ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
8523 || memory == MEMORY_BOTH))
e075ae69
RH
8524 cost += 1;
8525
8526 /* INT->FP conversion is expensive. */
8527 if (get_attr_fp_int_src (dep_insn))
8528 cost += 5;
8529
8530 /* There is one cycle extra latency between an FP op and a store. */
8531 if (insn_type == TYPE_FMOV
8532 && (set = single_set (dep_insn)) != NULL_RTX
8533 && (set2 = single_set (insn)) != NULL_RTX
8534 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
8535 && GET_CODE (SET_DEST (set2)) == MEM)
8536 cost += 1;
8537 break;
a269a03c 8538
e075ae69
RH
8539 case PROCESSOR_K6:
8540 /* The esp dependency is resolved before the instruction is really
8541 finished. */
8542 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
8543 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
8544 return 1;
a269a03c 8545
0f290768 8546 /* Since we can't represent delayed latencies of load+operation,
e075ae69 8547 increase the cost here for non-imov insns. */
0b5107cf
JH
8548 if ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
8549 || memory == MEMORY_BOTH)
e075ae69
RH
8550 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
8551
8552 /* INT->FP conversion is expensive. */
8553 if (get_attr_fp_int_src (dep_insn))
8554 cost += 5;
a14003ee 8555 break;
e075ae69 8556
309ada50 8557 case PROCESSOR_ATHLON:
0b5107cf
JH
8558 if ((memory = get_attr_memory (dep_insn)) == MEMORY_LOAD
8559 || memory == MEMORY_BOTH)
8560 {
8561 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
8562 cost += 2;
8563 else
8564 cost += 3;
8565 }
309ada50 8566
a269a03c 8567 default:
a269a03c
JC
8568 break;
8569 }
8570
8571 return cost;
8572}
0a726ef1 8573
e075ae69
RH
8574static union
8575{
8576 struct ppro_sched_data
8577 {
8578 rtx decode[3];
8579 int issued_this_cycle;
8580 } ppro;
8581} ix86_sched_data;
0a726ef1 8582
e075ae69
RH
8583static int
8584ix86_safe_length (insn)
8585 rtx insn;
8586{
8587 if (recog_memoized (insn) >= 0)
8588 return get_attr_length(insn);
8589 else
8590 return 128;
8591}
0a726ef1 8592
e075ae69
RH
8593static int
8594ix86_safe_length_prefix (insn)
8595 rtx insn;
8596{
8597 if (recog_memoized (insn) >= 0)
8598 return get_attr_length(insn);
8599 else
8600 return 0;
8601}
8602
8603static enum attr_memory
8604ix86_safe_memory (insn)
8605 rtx insn;
8606{
8607 if (recog_memoized (insn) >= 0)
8608 return get_attr_memory(insn);
8609 else
8610 return MEMORY_UNKNOWN;
8611}
0a726ef1 8612
e075ae69
RH
8613static enum attr_pent_pair
8614ix86_safe_pent_pair (insn)
8615 rtx insn;
8616{
8617 if (recog_memoized (insn) >= 0)
8618 return get_attr_pent_pair(insn);
8619 else
8620 return PENT_PAIR_NP;
8621}
0a726ef1 8622
e075ae69
RH
8623static enum attr_ppro_uops
8624ix86_safe_ppro_uops (insn)
8625 rtx insn;
8626{
8627 if (recog_memoized (insn) >= 0)
8628 return get_attr_ppro_uops (insn);
8629 else
8630 return PPRO_UOPS_MANY;
8631}
0a726ef1 8632
e075ae69
RH
8633static void
8634ix86_dump_ppro_packet (dump)
8635 FILE *dump;
0a726ef1 8636{
e075ae69 8637 if (ix86_sched_data.ppro.decode[0])
0a726ef1 8638 {
e075ae69
RH
8639 fprintf (dump, "PPRO packet: %d",
8640 INSN_UID (ix86_sched_data.ppro.decode[0]));
8641 if (ix86_sched_data.ppro.decode[1])
8642 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
8643 if (ix86_sched_data.ppro.decode[2])
8644 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
8645 fputc ('\n', dump);
8646 }
8647}
0a726ef1 8648
e075ae69 8649/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 8650
e075ae69
RH
8651void
8652ix86_sched_init (dump, sched_verbose)
8653 FILE *dump ATTRIBUTE_UNUSED;
8654 int sched_verbose ATTRIBUTE_UNUSED;
8655{
8656 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
8657}
8658
8659/* Shift INSN to SLOT, and shift everything else down. */
8660
8661static void
8662ix86_reorder_insn (insnp, slot)
8663 rtx *insnp, *slot;
8664{
8665 if (insnp != slot)
8666 {
8667 rtx insn = *insnp;
0f290768 8668 do
e075ae69
RH
8669 insnp[0] = insnp[1];
8670 while (++insnp != slot);
8671 *insnp = insn;
0a726ef1 8672 }
e075ae69
RH
8673}
8674
8675/* Find an instruction with given pairability and minimal amount of cycles
8676 lost by the fact that the CPU waits for both pipelines to finish before
8677 reading next instructions. Also take care that both instructions together
8678 can not exceed 7 bytes. */
8679
8680static rtx *
8681ix86_pent_find_pair (e_ready, ready, type, first)
8682 rtx *e_ready;
8683 rtx *ready;
8684 enum attr_pent_pair type;
8685 rtx first;
8686{
8687 int mincycles, cycles;
8688 enum attr_pent_pair tmp;
8689 enum attr_memory memory;
8690 rtx *insnp, *bestinsnp = NULL;
0a726ef1 8691
e075ae69
RH
8692 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
8693 return NULL;
0a726ef1 8694
e075ae69
RH
8695 memory = ix86_safe_memory (first);
8696 cycles = result_ready_cost (first);
8697 mincycles = INT_MAX;
8698
8699 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
8700 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
8701 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
6ec6d558 8702 {
e075ae69
RH
8703 enum attr_memory second_memory;
8704 int secondcycles, currentcycles;
8705
8706 second_memory = ix86_safe_memory (*insnp);
8707 secondcycles = result_ready_cost (*insnp);
8708 currentcycles = abs (cycles - secondcycles);
8709
8710 if (secondcycles >= 1 && cycles >= 1)
6ec6d558 8711 {
e075ae69
RH
8712 /* Two read/modify/write instructions together takes two
8713 cycles longer. */
8714 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
8715 currentcycles += 2;
0f290768 8716
e075ae69
RH
8717 /* Read modify/write instruction followed by read/modify
8718 takes one cycle longer. */
8719 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
8720 && tmp != PENT_PAIR_UV
8721 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
8722 currentcycles += 1;
6ec6d558 8723 }
e075ae69
RH
8724 if (currentcycles < mincycles)
8725 bestinsnp = insnp, mincycles = currentcycles;
6ec6d558 8726 }
0a726ef1 8727
e075ae69
RH
8728 return bestinsnp;
8729}
8730
78a0d70c 8731/* Subroutines of ix86_sched_reorder. */
e075ae69 8732
c6991660 8733static void
78a0d70c 8734ix86_sched_reorder_pentium (ready, e_ready)
e075ae69 8735 rtx *ready;
78a0d70c 8736 rtx *e_ready;
e075ae69 8737{
78a0d70c 8738 enum attr_pent_pair pair1, pair2;
e075ae69 8739 rtx *insnp;
e075ae69 8740
78a0d70c
ZW
8741 /* This wouldn't be necessary if Haifa knew that static insn ordering
8742 is important to which pipe an insn is issued to. So we have to make
8743 some minor rearrangements. */
e075ae69 8744
78a0d70c
ZW
8745 pair1 = ix86_safe_pent_pair (*e_ready);
8746
8747 /* If the first insn is non-pairable, let it be. */
8748 if (pair1 == PENT_PAIR_NP)
8749 return;
8750
8751 pair2 = PENT_PAIR_NP;
8752 insnp = 0;
8753
8754 /* If the first insn is UV or PV pairable, search for a PU
8755 insn to go with. */
8756 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
e075ae69 8757 {
78a0d70c
ZW
8758 insnp = ix86_pent_find_pair (e_ready-1, ready,
8759 PENT_PAIR_PU, *e_ready);
8760 if (insnp)
8761 pair2 = PENT_PAIR_PU;
8762 }
e075ae69 8763
78a0d70c
ZW
8764 /* If the first insn is PU or UV pairable, search for a PV
8765 insn to go with. */
8766 if (pair2 == PENT_PAIR_NP
8767 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
8768 {
8769 insnp = ix86_pent_find_pair (e_ready-1, ready,
8770 PENT_PAIR_PV, *e_ready);
8771 if (insnp)
8772 pair2 = PENT_PAIR_PV;
8773 }
e075ae69 8774
78a0d70c
ZW
8775 /* If the first insn is pairable, search for a UV
8776 insn to go with. */
8777 if (pair2 == PENT_PAIR_NP)
8778 {
8779 insnp = ix86_pent_find_pair (e_ready-1, ready,
8780 PENT_PAIR_UV, *e_ready);
8781 if (insnp)
8782 pair2 = PENT_PAIR_UV;
8783 }
e075ae69 8784
78a0d70c
ZW
8785 if (pair2 == PENT_PAIR_NP)
8786 return;
e075ae69 8787
78a0d70c
ZW
8788 /* Found something! Decide if we need to swap the order. */
8789 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
8790 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
8791 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
8792 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
8793 ix86_reorder_insn (insnp, e_ready);
8794 else
8795 ix86_reorder_insn (insnp, e_ready - 1);
8796}
e075ae69 8797
c6991660 8798static void
78a0d70c
ZW
8799ix86_sched_reorder_ppro (ready, e_ready)
8800 rtx *ready;
8801 rtx *e_ready;
8802{
8803 rtx decode[3];
8804 enum attr_ppro_uops cur_uops;
8805 int issued_this_cycle;
8806 rtx *insnp;
8807 int i;
e075ae69 8808
0f290768 8809 /* At this point .ppro.decode contains the state of the three
78a0d70c 8810 decoders from last "cycle". That is, those insns that were
0f290768 8811 actually independent. But here we're scheduling for the
78a0d70c
ZW
8812 decoder, and we may find things that are decodable in the
8813 same cycle. */
e075ae69 8814
0f290768 8815 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
78a0d70c 8816 issued_this_cycle = 0;
e075ae69 8817
78a0d70c
ZW
8818 insnp = e_ready;
8819 cur_uops = ix86_safe_ppro_uops (*insnp);
0a726ef1 8820
78a0d70c
ZW
8821 /* If the decoders are empty, and we've a complex insn at the
8822 head of the priority queue, let it issue without complaint. */
8823 if (decode[0] == NULL)
8824 {
8825 if (cur_uops == PPRO_UOPS_MANY)
8826 {
8827 decode[0] = *insnp;
8828 goto ppro_done;
8829 }
8830
8831 /* Otherwise, search for a 2-4 uop unsn to issue. */
8832 while (cur_uops != PPRO_UOPS_FEW)
8833 {
8834 if (insnp == ready)
8835 break;
8836 cur_uops = ix86_safe_ppro_uops (*--insnp);
8837 }
8838
8839 /* If so, move it to the head of the line. */
8840 if (cur_uops == PPRO_UOPS_FEW)
8841 ix86_reorder_insn (insnp, e_ready);
0a726ef1 8842
78a0d70c
ZW
8843 /* Issue the head of the queue. */
8844 issued_this_cycle = 1;
8845 decode[0] = *e_ready--;
8846 }
fb693d44 8847
78a0d70c
ZW
8848 /* Look for simple insns to fill in the other two slots. */
8849 for (i = 1; i < 3; ++i)
8850 if (decode[i] == NULL)
8851 {
8852 if (ready >= e_ready)
8853 goto ppro_done;
fb693d44 8854
e075ae69
RH
8855 insnp = e_ready;
8856 cur_uops = ix86_safe_ppro_uops (*insnp);
78a0d70c
ZW
8857 while (cur_uops != PPRO_UOPS_ONE)
8858 {
8859 if (insnp == ready)
8860 break;
8861 cur_uops = ix86_safe_ppro_uops (*--insnp);
8862 }
fb693d44 8863
78a0d70c
ZW
8864 /* Found one. Move it to the head of the queue and issue it. */
8865 if (cur_uops == PPRO_UOPS_ONE)
e075ae69 8866 {
78a0d70c
ZW
8867 ix86_reorder_insn (insnp, e_ready);
8868 decode[i] = *e_ready--;
8869 issued_this_cycle++;
8870 continue;
8871 }
fb693d44 8872
78a0d70c
ZW
8873 /* ??? Didn't find one. Ideally, here we would do a lazy split
8874 of 2-uop insns, issue one and queue the other. */
8875 }
fb693d44 8876
78a0d70c
ZW
8877 ppro_done:
8878 if (issued_this_cycle == 0)
8879 issued_this_cycle = 1;
8880 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
8881}
fb693d44 8882
0f290768 8883/* We are about to being issuing insns for this clock cycle.
78a0d70c
ZW
8884 Override the default sort algorithm to better slot instructions. */
8885int
8886ix86_sched_reorder (dump, sched_verbose, ready, n_ready, clock_var)
8887 FILE *dump ATTRIBUTE_UNUSED;
8888 int sched_verbose ATTRIBUTE_UNUSED;
8889 rtx *ready;
8890 int n_ready;
8891 int clock_var ATTRIBUTE_UNUSED;
8892{
8893 rtx *e_ready = ready + n_ready - 1;
fb693d44 8894
78a0d70c
ZW
8895 if (n_ready < 2)
8896 goto out;
e075ae69 8897
78a0d70c
ZW
8898 switch (ix86_cpu)
8899 {
8900 default:
8901 break;
e075ae69 8902
78a0d70c
ZW
8903 case PROCESSOR_PENTIUM:
8904 ix86_sched_reorder_pentium (ready, e_ready);
8905 break;
e075ae69 8906
78a0d70c
ZW
8907 case PROCESSOR_PENTIUMPRO:
8908 ix86_sched_reorder_ppro (ready, e_ready);
e075ae69 8909 break;
fb693d44
RH
8910 }
8911
e075ae69
RH
8912out:
8913 return ix86_issue_rate ();
8914}
fb693d44 8915
e075ae69
RH
8916/* We are about to issue INSN. Return the number of insns left on the
8917 ready queue that can be issued this cycle. */
b222082e 8918
e075ae69
RH
8919int
8920ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
8921 FILE *dump;
8922 int sched_verbose;
8923 rtx insn;
8924 int can_issue_more;
8925{
8926 int i;
8927 switch (ix86_cpu)
fb693d44 8928 {
e075ae69
RH
8929 default:
8930 return can_issue_more - 1;
fb693d44 8931
e075ae69
RH
8932 case PROCESSOR_PENTIUMPRO:
8933 {
8934 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 8935
e075ae69
RH
8936 if (uops == PPRO_UOPS_MANY)
8937 {
8938 if (sched_verbose)
8939 ix86_dump_ppro_packet (dump);
8940 ix86_sched_data.ppro.decode[0] = insn;
8941 ix86_sched_data.ppro.decode[1] = NULL;
8942 ix86_sched_data.ppro.decode[2] = NULL;
8943 if (sched_verbose)
8944 ix86_dump_ppro_packet (dump);
8945 ix86_sched_data.ppro.decode[0] = NULL;
8946 }
8947 else if (uops == PPRO_UOPS_FEW)
8948 {
8949 if (sched_verbose)
8950 ix86_dump_ppro_packet (dump);
8951 ix86_sched_data.ppro.decode[0] = insn;
8952 ix86_sched_data.ppro.decode[1] = NULL;
8953 ix86_sched_data.ppro.decode[2] = NULL;
8954 }
8955 else
8956 {
8957 for (i = 0; i < 3; ++i)
8958 if (ix86_sched_data.ppro.decode[i] == NULL)
8959 {
8960 ix86_sched_data.ppro.decode[i] = insn;
8961 break;
8962 }
8963 if (i == 3)
8964 abort ();
8965 if (i == 2)
8966 {
8967 if (sched_verbose)
8968 ix86_dump_ppro_packet (dump);
8969 ix86_sched_data.ppro.decode[0] = NULL;
8970 ix86_sched_data.ppro.decode[1] = NULL;
8971 ix86_sched_data.ppro.decode[2] = NULL;
8972 }
8973 }
8974 }
8975 return --ix86_sched_data.ppro.issued_this_cycle;
8976 }
fb693d44 8977}
a7180f70 8978\f
0e4970d7
RK
8979/* Walk through INSNS and look for MEM references whose address is DSTREG or
8980 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
8981 appropriate. */
8982
8983void
8984ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
8985 rtx insns;
8986 rtx dstref, srcref, dstreg, srcreg;
8987{
8988 rtx insn;
8989
8990 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
8991 if (INSN_P (insn))
8992 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
8993 dstreg, srcreg);
8994}
8995
8996/* Subroutine of above to actually do the updating by recursively walking
8997 the rtx. */
8998
8999static void
9000ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
9001 rtx x;
9002 rtx dstref, srcref, dstreg, srcreg;
9003{
9004 enum rtx_code code = GET_CODE (x);
9005 const char *format_ptr = GET_RTX_FORMAT (code);
9006 int i, j;
9007
9008 if (code == MEM && XEXP (x, 0) == dstreg)
9009 MEM_COPY_ATTRIBUTES (x, dstref);
9010 else if (code == MEM && XEXP (x, 0) == srcreg)
9011 MEM_COPY_ATTRIBUTES (x, srcref);
9012
9013 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
9014 {
9015 if (*format_ptr == 'e')
9016 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
9017 dstreg, srcreg);
9018 else if (*format_ptr == 'E')
9019 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
397be6cf 9020 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
0e4970d7
RK
9021 dstreg, srcreg);
9022 }
9023}
9024\f
a7180f70
BS
9025/* Compute the alignment given to a constant that is being placed in memory.
9026 EXP is the constant and ALIGN is the alignment that the object would
9027 ordinarily have.
9028 The value of this function is used instead of that alignment to align
9029 the object. */
9030
9031int
9032ix86_constant_alignment (exp, align)
9033 tree exp;
9034 int align;
9035{
9036 if (TREE_CODE (exp) == REAL_CST)
9037 {
9038 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
9039 return 64;
9040 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
9041 return 128;
9042 }
9043 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
9044 && align < 256)
9045 return 256;
9046
9047 return align;
9048}
9049
9050/* Compute the alignment for a static variable.
9051 TYPE is the data type, and ALIGN is the alignment that
9052 the object would ordinarily have. The value of this function is used
9053 instead of that alignment to align the object. */
9054
9055int
9056ix86_data_alignment (type, align)
9057 tree type;
9058 int align;
9059{
9060 if (AGGREGATE_TYPE_P (type)
9061 && TYPE_SIZE (type)
9062 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
9063 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
9064 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
9065 return 256;
9066
0d7d98ee
JH
9067 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
9068 to 16byte boundary. */
9069 if (TARGET_64BIT)
9070 {
9071 if (AGGREGATE_TYPE_P (type)
9072 && TYPE_SIZE (type)
9073 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
9074 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
9075 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
9076 return 128;
9077 }
9078
a7180f70
BS
9079 if (TREE_CODE (type) == ARRAY_TYPE)
9080 {
9081 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
9082 return 64;
9083 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
9084 return 128;
9085 }
9086 else if (TREE_CODE (type) == COMPLEX_TYPE)
9087 {
0f290768 9088
a7180f70
BS
9089 if (TYPE_MODE (type) == DCmode && align < 64)
9090 return 64;
9091 if (TYPE_MODE (type) == XCmode && align < 128)
9092 return 128;
9093 }
9094 else if ((TREE_CODE (type) == RECORD_TYPE
9095 || TREE_CODE (type) == UNION_TYPE
9096 || TREE_CODE (type) == QUAL_UNION_TYPE)
9097 && TYPE_FIELDS (type))
9098 {
9099 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
9100 return 64;
9101 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
9102 return 128;
9103 }
9104 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
9105 || TREE_CODE (type) == INTEGER_TYPE)
9106 {
9107 if (TYPE_MODE (type) == DFmode && align < 64)
9108 return 64;
9109 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
9110 return 128;
9111 }
9112
9113 return align;
9114}
9115
9116/* Compute the alignment for a local variable.
9117 TYPE is the data type, and ALIGN is the alignment that
9118 the object would ordinarily have. The value of this macro is used
9119 instead of that alignment to align the object. */
9120
9121int
9122ix86_local_alignment (type, align)
9123 tree type;
9124 int align;
9125{
0d7d98ee
JH
9126 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
9127 to 16byte boundary. */
9128 if (TARGET_64BIT)
9129 {
9130 if (AGGREGATE_TYPE_P (type)
9131 && TYPE_SIZE (type)
9132 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
9133 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
9134 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
9135 return 128;
9136 }
a7180f70
BS
9137 if (TREE_CODE (type) == ARRAY_TYPE)
9138 {
9139 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
9140 return 64;
9141 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
9142 return 128;
9143 }
9144 else if (TREE_CODE (type) == COMPLEX_TYPE)
9145 {
9146 if (TYPE_MODE (type) == DCmode && align < 64)
9147 return 64;
9148 if (TYPE_MODE (type) == XCmode && align < 128)
9149 return 128;
9150 }
9151 else if ((TREE_CODE (type) == RECORD_TYPE
9152 || TREE_CODE (type) == UNION_TYPE
9153 || TREE_CODE (type) == QUAL_UNION_TYPE)
9154 && TYPE_FIELDS (type))
9155 {
9156 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
9157 return 64;
9158 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
9159 return 128;
9160 }
9161 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
9162 || TREE_CODE (type) == INTEGER_TYPE)
9163 {
0f290768 9164
a7180f70
BS
9165 if (TYPE_MODE (type) == DFmode && align < 64)
9166 return 64;
9167 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
9168 return 128;
9169 }
9170 return align;
9171}
0ed08620
JH
9172\f
9173/* Emit RTL insns to initialize the variable parts of a trampoline.
9174 FNADDR is an RTX for the address of the function's pure code.
9175 CXT is an RTX for the static chain value for the function. */
9176void
9177x86_initialize_trampoline (tramp, fnaddr, cxt)
9178 rtx tramp, fnaddr, cxt;
9179{
9180 if (!TARGET_64BIT)
9181 {
9182 /* Compute offset from the end of the jmp to the target function. */
9183 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
9184 plus_constant (tramp, 10),
9185 NULL_RTX, 1, OPTAB_DIRECT);
9186 emit_move_insn (gen_rtx_MEM (QImode, tramp),
9187 GEN_INT (trunc_int_for_mode (0xb9, QImode)));
9188 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
9189 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
9190 GEN_INT (trunc_int_for_mode (0xe9, QImode)));
9191 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
9192 }
9193 else
9194 {
9195 int offset = 0;
9196 /* Try to load address using shorter movl instead of movabs.
9197 We may want to support movq for kernel mode, but kernel does not use
9198 trampolines at the moment. */
9199 if (x86_64_zero_extended_value (fnaddr))
9200 {
9201 fnaddr = copy_to_mode_reg (DImode, fnaddr);
9202 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
9203 GEN_INT (trunc_int_for_mode (0xbb41, HImode)));
9204 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
9205 gen_lowpart (SImode, fnaddr));
9206 offset += 6;
9207 }
9208 else
9209 {
9210 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
9211 GEN_INT (trunc_int_for_mode (0xbb49, HImode)));
9212 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
9213 fnaddr);
9214 offset += 10;
9215 }
9216 /* Load static chain using movabs to r10. */
9217 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
9218 GEN_INT (trunc_int_for_mode (0xba49, HImode)));
9219 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
9220 cxt);
9221 offset += 10;
9222 /* Jump to the r11 */
9223 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
9224 GEN_INT (trunc_int_for_mode (0xff49, HImode)));
9225 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
9226 GEN_INT (trunc_int_for_mode (0xe3, HImode)));
9227 offset += 3;
9228 if (offset > TRAMPOLINE_SIZE)
9229 abort();
9230 }
9231}
bd793c65
BS
9232
9233#define def_builtin(NAME, TYPE, CODE) \
df4ae160 9234 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL)
bd793c65
BS
9235struct builtin_description
9236{
9237 enum insn_code icode;
9238 const char * name;
9239 enum ix86_builtins code;
9240 enum rtx_code comparison;
9241 unsigned int flag;
9242};
9243
9244static struct builtin_description bdesc_comi[] =
9245{
9246 { CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
9247 { CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
9248 { CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
9249 { CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
9250 { CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
9251 { CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
9252 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
9253 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
9254 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
9255 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
9256 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
9257 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
9258};
9259
9260static struct builtin_description bdesc_2arg[] =
9261{
9262 /* SSE */
9263 { CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
9264 { CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
9265 { CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
9266 { CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
9267 { CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
9268 { CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
9269 { CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
9270 { CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
9271
9272 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
9273 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
9274 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
9275 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
9276 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
9277 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
9278 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
9279 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
9280 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
9281 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
9282 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
9283 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
9284 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
9285 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
9286 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
9287 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
9288 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
9289 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
9290 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
9291 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
9292 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
9293 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
9294 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
9295 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
9296
9297 { CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
9298 { CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
9299 { CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
9300 { CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
9301
9302 { CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
9303 { CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
9304 { CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
9305 { CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
9306
9307 { CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
9308 { CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
9309 { CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
9310 { CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
9311 { CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
9312
9313 /* MMX */
9314 { CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
9315 { CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
9316 { CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
9317 { CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
9318 { CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
9319 { CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
9320
9321 { CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
9322 { CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
9323 { CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
9324 { CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
9325 { CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
9326 { CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
9327 { CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
9328 { CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
9329
9330 { CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
9331 { CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
9332 { CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
9333
9334 { CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
9335 { CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
9336 { CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
9337 { CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
9338
9339 { CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
9340 { CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
9341
9342 { CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
9343 { CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
9344 { CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
9345 { CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
9346 { CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
9347 { CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
9348
9349 { CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
9350 { CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
9351 { CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
9352 { CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
9353
9354 { CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
9355 { CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
9356 { CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
9357 { CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
9358 { CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
9359 { CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
9360
9361 /* Special. */
9362 { CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
9363 { CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
9364 { CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
9365
9366 { CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
9367 { CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
9368
9369 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
9370 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
9371 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
9372 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
9373 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
9374 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
9375
9376 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
9377 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
9378 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
9379 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
9380 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
9381 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
9382
9383 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
9384 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
9385 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
9386 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
9387
9388 { CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
9389 { CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
9390
9391};
9392
9393static struct builtin_description bdesc_1arg[] =
9394{
9395 { CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
9396 { CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
9397
9398 { CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
9399 { CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
9400 { CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
9401
9402 { CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
9403 { CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
9404 { CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
9405 { CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
9406
9407};
9408
f6155fda
SS
9409/* Set up all the target-specific builtins. */
9410void
9411ix86_init_builtins ()
9412{
9413 if (TARGET_MMX)
9414 ix86_init_mmx_sse_builtins ();
9415}
9416
9417/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
bd793c65
BS
9418 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
9419 builtins. */
9420void
f6155fda 9421ix86_init_mmx_sse_builtins ()
bd793c65
BS
9422{
9423 struct builtin_description * d;
77ebd435 9424 size_t i;
cbd5937a 9425 tree endlink = void_list_node;
bd793c65
BS
9426
9427 tree pchar_type_node = build_pointer_type (char_type_node);
9428 tree pfloat_type_node = build_pointer_type (float_type_node);
9429 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
9430 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
9431
9432 /* Comparisons. */
9433 tree int_ftype_v4sf_v4sf
9434 = build_function_type (integer_type_node,
9435 tree_cons (NULL_TREE, V4SF_type_node,
9436 tree_cons (NULL_TREE,
9437 V4SF_type_node,
9438 endlink)));
9439 tree v4si_ftype_v4sf_v4sf
9440 = build_function_type (V4SI_type_node,
9441 tree_cons (NULL_TREE, V4SF_type_node,
9442 tree_cons (NULL_TREE,
9443 V4SF_type_node,
9444 endlink)));
9445 /* MMX/SSE/integer conversions. */
9446 tree int_ftype_v4sf_int
9447 = build_function_type (integer_type_node,
9448 tree_cons (NULL_TREE, V4SF_type_node,
9449 tree_cons (NULL_TREE,
9450 integer_type_node,
9451 endlink)));
9452 tree int_ftype_v4sf
9453 = build_function_type (integer_type_node,
9454 tree_cons (NULL_TREE, V4SF_type_node,
9455 endlink));
9456 tree int_ftype_v8qi
9457 = build_function_type (integer_type_node,
9458 tree_cons (NULL_TREE, V8QI_type_node,
9459 endlink));
9460 tree int_ftype_v2si
9461 = build_function_type (integer_type_node,
9462 tree_cons (NULL_TREE, V2SI_type_node,
9463 endlink));
9464 tree v2si_ftype_int
9465 = build_function_type (V2SI_type_node,
9466 tree_cons (NULL_TREE, integer_type_node,
9467 endlink));
9468 tree v4sf_ftype_v4sf_int
9469 = build_function_type (integer_type_node,
9470 tree_cons (NULL_TREE, V4SF_type_node,
9471 tree_cons (NULL_TREE, integer_type_node,
9472 endlink)));
9473 tree v4sf_ftype_v4sf_v2si
9474 = build_function_type (V4SF_type_node,
9475 tree_cons (NULL_TREE, V4SF_type_node,
9476 tree_cons (NULL_TREE, V2SI_type_node,
9477 endlink)));
9478 tree int_ftype_v4hi_int
9479 = build_function_type (integer_type_node,
9480 tree_cons (NULL_TREE, V4HI_type_node,
9481 tree_cons (NULL_TREE, integer_type_node,
9482 endlink)));
9483 tree v4hi_ftype_v4hi_int_int
332316cd 9484 = build_function_type (V4HI_type_node,
bd793c65
BS
9485 tree_cons (NULL_TREE, V4HI_type_node,
9486 tree_cons (NULL_TREE, integer_type_node,
9487 tree_cons (NULL_TREE,
9488 integer_type_node,
9489 endlink))));
9490 /* Miscellaneous. */
9491 tree v8qi_ftype_v4hi_v4hi
9492 = build_function_type (V8QI_type_node,
9493 tree_cons (NULL_TREE, V4HI_type_node,
9494 tree_cons (NULL_TREE, V4HI_type_node,
9495 endlink)));
9496 tree v4hi_ftype_v2si_v2si
9497 = build_function_type (V4HI_type_node,
9498 tree_cons (NULL_TREE, V2SI_type_node,
9499 tree_cons (NULL_TREE, V2SI_type_node,
9500 endlink)));
9501 tree v4sf_ftype_v4sf_v4sf_int
9502 = build_function_type (V4SF_type_node,
9503 tree_cons (NULL_TREE, V4SF_type_node,
9504 tree_cons (NULL_TREE, V4SF_type_node,
9505 tree_cons (NULL_TREE,
9506 integer_type_node,
9507 endlink))));
9508 tree v4hi_ftype_v8qi_v8qi
9509 = build_function_type (V4HI_type_node,
9510 tree_cons (NULL_TREE, V8QI_type_node,
9511 tree_cons (NULL_TREE, V8QI_type_node,
9512 endlink)));
9513 tree v2si_ftype_v4hi_v4hi
9514 = build_function_type (V2SI_type_node,
9515 tree_cons (NULL_TREE, V4HI_type_node,
9516 tree_cons (NULL_TREE, V4HI_type_node,
9517 endlink)));
9518 tree v4hi_ftype_v4hi_int
9519 = build_function_type (V4HI_type_node,
9520 tree_cons (NULL_TREE, V4HI_type_node,
9521 tree_cons (NULL_TREE, integer_type_node,
9522 endlink)));
9523 tree di_ftype_di_int
9524 = build_function_type (long_long_unsigned_type_node,
9525 tree_cons (NULL_TREE, long_long_unsigned_type_node,
9526 tree_cons (NULL_TREE, integer_type_node,
9527 endlink)));
9528 tree v8qi_ftype_v8qi_di
9529 = build_function_type (V8QI_type_node,
9530 tree_cons (NULL_TREE, V8QI_type_node,
9531 tree_cons (NULL_TREE,
9532 long_long_integer_type_node,
9533 endlink)));
9534 tree v4hi_ftype_v4hi_di
9535 = build_function_type (V4HI_type_node,
9536 tree_cons (NULL_TREE, V4HI_type_node,
9537 tree_cons (NULL_TREE,
9538 long_long_integer_type_node,
9539 endlink)));
9540 tree v2si_ftype_v2si_di
9541 = build_function_type (V2SI_type_node,
9542 tree_cons (NULL_TREE, V2SI_type_node,
9543 tree_cons (NULL_TREE,
9544 long_long_integer_type_node,
9545 endlink)));
9546 tree void_ftype_void
9547 = build_function_type (void_type_node, endlink);
9548 tree void_ftype_pchar_int
9549 = build_function_type (void_type_node,
9550 tree_cons (NULL_TREE, pchar_type_node,
9551 tree_cons (NULL_TREE, integer_type_node,
9552 endlink)));
9553 tree void_ftype_unsigned
9554 = build_function_type (void_type_node,
9555 tree_cons (NULL_TREE, unsigned_type_node,
9556 endlink));
9557 tree unsigned_ftype_void
9558 = build_function_type (unsigned_type_node, endlink);
9559 tree di_ftype_void
9560 = build_function_type (long_long_unsigned_type_node, endlink);
9561 tree ti_ftype_void
9562 = build_function_type (intTI_type_node, endlink);
9563 tree v2si_ftype_v4sf
9564 = build_function_type (V2SI_type_node,
9565 tree_cons (NULL_TREE, V4SF_type_node,
9566 endlink));
9567 /* Loads/stores. */
9568 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
9569 tree_cons (NULL_TREE, V8QI_type_node,
9570 tree_cons (NULL_TREE,
9571 pchar_type_node,
9572 endlink)));
9573 tree void_ftype_v8qi_v8qi_pchar
9574 = build_function_type (void_type_node, maskmovq_args);
9575 tree v4sf_ftype_pfloat
9576 = build_function_type (V4SF_type_node,
9577 tree_cons (NULL_TREE, pfloat_type_node,
9578 endlink));
9579 tree v4sf_ftype_float
9580 = build_function_type (V4SF_type_node,
9581 tree_cons (NULL_TREE, float_type_node,
9582 endlink));
9583 tree v4sf_ftype_float_float_float_float
9584 = build_function_type (V4SF_type_node,
9585 tree_cons (NULL_TREE, float_type_node,
9586 tree_cons (NULL_TREE, float_type_node,
9587 tree_cons (NULL_TREE,
9588 float_type_node,
9589 tree_cons (NULL_TREE,
9590 float_type_node,
9591 endlink)))));
9592 /* @@@ the type is bogus */
9593 tree v4sf_ftype_v4sf_pv2si
9594 = build_function_type (V4SF_type_node,
9595 tree_cons (NULL_TREE, V4SF_type_node,
9596 tree_cons (NULL_TREE, pv2si_type_node,
9597 endlink)));
9598 tree v4sf_ftype_pv2si_v4sf
9599 = build_function_type (V4SF_type_node,
9600 tree_cons (NULL_TREE, V4SF_type_node,
9601 tree_cons (NULL_TREE, pv2si_type_node,
9602 endlink)));
9603 tree void_ftype_pfloat_v4sf
9604 = build_function_type (void_type_node,
9605 tree_cons (NULL_TREE, pfloat_type_node,
9606 tree_cons (NULL_TREE, V4SF_type_node,
9607 endlink)));
9608 tree void_ftype_pdi_di
9609 = build_function_type (void_type_node,
9610 tree_cons (NULL_TREE, pdi_type_node,
9611 tree_cons (NULL_TREE,
9612 long_long_unsigned_type_node,
9613 endlink)));
9614 /* Normal vector unops. */
9615 tree v4sf_ftype_v4sf
9616 = build_function_type (V4SF_type_node,
9617 tree_cons (NULL_TREE, V4SF_type_node,
9618 endlink));
0f290768 9619
bd793c65
BS
9620 /* Normal vector binops. */
9621 tree v4sf_ftype_v4sf_v4sf
9622 = build_function_type (V4SF_type_node,
9623 tree_cons (NULL_TREE, V4SF_type_node,
9624 tree_cons (NULL_TREE, V4SF_type_node,
9625 endlink)));
9626 tree v8qi_ftype_v8qi_v8qi
9627 = build_function_type (V8QI_type_node,
9628 tree_cons (NULL_TREE, V8QI_type_node,
9629 tree_cons (NULL_TREE, V8QI_type_node,
9630 endlink)));
9631 tree v4hi_ftype_v4hi_v4hi
9632 = build_function_type (V4HI_type_node,
9633 tree_cons (NULL_TREE, V4HI_type_node,
9634 tree_cons (NULL_TREE, V4HI_type_node,
9635 endlink)));
9636 tree v2si_ftype_v2si_v2si
9637 = build_function_type (V2SI_type_node,
9638 tree_cons (NULL_TREE, V2SI_type_node,
9639 tree_cons (NULL_TREE, V2SI_type_node,
9640 endlink)));
9641 tree ti_ftype_ti_ti
9642 = build_function_type (intTI_type_node,
9643 tree_cons (NULL_TREE, intTI_type_node,
9644 tree_cons (NULL_TREE, intTI_type_node,
9645 endlink)));
9646 tree di_ftype_di_di
9647 = build_function_type (long_long_unsigned_type_node,
9648 tree_cons (NULL_TREE, long_long_unsigned_type_node,
9649 tree_cons (NULL_TREE,
9650 long_long_unsigned_type_node,
9651 endlink)));
9652
9653 /* Add all builtins that are more or less simple operations on two
9654 operands. */
9655 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
9656 {
9657 /* Use one of the operands; the target can have a different mode for
9658 mask-generating compares. */
9659 enum machine_mode mode;
9660 tree type;
9661
9662 if (d->name == 0)
9663 continue;
9664 mode = insn_data[d->icode].operand[1].mode;
9665
9666 if (! TARGET_SSE && ! VALID_MMX_REG_MODE (mode))
9667 continue;
9668
9669 switch (mode)
9670 {
9671 case V4SFmode:
9672 type = v4sf_ftype_v4sf_v4sf;
9673 break;
9674 case V8QImode:
9675 type = v8qi_ftype_v8qi_v8qi;
9676 break;
9677 case V4HImode:
9678 type = v4hi_ftype_v4hi_v4hi;
9679 break;
9680 case V2SImode:
9681 type = v2si_ftype_v2si_v2si;
9682 break;
9683 case TImode:
9684 type = ti_ftype_ti_ti;
9685 break;
9686 case DImode:
9687 type = di_ftype_di_di;
9688 break;
9689
9690 default:
9691 abort ();
9692 }
0f290768 9693
bd793c65
BS
9694 /* Override for comparisons. */
9695 if (d->icode == CODE_FOR_maskcmpv4sf3
9696 || d->icode == CODE_FOR_maskncmpv4sf3
9697 || d->icode == CODE_FOR_vmmaskcmpv4sf3
9698 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
9699 type = v4si_ftype_v4sf_v4sf;
9700
9701 def_builtin (d->name, type, d->code);
9702 }
9703
9704 /* Add the remaining MMX insns with somewhat more complicated types. */
9705 def_builtin ("__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
9706 def_builtin ("__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
9707 def_builtin ("__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
9708 def_builtin ("__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
9709 def_builtin ("__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
9710 def_builtin ("__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
9711 def_builtin ("__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
9712 def_builtin ("__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
9713 def_builtin ("__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
9714
9715 def_builtin ("__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
9716 def_builtin ("__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
9717 def_builtin ("__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
9718
9719 def_builtin ("__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
9720 def_builtin ("__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
9721
9722 def_builtin ("__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
9723 def_builtin ("__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
9724
9725 /* Everything beyond this point is SSE only. */
9726 if (! TARGET_SSE)
9727 return;
0f290768 9728
bd793c65
BS
9729 /* comi/ucomi insns. */
9730 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
9731 def_builtin (d->name, int_ftype_v4sf_v4sf, d->code);
9732
9733 def_builtin ("__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
9734 def_builtin ("__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
9735 def_builtin ("__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
9736
9737 def_builtin ("__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
9738 def_builtin ("__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
9739 def_builtin ("__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
9740 def_builtin ("__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
9741 def_builtin ("__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
9742 def_builtin ("__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
9743
9744 def_builtin ("__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
9745 def_builtin ("__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
9746
9747 def_builtin ("__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
9748
9749 def_builtin ("__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
9750 def_builtin ("__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
9751 def_builtin ("__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
9752 def_builtin ("__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
9753 def_builtin ("__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
9754 def_builtin ("__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
9755
9756 def_builtin ("__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
9757 def_builtin ("__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
9758 def_builtin ("__builtin_ia32_storehps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
9759 def_builtin ("__builtin_ia32_storelps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
9760
9761 def_builtin ("__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
9762 def_builtin ("__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
9763 def_builtin ("__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
9764 def_builtin ("__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
9765
9766 def_builtin ("__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
9767 def_builtin ("__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
9768
9769 def_builtin ("__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
9770
9771 def_builtin ("__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
9772 def_builtin ("__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
9773 def_builtin ("__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
9774 def_builtin ("__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
9775 def_builtin ("__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
9776 def_builtin ("__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
9777
9778 def_builtin ("__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
9779
9780 /* Composite intrinsics. */
9781 def_builtin ("__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
9782 def_builtin ("__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
9783 def_builtin ("__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
9784 def_builtin ("__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
9785 def_builtin ("__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
9786 def_builtin ("__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
9787 def_builtin ("__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
9788}
9789
9790/* Errors in the source file can cause expand_expr to return const0_rtx
9791 where we expect a vector. To avoid crashing, use one of the vector
9792 clear instructions. */
9793static rtx
9794safe_vector_operand (x, mode)
9795 rtx x;
9796 enum machine_mode mode;
9797{
9798 if (x != const0_rtx)
9799 return x;
9800 x = gen_reg_rtx (mode);
9801
9802 if (VALID_MMX_REG_MODE (mode))
9803 emit_insn (gen_mmx_clrdi (mode == DImode ? x
9804 : gen_rtx_SUBREG (DImode, x, 0)));
9805 else
9806 emit_insn (gen_sse_clrti (mode == TImode ? x
9807 : gen_rtx_SUBREG (TImode, x, 0)));
9808 return x;
9809}
9810
9811/* Subroutine of ix86_expand_builtin to take care of binop insns. */
9812
9813static rtx
9814ix86_expand_binop_builtin (icode, arglist, target)
9815 enum insn_code icode;
9816 tree arglist;
9817 rtx target;
9818{
9819 rtx pat;
9820 tree arg0 = TREE_VALUE (arglist);
9821 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9822 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9823 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9824 enum machine_mode tmode = insn_data[icode].operand[0].mode;
9825 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
9826 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
9827
9828 if (VECTOR_MODE_P (mode0))
9829 op0 = safe_vector_operand (op0, mode0);
9830 if (VECTOR_MODE_P (mode1))
9831 op1 = safe_vector_operand (op1, mode1);
9832
9833 if (! target
9834 || GET_MODE (target) != tmode
9835 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9836 target = gen_reg_rtx (tmode);
9837
9838 /* In case the insn wants input operands in modes different from
9839 the result, abort. */
9840 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
9841 abort ();
9842
9843 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9844 op0 = copy_to_mode_reg (mode0, op0);
9845 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
9846 op1 = copy_to_mode_reg (mode1, op1);
9847
9848 pat = GEN_FCN (icode) (target, op0, op1);
9849 if (! pat)
9850 return 0;
9851 emit_insn (pat);
9852 return target;
9853}
9854
9855/* Subroutine of ix86_expand_builtin to take care of stores. */
9856
9857static rtx
9858ix86_expand_store_builtin (icode, arglist, shuffle)
9859 enum insn_code icode;
9860 tree arglist;
9861 int shuffle;
9862{
9863 rtx pat;
9864 tree arg0 = TREE_VALUE (arglist);
9865 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9866 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9867 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9868 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
9869 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
9870
9871 if (VECTOR_MODE_P (mode1))
9872 op1 = safe_vector_operand (op1, mode1);
9873
9874 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
9875 if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
9876 op1 = copy_to_mode_reg (mode1, op1);
9877 if (shuffle >= 0)
9878 emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
9879 pat = GEN_FCN (icode) (op0, op1);
9880 if (pat)
9881 emit_insn (pat);
9882 return 0;
9883}
9884
9885/* Subroutine of ix86_expand_builtin to take care of unop insns. */
9886
9887static rtx
9888ix86_expand_unop_builtin (icode, arglist, target, do_load)
9889 enum insn_code icode;
9890 tree arglist;
9891 rtx target;
9892 int do_load;
9893{
9894 rtx pat;
9895 tree arg0 = TREE_VALUE (arglist);
9896 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9897 enum machine_mode tmode = insn_data[icode].operand[0].mode;
9898 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
9899
9900 if (! target
9901 || GET_MODE (target) != tmode
9902 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9903 target = gen_reg_rtx (tmode);
9904 if (do_load)
9905 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
9906 else
9907 {
9908 if (VECTOR_MODE_P (mode0))
9909 op0 = safe_vector_operand (op0, mode0);
9910
9911 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9912 op0 = copy_to_mode_reg (mode0, op0);
9913 }
9914
9915 pat = GEN_FCN (icode) (target, op0);
9916 if (! pat)
9917 return 0;
9918 emit_insn (pat);
9919 return target;
9920}
9921
9922/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
9923 sqrtss, rsqrtss, rcpss. */
9924
9925static rtx
9926ix86_expand_unop1_builtin (icode, arglist, target)
9927 enum insn_code icode;
9928 tree arglist;
9929 rtx target;
9930{
9931 rtx pat;
9932 tree arg0 = TREE_VALUE (arglist);
9933 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9934 enum machine_mode tmode = insn_data[icode].operand[0].mode;
9935 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
9936
9937 if (! target
9938 || GET_MODE (target) != tmode
9939 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9940 target = gen_reg_rtx (tmode);
9941
9942 if (VECTOR_MODE_P (mode0))
9943 op0 = safe_vector_operand (op0, mode0);
9944
9945 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9946 op0 = copy_to_mode_reg (mode0, op0);
9947
9948 pat = GEN_FCN (icode) (target, op0, op0);
9949 if (! pat)
9950 return 0;
9951 emit_insn (pat);
9952 return target;
9953}
9954
9955/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
9956
9957static rtx
9958ix86_expand_sse_compare (d, arglist, target)
9959 struct builtin_description *d;
9960 tree arglist;
9961 rtx target;
9962{
9963 rtx pat;
9964 tree arg0 = TREE_VALUE (arglist);
9965 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9966 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9967 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9968 rtx op2;
9969 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
9970 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
9971 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
9972 enum rtx_code comparison = d->comparison;
9973
9974 if (VECTOR_MODE_P (mode0))
9975 op0 = safe_vector_operand (op0, mode0);
9976 if (VECTOR_MODE_P (mode1))
9977 op1 = safe_vector_operand (op1, mode1);
9978
9979 /* Swap operands if we have a comparison that isn't available in
9980 hardware. */
9981 if (d->flag)
9982 {
9983 target = gen_reg_rtx (tmode);
9984 emit_move_insn (target, op1);
9985 op1 = op0;
9986 op0 = target;
9987 comparison = swap_condition (comparison);
9988 }
9989 else if (! target
9990 || GET_MODE (target) != tmode
9991 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
9992 target = gen_reg_rtx (tmode);
9993
9994 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
9995 op0 = copy_to_mode_reg (mode0, op0);
9996 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
9997 op1 = copy_to_mode_reg (mode1, op1);
9998
9999 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
10000 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
10001 if (! pat)
10002 return 0;
10003 emit_insn (pat);
10004 return target;
10005}
10006
10007/* Subroutine of ix86_expand_builtin to take care of comi insns. */
10008
10009static rtx
10010ix86_expand_sse_comi (d, arglist, target)
10011 struct builtin_description *d;
10012 tree arglist;
10013 rtx target;
10014{
10015 rtx pat;
10016 tree arg0 = TREE_VALUE (arglist);
10017 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10018 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10019 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10020 rtx op2;
10021 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
10022 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
10023 enum rtx_code comparison = d->comparison;
10024
10025 if (VECTOR_MODE_P (mode0))
10026 op0 = safe_vector_operand (op0, mode0);
10027 if (VECTOR_MODE_P (mode1))
10028 op1 = safe_vector_operand (op1, mode1);
10029
10030 /* Swap operands if we have a comparison that isn't available in
10031 hardware. */
10032 if (d->flag)
10033 {
10034 rtx tmp = op1;
10035 op1 = op0;
10036 op0 = tmp;
10037 comparison = swap_condition (comparison);
10038 }
10039
10040 target = gen_reg_rtx (SImode);
10041 emit_move_insn (target, const0_rtx);
10042 target = gen_rtx_SUBREG (QImode, target, 0);
10043
10044 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
10045 op0 = copy_to_mode_reg (mode0, op0);
10046 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
10047 op1 = copy_to_mode_reg (mode1, op1);
10048
10049 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
10050 pat = GEN_FCN (d->icode) (op0, op1, op2);
10051 if (! pat)
10052 return 0;
10053 emit_insn (pat);
10054 emit_insn (gen_setcc_2 (target, op2));
10055
10056 return target;
10057}
10058
10059/* Expand an expression EXP that calls a built-in function,
10060 with result going to TARGET if that's convenient
10061 (and in mode MODE if that's convenient).
10062 SUBTARGET may be used as the target for computing one of EXP's operands.
10063 IGNORE is nonzero if the value is to be ignored. */
10064
10065rtx
10066ix86_expand_builtin (exp, target, subtarget, mode, ignore)
10067 tree exp;
10068 rtx target;
10069 rtx subtarget ATTRIBUTE_UNUSED;
10070 enum machine_mode mode ATTRIBUTE_UNUSED;
10071 int ignore ATTRIBUTE_UNUSED;
10072{
10073 struct builtin_description *d;
77ebd435 10074 size_t i;
bd793c65
BS
10075 enum insn_code icode;
10076 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
10077 tree arglist = TREE_OPERAND (exp, 1);
10078 tree arg0, arg1, arg2, arg3;
10079 rtx op0, op1, op2, pat;
10080 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 10081 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
10082
10083 switch (fcode)
10084 {
10085 case IX86_BUILTIN_EMMS:
10086 emit_insn (gen_emms ());
10087 return 0;
10088
10089 case IX86_BUILTIN_SFENCE:
10090 emit_insn (gen_sfence ());
10091 return 0;
10092
10093 case IX86_BUILTIN_M_FROM_INT:
10094 target = gen_reg_rtx (DImode);
10095 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
10096 emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
10097 return target;
10098
10099 case IX86_BUILTIN_M_TO_INT:
10100 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
10101 op0 = copy_to_mode_reg (DImode, op0);
10102 target = gen_reg_rtx (SImode);
10103 emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
10104 return target;
10105
10106 case IX86_BUILTIN_PEXTRW:
10107 icode = CODE_FOR_mmx_pextrw;
10108 arg0 = TREE_VALUE (arglist);
10109 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10110 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10111 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10112 tmode = insn_data[icode].operand[0].mode;
10113 mode0 = insn_data[icode].operand[1].mode;
10114 mode1 = insn_data[icode].operand[2].mode;
10115
10116 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
10117 op0 = copy_to_mode_reg (mode0, op0);
10118 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
10119 {
10120 /* @@@ better error message */
10121 error ("selector must be an immediate");
10122 return const0_rtx;
10123 }
10124 if (target == 0
10125 || GET_MODE (target) != tmode
10126 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10127 target = gen_reg_rtx (tmode);
10128 pat = GEN_FCN (icode) (target, op0, op1);
10129 if (! pat)
10130 return 0;
10131 emit_insn (pat);
10132 return target;
10133
10134 case IX86_BUILTIN_PINSRW:
10135 icode = CODE_FOR_mmx_pinsrw;
10136 arg0 = TREE_VALUE (arglist);
10137 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10138 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
10139 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10140 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10141 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
10142 tmode = insn_data[icode].operand[0].mode;
10143 mode0 = insn_data[icode].operand[1].mode;
10144 mode1 = insn_data[icode].operand[2].mode;
10145 mode2 = insn_data[icode].operand[3].mode;
10146
10147 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
10148 op0 = copy_to_mode_reg (mode0, op0);
10149 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
10150 op1 = copy_to_mode_reg (mode1, op1);
10151 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
10152 {
10153 /* @@@ better error message */
10154 error ("selector must be an immediate");
10155 return const0_rtx;
10156 }
10157 if (target == 0
10158 || GET_MODE (target) != tmode
10159 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10160 target = gen_reg_rtx (tmode);
10161 pat = GEN_FCN (icode) (target, op0, op1, op2);
10162 if (! pat)
10163 return 0;
10164 emit_insn (pat);
10165 return target;
10166
10167 case IX86_BUILTIN_MASKMOVQ:
10168 icode = CODE_FOR_mmx_maskmovq;
10169 /* Note the arg order is different from the operand order. */
10170 arg1 = TREE_VALUE (arglist);
10171 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
10172 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
10173 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10174 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10175 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
10176 mode0 = insn_data[icode].operand[0].mode;
10177 mode1 = insn_data[icode].operand[1].mode;
10178 mode2 = insn_data[icode].operand[2].mode;
10179
10180 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
10181 op0 = copy_to_mode_reg (mode0, op0);
10182 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
10183 op1 = copy_to_mode_reg (mode1, op1);
10184 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
10185 op2 = copy_to_mode_reg (mode2, op2);
10186 pat = GEN_FCN (icode) (op0, op1, op2);
10187 if (! pat)
10188 return 0;
10189 emit_insn (pat);
10190 return 0;
10191
10192 case IX86_BUILTIN_SQRTSS:
10193 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
10194 case IX86_BUILTIN_RSQRTSS:
10195 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
10196 case IX86_BUILTIN_RCPSS:
10197 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
10198
10199 case IX86_BUILTIN_LOADAPS:
10200 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
10201
10202 case IX86_BUILTIN_LOADUPS:
10203 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
10204
10205 case IX86_BUILTIN_STOREAPS:
10206 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
10207 case IX86_BUILTIN_STOREUPS:
10208 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
10209
10210 case IX86_BUILTIN_LOADSS:
10211 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
10212
10213 case IX86_BUILTIN_STORESS:
10214 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
10215
0f290768 10216 case IX86_BUILTIN_LOADHPS:
bd793c65
BS
10217 case IX86_BUILTIN_LOADLPS:
10218 icode = (fcode == IX86_BUILTIN_LOADHPS
10219 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
10220 arg0 = TREE_VALUE (arglist);
10221 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10222 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10223 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10224 tmode = insn_data[icode].operand[0].mode;
10225 mode0 = insn_data[icode].operand[1].mode;
10226 mode1 = insn_data[icode].operand[2].mode;
10227
10228 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
10229 op0 = copy_to_mode_reg (mode0, op0);
10230 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
10231 if (target == 0
10232 || GET_MODE (target) != tmode
10233 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10234 target = gen_reg_rtx (tmode);
10235 pat = GEN_FCN (icode) (target, op0, op1);
10236 if (! pat)
10237 return 0;
10238 emit_insn (pat);
10239 return target;
0f290768 10240
bd793c65
BS
10241 case IX86_BUILTIN_STOREHPS:
10242 case IX86_BUILTIN_STORELPS:
10243 icode = (fcode == IX86_BUILTIN_STOREHPS
10244 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
10245 arg0 = TREE_VALUE (arglist);
10246 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10247 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10248 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10249 mode0 = insn_data[icode].operand[1].mode;
10250 mode1 = insn_data[icode].operand[2].mode;
10251
10252 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
10253 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
10254 op1 = copy_to_mode_reg (mode1, op1);
10255
10256 pat = GEN_FCN (icode) (op0, op0, op1);
10257 if (! pat)
10258 return 0;
10259 emit_insn (pat);
10260 return 0;
10261
10262 case IX86_BUILTIN_MOVNTPS:
10263 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
10264 case IX86_BUILTIN_MOVNTQ:
10265 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
10266
10267 case IX86_BUILTIN_LDMXCSR:
10268 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
10269 target = assign_386_stack_local (SImode, 0);
10270 emit_move_insn (target, op0);
10271 emit_insn (gen_ldmxcsr (target));
10272 return 0;
10273
10274 case IX86_BUILTIN_STMXCSR:
10275 target = assign_386_stack_local (SImode, 0);
10276 emit_insn (gen_stmxcsr (target));
10277 return copy_to_mode_reg (SImode, target);
10278
10279 case IX86_BUILTIN_PREFETCH:
10280 icode = CODE_FOR_prefetch;
10281 arg0 = TREE_VALUE (arglist);
10282 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10283 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10284 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
332316cd
BS
10285 mode0 = insn_data[icode].operand[0].mode;
10286 mode1 = insn_data[icode].operand[1].mode;
bd793c65 10287
332316cd 10288 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
bd793c65
BS
10289 {
10290 /* @@@ better error message */
10291 error ("selector must be an immediate");
10292 return const0_rtx;
10293 }
10294
332316cd 10295 op0 = copy_to_mode_reg (Pmode, op0);
bd793c65
BS
10296 pat = GEN_FCN (icode) (op0, op1);
10297 if (! pat)
10298 return 0;
10299 emit_insn (pat);
10300 return target;
0f290768 10301
bd793c65
BS
10302 case IX86_BUILTIN_SHUFPS:
10303 icode = CODE_FOR_sse_shufps;
10304 arg0 = TREE_VALUE (arglist);
10305 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10306 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
10307 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10308 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10309 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
10310 tmode = insn_data[icode].operand[0].mode;
10311 mode0 = insn_data[icode].operand[1].mode;
10312 mode1 = insn_data[icode].operand[2].mode;
10313 mode2 = insn_data[icode].operand[3].mode;
10314
10315 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
10316 op0 = copy_to_mode_reg (mode0, op0);
10317 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
10318 op1 = copy_to_mode_reg (mode1, op1);
10319 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
10320 {
10321 /* @@@ better error message */
10322 error ("mask must be an immediate");
10323 return const0_rtx;
10324 }
10325 if (target == 0
10326 || GET_MODE (target) != tmode
10327 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10328 target = gen_reg_rtx (tmode);
10329 pat = GEN_FCN (icode) (target, op0, op1, op2);
10330 if (! pat)
10331 return 0;
10332 emit_insn (pat);
10333 return target;
10334
10335 case IX86_BUILTIN_PSHUFW:
10336 icode = CODE_FOR_mmx_pshufw;
10337 arg0 = TREE_VALUE (arglist);
10338 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10339 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10340 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10341 tmode = insn_data[icode].operand[0].mode;
10342 mode0 = insn_data[icode].operand[2].mode;
10343 mode1 = insn_data[icode].operand[3].mode;
10344
10345 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
10346 op0 = copy_to_mode_reg (mode0, op0);
10347 if (! (*insn_data[icode].operand[3].predicate) (op1, mode1))
10348 {
10349 /* @@@ better error message */
10350 error ("mask must be an immediate");
10351 return const0_rtx;
10352 }
10353 if (target == 0
10354 || GET_MODE (target) != tmode
10355 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10356 target = gen_reg_rtx (tmode);
10357 pat = GEN_FCN (icode) (target, target, op0, op1);
10358 if (! pat)
10359 return 0;
10360 emit_insn (pat);
10361 return target;
10362
10363 /* Composite intrinsics. */
10364 case IX86_BUILTIN_SETPS1:
10365 target = assign_386_stack_local (SFmode, 0);
10366 arg0 = TREE_VALUE (arglist);
f4ef873c 10367 emit_move_insn (adjust_address (target, SFmode, 0),
bd793c65
BS
10368 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
10369 op0 = gen_reg_rtx (V4SFmode);
f4ef873c 10370 emit_insn (gen_sse_loadss (op0, adjust_address (target, V4SFmode, 0)));
bd793c65
BS
10371 emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
10372 return op0;
0f290768 10373
bd793c65
BS
10374 case IX86_BUILTIN_SETPS:
10375 target = assign_386_stack_local (V4SFmode, 0);
bd793c65
BS
10376 arg0 = TREE_VALUE (arglist);
10377 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10378 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
10379 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
d2037d01 10380 emit_move_insn (adjust_address (target, SFmode, 0),
bd793c65 10381 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
d2037d01 10382 emit_move_insn (adjust_address (target, SFmode, 4),
bd793c65 10383 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
d2037d01 10384 emit_move_insn (adjust_address (target, SFmode, 8),
bd793c65 10385 expand_expr (arg2, NULL_RTX, VOIDmode, 0));
d2037d01 10386 emit_move_insn (adjust_address (target, SFmode, 12),
bd793c65
BS
10387 expand_expr (arg3, NULL_RTX, VOIDmode, 0));
10388 op0 = gen_reg_rtx (V4SFmode);
10389 emit_insn (gen_sse_movaps (op0, target));
10390 return op0;
10391
10392 case IX86_BUILTIN_CLRPS:
10393 target = gen_reg_rtx (TImode);
10394 emit_insn (gen_sse_clrti (target));
10395 return target;
10396
10397 case IX86_BUILTIN_LOADRPS:
10398 target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
10399 gen_reg_rtx (V4SFmode), 1);
10400 emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
10401 return target;
10402
10403 case IX86_BUILTIN_LOADPS1:
10404 target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
10405 gen_reg_rtx (V4SFmode), 1);
10406 emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
10407 return target;
10408
10409 case IX86_BUILTIN_STOREPS1:
10410 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
10411 case IX86_BUILTIN_STORERPS:
10412 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
10413
10414 case IX86_BUILTIN_MMX_ZERO:
10415 target = gen_reg_rtx (DImode);
10416 emit_insn (gen_mmx_clrdi (target));
10417 return target;
10418
10419 default:
10420 break;
10421 }
10422
10423 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
10424 if (d->code == fcode)
10425 {
10426 /* Compares are treated specially. */
10427 if (d->icode == CODE_FOR_maskcmpv4sf3
10428 || d->icode == CODE_FOR_vmmaskcmpv4sf3
10429 || d->icode == CODE_FOR_maskncmpv4sf3
10430 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
10431 return ix86_expand_sse_compare (d, arglist, target);
10432
10433 return ix86_expand_binop_builtin (d->icode, arglist, target);
10434 }
10435
10436 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
10437 if (d->code == fcode)
10438 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 10439
bd793c65
BS
10440 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
10441 if (d->code == fcode)
10442 return ix86_expand_sse_comi (d, arglist, target);
0f290768 10443
bd793c65
BS
10444 /* @@@ Should really do something sensible here. */
10445 return 0;
bd793c65 10446}
4211a8fb
JH
10447
10448/* Store OPERAND to the memory after reload is completed. This means
10449 that we can't easilly use assign_stack_local. */
10450rtx
10451ix86_force_to_memory (mode, operand)
10452 enum machine_mode mode;
10453 rtx operand;
10454{
898d374d 10455 rtx result;
4211a8fb
JH
10456 if (!reload_completed)
10457 abort ();
898d374d
JH
10458 if (TARGET_64BIT && TARGET_RED_ZONE)
10459 {
10460 result = gen_rtx_MEM (mode,
10461 gen_rtx_PLUS (Pmode,
10462 stack_pointer_rtx,
10463 GEN_INT (-RED_ZONE_SIZE)));
10464 emit_move_insn (result, operand);
10465 }
10466 else if (TARGET_64BIT && !TARGET_RED_ZONE)
4211a8fb 10467 {
898d374d 10468 switch (mode)
4211a8fb 10469 {
898d374d
JH
10470 case HImode:
10471 case SImode:
10472 operand = gen_lowpart (DImode, operand);
10473 /* FALLTHRU */
10474 case DImode:
4211a8fb 10475 emit_insn (
898d374d
JH
10476 gen_rtx_SET (VOIDmode,
10477 gen_rtx_MEM (DImode,
10478 gen_rtx_PRE_DEC (DImode,
10479 stack_pointer_rtx)),
10480 operand));
10481 break;
10482 default:
10483 abort ();
10484 }
10485 result = gen_rtx_MEM (mode, stack_pointer_rtx);
10486 }
10487 else
10488 {
10489 switch (mode)
10490 {
10491 case DImode:
10492 {
10493 rtx operands[2];
10494 split_di (&operand, 1, operands, operands + 1);
10495 emit_insn (
10496 gen_rtx_SET (VOIDmode,
10497 gen_rtx_MEM (SImode,
10498 gen_rtx_PRE_DEC (Pmode,
10499 stack_pointer_rtx)),
10500 operands[1]));
10501 emit_insn (
10502 gen_rtx_SET (VOIDmode,
10503 gen_rtx_MEM (SImode,
10504 gen_rtx_PRE_DEC (Pmode,
10505 stack_pointer_rtx)),
10506 operands[0]));
10507 }
10508 break;
10509 case HImode:
10510 /* It is better to store HImodes as SImodes. */
10511 if (!TARGET_PARTIAL_REG_STALL)
10512 operand = gen_lowpart (SImode, operand);
10513 /* FALLTHRU */
10514 case SImode:
4211a8fb 10515 emit_insn (
898d374d
JH
10516 gen_rtx_SET (VOIDmode,
10517 gen_rtx_MEM (GET_MODE (operand),
10518 gen_rtx_PRE_DEC (SImode,
10519 stack_pointer_rtx)),
10520 operand));
10521 break;
10522 default:
10523 abort ();
4211a8fb 10524 }
898d374d 10525 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 10526 }
898d374d 10527 return result;
4211a8fb
JH
10528}
10529
10530/* Free operand from the memory. */
10531void
10532ix86_free_from_memory (mode)
10533 enum machine_mode mode;
10534{
898d374d
JH
10535 if (!TARGET_64BIT || !TARGET_RED_ZONE)
10536 {
10537 int size;
10538
10539 if (mode == DImode || TARGET_64BIT)
10540 size = 8;
10541 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
10542 size = 2;
10543 else
10544 size = 4;
10545 /* Use LEA to deallocate stack space. In peephole2 it will be converted
10546 to pop or add instruction if registers are available. */
10547 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10548 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10549 GEN_INT (size))));
10550 }
4211a8fb 10551}
a946dd00 10552
f84aa48a
JH
10553/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
10554 QImode must go into class Q_REGS.
10555 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
10556 movdf to do mem-to-mem moves through integer regs. */
10557enum reg_class
10558ix86_preferred_reload_class (x, class)
10559 rtx x;
10560 enum reg_class class;
10561{
10562 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
10563 {
10564 /* SSE can't load any constant directly yet. */
10565 if (SSE_CLASS_P (class))
10566 return NO_REGS;
10567 /* Floats can load 0 and 1. */
10568 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
10569 {
10570 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
10571 if (MAYBE_SSE_CLASS_P (class))
10572 return (reg_class_subset_p (class, GENERAL_REGS)
10573 ? GENERAL_REGS : FLOAT_REGS);
10574 else
10575 return class;
10576 }
10577 /* General regs can load everything. */
10578 if (reg_class_subset_p (class, GENERAL_REGS))
10579 return GENERAL_REGS;
10580 /* In case we haven't resolved FLOAT or SSE yet, give up. */
10581 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
10582 return NO_REGS;
10583 }
10584 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
10585 return NO_REGS;
10586 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
10587 return Q_REGS;
10588 return class;
10589}
10590
10591/* If we are copying between general and FP registers, we need a memory
10592 location. The same is true for SSE and MMX registers.
10593
10594 The macro can't work reliably when one of the CLASSES is class containing
10595 registers from multiple units (SSE, MMX, integer). We avoid this by never
10596 combining those units in single alternative in the machine description.
10597 Ensure that this constraint holds to avoid unexpected surprises.
10598
10599 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
10600 enforce these sanity checks. */
10601int
10602ix86_secondary_memory_needed (class1, class2, mode, strict)
10603 enum reg_class class1, class2;
10604 enum machine_mode mode;
10605 int strict;
10606{
10607 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
10608 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
10609 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
10610 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
10611 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
10612 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
10613 {
10614 if (strict)
10615 abort ();
10616 else
10617 return 1;
10618 }
10619 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
10620 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
10621 && (mode) != SImode)
10622 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
10623 && (mode) != SImode));
10624}
10625/* Return the cost of moving data from a register in class CLASS1 to
a4f31c00 10626 one in class CLASS2.
f84aa48a
JH
10627
10628 It is not required that the cost always equal 2 when FROM is the same as TO;
10629 on some machines it is expensive to move between registers if they are not
10630 general registers. */
10631int
10632ix86_register_move_cost (mode, class1, class2)
10633 enum machine_mode mode;
10634 enum reg_class class1, class2;
10635{
10636 /* In case we require secondary memory, compute cost of the store followed
10637 by load. In case of copying from general_purpose_register we may emit
10638 multiple stores followed by single load causing memory size mismatch
10639 stall. Count this as arbitarily high cost of 20. */
10640 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
10641 {
92d0fb09 10642 int add_cost = 0;
62415523 10643 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
92d0fb09 10644 add_cost = 20;
62415523 10645 return (MEMORY_MOVE_COST (mode, class1, 0)
92d0fb09 10646 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
f84aa48a 10647 }
92d0fb09 10648 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
10649 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
10650 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
10651 return ix86_cost->mmxsse_to_integer;
10652 if (MAYBE_FLOAT_CLASS_P (class1))
10653 return ix86_cost->fp_move;
10654 if (MAYBE_SSE_CLASS_P (class1))
10655 return ix86_cost->sse_move;
10656 if (MAYBE_MMX_CLASS_P (class1))
10657 return ix86_cost->mmx_move;
f84aa48a
JH
10658 return 2;
10659}
10660
a946dd00
JH
10661/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
10662int
10663ix86_hard_regno_mode_ok (regno, mode)
10664 int regno;
10665 enum machine_mode mode;
10666{
10667 /* Flags and only flags can only hold CCmode values. */
10668 if (CC_REGNO_P (regno))
10669 return GET_MODE_CLASS (mode) == MODE_CC;
10670 if (GET_MODE_CLASS (mode) == MODE_CC
10671 || GET_MODE_CLASS (mode) == MODE_RANDOM
10672 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
10673 return 0;
10674 if (FP_REGNO_P (regno))
10675 return VALID_FP_MODE_P (mode);
10676 if (SSE_REGNO_P (regno))
10677 return VALID_SSE_REG_MODE (mode);
10678 if (MMX_REGNO_P (regno))
10679 return VALID_MMX_REG_MODE (mode);
10680 /* We handle both integer and floats in the general purpose registers.
10681 In future we should be able to handle vector modes as well. */
10682 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
10683 return 0;
10684 /* Take care for QImode values - they can be in non-QI regs, but then
10685 they do cause partial register stalls. */
d2836273 10686 if (regno < 4 || mode != QImode || TARGET_64BIT)
a946dd00
JH
10687 return 1;
10688 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
10689}
fa79946e
JH
10690
10691/* Return the cost of moving data of mode M between a
10692 register and memory. A value of 2 is the default; this cost is
10693 relative to those in `REGISTER_MOVE_COST'.
10694
10695 If moving between registers and memory is more expensive than
10696 between two registers, you should define this macro to express the
a4f31c00
AJ
10697 relative cost.
10698
fa79946e
JH
10699 Model also increased moving costs of QImode registers in non
10700 Q_REGS classes.
10701 */
10702int
10703ix86_memory_move_cost (mode, class, in)
10704 enum machine_mode mode;
10705 enum reg_class class;
10706 int in;
10707{
10708 if (FLOAT_CLASS_P (class))
10709 {
10710 int index;
10711 switch (mode)
10712 {
10713 case SFmode:
10714 index = 0;
10715 break;
10716 case DFmode:
10717 index = 1;
10718 break;
10719 case XFmode:
10720 case TFmode:
10721 index = 2;
10722 break;
10723 default:
10724 return 100;
10725 }
10726 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
10727 }
10728 if (SSE_CLASS_P (class))
10729 {
10730 int index;
10731 switch (GET_MODE_SIZE (mode))
10732 {
10733 case 4:
10734 index = 0;
10735 break;
10736 case 8:
10737 index = 1;
10738 break;
10739 case 16:
10740 index = 2;
10741 break;
10742 default:
10743 return 100;
10744 }
10745 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
10746 }
10747 if (MMX_CLASS_P (class))
10748 {
10749 int index;
10750 switch (GET_MODE_SIZE (mode))
10751 {
10752 case 4:
10753 index = 0;
10754 break;
10755 case 8:
10756 index = 1;
10757 break;
10758 default:
10759 return 100;
10760 }
10761 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
10762 }
10763 switch (GET_MODE_SIZE (mode))
10764 {
10765 case 1:
10766 if (in)
10767 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
10768 : ix86_cost->movzbl_load);
10769 else
10770 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
10771 : ix86_cost->int_store[0] + 4);
10772 break;
10773 case 2:
10774 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
10775 default:
10776 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
10777 if (mode == TFmode)
10778 mode = XFmode;
3bb7e126 10779 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
fa79946e
JH
10780 * (int) GET_MODE_SIZE (mode) / 4);
10781 }
10782}
0ecf09f9
JH
10783
10784/* Most of current runtimes (Jul 2001) do not align stack properly when
10785 entering main, so emit an wrapper to align stack before the real main
10786 code is called.
10787
10788 This can eventually go if we manage to fix the runtimes or teach gcc
10789 to dynamically align stack in main automatically.
10790
10791 Adding check to configure is probably not good idea, as binarry can move
10792 from one shared library to older. */
10793
10794static void
10795ix86_output_main_function_alignment_hack (file, size)
10796 FILE *file;
10797 int size ATTRIBUTE_UNUSED;
10798{
10799 rtx label;
10800 char buf[256];
10801 /* Check that we see main function with maximally 8 bytes of arguments.
10802 if so, emit the hack to align stack for runtimes, where this constraint
10803 is broken. */
10804 if (strcmp (cfun->name, "main"))
10805 return;
10806 if (cfun->pops_args || cfun->args_size > 12)
10807 return;
10808 if (PREFERRED_STACK_BOUNDARY <= 2)
10809 return;
10810 label = gen_label_rtx ();
10811 fprintf (file, "\tpushl\t%%ebp\n");
10812 fprintf (file, "\tmovl\t%%esp, %%ebp\n");
10813 fprintf (file, "\tandl\t$0xfffffff0, %%esp\n");
10814 fprintf (file, "\tpushl\t%%ebp\n");
10815 fprintf (file, "\tpushl\t16(%%ebp)\n");
10816 fprintf (file, "\tpushl\t12(%%ebp)\n");
10817 fprintf (file, "\tpushl\t8(%%ebp)\n");
10818 fprintf (file, "\tcall\t");
10819 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (label));
10820 assemble_name (file, buf);
10821 fprintf (file, "\n\tleave\n");
10822 fprintf (file, "\tret\n");
10823 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (label));
10824}
This page took 2.35801 seconds and 5 git commands to generate.