]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
Daily bump.
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
8752c357 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
4592bdcb 3 Free Software Foundation, Inc.
2a2ab3f9
JVA
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
1fba7553 23#include <setjmp.h>
bb5177ac 24#include "system.h"
2a2ab3f9 25#include "rtl.h"
6baf1cc8
BS
26#include "tree.h"
27#include "tm_p.h"
2a2ab3f9
JVA
28#include "regs.h"
29#include "hard-reg-set.h"
30#include "real.h"
31#include "insn-config.h"
32#include "conditions.h"
2a2ab3f9
JVA
33#include "output.h"
34#include "insn-attr.h"
2a2ab3f9 35#include "flags.h"
a8ffcc81 36#include "except.h"
ecbc4695 37#include "function.h"
00c79232 38#include "recog.h"
ced8dd8c 39#include "expr.h"
f103890b 40#include "toplev.h"
e075ae69 41#include "basic-block.h"
1526a060 42#include "ggc.h"
672a6f42
NB
43#include "target.h"
44#include "target-def.h"
2a2ab3f9 45
8dfe5673
RK
46#ifndef CHECK_STACK_LIMIT
47#define CHECK_STACK_LIMIT -1
48#endif
49
32b5b1aa
SC
50/* Processor costs (relative to an add) */
51struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 52 1, /* cost of an add instruction */
32b5b1aa
SC
53 1, /* cost of a lea instruction */
54 3, /* variable shift costs */
55 2, /* constant shift costs */
56 6, /* cost of starting a multiply */
57 1, /* cost of multiply per each bit set */
e075ae69 58 23, /* cost of a divide/mod */
96e7ae40 59 15, /* "large" insn */
e2e52e1b 60 3, /* MOVE_RATIO */
7c6b971d 61 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
62 {2, 4, 2}, /* cost of loading integer registers
63 in QImode, HImode and SImode.
0f290768 64 Relative to reg-reg move (2). */
96e7ae40
JH
65 {2, 4, 2}, /* cost of storing integer registers */
66 2, /* cost of reg,reg fld/fst */
67 {8, 8, 8}, /* cost of loading fp registers
68 in SFmode, DFmode and XFmode */
fa79946e
JH
69 {8, 8, 8}, /* cost of loading integer registers */
70 2, /* cost of moving MMX register */
71 {4, 8}, /* cost of loading MMX registers
72 in SImode and DImode */
73 {4, 8}, /* cost of storing MMX registers
74 in SImode and DImode */
75 2, /* cost of moving SSE register */
76 {4, 8, 16}, /* cost of loading SSE registers
77 in SImode, DImode and TImode */
78 {4, 8, 16}, /* cost of storing SSE registers
79 in SImode, DImode and TImode */
80 3, /* MMX or SSE register to integer */
32b5b1aa
SC
81};
82
83struct processor_costs i486_cost = { /* 486 specific costs */
84 1, /* cost of an add instruction */
85 1, /* cost of a lea instruction */
86 3, /* variable shift costs */
87 2, /* constant shift costs */
88 12, /* cost of starting a multiply */
89 1, /* cost of multiply per each bit set */
e075ae69 90 40, /* cost of a divide/mod */
96e7ae40 91 15, /* "large" insn */
e2e52e1b 92 3, /* MOVE_RATIO */
7c6b971d 93 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
94 {2, 4, 2}, /* cost of loading integer registers
95 in QImode, HImode and SImode.
0f290768 96 Relative to reg-reg move (2). */
96e7ae40
JH
97 {2, 4, 2}, /* cost of storing integer registers */
98 2, /* cost of reg,reg fld/fst */
99 {8, 8, 8}, /* cost of loading fp registers
100 in SFmode, DFmode and XFmode */
fa79946e
JH
101 {8, 8, 8}, /* cost of loading integer registers */
102 2, /* cost of moving MMX register */
103 {4, 8}, /* cost of loading MMX registers
104 in SImode and DImode */
105 {4, 8}, /* cost of storing MMX registers
106 in SImode and DImode */
107 2, /* cost of moving SSE register */
108 {4, 8, 16}, /* cost of loading SSE registers
109 in SImode, DImode and TImode */
110 {4, 8, 16}, /* cost of storing SSE registers
111 in SImode, DImode and TImode */
112 3 /* MMX or SSE register to integer */
32b5b1aa
SC
113};
114
e5cb57e8 115struct processor_costs pentium_cost = {
32b5b1aa
SC
116 1, /* cost of an add instruction */
117 1, /* cost of a lea instruction */
856b07a1 118 4, /* variable shift costs */
e5cb57e8 119 1, /* constant shift costs */
856b07a1
SC
120 11, /* cost of starting a multiply */
121 0, /* cost of multiply per each bit set */
e075ae69 122 25, /* cost of a divide/mod */
96e7ae40 123 8, /* "large" insn */
e2e52e1b 124 6, /* MOVE_RATIO */
7c6b971d 125 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
126 {2, 4, 2}, /* cost of loading integer registers
127 in QImode, HImode and SImode.
0f290768 128 Relative to reg-reg move (2). */
96e7ae40
JH
129 {2, 4, 2}, /* cost of storing integer registers */
130 2, /* cost of reg,reg fld/fst */
131 {2, 2, 6}, /* cost of loading fp registers
132 in SFmode, DFmode and XFmode */
fa79946e
JH
133 {4, 4, 6}, /* cost of loading integer registers */
134 8, /* cost of moving MMX register */
135 {8, 8}, /* cost of loading MMX registers
136 in SImode and DImode */
137 {8, 8}, /* cost of storing MMX registers
138 in SImode and DImode */
139 2, /* cost of moving SSE register */
140 {4, 8, 16}, /* cost of loading SSE registers
141 in SImode, DImode and TImode */
142 {4, 8, 16}, /* cost of storing SSE registers
143 in SImode, DImode and TImode */
144 3 /* MMX or SSE register to integer */
32b5b1aa
SC
145};
146
856b07a1
SC
147struct processor_costs pentiumpro_cost = {
148 1, /* cost of an add instruction */
149 1, /* cost of a lea instruction */
e075ae69 150 1, /* variable shift costs */
856b07a1 151 1, /* constant shift costs */
369e59b1 152 4, /* cost of starting a multiply */
856b07a1 153 0, /* cost of multiply per each bit set */
e075ae69 154 17, /* cost of a divide/mod */
96e7ae40 155 8, /* "large" insn */
e2e52e1b 156 6, /* MOVE_RATIO */
7c6b971d 157 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
158 {4, 4, 4}, /* cost of loading integer registers
159 in QImode, HImode and SImode.
0f290768 160 Relative to reg-reg move (2). */
96e7ae40
JH
161 {2, 2, 2}, /* cost of storing integer registers */
162 2, /* cost of reg,reg fld/fst */
163 {2, 2, 6}, /* cost of loading fp registers
164 in SFmode, DFmode and XFmode */
fa79946e
JH
165 {4, 4, 6}, /* cost of loading integer registers */
166 2, /* cost of moving MMX register */
167 {2, 2}, /* cost of loading MMX registers
168 in SImode and DImode */
169 {2, 2}, /* cost of storing MMX registers
170 in SImode and DImode */
171 2, /* cost of moving SSE register */
172 {2, 2, 8}, /* cost of loading SSE registers
173 in SImode, DImode and TImode */
174 {2, 2, 8}, /* cost of storing SSE registers
175 in SImode, DImode and TImode */
176 3 /* MMX or SSE register to integer */
856b07a1
SC
177};
178
a269a03c
JC
179struct processor_costs k6_cost = {
180 1, /* cost of an add instruction */
e075ae69 181 2, /* cost of a lea instruction */
a269a03c
JC
182 1, /* variable shift costs */
183 1, /* constant shift costs */
73fe76e4 184 3, /* cost of starting a multiply */
a269a03c 185 0, /* cost of multiply per each bit set */
e075ae69 186 18, /* cost of a divide/mod */
96e7ae40 187 8, /* "large" insn */
e2e52e1b 188 4, /* MOVE_RATIO */
7c6b971d 189 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
190 {4, 5, 4}, /* cost of loading integer registers
191 in QImode, HImode and SImode.
0f290768 192 Relative to reg-reg move (2). */
96e7ae40
JH
193 {2, 3, 2}, /* cost of storing integer registers */
194 4, /* cost of reg,reg fld/fst */
195 {6, 6, 6}, /* cost of loading fp registers
196 in SFmode, DFmode and XFmode */
fa79946e
JH
197 {4, 4, 4}, /* cost of loading integer registers */
198 2, /* cost of moving MMX register */
199 {2, 2}, /* cost of loading MMX registers
200 in SImode and DImode */
201 {2, 2}, /* cost of storing MMX registers
202 in SImode and DImode */
203 2, /* cost of moving SSE register */
204 {2, 2, 8}, /* cost of loading SSE registers
205 in SImode, DImode and TImode */
206 {2, 2, 8}, /* cost of storing SSE registers
207 in SImode, DImode and TImode */
208 6 /* MMX or SSE register to integer */
a269a03c
JC
209};
210
309ada50
JH
211struct processor_costs athlon_cost = {
212 1, /* cost of an add instruction */
0b5107cf 213 2, /* cost of a lea instruction */
309ada50
JH
214 1, /* variable shift costs */
215 1, /* constant shift costs */
216 5, /* cost of starting a multiply */
217 0, /* cost of multiply per each bit set */
0b5107cf 218 42, /* cost of a divide/mod */
309ada50 219 8, /* "large" insn */
e2e52e1b 220 9, /* MOVE_RATIO */
309ada50
JH
221 4, /* cost for loading QImode using movzbl */
222 {4, 5, 4}, /* cost of loading integer registers
223 in QImode, HImode and SImode.
0f290768 224 Relative to reg-reg move (2). */
309ada50
JH
225 {2, 3, 2}, /* cost of storing integer registers */
226 4, /* cost of reg,reg fld/fst */
0b5107cf 227 {6, 6, 20}, /* cost of loading fp registers
309ada50 228 in SFmode, DFmode and XFmode */
fa79946e
JH
229 {4, 4, 16}, /* cost of loading integer registers */
230 2, /* cost of moving MMX register */
231 {2, 2}, /* cost of loading MMX registers
232 in SImode and DImode */
233 {2, 2}, /* cost of storing MMX registers
234 in SImode and DImode */
235 2, /* cost of moving SSE register */
236 {2, 2, 8}, /* cost of loading SSE registers
237 in SImode, DImode and TImode */
238 {2, 2, 8}, /* cost of storing SSE registers
239 in SImode, DImode and TImode */
240 6 /* MMX or SSE register to integer */
309ada50
JH
241};
242
b4e89e2d
JH
243struct processor_costs pentium4_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 8, /* variable shift costs */
247 8, /* constant shift costs */
248 30, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 112, /* cost of a divide/mod */
251 16, /* "large" insn */
252 6, /* MOVE_RATIO */
253 2, /* cost for loading QImode using movzbl */
254 {4, 5, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 3, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 12, /* cost of moving SSE register */
268 {12, 12, 12}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 10, /* MMX or SSE register to integer */
273};
274
32b5b1aa
SC
275struct processor_costs *ix86_cost = &pentium_cost;
276
a269a03c
JC
277/* Processor feature/optimization bitmasks. */
278#define m_386 (1<<PROCESSOR_I386)
279#define m_486 (1<<PROCESSOR_I486)
280#define m_PENT (1<<PROCESSOR_PENTIUM)
281#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
282#define m_K6 (1<<PROCESSOR_K6)
309ada50 283#define m_ATHLON (1<<PROCESSOR_ATHLON)
b4e89e2d 284#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
a269a03c 285
309ada50 286const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
b4e89e2d 287const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
a269a03c 288const int x86_zero_extend_with_and = m_486 | m_PENT;
b4e89e2d 289const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
e075ae69 290const int x86_double_with_add = ~m_386;
a269a03c 291const int x86_use_bit_test = m_386;
e2e52e1b 292const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
b4e89e2d
JH
293const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
294const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
ef6257cd 295const int x86_branch_hints = m_PENT4;
b4e89e2d 296const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
e075ae69
RH
297const int x86_partial_reg_stall = m_PPRO;
298const int x86_use_loop = m_K6;
309ada50 299const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
e075ae69
RH
300const int x86_use_mov0 = m_K6;
301const int x86_use_cltd = ~(m_PENT | m_K6);
302const int x86_read_modify_write = ~m_PENT;
303const int x86_read_modify = ~(m_PENT | m_PPRO);
304const int x86_split_long_moves = m_PPRO;
e9e80858 305const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
b4e89e2d 306const int x86_single_stringop = m_386 | m_PENT4;
d9f32422
JH
307const int x86_qimode_math = ~(0);
308const int x86_promote_qi_regs = 0;
309const int x86_himode_math = ~(m_PPRO);
310const int x86_promote_hi_regs = m_PPRO;
b4e89e2d
JH
311const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
312const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
313const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
314const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
315const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
316const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
317const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
c6036a37
JH
318const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
319const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
320const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
a269a03c 321
564d80f4 322#define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
2a2ab3f9 323
e075ae69
RH
324const char * const hi_reg_name[] = HI_REGISTER_NAMES;
325const char * const qi_reg_name[] = QI_REGISTER_NAMES;
326const char * const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
4c0d89b5
RS
327
328/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 329 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 330
e075ae69 331enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
332{
333 /* ax, dx, cx, bx */
ab408a86 334 AREG, DREG, CREG, BREG,
4c0d89b5 335 /* si, di, bp, sp */
e075ae69 336 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
337 /* FP registers */
338 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 339 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 340 /* arg pointer */
83774849 341 NON_Q_REGS,
564d80f4 342 /* flags, fpsr, dirflag, frame */
a7180f70
BS
343 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
344 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
345 SSE_REGS, SSE_REGS,
346 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30
JH
347 MMX_REGS, MMX_REGS,
348 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
349 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
350 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
351 SSE_REGS, SSE_REGS,
4c0d89b5 352};
c572e5ba 353
3d117b30 354/* The "default" register map used in 32bit mode. */
83774849 355
0f290768 356int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
357{
358 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
359 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
3d117b30 360 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
361 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
362 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
363 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
364 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
365};
366
0f7fa3d0
JH
367/* The "default" register map used in 64bit mode. */
368int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
369{
370 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
371 33, 34, 35, 36, 37, 38, 39, 40 /* fp regs */
372 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
373 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
374 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
375 8,9,10,11,12,13,14,15, /* extended integer registers */
376 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
377};
378
83774849
RH
379/* Define the register numbers to be used in Dwarf debugging information.
380 The SVR4 reference port C compiler uses the following register numbers
381 in its Dwarf output code:
382 0 for %eax (gcc regno = 0)
383 1 for %ecx (gcc regno = 2)
384 2 for %edx (gcc regno = 1)
385 3 for %ebx (gcc regno = 3)
386 4 for %esp (gcc regno = 7)
387 5 for %ebp (gcc regno = 6)
388 6 for %esi (gcc regno = 4)
389 7 for %edi (gcc regno = 5)
390 The following three DWARF register numbers are never generated by
391 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
392 believes these numbers have these meanings.
393 8 for %eip (no gcc equivalent)
394 9 for %eflags (gcc regno = 17)
395 10 for %trapno (no gcc equivalent)
396 It is not at all clear how we should number the FP stack registers
397 for the x86 architecture. If the version of SDB on x86/svr4 were
398 a bit less brain dead with respect to floating-point then we would
399 have a precedent to follow with respect to DWARF register numbers
400 for x86 FP registers, but the SDB on x86/svr4 is so completely
401 broken with respect to FP registers that it is hardly worth thinking
402 of it as something to strive for compatibility with.
403 The version of x86/svr4 SDB I have at the moment does (partially)
404 seem to believe that DWARF register number 11 is associated with
405 the x86 register %st(0), but that's about all. Higher DWARF
406 register numbers don't seem to be associated with anything in
407 particular, and even for DWARF regno 11, SDB only seems to under-
408 stand that it should say that a variable lives in %st(0) (when
409 asked via an `=' command) if we said it was in DWARF regno 11,
410 but SDB still prints garbage when asked for the value of the
411 variable in question (via a `/' command).
412 (Also note that the labels SDB prints for various FP stack regs
413 when doing an `x' command are all wrong.)
414 Note that these problems generally don't affect the native SVR4
415 C compiler because it doesn't allow the use of -O with -g and
416 because when it is *not* optimizing, it allocates a memory
417 location for each floating-point variable, and the memory
418 location is what gets described in the DWARF AT_location
419 attribute for the variable in question.
420 Regardless of the severe mental illness of the x86/svr4 SDB, we
421 do something sensible here and we use the following DWARF
422 register numbers. Note that these are all stack-top-relative
423 numbers.
424 11 for %st(0) (gcc regno = 8)
425 12 for %st(1) (gcc regno = 9)
426 13 for %st(2) (gcc regno = 10)
427 14 for %st(3) (gcc regno = 11)
428 15 for %st(4) (gcc regno = 12)
429 16 for %st(5) (gcc regno = 13)
430 17 for %st(6) (gcc regno = 14)
431 18 for %st(7) (gcc regno = 15)
432*/
0f290768 433int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
434{
435 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
436 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
3f3f2124 437 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
438 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
439 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
3f3f2124
JH
440 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
441 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
83774849
RH
442};
443
c572e5ba
JVA
444/* Test and compare insns in i386.md store the information needed to
445 generate branch and scc insns here. */
446
e075ae69
RH
447struct rtx_def *ix86_compare_op0 = NULL_RTX;
448struct rtx_def *ix86_compare_op1 = NULL_RTX;
f5316dfe 449
7a2e09f4 450#define MAX_386_STACK_LOCALS 3
8362f420
JH
451/* Size of the register save area. */
452#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
36edd3cc
BS
453
454/* Define the structure for the machine field in struct function. */
455struct machine_function
456{
457 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
8362f420 458 int save_varrargs_registers;
6fca22eb 459 int accesses_prev_frame;
36edd3cc
BS
460};
461
01d939e8 462#define ix86_stack_locals (cfun->machine->stack_locals)
8362f420 463#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
36edd3cc 464
4dd2ac2c
JH
465/* Structure describing stack frame layout.
466 Stack grows downward:
467
468 [arguments]
469 <- ARG_POINTER
470 saved pc
471
472 saved frame pointer if frame_pointer_needed
473 <- HARD_FRAME_POINTER
474 [saved regs]
475
476 [padding1] \
477 )
478 [va_arg registers] (
479 > to_allocate <- FRAME_POINTER
480 [frame] (
481 )
482 [padding2] /
483 */
484struct ix86_frame
485{
486 int nregs;
487 int padding1;
8362f420 488 int va_arg_size;
4dd2ac2c
JH
489 HOST_WIDE_INT frame;
490 int padding2;
491 int outgoing_arguments_size;
8362f420 492 int red_zone_size;
4dd2ac2c
JH
493
494 HOST_WIDE_INT to_allocate;
495 /* The offsets relative to ARG_POINTER. */
496 HOST_WIDE_INT frame_pointer_offset;
497 HOST_WIDE_INT hard_frame_pointer_offset;
498 HOST_WIDE_INT stack_pointer_offset;
499};
500
6189a572
JH
501/* Code model option as passed by user. */
502const char *ix86_cmodel_string;
503/* Parsed value. */
504enum cmodel ix86_cmodel;
505
c8c5cb99 506/* which cpu are we scheduling for */
e42ea7f9 507enum processor_type ix86_cpu;
c8c5cb99
SC
508
509/* which instruction set architecture to use. */
c942177e 510int ix86_arch;
c8c5cb99
SC
511
512/* Strings to hold which cpu and instruction set architecture to use. */
9c23aa47
ZW
513const char *ix86_cpu_string; /* for -mcpu=<xxx> */
514const char *ix86_arch_string; /* for -march=<xxx> */
c8c5cb99 515
0f290768 516/* # of registers to use to pass arguments. */
e075ae69 517const char *ix86_regparm_string;
e9a25f70 518
e075ae69
RH
519/* ix86_regparm_string as a number */
520int ix86_regparm;
e9a25f70
JL
521
522/* Alignment to use for loops and jumps: */
523
0f290768 524/* Power of two alignment for loops. */
e075ae69 525const char *ix86_align_loops_string;
e9a25f70 526
0f290768 527/* Power of two alignment for non-loop jumps. */
e075ae69 528const char *ix86_align_jumps_string;
e9a25f70 529
3af4bd89 530/* Power of two alignment for stack boundary in bytes. */
e075ae69 531const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
532
533/* Preferred alignment for stack boundary in bits. */
e075ae69 534int ix86_preferred_stack_boundary;
3af4bd89 535
e9a25f70 536/* Values 1-5: see jump.c */
e075ae69
RH
537int ix86_branch_cost;
538const char *ix86_branch_cost_string;
e9a25f70 539
0f290768 540/* Power of two alignment for functions. */
e075ae69 541const char *ix86_align_funcs_string;
e075ae69 542\f
f6da8bc3
KG
543static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
544static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
e075ae69 545 int, int, FILE *));
f6da8bc3 546static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
3a3677ff
RH
547static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
548 rtx *, rtx *));
f6da8bc3
KG
549static rtx gen_push PARAMS ((rtx));
550static int memory_address_length PARAMS ((rtx addr));
551static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
552static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
553static int ix86_safe_length PARAMS ((rtx));
554static enum attr_memory ix86_safe_memory PARAMS ((rtx));
555static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
556static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
557static void ix86_dump_ppro_packet PARAMS ((FILE *));
558static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
559static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
e075ae69 560 rtx));
f6da8bc3
KG
561static void ix86_init_machine_status PARAMS ((struct function *));
562static void ix86_mark_machine_status PARAMS ((struct function *));
37b15744 563static void ix86_free_machine_status PARAMS ((struct function *));
2b589241 564static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
f6da8bc3 565static int ix86_safe_length_prefix PARAMS ((rtx));
0903fcab
JH
566static int ix86_nsaved_regs PARAMS((void));
567static void ix86_emit_save_regs PARAMS((void));
c6036a37 568static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
37a58036 569static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
0e4970d7 570static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
c6991660
KG
571static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
572static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
55efb413 573static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
0945b39d
JH
574static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
575static rtx ix86_zero_extend_to_Pmode PARAMS ((rtx));
576static rtx ix86_expand_aligntest PARAMS ((rtx, int));
577static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
e075ae69
RH
578
579struct ix86_address
580{
581 rtx base, index, disp;
582 HOST_WIDE_INT scale;
583};
b08de47e 584
e075ae69 585static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
bd793c65
BS
586
587struct builtin_description;
588static rtx ix86_expand_sse_comi PARAMS ((struct builtin_description *, tree,
589 rtx));
590static rtx ix86_expand_sse_compare PARAMS ((struct builtin_description *, tree,
591 rtx));
592static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
593static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
594static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
595static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
596static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
c0c102a9
JH
597static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
598static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
599 enum rtx_code *,
600 enum rtx_code *,
601 enum rtx_code *));
9e7adcb3
JH
602static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
603 rtx *, rtx *));
604static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
605static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
606static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
607static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
37a58036 608static int ix86_save_reg PARAMS ((int, int));
4dd2ac2c 609static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
672a6f42
NB
610\f
611/* Initialize the GCC target structure. */
612#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
613#undef TARGET_MERGE_DECL_ATTRIBUTES
614#define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
615#endif
616
617#undef TARGET_VALID_TYPE_ATTRIBUTE
618#define TARGET_VALID_TYPE_ATTRIBUTE ix86_valid_type_attribute_p
619
620struct gcc_target target = TARGET_INITIALIZER;
e075ae69 621\f
f5316dfe
MM
622/* Sometimes certain combinations of command options do not make
623 sense on a particular target machine. You can define a macro
624 `OVERRIDE_OPTIONS' to take account of this. This macro, if
625 defined, is executed once just after all the command options have
626 been parsed.
627
628 Don't use this macro to turn on various extra optimizations for
629 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
630
631void
632override_options ()
633{
400500c4 634 int i;
e075ae69
RH
635 /* Comes from final.c -- no real reason to change it. */
636#define MAX_CODE_ALIGN 16
f5316dfe 637
c8c5cb99
SC
638 static struct ptt
639 {
e075ae69
RH
640 struct processor_costs *cost; /* Processor costs */
641 int target_enable; /* Target flags to enable. */
642 int target_disable; /* Target flags to disable. */
643 int align_loop; /* Default alignments. */
644 int align_jump;
645 int align_func;
646 int branch_cost;
647 }
0f290768 648 const processor_target_table[PROCESSOR_max] =
e075ae69
RH
649 {
650 {&i386_cost, 0, 0, 2, 2, 2, 1},
651 {&i486_cost, 0, 0, 4, 4, 4, 1},
652 {&pentium_cost, 0, 0, -4, -4, -4, 1},
653 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
309ada50 654 {&k6_cost, 0, 0, -5, -5, 4, 1},
b4e89e2d
JH
655 {&athlon_cost, 0, 0, 4, -4, 4, 1},
656 {&pentium4_cost, 0, 0, 2, 2, 2, 1}
e075ae69
RH
657 };
658
659 static struct pta
660 {
0f290768 661 const char *name; /* processor name or nickname. */
e075ae69
RH
662 enum processor_type processor;
663 }
0f290768 664 const processor_alias_table[] =
e075ae69
RH
665 {
666 {"i386", PROCESSOR_I386},
667 {"i486", PROCESSOR_I486},
668 {"i586", PROCESSOR_PENTIUM},
669 {"pentium", PROCESSOR_PENTIUM},
670 {"i686", PROCESSOR_PENTIUMPRO},
671 {"pentiumpro", PROCESSOR_PENTIUMPRO},
e075ae69 672 {"k6", PROCESSOR_K6},
309ada50 673 {"athlon", PROCESSOR_ATHLON},
b4e89e2d 674 {"pentium4", PROCESSOR_PENTIUM4},
3af4bd89 675 };
c8c5cb99 676
0f290768 677 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
c8c5cb99 678
f5316dfe
MM
679#ifdef SUBTARGET_OVERRIDE_OPTIONS
680 SUBTARGET_OVERRIDE_OPTIONS;
681#endif
682
5a6ee819 683 ix86_arch = PROCESSOR_I386;
e075ae69
RH
684 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
685
6189a572
JH
686 if (ix86_cmodel_string != 0)
687 {
688 if (!strcmp (ix86_cmodel_string, "small"))
689 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
690 else if (flag_pic)
691 sorry ("Code model %s not supported in PIC mode", ix86_cmodel_string);
692 else if (!strcmp (ix86_cmodel_string, "32"))
693 ix86_cmodel = CM_32;
694 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
695 ix86_cmodel = CM_KERNEL;
696 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
697 ix86_cmodel = CM_MEDIUM;
698 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
699 ix86_cmodel = CM_LARGE;
700 else
701 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
702 }
703 else
704 {
705 ix86_cmodel = CM_32;
706 if (TARGET_64BIT)
707 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
708 }
709 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
710 error ("Code model `%s' not supported in the %s bit mode.",
711 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
712 if (ix86_cmodel == CM_LARGE)
713 sorry ("Code model `large' not supported yet.");
0c2dc519
JH
714 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
715 sorry ("%i-bit mode not compiled in.",
716 (target_flags & MASK_64BIT) ? 64 : 32);
6189a572 717
e075ae69
RH
718 if (ix86_arch_string != 0)
719 {
e075ae69
RH
720 for (i = 0; i < pta_size; i++)
721 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
722 {
723 ix86_arch = processor_alias_table[i].processor;
724 /* Default cpu tuning to the architecture. */
725 ix86_cpu = ix86_arch;
726 break;
727 }
400500c4 728
e075ae69
RH
729 if (i == pta_size)
730 error ("bad value (%s) for -march= switch", ix86_arch_string);
731 }
732
733 if (ix86_cpu_string != 0)
734 {
e075ae69
RH
735 for (i = 0; i < pta_size; i++)
736 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
737 {
738 ix86_cpu = processor_alias_table[i].processor;
739 break;
740 }
741 if (i == pta_size)
742 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
743 }
744
745 ix86_cost = processor_target_table[ix86_cpu].cost;
746 target_flags |= processor_target_table[ix86_cpu].target_enable;
747 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
748
36edd3cc
BS
749 /* Arrange to set up i386_stack_locals for all functions. */
750 init_machine_status = ix86_init_machine_status;
1526a060 751 mark_machine_status = ix86_mark_machine_status;
37b15744 752 free_machine_status = ix86_free_machine_status;
36edd3cc 753
0f290768 754 /* Validate -mregparm= value. */
e075ae69 755 if (ix86_regparm_string)
b08de47e 756 {
400500c4
RK
757 i = atoi (ix86_regparm_string);
758 if (i < 0 || i > REGPARM_MAX)
759 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
760 else
761 ix86_regparm = i;
b08de47e 762 }
0d7d98ee
JH
763 else
764 if (TARGET_64BIT)
765 ix86_regparm = REGPARM_MAX;
b08de47e 766
3e18fdf6
GK
767 /* If the user has provided any of the -malign-* options,
768 warn and use that value only if -falign-* is not set.
769 Remove this code in GCC 3.2 or later. */
e075ae69 770 if (ix86_align_loops_string)
b08de47e 771 {
3e18fdf6
GK
772 warning ("-malign-loops is obsolete, use -falign-loops");
773 if (align_loops == 0)
774 {
775 i = atoi (ix86_align_loops_string);
776 if (i < 0 || i > MAX_CODE_ALIGN)
777 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
778 else
779 align_loops = 1 << i;
780 }
b08de47e 781 }
3af4bd89 782
e075ae69 783 if (ix86_align_jumps_string)
b08de47e 784 {
3e18fdf6
GK
785 warning ("-malign-jumps is obsolete, use -falign-jumps");
786 if (align_jumps == 0)
787 {
788 i = atoi (ix86_align_jumps_string);
789 if (i < 0 || i > MAX_CODE_ALIGN)
790 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
791 else
792 align_jumps = 1 << i;
793 }
b08de47e 794 }
b08de47e 795
e075ae69 796 if (ix86_align_funcs_string)
b08de47e 797 {
3e18fdf6
GK
798 warning ("-malign-functions is obsolete, use -falign-functions");
799 if (align_functions == 0)
800 {
801 i = atoi (ix86_align_funcs_string);
802 if (i < 0 || i > MAX_CODE_ALIGN)
803 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
804 else
805 align_functions = 1 << i;
806 }
b08de47e 807 }
3af4bd89 808
3e18fdf6
GK
809 /* Default align_* from the processor table. */
810#define abs(n) (n < 0 ? -n : n)
811 if (align_loops == 0)
812 align_loops = 1 << abs (processor_target_table[ix86_cpu].align_loop);
813 if (align_jumps == 0)
814 align_jumps = 1 << abs (processor_target_table[ix86_cpu].align_jump);
815 if (align_functions == 0)
816 align_functions = 1 << abs (processor_target_table[ix86_cpu].align_func);
817
e4c0478d 818 /* Validate -mpreferred-stack-boundary= value, or provide default.
3af4bd89 819 The default of 128 bits is for Pentium III's SSE __m128. */
e075ae69
RH
820 ix86_preferred_stack_boundary = 128;
821 if (ix86_preferred_stack_boundary_string)
3af4bd89 822 {
400500c4 823 i = atoi (ix86_preferred_stack_boundary_string);
0d7d98ee
JH
824 if (i < (TARGET_64BIT ? 3 : 2) || i > 31)
825 error ("-mpreferred-stack-boundary=%d is not between %d and 31", i,
826 TARGET_64BIT ? 3 : 2);
400500c4
RK
827 else
828 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 829 }
77a989d1 830
0f290768 831 /* Validate -mbranch-cost= value, or provide default. */
e075ae69
RH
832 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
833 if (ix86_branch_cost_string)
804a8ee0 834 {
400500c4
RK
835 i = atoi (ix86_branch_cost_string);
836 if (i < 0 || i > 5)
837 error ("-mbranch-cost=%d is not between 0 and 5", i);
838 else
839 ix86_branch_cost = i;
804a8ee0 840 }
804a8ee0 841
e9a25f70
JL
842 /* Keep nonleaf frame pointers. */
843 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 844 flag_omit_frame_pointer = 1;
e075ae69
RH
845
846 /* If we're doing fast math, we don't care about comparison order
847 wrt NaNs. This lets us use a shorter comparison sequence. */
de6c5979 848 if (flag_unsafe_math_optimizations)
e075ae69
RH
849 target_flags &= ~MASK_IEEE_FP;
850
a7180f70
BS
851 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
852 on by -msse. */
853 if (TARGET_SSE)
854 target_flags |= MASK_MMX;
c6036a37
JH
855
856 if ((x86_accumulate_outgoing_args & CPUMASK)
857 && !(target_flags & MASK_NO_ACCUMULATE_OUTGOING_ARGS)
858 && !optimize_size)
859 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
f5316dfe
MM
860}
861\f
32b5b1aa 862void
c6aded7c 863optimization_options (level, size)
32b5b1aa 864 int level;
bb5177ac 865 int size ATTRIBUTE_UNUSED;
32b5b1aa 866{
e9a25f70
JL
867 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
868 make the problem with not enough registers even worse. */
32b5b1aa
SC
869#ifdef INSN_SCHEDULING
870 if (level > 1)
871 flag_schedule_insns = 0;
872#endif
873}
b08de47e 874\f
b08de47e
MM
875/* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
876 attribute for TYPE. The attributes in ATTRIBUTES have previously been
877 assigned to TYPE. */
878
9959db6d 879int
e075ae69 880ix86_valid_type_attribute_p (type, attributes, identifier, args)
b08de47e 881 tree type;
bb5177ac 882 tree attributes ATTRIBUTE_UNUSED;
b08de47e
MM
883 tree identifier;
884 tree args;
885{
886 if (TREE_CODE (type) != FUNCTION_TYPE
ac478ac0 887 && TREE_CODE (type) != METHOD_TYPE
b08de47e
MM
888 && TREE_CODE (type) != FIELD_DECL
889 && TREE_CODE (type) != TYPE_DECL)
890 return 0;
891
892 /* Stdcall attribute says callee is responsible for popping arguments
893 if they are not variable. */
0d7d98ee
JH
894 if (is_attribute_p ("stdcall", identifier)
895 && !TARGET_64BIT)
b08de47e
MM
896 return (args == NULL_TREE);
897
0f290768 898 /* Cdecl attribute says the callee is a normal C declaration. */
0d7d98ee
JH
899 if (is_attribute_p ("cdecl", identifier)
900 && !TARGET_64BIT)
b08de47e
MM
901 return (args == NULL_TREE);
902
903 /* Regparm attribute specifies how many integer arguments are to be
0f290768 904 passed in registers. */
b08de47e
MM
905 if (is_attribute_p ("regparm", identifier))
906 {
907 tree cst;
908
e9a25f70 909 if (! args || TREE_CODE (args) != TREE_LIST
b08de47e
MM
910 || TREE_CHAIN (args) != NULL_TREE
911 || TREE_VALUE (args) == NULL_TREE)
912 return 0;
913
914 cst = TREE_VALUE (args);
915 if (TREE_CODE (cst) != INTEGER_CST)
916 return 0;
917
cce097f1 918 if (compare_tree_int (cst, REGPARM_MAX) > 0)
b08de47e
MM
919 return 0;
920
921 return 1;
922 }
923
924 return 0;
925}
926
927/* Return 0 if the attributes for two types are incompatible, 1 if they
928 are compatible, and 2 if they are nearly compatible (which causes a
929 warning to be generated). */
930
931int
e075ae69 932ix86_comp_type_attributes (type1, type2)
afcfe58c
MM
933 tree type1;
934 tree type2;
b08de47e 935{
0f290768 936 /* Check for mismatch of non-default calling convention. */
69ddee61 937 const char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
938
939 if (TREE_CODE (type1) != FUNCTION_TYPE)
940 return 1;
941
942 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
943 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
944 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 945 return 0;
b08de47e
MM
946 return 1;
947}
b08de47e
MM
948\f
949/* Value is the number of bytes of arguments automatically
950 popped when returning from a subroutine call.
951 FUNDECL is the declaration node of the function (as a tree),
952 FUNTYPE is the data type of the function (as a tree),
953 or for a library call it is an identifier node for the subroutine name.
954 SIZE is the number of bytes of arguments passed on the stack.
955
956 On the 80386, the RTD insn may be used to pop them if the number
957 of args is fixed, but if the number is variable then the caller
958 must pop them all. RTD can't be used for library calls now
959 because the library is compiled with the Unix compiler.
960 Use of RTD is a selectable option, since it is incompatible with
961 standard Unix calling sequences. If the option is not selected,
962 the caller must always pop the args.
963
964 The attribute stdcall is equivalent to RTD on a per module basis. */
965
966int
e075ae69 967ix86_return_pops_args (fundecl, funtype, size)
b08de47e
MM
968 tree fundecl;
969 tree funtype;
970 int size;
79325812 971{
3345ee7d 972 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 973
0f290768 974 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 975 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 976
0f290768 977 /* Stdcall functions will pop the stack if not variable args. */
698cdd84
SC
978 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
979 rtd = 1;
79325812 980
698cdd84
SC
981 if (rtd
982 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
983 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
984 == void_type_node)))
698cdd84
SC
985 return size;
986 }
79325812 987
e9a25f70 988 /* Lose any fake structure return argument. */
0d7d98ee
JH
989 if (aggregate_value_p (TREE_TYPE (funtype))
990 && !TARGET_64BIT)
698cdd84 991 return GET_MODE_SIZE (Pmode);
79325812 992
2614aac6 993 return 0;
b08de47e 994}
b08de47e
MM
995\f
996/* Argument support functions. */
997
998/* Initialize a variable CUM of type CUMULATIVE_ARGS
999 for a call to a function whose data type is FNTYPE.
1000 For a library call, FNTYPE is 0. */
1001
1002void
1003init_cumulative_args (cum, fntype, libname)
e9a25f70 1004 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
b08de47e
MM
1005 tree fntype; /* tree ptr for function decl */
1006 rtx libname; /* SYMBOL_REF of library name or 0 */
1007{
1008 static CUMULATIVE_ARGS zero_cum;
1009 tree param, next_param;
1010
1011 if (TARGET_DEBUG_ARG)
1012 {
1013 fprintf (stderr, "\ninit_cumulative_args (");
1014 if (fntype)
e9a25f70
JL
1015 fprintf (stderr, "fntype code = %s, ret code = %s",
1016 tree_code_name[(int) TREE_CODE (fntype)],
1017 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
1018 else
1019 fprintf (stderr, "no fntype");
1020
1021 if (libname)
1022 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1023 }
1024
1025 *cum = zero_cum;
1026
1027 /* Set up the number of registers to use for passing arguments. */
e075ae69 1028 cum->nregs = ix86_regparm;
b08de47e
MM
1029 if (fntype)
1030 {
1031 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
e9a25f70 1032
b08de47e
MM
1033 if (attr)
1034 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1035 }
1036
1037 /* Determine if this function has variable arguments. This is
1038 indicated by the last argument being 'void_type_mode' if there
1039 are no variable arguments. If there are variable arguments, then
1040 we won't pass anything in registers */
1041
1042 if (cum->nregs)
1043 {
1044 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 1045 param != 0; param = next_param)
b08de47e
MM
1046 {
1047 next_param = TREE_CHAIN (param);
e9a25f70 1048 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
b08de47e
MM
1049 cum->nregs = 0;
1050 }
1051 }
1052
1053 if (TARGET_DEBUG_ARG)
1054 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1055
1056 return;
1057}
1058
1059/* Update the data in CUM to advance over an argument
1060 of mode MODE and data type TYPE.
1061 (TYPE is null for libcalls where that information may not be available.) */
1062
1063void
1064function_arg_advance (cum, mode, type, named)
1065 CUMULATIVE_ARGS *cum; /* current arg information */
1066 enum machine_mode mode; /* current arg mode */
1067 tree type; /* type of the argument or 0 if lib support */
1068 int named; /* whether or not the argument was named */
1069{
5ac9118e
KG
1070 int bytes =
1071 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
1072 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1073
1074 if (TARGET_DEBUG_ARG)
1075 fprintf (stderr,
e9a25f70 1076 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
b08de47e 1077 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
82a127a9 1078 if (TARGET_SSE && mode == TImode)
b08de47e 1079 {
82a127a9
CM
1080 cum->sse_words += words;
1081 cum->sse_nregs -= 1;
1082 cum->sse_regno += 1;
1083 if (cum->sse_nregs <= 0)
1084 {
1085 cum->sse_nregs = 0;
1086 cum->sse_regno = 0;
1087 }
b08de47e 1088 }
82a127a9
CM
1089 else
1090 {
1091 cum->words += words;
1092 cum->nregs -= words;
1093 cum->regno += words;
b08de47e 1094
82a127a9
CM
1095 if (cum->nregs <= 0)
1096 {
1097 cum->nregs = 0;
1098 cum->regno = 0;
1099 }
1100 }
b08de47e
MM
1101 return;
1102}
1103
1104/* Define where to put the arguments to a function.
1105 Value is zero to push the argument on the stack,
1106 or a hard register in which to store the argument.
1107
1108 MODE is the argument's machine mode.
1109 TYPE is the data type of the argument (as a tree).
1110 This is null for libcalls where that information may
1111 not be available.
1112 CUM is a variable of type CUMULATIVE_ARGS which gives info about
1113 the preceding args and about the function being called.
1114 NAMED is nonzero if this argument is a named parameter
1115 (otherwise it is an extra parameter matching an ellipsis). */
1116
1117struct rtx_def *
1118function_arg (cum, mode, type, named)
1119 CUMULATIVE_ARGS *cum; /* current arg information */
1120 enum machine_mode mode; /* current arg mode */
1121 tree type; /* type of the argument or 0 if lib support */
1122 int named; /* != 0 for normal args, == 0 for ... args */
1123{
1124 rtx ret = NULL_RTX;
5ac9118e
KG
1125 int bytes =
1126 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
1127 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1128
32ee7d1d
JH
1129 if (mode == VOIDmode)
1130 return constm1_rtx;
1131
b08de47e
MM
1132 switch (mode)
1133 {
0f290768 1134 /* For now, pass fp/complex values on the stack. */
e9a25f70 1135 default:
b08de47e
MM
1136 break;
1137
1138 case BLKmode:
1139 case DImode:
1140 case SImode:
1141 case HImode:
1142 case QImode:
1143 if (words <= cum->nregs)
f64cecad 1144 ret = gen_rtx_REG (mode, cum->regno);
b08de47e 1145 break;
82a127a9
CM
1146 case TImode:
1147 if (cum->sse_nregs)
1148 ret = gen_rtx_REG (mode, cum->sse_regno);
1149 break;
b08de47e
MM
1150 }
1151
1152 if (TARGET_DEBUG_ARG)
1153 {
1154 fprintf (stderr,
e9a25f70 1155 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
b08de47e
MM
1156 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
1157
1158 if (ret)
1159 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
1160 else
1161 fprintf (stderr, ", stack");
1162
1163 fprintf (stderr, " )\n");
1164 }
1165
1166 return ret;
1167}
e075ae69 1168\f
8bad7136 1169
7dd4b4a3
JH
1170/* Return nonzero if OP is general operand representable on x86_64. */
1171
1172int
1173x86_64_general_operand (op, mode)
1174 rtx op;
1175 enum machine_mode mode;
1176{
1177 if (!TARGET_64BIT)
1178 return general_operand (op, mode);
1179 if (nonimmediate_operand (op, mode))
1180 return 1;
1181 return x86_64_sign_extended_value (op);
1182}
1183
1184/* Return nonzero if OP is general operand representable on x86_64
1185 as eighter sign extended or zero extended constant. */
1186
1187int
1188x86_64_szext_general_operand (op, mode)
1189 rtx op;
1190 enum machine_mode mode;
1191{
1192 if (!TARGET_64BIT)
1193 return general_operand (op, mode);
1194 if (nonimmediate_operand (op, mode))
1195 return 1;
1196 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
1197}
1198
1199/* Return nonzero if OP is nonmemory operand representable on x86_64. */
1200
1201int
1202x86_64_nonmemory_operand (op, mode)
1203 rtx op;
1204 enum machine_mode mode;
1205{
1206 if (!TARGET_64BIT)
1207 return nonmemory_operand (op, mode);
1208 if (register_operand (op, mode))
1209 return 1;
1210 return x86_64_sign_extended_value (op);
1211}
1212
1213/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
1214
1215int
1216x86_64_movabs_operand (op, mode)
1217 rtx op;
1218 enum machine_mode mode;
1219{
1220 if (!TARGET_64BIT || !flag_pic)
1221 return nonmemory_operand (op, mode);
1222 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
1223 return 1;
1224 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
1225 return 1;
1226 return 0;
1227}
1228
1229/* Return nonzero if OP is nonmemory operand representable on x86_64. */
1230
1231int
1232x86_64_szext_nonmemory_operand (op, mode)
1233 rtx op;
1234 enum machine_mode mode;
1235{
1236 if (!TARGET_64BIT)
1237 return nonmemory_operand (op, mode);
1238 if (register_operand (op, mode))
1239 return 1;
1240 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
1241}
1242
1243/* Return nonzero if OP is immediate operand representable on x86_64. */
1244
1245int
1246x86_64_immediate_operand (op, mode)
1247 rtx op;
1248 enum machine_mode mode;
1249{
1250 if (!TARGET_64BIT)
1251 return immediate_operand (op, mode);
1252 return x86_64_sign_extended_value (op);
1253}
1254
1255/* Return nonzero if OP is immediate operand representable on x86_64. */
1256
1257int
1258x86_64_zext_immediate_operand (op, mode)
1259 rtx op;
1260 enum machine_mode mode ATTRIBUTE_UNUSED;
1261{
1262 return x86_64_zero_extended_value (op);
1263}
1264
8bad7136
JL
1265/* Return nonzero if OP is (const_int 1), else return zero. */
1266
1267int
1268const_int_1_operand (op, mode)
1269 rtx op;
1270 enum machine_mode mode ATTRIBUTE_UNUSED;
1271{
1272 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
1273}
1274
e075ae69
RH
1275/* Returns 1 if OP is either a symbol reference or a sum of a symbol
1276 reference and a constant. */
b08de47e
MM
1277
1278int
e075ae69
RH
1279symbolic_operand (op, mode)
1280 register rtx op;
1281 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 1282{
e075ae69 1283 switch (GET_CODE (op))
2a2ab3f9 1284 {
e075ae69
RH
1285 case SYMBOL_REF:
1286 case LABEL_REF:
1287 return 1;
1288
1289 case CONST:
1290 op = XEXP (op, 0);
1291 if (GET_CODE (op) == SYMBOL_REF
1292 || GET_CODE (op) == LABEL_REF
1293 || (GET_CODE (op) == UNSPEC
1294 && XINT (op, 1) >= 6
1295 && XINT (op, 1) <= 7))
1296 return 1;
1297 if (GET_CODE (op) != PLUS
1298 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1299 return 0;
1300
1301 op = XEXP (op, 0);
1302 if (GET_CODE (op) == SYMBOL_REF
1303 || GET_CODE (op) == LABEL_REF)
1304 return 1;
1305 /* Only @GOTOFF gets offsets. */
1306 if (GET_CODE (op) != UNSPEC
1307 || XINT (op, 1) != 7)
1308 return 0;
1309
1310 op = XVECEXP (op, 0, 0);
1311 if (GET_CODE (op) == SYMBOL_REF
1312 || GET_CODE (op) == LABEL_REF)
1313 return 1;
1314 return 0;
1315
1316 default:
1317 return 0;
2a2ab3f9
JVA
1318 }
1319}
2a2ab3f9 1320
e075ae69 1321/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 1322
e075ae69
RH
1323int
1324pic_symbolic_operand (op, mode)
1325 register rtx op;
1326 enum machine_mode mode ATTRIBUTE_UNUSED;
1327{
1328 if (GET_CODE (op) == CONST)
2a2ab3f9 1329 {
e075ae69
RH
1330 op = XEXP (op, 0);
1331 if (GET_CODE (op) == UNSPEC)
1332 return 1;
1333 if (GET_CODE (op) != PLUS
1334 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1335 return 0;
1336 op = XEXP (op, 0);
1337 if (GET_CODE (op) == UNSPEC)
1338 return 1;
2a2ab3f9 1339 }
e075ae69 1340 return 0;
2a2ab3f9 1341}
2a2ab3f9 1342
28d52ffb
RH
1343/* Test for a valid operand for a call instruction. Don't allow the
1344 arg pointer register or virtual regs since they may decay into
1345 reg + const, which the patterns can't handle. */
2a2ab3f9 1346
e075ae69
RH
1347int
1348call_insn_operand (op, mode)
1349 rtx op;
1350 enum machine_mode mode ATTRIBUTE_UNUSED;
1351{
e075ae69
RH
1352 /* Disallow indirect through a virtual register. This leads to
1353 compiler aborts when trying to eliminate them. */
1354 if (GET_CODE (op) == REG
1355 && (op == arg_pointer_rtx
564d80f4 1356 || op == frame_pointer_rtx
e075ae69
RH
1357 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
1358 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
1359 return 0;
2a2ab3f9 1360
28d52ffb
RH
1361 /* Disallow `call 1234'. Due to varying assembler lameness this
1362 gets either rejected or translated to `call .+1234'. */
1363 if (GET_CODE (op) == CONST_INT)
1364 return 0;
1365
cbbf65e0
RH
1366 /* Explicitly allow SYMBOL_REF even if pic. */
1367 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 1368 return 1;
2a2ab3f9 1369
cbbf65e0
RH
1370 /* Half-pic doesn't allow anything but registers and constants.
1371 We've just taken care of the later. */
1372 if (HALF_PIC_P ())
1373 return register_operand (op, Pmode);
1374
1375 /* Otherwise we can allow any general_operand in the address. */
1376 return general_operand (op, Pmode);
e075ae69 1377}
79325812 1378
e075ae69
RH
1379int
1380constant_call_address_operand (op, mode)
1381 rtx op;
1382 enum machine_mode mode ATTRIBUTE_UNUSED;
1383{
eaf19aba
JJ
1384 if (GET_CODE (op) == CONST
1385 && GET_CODE (XEXP (op, 0)) == PLUS
1386 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1387 op = XEXP (XEXP (op, 0), 0);
e1ff012c 1388 return GET_CODE (op) == SYMBOL_REF;
e075ae69 1389}
2a2ab3f9 1390
e075ae69 1391/* Match exactly zero and one. */
e9a25f70 1392
0f290768 1393int
e075ae69
RH
1394const0_operand (op, mode)
1395 register rtx op;
1396 enum machine_mode mode;
1397{
1398 return op == CONST0_RTX (mode);
1399}
e9a25f70 1400
0f290768 1401int
e075ae69
RH
1402const1_operand (op, mode)
1403 register rtx op;
1404 enum machine_mode mode ATTRIBUTE_UNUSED;
1405{
1406 return op == const1_rtx;
1407}
2a2ab3f9 1408
e075ae69 1409/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 1410
e075ae69
RH
1411int
1412const248_operand (op, mode)
1413 register rtx op;
1414 enum machine_mode mode ATTRIBUTE_UNUSED;
1415{
1416 return (GET_CODE (op) == CONST_INT
1417 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
1418}
e9a25f70 1419
e075ae69 1420/* True if this is a constant appropriate for an increment or decremenmt. */
81fd0956 1421
e075ae69
RH
1422int
1423incdec_operand (op, mode)
1424 register rtx op;
0631e0bf 1425 enum machine_mode mode ATTRIBUTE_UNUSED;
e075ae69 1426{
b4e89e2d
JH
1427 /* On Pentium4, the inc and dec operations causes extra dependancy on flag
1428 registers, since carry flag is not set. */
1429 if (TARGET_PENTIUM4 && !optimize_size)
1430 return 0;
2b1c08f5 1431 return op == const1_rtx || op == constm1_rtx;
e075ae69 1432}
2a2ab3f9 1433
371bc54b
JH
1434/* Return nonzero if OP is acceptable as operand of DImode shift
1435 expander. */
1436
1437int
1438shiftdi_operand (op, mode)
1439 rtx op;
1440 enum machine_mode mode ATTRIBUTE_UNUSED;
1441{
1442 if (TARGET_64BIT)
1443 return nonimmediate_operand (op, mode);
1444 else
1445 return register_operand (op, mode);
1446}
1447
0f290768 1448/* Return false if this is the stack pointer, or any other fake
e075ae69
RH
1449 register eliminable to the stack pointer. Otherwise, this is
1450 a register operand.
2a2ab3f9 1451
e075ae69
RH
1452 This is used to prevent esp from being used as an index reg.
1453 Which would only happen in pathological cases. */
5f1ec3e6 1454
e075ae69
RH
1455int
1456reg_no_sp_operand (op, mode)
1457 register rtx op;
1458 enum machine_mode mode;
1459{
1460 rtx t = op;
1461 if (GET_CODE (t) == SUBREG)
1462 t = SUBREG_REG (t);
564d80f4 1463 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 1464 return 0;
2a2ab3f9 1465
e075ae69 1466 return register_operand (op, mode);
2a2ab3f9 1467}
b840bfb0 1468
915119a5
BS
1469int
1470mmx_reg_operand (op, mode)
1471 register rtx op;
bd793c65 1472 enum machine_mode mode ATTRIBUTE_UNUSED;
915119a5
BS
1473{
1474 return MMX_REG_P (op);
1475}
1476
2c5a510c
RH
1477/* Return false if this is any eliminable register. Otherwise
1478 general_operand. */
1479
1480int
1481general_no_elim_operand (op, mode)
1482 register rtx op;
1483 enum machine_mode mode;
1484{
1485 rtx t = op;
1486 if (GET_CODE (t) == SUBREG)
1487 t = SUBREG_REG (t);
1488 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1489 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1490 || t == virtual_stack_dynamic_rtx)
1491 return 0;
1020a5ab
RH
1492 if (REG_P (t)
1493 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
1494 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
1495 return 0;
2c5a510c
RH
1496
1497 return general_operand (op, mode);
1498}
1499
1500/* Return false if this is any eliminable register. Otherwise
1501 register_operand or const_int. */
1502
1503int
1504nonmemory_no_elim_operand (op, mode)
1505 register rtx op;
1506 enum machine_mode mode;
1507{
1508 rtx t = op;
1509 if (GET_CODE (t) == SUBREG)
1510 t = SUBREG_REG (t);
1511 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1512 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1513 || t == virtual_stack_dynamic_rtx)
1514 return 0;
1515
1516 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
1517}
1518
e075ae69 1519/* Return true if op is a Q_REGS class register. */
b840bfb0 1520
e075ae69
RH
1521int
1522q_regs_operand (op, mode)
1523 register rtx op;
1524 enum machine_mode mode;
b840bfb0 1525{
e075ae69
RH
1526 if (mode != VOIDmode && GET_MODE (op) != mode)
1527 return 0;
1528 if (GET_CODE (op) == SUBREG)
1529 op = SUBREG_REG (op);
1530 return QI_REG_P (op);
0f290768 1531}
b840bfb0 1532
e075ae69 1533/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 1534
e075ae69
RH
1535int
1536non_q_regs_operand (op, mode)
1537 register rtx op;
1538 enum machine_mode mode;
1539{
1540 if (mode != VOIDmode && GET_MODE (op) != mode)
1541 return 0;
1542 if (GET_CODE (op) == SUBREG)
1543 op = SUBREG_REG (op);
1544 return NON_QI_REG_P (op);
0f290768 1545}
b840bfb0 1546
915119a5
BS
1547/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
1548 insns. */
1549int
1550sse_comparison_operator (op, mode)
1551 rtx op;
1552 enum machine_mode mode ATTRIBUTE_UNUSED;
1553{
1554 enum rtx_code code = GET_CODE (op);
a46d1d38
JH
1555 switch (code)
1556 {
1557 /* Operations supported directly. */
1558 case EQ:
1559 case LT:
1560 case LE:
1561 case UNORDERED:
1562 case NE:
1563 case UNGE:
1564 case UNGT:
1565 case ORDERED:
1566 return 1;
1567 /* These are equivalent to ones above in non-IEEE comparisons. */
1568 case UNEQ:
1569 case UNLT:
1570 case UNLE:
1571 case LTGT:
1572 case GE:
1573 case GT:
1574 return !TARGET_IEEE_FP;
1575 default:
1576 return 0;
1577 }
915119a5 1578}
9076b9c1 1579/* Return 1 if OP is a valid comparison operator in valid mode. */
e075ae69 1580int
9076b9c1
JH
1581ix86_comparison_operator (op, mode)
1582 register rtx op;
1583 enum machine_mode mode;
e075ae69 1584{
9076b9c1 1585 enum machine_mode inmode;
9a915772 1586 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
1587 if (mode != VOIDmode && GET_MODE (op) != mode)
1588 return 0;
9a915772
JH
1589 if (GET_RTX_CLASS (code) != '<')
1590 return 0;
1591 inmode = GET_MODE (XEXP (op, 0));
1592
1593 if (inmode == CCFPmode || inmode == CCFPUmode)
1594 {
1595 enum rtx_code second_code, bypass_code;
1596 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1597 return (bypass_code == NIL && second_code == NIL);
1598 }
1599 switch (code)
3a3677ff
RH
1600 {
1601 case EQ: case NE:
3a3677ff 1602 return 1;
9076b9c1 1603 case LT: case GE:
7e08e190 1604 if (inmode == CCmode || inmode == CCGCmode
9076b9c1
JH
1605 || inmode == CCGOCmode || inmode == CCNOmode)
1606 return 1;
1607 return 0;
7e08e190 1608 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
7e08e190 1609 if (inmode == CCmode)
9076b9c1
JH
1610 return 1;
1611 return 0;
1612 case GT: case LE:
7e08e190 1613 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
9076b9c1
JH
1614 return 1;
1615 return 0;
3a3677ff
RH
1616 default:
1617 return 0;
1618 }
1619}
1620
9076b9c1 1621/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3a3677ff 1622
9076b9c1
JH
1623int
1624fcmov_comparison_operator (op, mode)
3a3677ff
RH
1625 register rtx op;
1626 enum machine_mode mode;
1627{
b62d22a2 1628 enum machine_mode inmode;
9a915772 1629 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
1630 if (mode != VOIDmode && GET_MODE (op) != mode)
1631 return 0;
9a915772
JH
1632 if (GET_RTX_CLASS (code) != '<')
1633 return 0;
1634 inmode = GET_MODE (XEXP (op, 0));
1635 if (inmode == CCFPmode || inmode == CCFPUmode)
3a3677ff 1636 {
9a915772
JH
1637 enum rtx_code second_code, bypass_code;
1638 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1639 if (bypass_code != NIL || second_code != NIL)
1640 return 0;
1641 code = ix86_fp_compare_code_to_integer (code);
1642 }
1643 /* i387 supports just limited amount of conditional codes. */
1644 switch (code)
1645 {
1646 case LTU: case GTU: case LEU: case GEU:
1647 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
9076b9c1
JH
1648 return 1;
1649 return 0;
9a915772
JH
1650 case ORDERED: case UNORDERED:
1651 case EQ: case NE:
1652 return 1;
3a3677ff
RH
1653 default:
1654 return 0;
1655 }
e075ae69 1656}
b840bfb0 1657
e9e80858
JH
1658/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1659
1660int
1661promotable_binary_operator (op, mode)
1662 register rtx op;
1663 enum machine_mode mode ATTRIBUTE_UNUSED;
1664{
1665 switch (GET_CODE (op))
1666 {
1667 case MULT:
1668 /* Modern CPUs have same latency for HImode and SImode multiply,
1669 but 386 and 486 do HImode multiply faster. */
1670 return ix86_cpu > PROCESSOR_I486;
1671 case PLUS:
1672 case AND:
1673 case IOR:
1674 case XOR:
1675 case ASHIFT:
1676 return 1;
1677 default:
1678 return 0;
1679 }
1680}
1681
e075ae69
RH
1682/* Nearly general operand, but accept any const_double, since we wish
1683 to be able to drop them into memory rather than have them get pulled
1684 into registers. */
b840bfb0 1685
2a2ab3f9 1686int
e075ae69
RH
1687cmp_fp_expander_operand (op, mode)
1688 register rtx op;
1689 enum machine_mode mode;
2a2ab3f9 1690{
e075ae69 1691 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 1692 return 0;
e075ae69 1693 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 1694 return 1;
e075ae69 1695 return general_operand (op, mode);
2a2ab3f9
JVA
1696}
1697
e075ae69 1698/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
1699
1700int
e075ae69 1701ext_register_operand (op, mode)
2a2ab3f9 1702 register rtx op;
bb5177ac 1703 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 1704{
3522082b 1705 int regno;
0d7d98ee
JH
1706 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
1707 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
e075ae69 1708 return 0;
3522082b
JH
1709
1710 if (!register_operand (op, VOIDmode))
1711 return 0;
1712
1713 /* Be curefull to accept only registers having upper parts. */
1714 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
1715 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
e075ae69
RH
1716}
1717
1718/* Return 1 if this is a valid binary floating-point operation.
0f290768 1719 OP is the expression matched, and MODE is its mode. */
e075ae69
RH
1720
1721int
1722binary_fp_operator (op, mode)
1723 register rtx op;
1724 enum machine_mode mode;
1725{
1726 if (mode != VOIDmode && mode != GET_MODE (op))
1727 return 0;
1728
2a2ab3f9
JVA
1729 switch (GET_CODE (op))
1730 {
e075ae69
RH
1731 case PLUS:
1732 case MINUS:
1733 case MULT:
1734 case DIV:
1735 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 1736
2a2ab3f9
JVA
1737 default:
1738 return 0;
1739 }
1740}
fee2770d 1741
e075ae69
RH
1742int
1743mult_operator(op, mode)
1744 register rtx op;
1745 enum machine_mode mode ATTRIBUTE_UNUSED;
1746{
1747 return GET_CODE (op) == MULT;
1748}
1749
1750int
1751div_operator(op, mode)
1752 register rtx op;
1753 enum machine_mode mode ATTRIBUTE_UNUSED;
1754{
1755 return GET_CODE (op) == DIV;
1756}
0a726ef1
JL
1757
1758int
e075ae69
RH
1759arith_or_logical_operator (op, mode)
1760 rtx op;
1761 enum machine_mode mode;
0a726ef1 1762{
e075ae69
RH
1763 return ((mode == VOIDmode || GET_MODE (op) == mode)
1764 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
1765 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
1766}
1767
e075ae69 1768/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
1769
1770int
e075ae69
RH
1771memory_displacement_operand (op, mode)
1772 register rtx op;
1773 enum machine_mode mode;
4f2c8ebb 1774{
e075ae69 1775 struct ix86_address parts;
e9a25f70 1776
e075ae69
RH
1777 if (! memory_operand (op, mode))
1778 return 0;
1779
1780 if (! ix86_decompose_address (XEXP (op, 0), &parts))
1781 abort ();
1782
1783 return parts.disp != NULL_RTX;
4f2c8ebb
RS
1784}
1785
16189740 1786/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
1787 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1788
1789 ??? It seems likely that this will only work because cmpsi is an
1790 expander, and no actual insns use this. */
4f2c8ebb
RS
1791
1792int
e075ae69
RH
1793cmpsi_operand (op, mode)
1794 rtx op;
1795 enum machine_mode mode;
fee2770d 1796{
e075ae69
RH
1797 if (general_operand (op, mode))
1798 return 1;
1799
1800 if (GET_CODE (op) == AND
1801 && GET_MODE (op) == SImode
1802 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
1803 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
1804 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
1805 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
1806 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
1807 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 1808 return 1;
e9a25f70 1809
fee2770d
RS
1810 return 0;
1811}
d784886d 1812
e075ae69
RH
1813/* Returns 1 if OP is memory operand that can not be represented by the
1814 modRM array. */
d784886d
RK
1815
1816int
e075ae69 1817long_memory_operand (op, mode)
d784886d
RK
1818 register rtx op;
1819 enum machine_mode mode;
1820{
e075ae69 1821 if (! memory_operand (op, mode))
d784886d
RK
1822 return 0;
1823
e075ae69 1824 return memory_address_length (op) != 0;
d784886d 1825}
2247f6ed
JH
1826
1827/* Return nonzero if the rtx is known aligned. */
1828
1829int
1830aligned_operand (op, mode)
1831 rtx op;
1832 enum machine_mode mode;
1833{
1834 struct ix86_address parts;
1835
1836 if (!general_operand (op, mode))
1837 return 0;
1838
0f290768 1839 /* Registers and immediate operands are always "aligned". */
2247f6ed
JH
1840 if (GET_CODE (op) != MEM)
1841 return 1;
1842
0f290768 1843 /* Don't even try to do any aligned optimizations with volatiles. */
2247f6ed
JH
1844 if (MEM_VOLATILE_P (op))
1845 return 0;
1846
1847 op = XEXP (op, 0);
1848
1849 /* Pushes and pops are only valid on the stack pointer. */
1850 if (GET_CODE (op) == PRE_DEC
1851 || GET_CODE (op) == POST_INC)
1852 return 1;
1853
1854 /* Decode the address. */
1855 if (! ix86_decompose_address (op, &parts))
1856 abort ();
1857
1858 /* Look for some component that isn't known to be aligned. */
1859 if (parts.index)
1860 {
1861 if (parts.scale < 4
bdb429a5 1862 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
1863 return 0;
1864 }
1865 if (parts.base)
1866 {
bdb429a5 1867 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
1868 return 0;
1869 }
1870 if (parts.disp)
1871 {
1872 if (GET_CODE (parts.disp) != CONST_INT
1873 || (INTVAL (parts.disp) & 3) != 0)
1874 return 0;
1875 }
1876
1877 /* Didn't find one -- this must be an aligned address. */
1878 return 1;
1879}
e075ae69
RH
1880\f
1881/* Return true if the constant is something that can be loaded with
1882 a special instruction. Only handle 0.0 and 1.0; others are less
1883 worthwhile. */
57dbca5e
BS
1884
1885int
e075ae69
RH
1886standard_80387_constant_p (x)
1887 rtx x;
57dbca5e 1888{
2b04e52b 1889 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 1890 return -1;
2b04e52b
JH
1891 /* Note that on the 80387, other constants, such as pi, that we should support
1892 too. On some machines, these are much slower to load as standard constant,
1893 than to load from doubles in memory. */
1894 if (x == CONST0_RTX (GET_MODE (x)))
1895 return 1;
1896 if (x == CONST1_RTX (GET_MODE (x)))
1897 return 2;
e075ae69 1898 return 0;
57dbca5e
BS
1899}
1900
2b04e52b
JH
1901/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
1902 */
1903int
1904standard_sse_constant_p (x)
1905 rtx x;
1906{
1907 if (GET_CODE (x) != CONST_DOUBLE)
1908 return -1;
1909 return (x == CONST0_RTX (GET_MODE (x)));
1910}
1911
2a2ab3f9
JVA
1912/* Returns 1 if OP contains a symbol reference */
1913
1914int
1915symbolic_reference_mentioned_p (op)
1916 rtx op;
1917{
6f7d635c 1918 register const char *fmt;
2a2ab3f9
JVA
1919 register int i;
1920
1921 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
1922 return 1;
1923
1924 fmt = GET_RTX_FORMAT (GET_CODE (op));
1925 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
1926 {
1927 if (fmt[i] == 'E')
1928 {
1929 register int j;
1930
1931 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
1932 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
1933 return 1;
1934 }
e9a25f70 1935
2a2ab3f9
JVA
1936 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
1937 return 1;
1938 }
1939
1940 return 0;
1941}
e075ae69
RH
1942
1943/* Return 1 if it is appropriate to emit `ret' instructions in the
1944 body of a function. Do this only if the epilogue is simple, needing a
1945 couple of insns. Prior to reloading, we can't tell how many registers
1946 must be saved, so return 0 then. Return 0 if there is no frame
1947 marker to de-allocate.
1948
1949 If NON_SAVING_SETJMP is defined and true, then it is not possible
1950 for the epilogue to be simple, so return 0. This is a special case
1951 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1952 until final, but jump_optimize may need to know sooner if a
1953 `return' is OK. */
32b5b1aa
SC
1954
1955int
e075ae69 1956ix86_can_use_return_insn_p ()
32b5b1aa 1957{
4dd2ac2c 1958 struct ix86_frame frame;
9a7372d6 1959
e075ae69
RH
1960#ifdef NON_SAVING_SETJMP
1961 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
1962 return 0;
1963#endif
9a7372d6
RH
1964#ifdef FUNCTION_BLOCK_PROFILER_EXIT
1965 if (profile_block_flag == 2)
1966 return 0;
1967#endif
1968
1969 if (! reload_completed || frame_pointer_needed)
1970 return 0;
32b5b1aa 1971
9a7372d6
RH
1972 /* Don't allow more than 32 pop, since that's all we can do
1973 with one instruction. */
1974 if (current_function_pops_args
1975 && current_function_args_size >= 32768)
e075ae69 1976 return 0;
32b5b1aa 1977
4dd2ac2c
JH
1978 ix86_compute_frame_layout (&frame);
1979 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 1980}
6189a572
JH
1981\f
1982/* Return 1 if VALUE can be stored in the sign extended immediate field. */
1983int
1984x86_64_sign_extended_value (value)
1985 rtx value;
1986{
1987 switch (GET_CODE (value))
1988 {
1989 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
1990 to be at least 32 and this all acceptable constants are
1991 represented as CONST_INT. */
1992 case CONST_INT:
1993 if (HOST_BITS_PER_WIDE_INT == 32)
1994 return 1;
1995 else
1996 {
1997 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
fa9f36a1 1998 return trunc_int_for_mode (val, SImode) == val;
6189a572
JH
1999 }
2000 break;
2001
2002 /* For certain code models, the symbolic references are known to fit. */
2003 case SYMBOL_REF:
2004 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
2005
2006 /* For certain code models, the code is near as well. */
2007 case LABEL_REF:
2008 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
2009
2010 /* We also may accept the offsetted memory references in certain special
2011 cases. */
2012 case CONST:
2013 if (GET_CODE (XEXP (value, 0)) == UNSPEC
2014 && XVECLEN (XEXP (value, 0), 0) == 1
2015 && XINT (XEXP (value, 0), 1) == 15)
2016 return 1;
2017 else if (GET_CODE (XEXP (value, 0)) == PLUS)
2018 {
2019 rtx op1 = XEXP (XEXP (value, 0), 0);
2020 rtx op2 = XEXP (XEXP (value, 0), 1);
2021 HOST_WIDE_INT offset;
2022
2023 if (ix86_cmodel == CM_LARGE)
2024 return 0;
2025 if (GET_CODE (op2) != CONST_INT)
2026 return 0;
2027 offset = trunc_int_for_mode (INTVAL (op2), DImode);
2028 switch (GET_CODE (op1))
2029 {
2030 case SYMBOL_REF:
2031 /* For CM_SMALL assume that latest object is 1MB before
2032 end of 31bits boundary. We may also accept pretty
2033 large negative constants knowing that all objects are
2034 in the positive half of address space. */
2035 if (ix86_cmodel == CM_SMALL
2036 && offset < 1024*1024*1024
2037 && trunc_int_for_mode (offset, SImode) == offset)
2038 return 1;
2039 /* For CM_KERNEL we know that all object resist in the
2040 negative half of 32bits address space. We may not
2041 accept negative offsets, since they may be just off
2042 and we may accept pretty large possitive ones. */
2043 if (ix86_cmodel == CM_KERNEL
2044 && offset > 0
2045 && trunc_int_for_mode (offset, SImode) == offset)
2046 return 1;
2047 break;
2048 case LABEL_REF:
2049 /* These conditions are similar to SYMBOL_REF ones, just the
2050 constraints for code models differ. */
2051 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
2052 && offset < 1024*1024*1024
2053 && trunc_int_for_mode (offset, SImode) == offset)
2054 return 1;
2055 if (ix86_cmodel == CM_KERNEL
2056 && offset > 0
2057 && trunc_int_for_mode (offset, SImode) == offset)
2058 return 1;
2059 break;
2060 default:
2061 return 0;
2062 }
2063 }
2064 return 0;
2065 default:
2066 return 0;
2067 }
2068}
2069
2070/* Return 1 if VALUE can be stored in the zero extended immediate field. */
2071int
2072x86_64_zero_extended_value (value)
2073 rtx value;
2074{
2075 switch (GET_CODE (value))
2076 {
2077 case CONST_DOUBLE:
2078 if (HOST_BITS_PER_WIDE_INT == 32)
2079 return (GET_MODE (value) == VOIDmode
2080 && !CONST_DOUBLE_HIGH (value));
2081 else
2082 return 0;
2083 case CONST_INT:
2084 if (HOST_BITS_PER_WIDE_INT == 32)
2085 return INTVAL (value) >= 0;
2086 else
2087 return !(INTVAL (value) & ~(HOST_WIDE_INT)0xffffffff);
2088 break;
2089
2090 /* For certain code models, the symbolic references are known to fit. */
2091 case SYMBOL_REF:
2092 return ix86_cmodel == CM_SMALL;
2093
2094 /* For certain code models, the code is near as well. */
2095 case LABEL_REF:
2096 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
2097
2098 /* We also may accept the offsetted memory references in certain special
2099 cases. */
2100 case CONST:
2101 if (GET_CODE (XEXP (value, 0)) == PLUS)
2102 {
2103 rtx op1 = XEXP (XEXP (value, 0), 0);
2104 rtx op2 = XEXP (XEXP (value, 0), 1);
2105
2106 if (ix86_cmodel == CM_LARGE)
2107 return 0;
2108 switch (GET_CODE (op1))
2109 {
2110 case SYMBOL_REF:
2111 return 0;
2112 /* For small code model we may accept pretty large possitive
2113 offsets, since one bit is available for free. Negative
2114 offsets are limited by the size of NULL pointer area
2115 specified by the ABI. */
2116 if (ix86_cmodel == CM_SMALL
2117 && GET_CODE (op2) == CONST_INT
2118 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
2119 && (trunc_int_for_mode (INTVAL (op2), SImode)
2120 == INTVAL (op2)))
2121 return 1;
2122 /* ??? For the kernel, we may accept adjustment of
2123 -0x10000000, since we know that it will just convert
2124 negative address space to possitive, but perhaps this
2125 is not worthwhile. */
2126 break;
2127 case LABEL_REF:
2128 /* These conditions are similar to SYMBOL_REF ones, just the
2129 constraints for code models differ. */
2130 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
2131 && GET_CODE (op2) == CONST_INT
2132 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
2133 && (trunc_int_for_mode (INTVAL (op2), SImode)
2134 == INTVAL (op2)))
2135 return 1;
2136 break;
2137 default:
2138 return 0;
2139 }
2140 }
2141 return 0;
2142 default:
2143 return 0;
2144 }
2145}
6fca22eb
RH
2146
2147/* Value should be nonzero if functions must have frame pointers.
2148 Zero means the frame pointer need not be set up (and parms may
2149 be accessed via the stack pointer) in functions that seem suitable. */
2150
2151int
2152ix86_frame_pointer_required ()
2153{
2154 /* If we accessed previous frames, then the generated code expects
2155 to be able to access the saved ebp value in our frame. */
2156 if (cfun->machine->accesses_prev_frame)
2157 return 1;
2158
2159 /* Several x86 os'es need a frame pointer for other reasons,
2160 usually pertaining to setjmp. */
2161 if (SUBTARGET_FRAME_POINTER_REQUIRED)
2162 return 1;
2163
2164 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
2165 the frame pointer by default. Turn it back on now if we've not
2166 got a leaf function. */
2167 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
2168 return 1;
2169
2170 return 0;
2171}
2172
2173/* Record that the current function accesses previous call frames. */
2174
2175void
2176ix86_setup_frame_addresses ()
2177{
2178 cfun->machine->accesses_prev_frame = 1;
2179}
e075ae69 2180\f
4cf12e7e 2181static char pic_label_name[32];
e9a25f70 2182
e075ae69
RH
2183/* This function generates code for -fpic that loads %ebx with
2184 the return address of the caller and then returns. */
2185
2186void
4cf12e7e 2187ix86_asm_file_end (file)
e075ae69 2188 FILE *file;
e075ae69
RH
2189{
2190 rtx xops[2];
32b5b1aa 2191
4cf12e7e
RH
2192 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
2193 return;
32b5b1aa 2194
c7f0da1d
RH
2195 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
2196 to updating relocations to a section being discarded such that this
2197 doesn't work. Ought to detect this at configure time. */
2198#if 0 && defined (ASM_OUTPUT_SECTION_NAME)
4cf12e7e
RH
2199 /* The trick here is to create a linkonce section containing the
2200 pic label thunk, but to refer to it with an internal label.
2201 Because the label is internal, we don't have inter-dso name
2202 binding issues on hosts that don't support ".hidden".
e9a25f70 2203
4cf12e7e
RH
2204 In order to use these macros, however, we must create a fake
2205 function decl. */
2206 {
2207 tree decl = build_decl (FUNCTION_DECL,
2208 get_identifier ("i686.get_pc_thunk"),
2209 error_mark_node);
2210 DECL_ONE_ONLY (decl) = 1;
2211 UNIQUE_SECTION (decl, 0);
2212 named_section (decl, NULL, 0);
2213 }
2214#else
2215 text_section ();
2216#endif
0afeb08a 2217
4cf12e7e
RH
2218 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
2219 internal (non-global) label that's being emitted, it didn't make
2220 sense to have .type information for local labels. This caused
2221 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
2222 me debug info for a label that you're declaring non-global?) this
2223 was changed to call ASM_OUTPUT_LABEL() instead. */
2224
2225 ASM_OUTPUT_LABEL (file, pic_label_name);
2226
2227 xops[0] = pic_offset_table_rtx;
2228 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
2229 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
2230 output_asm_insn ("ret", xops);
32b5b1aa 2231}
32b5b1aa 2232
e075ae69
RH
2233void
2234load_pic_register ()
32b5b1aa 2235{
e075ae69 2236 rtx gotsym, pclab;
32b5b1aa 2237
0d7d98ee
JH
2238 if (TARGET_64BIT)
2239 abort();
2240
a8a05998 2241 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
32b5b1aa 2242
e075ae69 2243 if (TARGET_DEEP_BRANCH_PREDICTION)
32b5b1aa 2244 {
4cf12e7e
RH
2245 if (! pic_label_name[0])
2246 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
e075ae69 2247 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
32b5b1aa 2248 }
e075ae69 2249 else
e5cb57e8 2250 {
e075ae69 2251 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
e5cb57e8 2252 }
e5cb57e8 2253
e075ae69 2254 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
2a2ab3f9 2255
e075ae69
RH
2256 if (! TARGET_DEEP_BRANCH_PREDICTION)
2257 emit_insn (gen_popsi1 (pic_offset_table_rtx));
79325812 2258
e075ae69 2259 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
e9a25f70 2260}
8dfe5673 2261
0d7d98ee 2262/* Generate an "push" pattern for input ARG. */
e9a25f70 2263
e075ae69
RH
2264static rtx
2265gen_push (arg)
2266 rtx arg;
e9a25f70 2267{
c5c76735 2268 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
2269 gen_rtx_MEM (Pmode,
2270 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
2271 stack_pointer_rtx)),
2272 arg);
e9a25f70
JL
2273}
2274
4dd2ac2c
JH
2275/* Return 1 if we need to save REGNO. */
2276static int
1020a5ab
RH
2277ix86_save_reg (regno, maybe_eh_return)
2278 int regno;
37a58036 2279 int maybe_eh_return;
1020a5ab
RH
2280{
2281 if (flag_pic
2282 && ! TARGET_64BIT
2283 && regno == PIC_OFFSET_TABLE_REGNUM
2284 && (current_function_uses_pic_offset_table
2285 || current_function_uses_const_pool
2286 || current_function_calls_eh_return))
2287 return 1;
2288
2289 if (current_function_calls_eh_return && maybe_eh_return)
2290 {
2291 unsigned i;
2292 for (i = 0; ; i++)
2293 {
2294 unsigned test = EH_RETURN_DATA_REGNO(i);
2295 if (test == INVALID_REGNUM)
2296 break;
2297 if (test == (unsigned) regno)
2298 return 1;
2299 }
2300 }
4dd2ac2c 2301
1020a5ab
RH
2302 return (regs_ever_live[regno]
2303 && !call_used_regs[regno]
2304 && !fixed_regs[regno]
2305 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
2306}
2307
0903fcab
JH
2308/* Return number of registers to be saved on the stack. */
2309
2310static int
2311ix86_nsaved_regs ()
2312{
2313 int nregs = 0;
0903fcab
JH
2314 int regno;
2315
4dd2ac2c 2316 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 2317 if (ix86_save_reg (regno, true))
4dd2ac2c 2318 nregs++;
0903fcab
JH
2319 return nregs;
2320}
2321
2322/* Return the offset between two registers, one to be eliminated, and the other
2323 its replacement, at the start of a routine. */
2324
2325HOST_WIDE_INT
2326ix86_initial_elimination_offset (from, to)
2327 int from;
2328 int to;
2329{
4dd2ac2c
JH
2330 struct ix86_frame frame;
2331 ix86_compute_frame_layout (&frame);
564d80f4
JH
2332
2333 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 2334 return frame.hard_frame_pointer_offset;
564d80f4
JH
2335 else if (from == FRAME_POINTER_REGNUM
2336 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 2337 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
2338 else
2339 {
564d80f4
JH
2340 if (to != STACK_POINTER_REGNUM)
2341 abort ();
2342 else if (from == ARG_POINTER_REGNUM)
4dd2ac2c 2343 return frame.stack_pointer_offset;
564d80f4
JH
2344 else if (from != FRAME_POINTER_REGNUM)
2345 abort ();
0903fcab 2346 else
4dd2ac2c 2347 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
2348 }
2349}
2350
4dd2ac2c 2351/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 2352
4dd2ac2c
JH
2353static void
2354ix86_compute_frame_layout (frame)
2355 struct ix86_frame *frame;
65954bd8 2356{
65954bd8 2357 HOST_WIDE_INT total_size;
564d80f4 2358 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
44affdae
JH
2359 int offset;
2360 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4dd2ac2c 2361 HOST_WIDE_INT size = get_frame_size ();
65954bd8 2362
4dd2ac2c 2363 frame->nregs = ix86_nsaved_regs ();
564d80f4 2364 total_size = size;
65954bd8 2365
4dd2ac2c
JH
2366 /* Skip return value and save base pointer. */
2367 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
2368
2369 frame->hard_frame_pointer_offset = offset;
564d80f4 2370
fcbfaa65
RK
2371 /* Do some sanity checking of stack_alignment_needed and
2372 preferred_alignment, since i386 port is the only using those features
2373 that may break easilly. */
564d80f4 2374
44affdae
JH
2375 if (size && !stack_alignment_needed)
2376 abort ();
44affdae
JH
2377 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
2378 abort ();
2379 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
2380 abort ();
2381 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
2382 abort ();
564d80f4 2383
4dd2ac2c
JH
2384 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
2385 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 2386
4dd2ac2c
JH
2387 /* Register save area */
2388 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 2389
8362f420
JH
2390 /* Va-arg area */
2391 if (ix86_save_varrargs_registers)
2392 {
2393 offset += X86_64_VARARGS_SIZE;
2394 frame->va_arg_size = X86_64_VARARGS_SIZE;
2395 }
2396 else
2397 frame->va_arg_size = 0;
2398
4dd2ac2c
JH
2399 /* Align start of frame for local function. */
2400 frame->padding1 = ((offset + stack_alignment_needed - 1)
2401 & -stack_alignment_needed) - offset;
f73ad30e 2402
4dd2ac2c 2403 offset += frame->padding1;
65954bd8 2404
4dd2ac2c
JH
2405 /* Frame pointer points here. */
2406 frame->frame_pointer_offset = offset;
54ff41b7 2407
4dd2ac2c 2408 offset += size;
65954bd8 2409
4dd2ac2c 2410 /* Add outgoing arguments area. */
f73ad30e 2411 if (ACCUMULATE_OUTGOING_ARGS)
4dd2ac2c
JH
2412 {
2413 offset += current_function_outgoing_args_size;
2414 frame->outgoing_arguments_size = current_function_outgoing_args_size;
2415 }
2416 else
2417 frame->outgoing_arguments_size = 0;
564d80f4 2418
4dd2ac2c
JH
2419 /* Align stack boundary. */
2420 frame->padding2 = ((offset + preferred_alignment - 1)
2421 & -preferred_alignment) - offset;
2422
2423 offset += frame->padding2;
2424
2425 /* We've reached end of stack frame. */
2426 frame->stack_pointer_offset = offset;
2427
2428 /* Size prologue needs to allocate. */
2429 frame->to_allocate =
2430 (size + frame->padding1 + frame->padding2
8362f420 2431 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 2432
8362f420
JH
2433 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
2434 && current_function_is_leaf)
2435 {
2436 frame->red_zone_size = frame->to_allocate;
2437 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
2438 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
2439 }
2440 else
2441 frame->red_zone_size = 0;
2442 frame->to_allocate -= frame->red_zone_size;
2443 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c
JH
2444#if 0
2445 fprintf (stderr, "nregs: %i\n", frame->nregs);
2446 fprintf (stderr, "size: %i\n", size);
2447 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
2448 fprintf (stderr, "padding1: %i\n", frame->padding1);
8362f420 2449 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4dd2ac2c
JH
2450 fprintf (stderr, "padding2: %i\n", frame->padding2);
2451 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
8362f420 2452 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4dd2ac2c
JH
2453 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
2454 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
2455 frame->hard_frame_pointer_offset);
2456 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
2457#endif
65954bd8
JL
2458}
2459
0903fcab
JH
2460/* Emit code to save registers in the prologue. */
2461
2462static void
2463ix86_emit_save_regs ()
2464{
2465 register int regno;
0903fcab 2466 rtx insn;
0903fcab 2467
4dd2ac2c 2468 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 2469 if (ix86_save_reg (regno, true))
0903fcab 2470 {
0d7d98ee 2471 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
2472 RTX_FRAME_RELATED_P (insn) = 1;
2473 }
2474}
2475
c6036a37
JH
2476/* Emit code to save registers using MOV insns. First register
2477 is restored from POINTER + OFFSET. */
2478static void
2479ix86_emit_save_regs_using_mov (pointer, offset)
2480 rtx pointer;
2481 HOST_WIDE_INT offset;
2482{
2483 int regno;
2484 rtx insn;
2485
2486 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2487 if (ix86_save_reg (regno, true))
2488 {
2489 insn = emit_move_insn (adj_offsettable_operand (gen_rtx_MEM (Pmode,
2490 pointer),
2491 offset),
2492 gen_rtx_REG (Pmode, regno));
2493 RTX_FRAME_RELATED_P (insn) = 1;
2494 offset += UNITS_PER_WORD;
2495 }
2496}
2497
0f290768 2498/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
2499
2500void
2501ix86_expand_prologue ()
2a2ab3f9 2502{
564d80f4 2503 rtx insn;
0d7d98ee
JH
2504 int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table
2505 || current_function_uses_const_pool)
2506 && !TARGET_64BIT);
4dd2ac2c 2507 struct ix86_frame frame;
c6036a37
JH
2508 int use_mov = (TARGET_PROLOGUE_USING_MOVE && !optimize_size);
2509 HOST_WIDE_INT allocate;
4dd2ac2c
JH
2510
2511 ix86_compute_frame_layout (&frame);
79325812 2512
e075ae69
RH
2513 /* Note: AT&T enter does NOT have reversed args. Enter is probably
2514 slower on all targets. Also sdb doesn't like it. */
e9a25f70 2515
2a2ab3f9
JVA
2516 if (frame_pointer_needed)
2517 {
564d80f4 2518 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 2519 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 2520
564d80f4 2521 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 2522 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
2523 }
2524
c6036a37
JH
2525 allocate = frame.to_allocate;
2526 /* In case we are dealing only with single register and empty frame,
2527 push is equivalent of the mov+add sequence. */
2528 if (allocate == 0 && frame.nregs <= 1)
2529 use_mov = 0;
2530
2531 if (!use_mov)
2532 ix86_emit_save_regs ();
2533 else
2534 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 2535
c6036a37 2536 if (allocate == 0)
8dfe5673 2537 ;
e323735c 2538 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
469ac993 2539 {
f2042df3
RH
2540 insn = emit_insn (gen_pro_epilogue_adjust_stack
2541 (stack_pointer_rtx, stack_pointer_rtx,
e323735c 2542 GEN_INT (-allocate)));
e075ae69 2543 RTX_FRAME_RELATED_P (insn) = 1;
469ac993 2544 }
79325812 2545 else
8dfe5673 2546 {
e075ae69 2547 /* ??? Is this only valid for Win32? */
e9a25f70 2548
e075ae69 2549 rtx arg0, sym;
e9a25f70 2550
8362f420
JH
2551 if (TARGET_64BIT)
2552 abort();
2553
e075ae69 2554 arg0 = gen_rtx_REG (SImode, 0);
c6036a37 2555 emit_move_insn (arg0, GEN_INT (allocate));
77a989d1 2556
e075ae69
RH
2557 sym = gen_rtx_MEM (FUNCTION_MODE,
2558 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
32ee7d1d 2559 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
e075ae69
RH
2560
2561 CALL_INSN_FUNCTION_USAGE (insn)
276ab4a4
RH
2562 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
2563 CALL_INSN_FUNCTION_USAGE (insn));
e075ae69 2564 }
c6036a37
JH
2565 if (use_mov)
2566 {
2567 if (!frame_pointer_needed || !frame.to_allocate)
2568 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
2569 else
2570 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
2571 -frame.nregs * UNITS_PER_WORD);
2572 }
e9a25f70 2573
84530511
SC
2574#ifdef SUBTARGET_PROLOGUE
2575 SUBTARGET_PROLOGUE;
0f290768 2576#endif
84530511 2577
e9a25f70 2578 if (pic_reg_used)
e075ae69 2579 load_pic_register ();
77a989d1 2580
e9a25f70
JL
2581 /* If we are profiling, make sure no instructions are scheduled before
2582 the call to mcount. However, if -fpic, the above call will have
2583 done that. */
e075ae69 2584 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
e9a25f70 2585 emit_insn (gen_blockage ());
77a989d1
SC
2586}
2587
da2d1d3a
JH
2588/* Emit code to restore saved registers using MOV insns. First register
2589 is restored from POINTER + OFFSET. */
2590static void
1020a5ab
RH
2591ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
2592 rtx pointer;
2593 int offset;
37a58036 2594 int maybe_eh_return;
da2d1d3a
JH
2595{
2596 int regno;
da2d1d3a 2597
4dd2ac2c 2598 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 2599 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 2600 {
4dd2ac2c
JH
2601 emit_move_insn (gen_rtx_REG (Pmode, regno),
2602 adj_offsettable_operand (gen_rtx_MEM (Pmode,
da2d1d3a
JH
2603 pointer),
2604 offset));
4dd2ac2c 2605 offset += UNITS_PER_WORD;
da2d1d3a
JH
2606 }
2607}
2608
0f290768 2609/* Restore function stack, frame, and registers. */
e9a25f70 2610
2a2ab3f9 2611void
1020a5ab
RH
2612ix86_expand_epilogue (style)
2613 int style;
2a2ab3f9 2614{
1c71e60e 2615 int regno;
fdb8a883 2616 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 2617 struct ix86_frame frame;
65954bd8 2618 HOST_WIDE_INT offset;
4dd2ac2c
JH
2619
2620 ix86_compute_frame_layout (&frame);
2a2ab3f9 2621
84e306b4
RH
2622 /* Calculate start of saved registers relative to ebp. Special care
2623 must be taken for the normal return case of a function using
2624 eh_return: the eax and edx registers are marked as saved, but not
2625 restored along this path. */
2626 offset = frame.nregs;
2627 if (current_function_calls_eh_return && style != 2)
2628 offset -= 2;
2629 offset *= -UNITS_PER_WORD;
2a2ab3f9 2630
1c71e60e
JH
2631#ifdef FUNCTION_BLOCK_PROFILER_EXIT
2632 if (profile_block_flag == 2)
564d80f4 2633 {
1c71e60e 2634 FUNCTION_BLOCK_PROFILER_EXIT;
564d80f4 2635 }
1c71e60e 2636#endif
564d80f4 2637
fdb8a883
JW
2638 /* If we're only restoring one register and sp is not valid then
2639 using a move instruction to restore the register since it's
0f290768 2640 less work than reloading sp and popping the register.
da2d1d3a
JH
2641
2642 The default code result in stack adjustment using add/lea instruction,
2643 while this code results in LEAVE instruction (or discrete equivalent),
2644 so it is profitable in some other cases as well. Especially when there
2645 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2646 and there is exactly one register to pop. This heruistic may need some
2647 tuning in future. */
4dd2ac2c 2648 if ((!sp_valid && frame.nregs <= 1)
c6036a37
JH
2649 || (TARGET_EPILOGUE_USING_MOVE && !optimize_size
2650 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 2651 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
da2d1d3a 2652 || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
1020a5ab
RH
2653 && frame.nregs == 1)
2654 || style == 2)
2a2ab3f9 2655 {
da2d1d3a
JH
2656 /* Restore registers. We can use ebp or esp to address the memory
2657 locations. If both are available, default to ebp, since offsets
2658 are known to be small. Only exception is esp pointing directly to the
2659 end of block of saved registers, where we may simplify addressing
2660 mode. */
2661
4dd2ac2c 2662 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
2663 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
2664 frame.to_allocate, style == 2);
da2d1d3a 2665 else
1020a5ab
RH
2666 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
2667 offset, style == 2);
2668
2669 /* eh_return epilogues need %ecx added to the stack pointer. */
2670 if (style == 2)
2671 {
2672 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 2673
1020a5ab
RH
2674 if (frame_pointer_needed)
2675 {
2676 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
2677 tmp = plus_constant (tmp, UNITS_PER_WORD);
2678 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
2679
2680 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
2681 emit_move_insn (hard_frame_pointer_rtx, tmp);
2682
2683 emit_insn (gen_pro_epilogue_adjust_stack
f2042df3 2684 (stack_pointer_rtx, sa, const0_rtx));
1020a5ab
RH
2685 }
2686 else
2687 {
2688 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
2689 tmp = plus_constant (tmp, (frame.to_allocate
2690 + frame.nregs * UNITS_PER_WORD));
2691 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
2692 }
2693 }
2694 else if (!frame_pointer_needed)
f2042df3
RH
2695 emit_insn (gen_pro_epilogue_adjust_stack
2696 (stack_pointer_rtx, stack_pointer_rtx,
2697 GEN_INT (frame.to_allocate
2698 + frame.nregs * UNITS_PER_WORD)));
0f290768 2699 /* If not an i386, mov & pop is faster than "leave". */
da2d1d3a 2700 else if (TARGET_USE_LEAVE || optimize_size)
8362f420 2701 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
c8c5cb99 2702 else
2a2ab3f9 2703 {
1c71e60e
JH
2704 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2705 hard_frame_pointer_rtx,
f2042df3 2706 const0_rtx));
8362f420
JH
2707 if (TARGET_64BIT)
2708 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
2709 else
2710 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
2711 }
2712 }
1c71e60e 2713 else
68f654ec 2714 {
1c71e60e
JH
2715 /* First step is to deallocate the stack frame so that we can
2716 pop the registers. */
2717 if (!sp_valid)
2718 {
2719 if (!frame_pointer_needed)
2720 abort ();
2721 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2722 hard_frame_pointer_rtx,
f2042df3 2723 GEN_INT (offset)));
1c71e60e 2724 }
4dd2ac2c 2725 else if (frame.to_allocate)
f2042df3
RH
2726 emit_insn (gen_pro_epilogue_adjust_stack
2727 (stack_pointer_rtx, stack_pointer_rtx,
2728 GEN_INT (frame.to_allocate)));
1c71e60e 2729
4dd2ac2c 2730 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 2731 if (ix86_save_reg (regno, false))
8362f420
JH
2732 {
2733 if (TARGET_64BIT)
2734 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
2735 else
2736 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
2737 }
4dd2ac2c 2738 if (frame_pointer_needed)
8362f420
JH
2739 {
2740 if (TARGET_64BIT)
2741 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
2742 else
2743 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
2744 }
68f654ec 2745 }
68f654ec 2746
cbbf65e0 2747 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 2748 if (style == 0)
cbbf65e0
RH
2749 return;
2750
2a2ab3f9
JVA
2751 if (current_function_pops_args && current_function_args_size)
2752 {
e075ae69 2753 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 2754
b8c752c8
UD
2755 /* i386 can only pop 64K bytes. If asked to pop more, pop
2756 return address, do explicit add, and jump indirectly to the
0f290768 2757 caller. */
2a2ab3f9 2758
b8c752c8 2759 if (current_function_pops_args >= 65536)
2a2ab3f9 2760 {
e075ae69 2761 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 2762
8362f420
JH
2763 /* There are is no "pascal" calling convention in 64bit ABI. */
2764 if (TARGET_64BIT)
2765 abort();
2766
e075ae69
RH
2767 emit_insn (gen_popsi1 (ecx));
2768 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 2769 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 2770 }
79325812 2771 else
e075ae69
RH
2772 emit_jump_insn (gen_return_pop_internal (popc));
2773 }
2774 else
2775 emit_jump_insn (gen_return_internal ());
2776}
2777\f
2778/* Extract the parts of an RTL expression that is a valid memory address
2779 for an instruction. Return false if the structure of the address is
2780 grossly off. */
2781
2782static int
2783ix86_decompose_address (addr, out)
2784 register rtx addr;
2785 struct ix86_address *out;
2786{
2787 rtx base = NULL_RTX;
2788 rtx index = NULL_RTX;
2789 rtx disp = NULL_RTX;
2790 HOST_WIDE_INT scale = 1;
2791 rtx scale_rtx = NULL_RTX;
2792
2793 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
2794 base = addr;
2795 else if (GET_CODE (addr) == PLUS)
2796 {
2797 rtx op0 = XEXP (addr, 0);
2798 rtx op1 = XEXP (addr, 1);
2799 enum rtx_code code0 = GET_CODE (op0);
2800 enum rtx_code code1 = GET_CODE (op1);
2801
2802 if (code0 == REG || code0 == SUBREG)
2803 {
2804 if (code1 == REG || code1 == SUBREG)
2805 index = op0, base = op1; /* index + base */
2806 else
2807 base = op0, disp = op1; /* base + displacement */
2808 }
2809 else if (code0 == MULT)
e9a25f70 2810 {
e075ae69
RH
2811 index = XEXP (op0, 0);
2812 scale_rtx = XEXP (op0, 1);
2813 if (code1 == REG || code1 == SUBREG)
2814 base = op1; /* index*scale + base */
e9a25f70 2815 else
e075ae69
RH
2816 disp = op1; /* index*scale + disp */
2817 }
2818 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
2819 {
2820 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
2821 scale_rtx = XEXP (XEXP (op0, 0), 1);
2822 base = XEXP (op0, 1);
2823 disp = op1;
2a2ab3f9 2824 }
e075ae69
RH
2825 else if (code0 == PLUS)
2826 {
2827 index = XEXP (op0, 0); /* index + base + disp */
2828 base = XEXP (op0, 1);
2829 disp = op1;
2830 }
2831 else
2832 return FALSE;
2833 }
2834 else if (GET_CODE (addr) == MULT)
2835 {
2836 index = XEXP (addr, 0); /* index*scale */
2837 scale_rtx = XEXP (addr, 1);
2838 }
2839 else if (GET_CODE (addr) == ASHIFT)
2840 {
2841 rtx tmp;
2842
2843 /* We're called for lea too, which implements ashift on occasion. */
2844 index = XEXP (addr, 0);
2845 tmp = XEXP (addr, 1);
2846 if (GET_CODE (tmp) != CONST_INT)
2847 return FALSE;
2848 scale = INTVAL (tmp);
2849 if ((unsigned HOST_WIDE_INT) scale > 3)
2850 return FALSE;
2851 scale = 1 << scale;
2a2ab3f9 2852 }
2a2ab3f9 2853 else
e075ae69
RH
2854 disp = addr; /* displacement */
2855
2856 /* Extract the integral value of scale. */
2857 if (scale_rtx)
e9a25f70 2858 {
e075ae69
RH
2859 if (GET_CODE (scale_rtx) != CONST_INT)
2860 return FALSE;
2861 scale = INTVAL (scale_rtx);
e9a25f70 2862 }
3b3c6a3f 2863
e075ae69
RH
2864 /* Allow arg pointer and stack pointer as index if there is not scaling */
2865 if (base && index && scale == 1
564d80f4
JH
2866 && (index == arg_pointer_rtx || index == frame_pointer_rtx
2867 || index == stack_pointer_rtx))
e075ae69
RH
2868 {
2869 rtx tmp = base;
2870 base = index;
2871 index = tmp;
2872 }
2873
2874 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
2875 if ((base == hard_frame_pointer_rtx
2876 || base == frame_pointer_rtx
2877 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
2878 disp = const0_rtx;
2879
2880 /* Special case: on K6, [%esi] makes the instruction vector decoded.
2881 Avoid this by transforming to [%esi+0]. */
2882 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
2883 && base && !index && !disp
329e1d01 2884 && REG_P (base)
e075ae69
RH
2885 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
2886 disp = const0_rtx;
2887
2888 /* Special case: encode reg+reg instead of reg*2. */
2889 if (!base && index && scale && scale == 2)
2890 base = index, scale = 1;
0f290768 2891
e075ae69
RH
2892 /* Special case: scaling cannot be encoded without base or displacement. */
2893 if (!base && !disp && index && scale != 1)
2894 disp = const0_rtx;
2895
2896 out->base = base;
2897 out->index = index;
2898 out->disp = disp;
2899 out->scale = scale;
3b3c6a3f 2900
e075ae69
RH
2901 return TRUE;
2902}
01329426
JH
2903\f
2904/* Return cost of the memory address x.
2905 For i386, it is better to use a complex address than let gcc copy
2906 the address into a reg and make a new pseudo. But not if the address
2907 requires to two regs - that would mean more pseudos with longer
2908 lifetimes. */
2909int
2910ix86_address_cost (x)
2911 rtx x;
2912{
2913 struct ix86_address parts;
2914 int cost = 1;
3b3c6a3f 2915
01329426
JH
2916 if (!ix86_decompose_address (x, &parts))
2917 abort ();
2918
2919 /* More complex memory references are better. */
2920 if (parts.disp && parts.disp != const0_rtx)
2921 cost--;
2922
2923 /* Attempt to minimize number of registers in the address. */
2924 if ((parts.base
2925 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
2926 || (parts.index
2927 && (!REG_P (parts.index)
2928 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
2929 cost++;
2930
2931 if (parts.base
2932 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
2933 && parts.index
2934 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
2935 && parts.base != parts.index)
2936 cost++;
2937
2938 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
2939 since it's predecode logic can't detect the length of instructions
2940 and it degenerates to vector decoded. Increase cost of such
2941 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 2942 to split such addresses or even refuse such addresses at all.
01329426
JH
2943
2944 Following addressing modes are affected:
2945 [base+scale*index]
2946 [scale*index+disp]
2947 [base+index]
0f290768 2948
01329426
JH
2949 The first and last case may be avoidable by explicitly coding the zero in
2950 memory address, but I don't have AMD-K6 machine handy to check this
2951 theory. */
2952
2953 if (TARGET_K6
2954 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
2955 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
2956 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
2957 cost += 10;
0f290768 2958
01329426
JH
2959 return cost;
2960}
2961\f
b949ea8b
JW
2962/* If X is a machine specific address (i.e. a symbol or label being
2963 referenced as a displacement from the GOT implemented using an
2964 UNSPEC), then return the base term. Otherwise return X. */
2965
2966rtx
2967ix86_find_base_term (x)
2968 rtx x;
2969{
2970 rtx term;
2971
2972 if (GET_CODE (x) != PLUS
2973 || XEXP (x, 0) != pic_offset_table_rtx
2974 || GET_CODE (XEXP (x, 1)) != CONST)
2975 return x;
2976
2977 term = XEXP (XEXP (x, 1), 0);
2978
2979 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
2980 term = XEXP (term, 0);
2981
2982 if (GET_CODE (term) != UNSPEC
2983 || XVECLEN (term, 0) != 1
2984 || XINT (term, 1) != 7)
2985 return x;
2986
2987 term = XVECEXP (term, 0, 0);
2988
2989 if (GET_CODE (term) != SYMBOL_REF
2990 && GET_CODE (term) != LABEL_REF)
2991 return x;
2992
2993 return term;
2994}
2995\f
e075ae69
RH
2996/* Determine if a given CONST RTX is a valid memory displacement
2997 in PIC mode. */
0f290768 2998
59be65f6 2999int
91bb873f
RH
3000legitimate_pic_address_disp_p (disp)
3001 register rtx disp;
3002{
3003 if (GET_CODE (disp) != CONST)
3004 return 0;
3005 disp = XEXP (disp, 0);
3006
3007 if (GET_CODE (disp) == PLUS)
3008 {
3009 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
3010 return 0;
3011 disp = XEXP (disp, 0);
3012 }
3013
3014 if (GET_CODE (disp) != UNSPEC
3015 || XVECLEN (disp, 0) != 1)
3016 return 0;
3017
3018 /* Must be @GOT or @GOTOFF. */
3019 if (XINT (disp, 1) != 6
3020 && XINT (disp, 1) != 7)
3021 return 0;
3022
3023 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
3024 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
3025 return 0;
3026
3027 return 1;
3028}
3029
e075ae69
RH
3030/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
3031 memory address for an instruction. The MODE argument is the machine mode
3032 for the MEM expression that wants to use this address.
3033
3034 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
3035 convert common non-canonical forms to canonical form so that they will
3036 be recognized. */
3037
3b3c6a3f
MM
3038int
3039legitimate_address_p (mode, addr, strict)
3040 enum machine_mode mode;
3041 register rtx addr;
3042 int strict;
3043{
e075ae69
RH
3044 struct ix86_address parts;
3045 rtx base, index, disp;
3046 HOST_WIDE_INT scale;
3047 const char *reason = NULL;
3048 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
3049
3050 if (TARGET_DEBUG_ADDR)
3051 {
3052 fprintf (stderr,
e9a25f70 3053 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 3054 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
3055 debug_rtx (addr);
3056 }
3057
e075ae69 3058 if (! ix86_decompose_address (addr, &parts))
3b3c6a3f 3059 {
e075ae69 3060 reason = "decomposition failed";
50e60bc3 3061 goto report_error;
3b3c6a3f
MM
3062 }
3063
e075ae69
RH
3064 base = parts.base;
3065 index = parts.index;
3066 disp = parts.disp;
3067 scale = parts.scale;
91f0226f 3068
e075ae69 3069 /* Validate base register.
e9a25f70
JL
3070
3071 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
3072 is one word out of a two word structure, which is represented internally
3073 as a DImode int. */
e9a25f70 3074
3b3c6a3f
MM
3075 if (base)
3076 {
e075ae69
RH
3077 reason_rtx = base;
3078
3d771dfd 3079 if (GET_CODE (base) != REG)
3b3c6a3f 3080 {
e075ae69 3081 reason = "base is not a register";
50e60bc3 3082 goto report_error;
3b3c6a3f
MM
3083 }
3084
c954bd01
RH
3085 if (GET_MODE (base) != Pmode)
3086 {
e075ae69 3087 reason = "base is not in Pmode";
50e60bc3 3088 goto report_error;
c954bd01
RH
3089 }
3090
e9a25f70
JL
3091 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
3092 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
3b3c6a3f 3093 {
e075ae69 3094 reason = "base is not valid";
50e60bc3 3095 goto report_error;
3b3c6a3f
MM
3096 }
3097 }
3098
e075ae69 3099 /* Validate index register.
e9a25f70
JL
3100
3101 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
3102 is one word out of a two word structure, which is represented internally
3103 as a DImode int. */
e075ae69
RH
3104
3105 if (index)
3b3c6a3f 3106 {
e075ae69
RH
3107 reason_rtx = index;
3108
3109 if (GET_CODE (index) != REG)
3b3c6a3f 3110 {
e075ae69 3111 reason = "index is not a register";
50e60bc3 3112 goto report_error;
3b3c6a3f
MM
3113 }
3114
e075ae69 3115 if (GET_MODE (index) != Pmode)
c954bd01 3116 {
e075ae69 3117 reason = "index is not in Pmode";
50e60bc3 3118 goto report_error;
c954bd01
RH
3119 }
3120
e075ae69
RH
3121 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
3122 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
3b3c6a3f 3123 {
e075ae69 3124 reason = "index is not valid";
50e60bc3 3125 goto report_error;
3b3c6a3f
MM
3126 }
3127 }
3b3c6a3f 3128
e075ae69
RH
3129 /* Validate scale factor. */
3130 if (scale != 1)
3b3c6a3f 3131 {
e075ae69
RH
3132 reason_rtx = GEN_INT (scale);
3133 if (!index)
3b3c6a3f 3134 {
e075ae69 3135 reason = "scale without index";
50e60bc3 3136 goto report_error;
3b3c6a3f
MM
3137 }
3138
e075ae69 3139 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 3140 {
e075ae69 3141 reason = "scale is not a valid multiplier";
50e60bc3 3142 goto report_error;
3b3c6a3f
MM
3143 }
3144 }
3145
91bb873f 3146 /* Validate displacement. */
3b3c6a3f
MM
3147 if (disp)
3148 {
e075ae69
RH
3149 reason_rtx = disp;
3150
91bb873f 3151 if (!CONSTANT_ADDRESS_P (disp))
3b3c6a3f 3152 {
e075ae69 3153 reason = "displacement is not constant";
50e60bc3 3154 goto report_error;
3b3c6a3f
MM
3155 }
3156
0d7d98ee 3157 if (TARGET_64BIT)
3b3c6a3f 3158 {
0d7d98ee
JH
3159 if (!x86_64_sign_extended_value (disp))
3160 {
3161 reason = "displacement is out of range";
3162 goto report_error;
3163 }
3164 }
3165 else
3166 {
3167 if (GET_CODE (disp) == CONST_DOUBLE)
3168 {
3169 reason = "displacement is a const_double";
3170 goto report_error;
3171 }
3b3c6a3f
MM
3172 }
3173
91bb873f 3174 if (flag_pic && SYMBOLIC_CONST (disp))
3b3c6a3f 3175 {
0d7d98ee
JH
3176 if (TARGET_64BIT && (index || base))
3177 {
3178 reason = "non-constant pic memory reference";
3179 goto report_error;
3180 }
91bb873f
RH
3181 if (! legitimate_pic_address_disp_p (disp))
3182 {
e075ae69 3183 reason = "displacement is an invalid pic construct";
50e60bc3 3184 goto report_error;
91bb873f
RH
3185 }
3186
4e9efe54 3187 /* This code used to verify that a symbolic pic displacement
0f290768
KH
3188 includes the pic_offset_table_rtx register.
3189
4e9efe54
JH
3190 While this is good idea, unfortunately these constructs may
3191 be created by "adds using lea" optimization for incorrect
3192 code like:
3193
3194 int a;
3195 int foo(int i)
3196 {
3197 return *(&a+i);
3198 }
3199
50e60bc3 3200 This code is nonsensical, but results in addressing
4e9efe54
JH
3201 GOT table with pic_offset_table_rtx base. We can't
3202 just refuse it easilly, since it gets matched by
3203 "addsi3" pattern, that later gets split to lea in the
3204 case output register differs from input. While this
3205 can be handled by separate addsi pattern for this case
3206 that never results in lea, this seems to be easier and
3207 correct fix for crash to disable this test. */
3b3c6a3f 3208 }
91bb873f 3209 else if (HALF_PIC_P ())
3b3c6a3f 3210 {
91bb873f 3211 if (! HALF_PIC_ADDRESS_P (disp)
e075ae69 3212 || (base != NULL_RTX || index != NULL_RTX))
91bb873f 3213 {
e075ae69 3214 reason = "displacement is an invalid half-pic reference";
50e60bc3 3215 goto report_error;
91bb873f 3216 }
3b3c6a3f
MM
3217 }
3218 }
3219
e075ae69 3220 /* Everything looks valid. */
3b3c6a3f 3221 if (TARGET_DEBUG_ADDR)
e075ae69 3222 fprintf (stderr, "Success.\n");
3b3c6a3f 3223 return TRUE;
e075ae69 3224
50e60bc3 3225report_error:
e075ae69
RH
3226 if (TARGET_DEBUG_ADDR)
3227 {
3228 fprintf (stderr, "Error: %s\n", reason);
3229 debug_rtx (reason_rtx);
3230 }
3231 return FALSE;
3b3c6a3f 3232}
3b3c6a3f 3233\f
55efb413
JW
3234/* Return an unique alias set for the GOT. */
3235
0f290768 3236static HOST_WIDE_INT
55efb413
JW
3237ix86_GOT_alias_set ()
3238{
3239 static HOST_WIDE_INT set = -1;
3240 if (set == -1)
3241 set = new_alias_set ();
3242 return set;
0f290768 3243}
55efb413 3244
3b3c6a3f
MM
3245/* Return a legitimate reference for ORIG (an address) using the
3246 register REG. If REG is 0, a new pseudo is generated.
3247
91bb873f 3248 There are two types of references that must be handled:
3b3c6a3f
MM
3249
3250 1. Global data references must load the address from the GOT, via
3251 the PIC reg. An insn is emitted to do this load, and the reg is
3252 returned.
3253
91bb873f
RH
3254 2. Static data references, constant pool addresses, and code labels
3255 compute the address as an offset from the GOT, whose base is in
3256 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
3257 differentiate them from global data objects. The returned
3258 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
3259
3260 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 3261 reg also appears in the address. */
3b3c6a3f
MM
3262
3263rtx
3264legitimize_pic_address (orig, reg)
3265 rtx orig;
3266 rtx reg;
3267{
3268 rtx addr = orig;
3269 rtx new = orig;
91bb873f 3270 rtx base;
3b3c6a3f 3271
91bb873f
RH
3272 if (GET_CODE (addr) == LABEL_REF
3273 || (GET_CODE (addr) == SYMBOL_REF
3274 && (CONSTANT_POOL_ADDRESS_P (addr)
3275 || SYMBOL_REF_FLAG (addr))))
3b3c6a3f 3276 {
91bb873f
RH
3277 /* This symbol may be referenced via a displacement from the PIC
3278 base address (@GOTOFF). */
3b3c6a3f 3279
91bb873f 3280 current_function_uses_pic_offset_table = 1;
4859dd36
RH
3281 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
3282 new = gen_rtx_CONST (Pmode, new);
91bb873f 3283 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 3284
91bb873f
RH
3285 if (reg != 0)
3286 {
3b3c6a3f 3287 emit_move_insn (reg, new);
91bb873f 3288 new = reg;
3b3c6a3f 3289 }
3b3c6a3f 3290 }
91bb873f 3291 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 3292 {
91bb873f 3293 /* This symbol must be referenced via a load from the
0f290768 3294 Global Offset Table (@GOT). */
3b3c6a3f 3295
91bb873f 3296 current_function_uses_pic_offset_table = 1;
4859dd36
RH
3297 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
3298 new = gen_rtx_CONST (Pmode, new);
91bb873f
RH
3299 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3300 new = gen_rtx_MEM (Pmode, new);
3301 RTX_UNCHANGING_P (new) = 1;
0f290768 3302 MEM_ALIAS_SET (new) = ix86_GOT_alias_set ();
3b3c6a3f
MM
3303
3304 if (reg == 0)
3305 reg = gen_reg_rtx (Pmode);
91bb873f
RH
3306 emit_move_insn (reg, new);
3307 new = reg;
0f290768 3308 }
91bb873f
RH
3309 else
3310 {
3311 if (GET_CODE (addr) == CONST)
3b3c6a3f 3312 {
91bb873f
RH
3313 addr = XEXP (addr, 0);
3314 if (GET_CODE (addr) == UNSPEC)
3315 {
3316 /* Check that the unspec is one of the ones we generate? */
3317 }
3318 else if (GET_CODE (addr) != PLUS)
564d80f4 3319 abort ();
3b3c6a3f 3320 }
91bb873f
RH
3321 if (GET_CODE (addr) == PLUS)
3322 {
3323 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 3324
91bb873f
RH
3325 /* Check first to see if this is a constant offset from a @GOTOFF
3326 symbol reference. */
3327 if ((GET_CODE (op0) == LABEL_REF
3328 || (GET_CODE (op0) == SYMBOL_REF
3329 && (CONSTANT_POOL_ADDRESS_P (op0)
3330 || SYMBOL_REF_FLAG (op0))))
3331 && GET_CODE (op1) == CONST_INT)
3332 {
3333 current_function_uses_pic_offset_table = 1;
4859dd36
RH
3334 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
3335 new = gen_rtx_PLUS (Pmode, new, op1);
3336 new = gen_rtx_CONST (Pmode, new);
91bb873f
RH
3337 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3338
3339 if (reg != 0)
3340 {
3341 emit_move_insn (reg, new);
3342 new = reg;
3343 }
3344 }
3345 else
3346 {
3347 base = legitimize_pic_address (XEXP (addr, 0), reg);
3348 new = legitimize_pic_address (XEXP (addr, 1),
3349 base == reg ? NULL_RTX : reg);
3350
3351 if (GET_CODE (new) == CONST_INT)
3352 new = plus_constant (base, INTVAL (new));
3353 else
3354 {
3355 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
3356 {
3357 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
3358 new = XEXP (new, 1);
3359 }
3360 new = gen_rtx_PLUS (Pmode, base, new);
3361 }
3362 }
3363 }
3b3c6a3f
MM
3364 }
3365 return new;
3366}
3367\f
3b3c6a3f
MM
3368/* Try machine-dependent ways of modifying an illegitimate address
3369 to be legitimate. If we find one, return the new, valid address.
3370 This macro is used in only one place: `memory_address' in explow.c.
3371
3372 OLDX is the address as it was before break_out_memory_refs was called.
3373 In some cases it is useful to look at this to decide what needs to be done.
3374
3375 MODE and WIN are passed so that this macro can use
3376 GO_IF_LEGITIMATE_ADDRESS.
3377
3378 It is always safe for this macro to do nothing. It exists to recognize
3379 opportunities to optimize the output.
3380
3381 For the 80386, we handle X+REG by loading X into a register R and
3382 using R+REG. R will go in a general reg and indexing will be used.
3383 However, if REG is a broken-out memory address or multiplication,
3384 nothing needs to be done because REG can certainly go in a general reg.
3385
3386 When -fpic is used, special handling is needed for symbolic references.
3387 See comments by legitimize_pic_address in i386.c for details. */
3388
3389rtx
3390legitimize_address (x, oldx, mode)
3391 register rtx x;
bb5177ac 3392 register rtx oldx ATTRIBUTE_UNUSED;
3b3c6a3f
MM
3393 enum machine_mode mode;
3394{
3395 int changed = 0;
3396 unsigned log;
3397
3398 if (TARGET_DEBUG_ADDR)
3399 {
e9a25f70
JL
3400 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
3401 GET_MODE_NAME (mode));
3b3c6a3f
MM
3402 debug_rtx (x);
3403 }
3404
3405 if (flag_pic && SYMBOLIC_CONST (x))
3406 return legitimize_pic_address (x, 0);
3407
3408 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
3409 if (GET_CODE (x) == ASHIFT
3410 && GET_CODE (XEXP (x, 1)) == CONST_INT
3411 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3412 {
3413 changed = 1;
a269a03c
JC
3414 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
3415 GEN_INT (1 << log));
3b3c6a3f
MM
3416 }
3417
3418 if (GET_CODE (x) == PLUS)
3419 {
0f290768 3420 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 3421
3b3c6a3f
MM
3422 if (GET_CODE (XEXP (x, 0)) == ASHIFT
3423 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3424 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3425 {
3426 changed = 1;
c5c76735
JL
3427 XEXP (x, 0) = gen_rtx_MULT (Pmode,
3428 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
3429 GEN_INT (1 << log));
3b3c6a3f
MM
3430 }
3431
3432 if (GET_CODE (XEXP (x, 1)) == ASHIFT
3433 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
3434 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3435 {
3436 changed = 1;
c5c76735
JL
3437 XEXP (x, 1) = gen_rtx_MULT (Pmode,
3438 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
3439 GEN_INT (1 << log));
3b3c6a3f
MM
3440 }
3441
0f290768 3442 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
3443 if (GET_CODE (XEXP (x, 1)) == MULT)
3444 {
3445 rtx tmp = XEXP (x, 0);
3446 XEXP (x, 0) = XEXP (x, 1);
3447 XEXP (x, 1) = tmp;
3448 changed = 1;
3449 }
3450
3451 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
3452 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
3453 created by virtual register instantiation, register elimination, and
3454 similar optimizations. */
3455 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
3456 {
3457 changed = 1;
c5c76735
JL
3458 x = gen_rtx_PLUS (Pmode,
3459 gen_rtx_PLUS (Pmode, XEXP (x, 0),
3460 XEXP (XEXP (x, 1), 0)),
3461 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
3462 }
3463
e9a25f70
JL
3464 /* Canonicalize
3465 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
3466 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
3467 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
3468 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
3469 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
3470 && CONSTANT_P (XEXP (x, 1)))
3471 {
00c79232
ML
3472 rtx constant;
3473 rtx other = NULL_RTX;
3b3c6a3f
MM
3474
3475 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
3476 {
3477 constant = XEXP (x, 1);
3478 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
3479 }
3480 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
3481 {
3482 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
3483 other = XEXP (x, 1);
3484 }
3485 else
3486 constant = 0;
3487
3488 if (constant)
3489 {
3490 changed = 1;
c5c76735
JL
3491 x = gen_rtx_PLUS (Pmode,
3492 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
3493 XEXP (XEXP (XEXP (x, 0), 1), 0)),
3494 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
3495 }
3496 }
3497
3498 if (changed && legitimate_address_p (mode, x, FALSE))
3499 return x;
3500
3501 if (GET_CODE (XEXP (x, 0)) == MULT)
3502 {
3503 changed = 1;
3504 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
3505 }
3506
3507 if (GET_CODE (XEXP (x, 1)) == MULT)
3508 {
3509 changed = 1;
3510 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
3511 }
3512
3513 if (changed
3514 && GET_CODE (XEXP (x, 1)) == REG
3515 && GET_CODE (XEXP (x, 0)) == REG)
3516 return x;
3517
3518 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
3519 {
3520 changed = 1;
3521 x = legitimize_pic_address (x, 0);
3522 }
3523
3524 if (changed && legitimate_address_p (mode, x, FALSE))
3525 return x;
3526
3527 if (GET_CODE (XEXP (x, 0)) == REG)
3528 {
3529 register rtx temp = gen_reg_rtx (Pmode);
3530 register rtx val = force_operand (XEXP (x, 1), temp);
3531 if (val != temp)
3532 emit_move_insn (temp, val);
3533
3534 XEXP (x, 1) = temp;
3535 return x;
3536 }
3537
3538 else if (GET_CODE (XEXP (x, 1)) == REG)
3539 {
3540 register rtx temp = gen_reg_rtx (Pmode);
3541 register rtx val = force_operand (XEXP (x, 0), temp);
3542 if (val != temp)
3543 emit_move_insn (temp, val);
3544
3545 XEXP (x, 0) = temp;
3546 return x;
3547 }
3548 }
3549
3550 return x;
3551}
2a2ab3f9
JVA
3552\f
3553/* Print an integer constant expression in assembler syntax. Addition
3554 and subtraction are the only arithmetic that may appear in these
3555 expressions. FILE is the stdio stream to write to, X is the rtx, and
3556 CODE is the operand print code from the output string. */
3557
3558static void
3559output_pic_addr_const (file, x, code)
3560 FILE *file;
3561 rtx x;
3562 int code;
3563{
3564 char buf[256];
3565
3566 switch (GET_CODE (x))
3567 {
3568 case PC:
3569 if (flag_pic)
3570 putc ('.', file);
3571 else
3572 abort ();
3573 break;
3574
3575 case SYMBOL_REF:
91bb873f
RH
3576 assemble_name (file, XSTR (x, 0));
3577 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
3578 fputs ("@PLT", file);
2a2ab3f9
JVA
3579 break;
3580
91bb873f
RH
3581 case LABEL_REF:
3582 x = XEXP (x, 0);
3583 /* FALLTHRU */
2a2ab3f9
JVA
3584 case CODE_LABEL:
3585 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
3586 assemble_name (asm_out_file, buf);
3587 break;
3588
3589 case CONST_INT:
f64cecad 3590 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
3591 break;
3592
3593 case CONST:
3594 /* This used to output parentheses around the expression,
3595 but that does not work on the 386 (either ATT or BSD assembler). */
3596 output_pic_addr_const (file, XEXP (x, 0), code);
3597 break;
3598
3599 case CONST_DOUBLE:
3600 if (GET_MODE (x) == VOIDmode)
3601 {
3602 /* We can use %d if the number is <32 bits and positive. */
3603 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
3604 fprintf (file, "0x%lx%08lx",
3605 (unsigned long) CONST_DOUBLE_HIGH (x),
3606 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 3607 else
f64cecad 3608 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
3609 }
3610 else
3611 /* We can't handle floating point constants;
3612 PRINT_OPERAND must handle them. */
3613 output_operand_lossage ("floating constant misused");
3614 break;
3615
3616 case PLUS:
e9a25f70 3617 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
3618 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
3619 {
2a2ab3f9 3620 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 3621 putc ('+', file);
e9a25f70 3622 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 3623 }
91bb873f 3624 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 3625 {
2a2ab3f9 3626 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 3627 putc ('+', file);
e9a25f70 3628 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 3629 }
91bb873f
RH
3630 else
3631 abort ();
2a2ab3f9
JVA
3632 break;
3633
3634 case MINUS:
e075ae69 3635 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
2a2ab3f9 3636 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 3637 putc ('-', file);
2a2ab3f9 3638 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 3639 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
2a2ab3f9
JVA
3640 break;
3641
91bb873f
RH
3642 case UNSPEC:
3643 if (XVECLEN (x, 0) != 1)
77ebd435 3644 abort ();
91bb873f
RH
3645 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
3646 switch (XINT (x, 1))
77ebd435
AJ
3647 {
3648 case 6:
3649 fputs ("@GOT", file);
3650 break;
3651 case 7:
3652 fputs ("@GOTOFF", file);
3653 break;
3654 case 8:
3655 fputs ("@PLT", file);
3656 break;
3657 default:
3658 output_operand_lossage ("invalid UNSPEC as operand");
3659 break;
3660 }
91bb873f
RH
3661 break;
3662
2a2ab3f9
JVA
3663 default:
3664 output_operand_lossage ("invalid expression as operand");
3665 }
3666}
1865dbb5 3667
0f290768 3668/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
1865dbb5
JM
3669 We need to handle our special PIC relocations. */
3670
0f290768 3671void
1865dbb5
JM
3672i386_dwarf_output_addr_const (file, x)
3673 FILE *file;
3674 rtx x;
3675{
f0ca81d2 3676 fprintf (file, "%s", INT_ASM_OP);
1865dbb5
JM
3677 if (flag_pic)
3678 output_pic_addr_const (file, x, '\0');
3679 else
3680 output_addr_const (file, x);
3681 fputc ('\n', file);
3682}
3683
3684/* In the name of slightly smaller debug output, and to cater to
3685 general assembler losage, recognize PIC+GOTOFF and turn it back
3686 into a direct symbol reference. */
3687
3688rtx
3689i386_simplify_dwarf_addr (orig_x)
3690 rtx orig_x;
3691{
3692 rtx x = orig_x;
3693
3694 if (GET_CODE (x) != PLUS
3695 || GET_CODE (XEXP (x, 0)) != REG
3696 || GET_CODE (XEXP (x, 1)) != CONST)
3697 return orig_x;
3698
3699 x = XEXP (XEXP (x, 1), 0);
3700 if (GET_CODE (x) == UNSPEC
3adbce3d
RH
3701 && (XINT (x, 1) == 6
3702 || XINT (x, 1) == 7))
1865dbb5
JM
3703 return XVECEXP (x, 0, 0);
3704
3705 if (GET_CODE (x) == PLUS
3706 && GET_CODE (XEXP (x, 0)) == UNSPEC
3707 && GET_CODE (XEXP (x, 1)) == CONST_INT
3adbce3d
RH
3708 && (XINT (XEXP (x, 0), 1) == 6
3709 || XINT (XEXP (x, 0), 1) == 7))
1865dbb5
JM
3710 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
3711
3712 return orig_x;
3713}
2a2ab3f9 3714\f
a269a03c 3715static void
e075ae69 3716put_condition_code (code, mode, reverse, fp, file)
a269a03c 3717 enum rtx_code code;
e075ae69
RH
3718 enum machine_mode mode;
3719 int reverse, fp;
a269a03c
JC
3720 FILE *file;
3721{
a269a03c
JC
3722 const char *suffix;
3723
9a915772
JH
3724 if (mode == CCFPmode || mode == CCFPUmode)
3725 {
3726 enum rtx_code second_code, bypass_code;
3727 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3728 if (bypass_code != NIL || second_code != NIL)
3729 abort();
3730 code = ix86_fp_compare_code_to_integer (code);
3731 mode = CCmode;
3732 }
a269a03c
JC
3733 if (reverse)
3734 code = reverse_condition (code);
e075ae69 3735
a269a03c
JC
3736 switch (code)
3737 {
3738 case EQ:
3739 suffix = "e";
3740 break;
a269a03c
JC
3741 case NE:
3742 suffix = "ne";
3743 break;
a269a03c 3744 case GT:
7e08e190 3745 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
3746 abort ();
3747 suffix = "g";
a269a03c 3748 break;
a269a03c 3749 case GTU:
e075ae69
RH
3750 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
3751 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 3752 if (mode != CCmode)
0f290768 3753 abort ();
e075ae69 3754 suffix = fp ? "nbe" : "a";
a269a03c 3755 break;
a269a03c 3756 case LT:
9076b9c1 3757 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 3758 suffix = "s";
7e08e190 3759 else if (mode == CCmode || mode == CCGCmode)
e075ae69 3760 suffix = "l";
9076b9c1 3761 else
0f290768 3762 abort ();
a269a03c 3763 break;
a269a03c 3764 case LTU:
9076b9c1 3765 if (mode != CCmode)
0f290768 3766 abort ();
a269a03c
JC
3767 suffix = "b";
3768 break;
a269a03c 3769 case GE:
9076b9c1 3770 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 3771 suffix = "ns";
7e08e190 3772 else if (mode == CCmode || mode == CCGCmode)
e075ae69 3773 suffix = "ge";
9076b9c1 3774 else
0f290768 3775 abort ();
a269a03c 3776 break;
a269a03c 3777 case GEU:
e075ae69 3778 /* ??? As above. */
7e08e190 3779 if (mode != CCmode)
0f290768 3780 abort ();
7e08e190 3781 suffix = fp ? "nb" : "ae";
a269a03c 3782 break;
a269a03c 3783 case LE:
7e08e190 3784 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
3785 abort ();
3786 suffix = "le";
a269a03c 3787 break;
a269a03c 3788 case LEU:
9076b9c1
JH
3789 if (mode != CCmode)
3790 abort ();
7e08e190 3791 suffix = "be";
a269a03c 3792 break;
3a3677ff 3793 case UNORDERED:
9e7adcb3 3794 suffix = fp ? "u" : "p";
3a3677ff
RH
3795 break;
3796 case ORDERED:
9e7adcb3 3797 suffix = fp ? "nu" : "np";
3a3677ff 3798 break;
a269a03c
JC
3799 default:
3800 abort ();
3801 }
3802 fputs (suffix, file);
3803}
3804
e075ae69
RH
3805void
3806print_reg (x, code, file)
3807 rtx x;
3808 int code;
3809 FILE *file;
e5cb57e8 3810{
e075ae69 3811 if (REGNO (x) == ARG_POINTER_REGNUM
564d80f4 3812 || REGNO (x) == FRAME_POINTER_REGNUM
e075ae69
RH
3813 || REGNO (x) == FLAGS_REG
3814 || REGNO (x) == FPSR_REG)
3815 abort ();
e9a25f70 3816
e075ae69
RH
3817 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
3818 putc ('%', file);
3819
ef6257cd 3820 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
3821 code = 2;
3822 else if (code == 'b')
3823 code = 1;
3824 else if (code == 'k')
3825 code = 4;
3f3f2124
JH
3826 else if (code == 'q')
3827 code = 8;
e075ae69
RH
3828 else if (code == 'y')
3829 code = 3;
3830 else if (code == 'h')
3831 code = 0;
3832 else
3833 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 3834
3f3f2124
JH
3835 /* Irritatingly, AMD extended registers use different naming convention
3836 from the normal registers. */
3837 if (REX_INT_REG_P (x))
3838 {
885a70fd
JH
3839 if (!TARGET_64BIT)
3840 abort ();
3f3f2124
JH
3841 switch (code)
3842 {
ef6257cd 3843 case 0:
3f3f2124
JH
3844 error ("Extended registers have no high halves\n");
3845 break;
3846 case 1:
3847 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
3848 break;
3849 case 2:
3850 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
3851 break;
3852 case 4:
3853 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
3854 break;
3855 case 8:
3856 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
3857 break;
3858 default:
3859 error ("Unsupported operand size for extended register.\n");
3860 break;
3861 }
3862 return;
3863 }
e075ae69
RH
3864 switch (code)
3865 {
3866 case 3:
3867 if (STACK_TOP_P (x))
3868 {
3869 fputs ("st(0)", file);
3870 break;
3871 }
3872 /* FALLTHRU */
e075ae69 3873 case 8:
3f3f2124 3874 case 4:
e075ae69 3875 case 12:
446988df 3876 if (! ANY_FP_REG_P (x))
885a70fd 3877 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
e075ae69 3878 /* FALLTHRU */
a7180f70 3879 case 16:
e075ae69
RH
3880 case 2:
3881 fputs (hi_reg_name[REGNO (x)], file);
3882 break;
3883 case 1:
3884 fputs (qi_reg_name[REGNO (x)], file);
3885 break;
3886 case 0:
3887 fputs (qi_high_reg_name[REGNO (x)], file);
3888 break;
3889 default:
3890 abort ();
fe25fea3 3891 }
e5cb57e8
SC
3892}
3893
2a2ab3f9 3894/* Meaning of CODE:
fe25fea3 3895 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 3896 C -- print opcode suffix for set/cmov insn.
fe25fea3 3897 c -- like C, but print reversed condition
ef6257cd 3898 F,f -- likewise, but for floating-point.
2a2ab3f9
JVA
3899 R -- print the prefix for register names.
3900 z -- print the opcode suffix for the size of the current operand.
3901 * -- print a star (in certain assembler syntax)
fb204271 3902 A -- print an absolute memory reference.
2a2ab3f9 3903 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
3904 s -- print a shift double count, followed by the assemblers argument
3905 delimiter.
fe25fea3
SC
3906 b -- print the QImode name of the register for the indicated operand.
3907 %b0 would print %al if operands[0] is reg 0.
3908 w -- likewise, print the HImode name of the register.
3909 k -- likewise, print the SImode name of the register.
3f3f2124 3910 q -- likewise, print the DImode name of the register.
ef6257cd
JH
3911 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
3912 y -- print "st(0)" instead of "st" as a register.
a46d1d38 3913 D -- print condition for SSE cmp instruction.
ef6257cd
JH
3914 P -- if PIC, print an @PLT suffix.
3915 X -- don't print any sort of PIC '@' suffix for a symbol.
a46d1d38 3916 */
2a2ab3f9
JVA
3917
3918void
3919print_operand (file, x, code)
3920 FILE *file;
3921 rtx x;
3922 int code;
3923{
3924 if (code)
3925 {
3926 switch (code)
3927 {
3928 case '*':
e075ae69 3929 if (ASSEMBLER_DIALECT == 0)
2a2ab3f9
JVA
3930 putc ('*', file);
3931 return;
3932
fb204271
DN
3933 case 'A':
3934 if (ASSEMBLER_DIALECT == 0)
3935 putc ('*', file);
3936 else if (ASSEMBLER_DIALECT == 1)
3937 {
3938 /* Intel syntax. For absolute addresses, registers should not
3939 be surrounded by braces. */
3940 if (GET_CODE (x) != REG)
3941 {
3942 putc ('[', file);
3943 PRINT_OPERAND (file, x, 0);
3944 putc (']', file);
3945 return;
3946 }
3947 }
3948
3949 PRINT_OPERAND (file, x, 0);
3950 return;
3951
3952
2a2ab3f9 3953 case 'L':
e075ae69
RH
3954 if (ASSEMBLER_DIALECT == 0)
3955 putc ('l', file);
2a2ab3f9
JVA
3956 return;
3957
3958 case 'W':
e075ae69
RH
3959 if (ASSEMBLER_DIALECT == 0)
3960 putc ('w', file);
2a2ab3f9
JVA
3961 return;
3962
3963 case 'B':
e075ae69
RH
3964 if (ASSEMBLER_DIALECT == 0)
3965 putc ('b', file);
2a2ab3f9
JVA
3966 return;
3967
3968 case 'Q':
e075ae69
RH
3969 if (ASSEMBLER_DIALECT == 0)
3970 putc ('l', file);
2a2ab3f9
JVA
3971 return;
3972
3973 case 'S':
e075ae69
RH
3974 if (ASSEMBLER_DIALECT == 0)
3975 putc ('s', file);
2a2ab3f9
JVA
3976 return;
3977
5f1ec3e6 3978 case 'T':
e075ae69
RH
3979 if (ASSEMBLER_DIALECT == 0)
3980 putc ('t', file);
5f1ec3e6
JVA
3981 return;
3982
2a2ab3f9
JVA
3983 case 'z':
3984 /* 387 opcodes don't get size suffixes if the operands are
0f290768 3985 registers. */
2a2ab3f9
JVA
3986
3987 if (STACK_REG_P (x))
3988 return;
3989
3990 /* this is the size of op from size of operand */
3991 switch (GET_MODE_SIZE (GET_MODE (x)))
3992 {
2a2ab3f9 3993 case 2:
155d8a47
JW
3994#ifdef HAVE_GAS_FILDS_FISTS
3995 putc ('s', file);
3996#endif
2a2ab3f9
JVA
3997 return;
3998
3999 case 4:
4000 if (GET_MODE (x) == SFmode)
4001 {
e075ae69 4002 putc ('s', file);
2a2ab3f9
JVA
4003 return;
4004 }
4005 else
e075ae69 4006 putc ('l', file);
2a2ab3f9
JVA
4007 return;
4008
5f1ec3e6 4009 case 12:
2b589241 4010 case 16:
e075ae69
RH
4011 putc ('t', file);
4012 return;
5f1ec3e6 4013
2a2ab3f9
JVA
4014 case 8:
4015 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
4016 {
4017#ifdef GAS_MNEMONICS
e075ae69 4018 putc ('q', file);
56c0e8fa 4019#else
e075ae69
RH
4020 putc ('l', file);
4021 putc ('l', file);
56c0e8fa
JVA
4022#endif
4023 }
e075ae69
RH
4024 else
4025 putc ('l', file);
2a2ab3f9 4026 return;
155d8a47
JW
4027
4028 default:
4029 abort ();
2a2ab3f9 4030 }
4af3895e
JVA
4031
4032 case 'b':
4033 case 'w':
4034 case 'k':
3f3f2124 4035 case 'q':
4af3895e
JVA
4036 case 'h':
4037 case 'y':
5cb6195d 4038 case 'X':
e075ae69 4039 case 'P':
4af3895e
JVA
4040 break;
4041
2d49677f
SC
4042 case 's':
4043 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
4044 {
4045 PRINT_OPERAND (file, x, 0);
e075ae69 4046 putc (',', file);
2d49677f 4047 }
a269a03c
JC
4048 return;
4049
a46d1d38
JH
4050 case 'D':
4051 /* Little bit of braindamage here. The SSE compare instructions
4052 does use completely different names for the comparisons that the
4053 fp conditional moves. */
4054 switch (GET_CODE (x))
4055 {
4056 case EQ:
4057 case UNEQ:
4058 fputs ("eq", file);
4059 break;
4060 case LT:
4061 case UNLT:
4062 fputs ("lt", file);
4063 break;
4064 case LE:
4065 case UNLE:
4066 fputs ("le", file);
4067 break;
4068 case UNORDERED:
4069 fputs ("unord", file);
4070 break;
4071 case NE:
4072 case LTGT:
4073 fputs ("neq", file);
4074 break;
4075 case UNGE:
4076 case GE:
4077 fputs ("nlt", file);
4078 break;
4079 case UNGT:
4080 case GT:
4081 fputs ("nle", file);
4082 break;
4083 case ORDERED:
4084 fputs ("ord", file);
4085 break;
4086 default:
4087 abort ();
4088 break;
4089 }
4090 return;
1853aadd 4091 case 'C':
e075ae69 4092 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 4093 return;
fe25fea3 4094 case 'F':
e075ae69 4095 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
4096 return;
4097
e9a25f70 4098 /* Like above, but reverse condition */
e075ae69
RH
4099 case 'c':
4100 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
4101 return;
fe25fea3 4102 case 'f':
e075ae69 4103 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 4104 return;
ef6257cd
JH
4105 case '+':
4106 {
4107 rtx x;
e5cb57e8 4108
ef6257cd
JH
4109 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
4110 return;
4111
4112 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4113 if (x)
4114 {
4115 int pred_val = INTVAL (XEXP (x, 0));
4116
4117 if (pred_val < REG_BR_PROB_BASE * 45 / 100
4118 || pred_val > REG_BR_PROB_BASE * 55 / 100)
4119 {
4120 int taken = pred_val > REG_BR_PROB_BASE / 2;
4121 int cputaken = final_forward_branch_p (current_output_insn) == 0;
4122
4123 /* Emit hints only in the case default branch prediction
4124 heruistics would fail. */
4125 if (taken != cputaken)
4126 {
4127 /* We use 3e (DS) prefix for taken branches and
4128 2e (CS) prefix for not taken branches. */
4129 if (taken)
4130 fputs ("ds ; ", file);
4131 else
4132 fputs ("cs ; ", file);
4133 }
4134 }
4135 }
4136 return;
4137 }
4af3895e 4138 default:
68daafd4
JVA
4139 {
4140 char str[50];
68daafd4
JVA
4141 sprintf (str, "invalid operand code `%c'", code);
4142 output_operand_lossage (str);
4143 }
2a2ab3f9
JVA
4144 }
4145 }
e9a25f70 4146
2a2ab3f9
JVA
4147 if (GET_CODE (x) == REG)
4148 {
4149 PRINT_REG (x, code, file);
4150 }
e9a25f70 4151
2a2ab3f9
JVA
4152 else if (GET_CODE (x) == MEM)
4153 {
e075ae69
RH
4154 /* No `byte ptr' prefix for call instructions. */
4155 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
2a2ab3f9 4156 {
69ddee61 4157 const char * size;
e075ae69
RH
4158 switch (GET_MODE_SIZE (GET_MODE (x)))
4159 {
4160 case 1: size = "BYTE"; break;
4161 case 2: size = "WORD"; break;
4162 case 4: size = "DWORD"; break;
4163 case 8: size = "QWORD"; break;
4164 case 12: size = "XWORD"; break;
a7180f70 4165 case 16: size = "XMMWORD"; break;
e075ae69 4166 default:
564d80f4 4167 abort ();
e075ae69 4168 }
fb204271
DN
4169
4170 /* Check for explicit size override (codes 'b', 'w' and 'k') */
4171 if (code == 'b')
4172 size = "BYTE";
4173 else if (code == 'w')
4174 size = "WORD";
4175 else if (code == 'k')
4176 size = "DWORD";
4177
e075ae69
RH
4178 fputs (size, file);
4179 fputs (" PTR ", file);
2a2ab3f9 4180 }
e075ae69
RH
4181
4182 x = XEXP (x, 0);
4183 if (flag_pic && CONSTANT_ADDRESS_P (x))
4184 output_pic_addr_const (file, x, code);
0d7d98ee
JH
4185 /* Avoid (%rip) for call operands. */
4186 else if (CONSTANT_ADDRESS_P (x) && code =='P'
4187 && GET_CODE (x) != CONST_INT)
4188 output_addr_const (file, x);
2a2ab3f9 4189 else
e075ae69 4190 output_address (x);
2a2ab3f9 4191 }
e9a25f70 4192
2a2ab3f9
JVA
4193 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
4194 {
e9a25f70
JL
4195 REAL_VALUE_TYPE r;
4196 long l;
4197
5f1ec3e6
JVA
4198 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4199 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69
RH
4200
4201 if (ASSEMBLER_DIALECT == 0)
4202 putc ('$', file);
52267fcb 4203 fprintf (file, "0x%lx", l);
5f1ec3e6 4204 }
e9a25f70 4205
0f290768 4206 /* These float cases don't actually occur as immediate operands. */
5f1ec3e6
JVA
4207 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
4208 {
e9a25f70
JL
4209 REAL_VALUE_TYPE r;
4210 char dstr[30];
4211
5f1ec3e6
JVA
4212 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4213 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
4214 fprintf (file, "%s", dstr);
2a2ab3f9 4215 }
e9a25f70 4216
2b589241
JH
4217 else if (GET_CODE (x) == CONST_DOUBLE
4218 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
2a2ab3f9 4219 {
e9a25f70
JL
4220 REAL_VALUE_TYPE r;
4221 char dstr[30];
4222
5f1ec3e6
JVA
4223 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4224 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
4225 fprintf (file, "%s", dstr);
2a2ab3f9 4226 }
79325812 4227 else
2a2ab3f9 4228 {
4af3895e 4229 if (code != 'P')
2a2ab3f9 4230 {
695dac07 4231 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69
RH
4232 {
4233 if (ASSEMBLER_DIALECT == 0)
4234 putc ('$', file);
4235 }
2a2ab3f9
JVA
4236 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
4237 || GET_CODE (x) == LABEL_REF)
e075ae69
RH
4238 {
4239 if (ASSEMBLER_DIALECT == 0)
4240 putc ('$', file);
4241 else
4242 fputs ("OFFSET FLAT:", file);
4243 }
2a2ab3f9 4244 }
e075ae69
RH
4245 if (GET_CODE (x) == CONST_INT)
4246 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
4247 else if (flag_pic)
2a2ab3f9
JVA
4248 output_pic_addr_const (file, x, code);
4249 else
4250 output_addr_const (file, x);
4251 }
4252}
4253\f
4254/* Print a memory operand whose address is ADDR. */
4255
4256void
4257print_operand_address (file, addr)
4258 FILE *file;
4259 register rtx addr;
4260{
e075ae69
RH
4261 struct ix86_address parts;
4262 rtx base, index, disp;
4263 int scale;
e9a25f70 4264
e075ae69
RH
4265 if (! ix86_decompose_address (addr, &parts))
4266 abort ();
e9a25f70 4267
e075ae69
RH
4268 base = parts.base;
4269 index = parts.index;
4270 disp = parts.disp;
4271 scale = parts.scale;
e9a25f70 4272
e075ae69
RH
4273 if (!base && !index)
4274 {
4275 /* Displacement only requires special attention. */
e9a25f70 4276
e075ae69 4277 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 4278 {
e075ae69 4279 if (ASSEMBLER_DIALECT != 0)
fb204271
DN
4280 {
4281 if (USER_LABEL_PREFIX[0] == 0)
4282 putc ('%', file);
4283 fputs ("ds:", file);
4284 }
e075ae69 4285 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
2a2ab3f9 4286 }
e075ae69
RH
4287 else if (flag_pic)
4288 output_pic_addr_const (file, addr, 0);
4289 else
4290 output_addr_const (file, addr);
0d7d98ee
JH
4291
4292 /* Use one byte shorter RIP relative addressing for 64bit mode. */
4293 if (GET_CODE (disp) != CONST_INT && TARGET_64BIT)
4294 fputs ("(%rip)", file);
e075ae69
RH
4295 }
4296 else
4297 {
4298 if (ASSEMBLER_DIALECT == 0)
2a2ab3f9 4299 {
e075ae69 4300 if (disp)
2a2ab3f9 4301 {
c399861d 4302 if (flag_pic)
e075ae69
RH
4303 output_pic_addr_const (file, disp, 0);
4304 else if (GET_CODE (disp) == LABEL_REF)
4305 output_asm_label (disp);
2a2ab3f9 4306 else
e075ae69 4307 output_addr_const (file, disp);
2a2ab3f9
JVA
4308 }
4309
e075ae69
RH
4310 putc ('(', file);
4311 if (base)
4312 PRINT_REG (base, 0, file);
4313 if (index)
2a2ab3f9 4314 {
e075ae69
RH
4315 putc (',', file);
4316 PRINT_REG (index, 0, file);
4317 if (scale != 1)
4318 fprintf (file, ",%d", scale);
2a2ab3f9 4319 }
e075ae69 4320 putc (')', file);
2a2ab3f9 4321 }
2a2ab3f9
JVA
4322 else
4323 {
e075ae69 4324 rtx offset = NULL_RTX;
e9a25f70 4325
e075ae69
RH
4326 if (disp)
4327 {
4328 /* Pull out the offset of a symbol; print any symbol itself. */
4329 if (GET_CODE (disp) == CONST
4330 && GET_CODE (XEXP (disp, 0)) == PLUS
4331 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
4332 {
4333 offset = XEXP (XEXP (disp, 0), 1);
4334 disp = gen_rtx_CONST (VOIDmode,
4335 XEXP (XEXP (disp, 0), 0));
4336 }
ce193852 4337
e075ae69
RH
4338 if (flag_pic)
4339 output_pic_addr_const (file, disp, 0);
4340 else if (GET_CODE (disp) == LABEL_REF)
4341 output_asm_label (disp);
4342 else if (GET_CODE (disp) == CONST_INT)
4343 offset = disp;
4344 else
4345 output_addr_const (file, disp);
4346 }
e9a25f70 4347
e075ae69
RH
4348 putc ('[', file);
4349 if (base)
a8620236 4350 {
e075ae69
RH
4351 PRINT_REG (base, 0, file);
4352 if (offset)
4353 {
4354 if (INTVAL (offset) >= 0)
4355 putc ('+', file);
4356 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
4357 }
a8620236 4358 }
e075ae69
RH
4359 else if (offset)
4360 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 4361 else
e075ae69 4362 putc ('0', file);
e9a25f70 4363
e075ae69
RH
4364 if (index)
4365 {
4366 putc ('+', file);
4367 PRINT_REG (index, 0, file);
4368 if (scale != 1)
4369 fprintf (file, "*%d", scale);
4370 }
4371 putc (']', file);
4372 }
2a2ab3f9
JVA
4373 }
4374}
4375\f
4376/* Split one or more DImode RTL references into pairs of SImode
4377 references. The RTL can be REG, offsettable MEM, integer constant, or
4378 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
4379 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 4380 that parallel "operands". */
2a2ab3f9
JVA
4381
4382void
4383split_di (operands, num, lo_half, hi_half)
4384 rtx operands[];
4385 int num;
4386 rtx lo_half[], hi_half[];
4387{
4388 while (num--)
4389 {
57dbca5e 4390 rtx op = operands[num];
e075ae69
RH
4391 if (CONSTANT_P (op))
4392 split_double (op, &lo_half[num], &hi_half[num]);
4393 else if (! reload_completed)
a269a03c
JC
4394 {
4395 lo_half[num] = gen_lowpart (SImode, op);
4396 hi_half[num] = gen_highpart (SImode, op);
4397 }
4398 else if (GET_CODE (op) == REG)
2a2ab3f9 4399 {
0d7d98ee
JH
4400 if (TARGET_64BIT)
4401 abort();
57dbca5e
BS
4402 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
4403 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
2a2ab3f9 4404 }
57dbca5e 4405 else if (offsettable_memref_p (op))
2a2ab3f9 4406 {
57dbca5e
BS
4407 rtx lo_addr = XEXP (op, 0);
4408 rtx hi_addr = XEXP (adj_offsettable_operand (op, 4), 0);
4409 lo_half[num] = change_address (op, SImode, lo_addr);
4410 hi_half[num] = change_address (op, SImode, hi_addr);
2a2ab3f9
JVA
4411 }
4412 else
564d80f4 4413 abort ();
2a2ab3f9
JVA
4414 }
4415}
4416\f
2a2ab3f9
JVA
4417/* Output code to perform a 387 binary operation in INSN, one of PLUS,
4418 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
4419 is the expression of the binary operation. The output may either be
4420 emitted here, or returned to the caller, like all output_* functions.
4421
4422 There is no guarantee that the operands are the same mode, as they
0f290768 4423 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 4424
e3c2afab
AM
4425#ifndef SYSV386_COMPAT
4426/* Set to 1 for compatibility with brain-damaged assemblers. No-one
4427 wants to fix the assemblers because that causes incompatibility
4428 with gcc. No-one wants to fix gcc because that causes
4429 incompatibility with assemblers... You can use the option of
4430 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
4431#define SYSV386_COMPAT 1
4432#endif
4433
69ddee61 4434const char *
2a2ab3f9
JVA
4435output_387_binary_op (insn, operands)
4436 rtx insn;
4437 rtx *operands;
4438{
e3c2afab 4439 static char buf[30];
69ddee61 4440 const char *p;
1deaa899
JH
4441 const char *ssep;
4442 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
2a2ab3f9 4443
e3c2afab
AM
4444#ifdef ENABLE_CHECKING
4445 /* Even if we do not want to check the inputs, this documents input
4446 constraints. Which helps in understanding the following code. */
4447 if (STACK_REG_P (operands[0])
4448 && ((REG_P (operands[1])
4449 && REGNO (operands[0]) == REGNO (operands[1])
4450 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
4451 || (REG_P (operands[2])
4452 && REGNO (operands[0]) == REGNO (operands[2])
4453 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
4454 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
4455 ; /* ok */
1deaa899 4456 else if (!is_sse)
e3c2afab
AM
4457 abort ();
4458#endif
4459
2a2ab3f9
JVA
4460 switch (GET_CODE (operands[3]))
4461 {
4462 case PLUS:
e075ae69
RH
4463 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4464 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4465 p = "fiadd";
4466 else
4467 p = "fadd";
1deaa899 4468 ssep = "add";
2a2ab3f9
JVA
4469 break;
4470
4471 case MINUS:
e075ae69
RH
4472 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4473 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4474 p = "fisub";
4475 else
4476 p = "fsub";
1deaa899 4477 ssep = "sub";
2a2ab3f9
JVA
4478 break;
4479
4480 case MULT:
e075ae69
RH
4481 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4482 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4483 p = "fimul";
4484 else
4485 p = "fmul";
1deaa899 4486 ssep = "mul";
2a2ab3f9
JVA
4487 break;
4488
4489 case DIV:
e075ae69
RH
4490 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4491 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4492 p = "fidiv";
4493 else
4494 p = "fdiv";
1deaa899 4495 ssep = "div";
2a2ab3f9
JVA
4496 break;
4497
4498 default:
4499 abort ();
4500 }
4501
1deaa899
JH
4502 if (is_sse)
4503 {
4504 strcpy (buf, ssep);
4505 if (GET_MODE (operands[0]) == SFmode)
4506 strcat (buf, "ss\t{%2, %0|%0, %2}");
4507 else
4508 strcat (buf, "sd\t{%2, %0|%0, %2}");
4509 return buf;
4510 }
e075ae69 4511 strcpy (buf, p);
2a2ab3f9
JVA
4512
4513 switch (GET_CODE (operands[3]))
4514 {
4515 case MULT:
4516 case PLUS:
4517 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
4518 {
e3c2afab 4519 rtx temp = operands[2];
2a2ab3f9
JVA
4520 operands[2] = operands[1];
4521 operands[1] = temp;
4522 }
4523
e3c2afab
AM
4524 /* know operands[0] == operands[1]. */
4525
2a2ab3f9 4526 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
4527 {
4528 p = "%z2\t%2";
4529 break;
4530 }
2a2ab3f9
JVA
4531
4532 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
4533 {
4534 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
4535 /* How is it that we are storing to a dead operand[2]?
4536 Well, presumably operands[1] is dead too. We can't
4537 store the result to st(0) as st(0) gets popped on this
4538 instruction. Instead store to operands[2] (which I
4539 think has to be st(1)). st(1) will be popped later.
4540 gcc <= 2.8.1 didn't have this check and generated
4541 assembly code that the Unixware assembler rejected. */
4542 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 4543 else
e3c2afab 4544 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 4545 break;
6b28fd63 4546 }
2a2ab3f9
JVA
4547
4548 if (STACK_TOP_P (operands[0]))
e3c2afab 4549 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 4550 else
e3c2afab 4551 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 4552 break;
2a2ab3f9
JVA
4553
4554 case MINUS:
4555 case DIV:
4556 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
4557 {
4558 p = "r%z1\t%1";
4559 break;
4560 }
2a2ab3f9
JVA
4561
4562 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
4563 {
4564 p = "%z2\t%2";
4565 break;
4566 }
2a2ab3f9 4567
2a2ab3f9 4568 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 4569 {
e3c2afab
AM
4570#if SYSV386_COMPAT
4571 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
4572 derived assemblers, confusingly reverse the direction of
4573 the operation for fsub{r} and fdiv{r} when the
4574 destination register is not st(0). The Intel assembler
4575 doesn't have this brain damage. Read !SYSV386_COMPAT to
4576 figure out what the hardware really does. */
4577 if (STACK_TOP_P (operands[0]))
4578 p = "{p\t%0, %2|rp\t%2, %0}";
4579 else
4580 p = "{rp\t%2, %0|p\t%0, %2}";
4581#else
6b28fd63 4582 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
4583 /* As above for fmul/fadd, we can't store to st(0). */
4584 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 4585 else
e3c2afab
AM
4586 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
4587#endif
e075ae69 4588 break;
6b28fd63 4589 }
2a2ab3f9
JVA
4590
4591 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 4592 {
e3c2afab 4593#if SYSV386_COMPAT
6b28fd63 4594 if (STACK_TOP_P (operands[0]))
e3c2afab 4595 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 4596 else
e3c2afab
AM
4597 p = "{p\t%1, %0|rp\t%0, %1}";
4598#else
4599 if (STACK_TOP_P (operands[0]))
4600 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
4601 else
4602 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
4603#endif
e075ae69 4604 break;
6b28fd63 4605 }
2a2ab3f9
JVA
4606
4607 if (STACK_TOP_P (operands[0]))
4608 {
4609 if (STACK_TOP_P (operands[1]))
e3c2afab 4610 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 4611 else
e3c2afab 4612 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 4613 break;
2a2ab3f9
JVA
4614 }
4615 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
4616 {
4617#if SYSV386_COMPAT
4618 p = "{\t%1, %0|r\t%0, %1}";
4619#else
4620 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
4621#endif
4622 }
2a2ab3f9 4623 else
e3c2afab
AM
4624 {
4625#if SYSV386_COMPAT
4626 p = "{r\t%2, %0|\t%0, %2}";
4627#else
4628 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
4629#endif
4630 }
e075ae69 4631 break;
2a2ab3f9
JVA
4632
4633 default:
4634 abort ();
4635 }
e075ae69
RH
4636
4637 strcat (buf, p);
4638 return buf;
2a2ab3f9 4639}
e075ae69 4640
7a2e09f4
JH
4641/* Output code to initialize control word copies used by
4642 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
4643 is set to control word rounding downwards. */
4644void
4645emit_i387_cw_initialization (normal, round_down)
4646 rtx normal, round_down;
4647{
4648 rtx reg = gen_reg_rtx (HImode);
4649
4650 emit_insn (gen_x86_fnstcw_1 (normal));
4651 emit_move_insn (reg, normal);
4652 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
4653 && !TARGET_64BIT)
4654 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
4655 else
4656 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
4657 emit_move_insn (round_down, reg);
4658}
4659
2a2ab3f9 4660/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 4661 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 4662 operand may be [SDX]Fmode. */
2a2ab3f9 4663
69ddee61 4664const char *
2a2ab3f9
JVA
4665output_fix_trunc (insn, operands)
4666 rtx insn;
4667 rtx *operands;
4668{
4669 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69
RH
4670 int dimode_p = GET_MODE (operands[0]) == DImode;
4671 rtx xops[4];
2a2ab3f9 4672
e075ae69
RH
4673 /* Jump through a hoop or two for DImode, since the hardware has no
4674 non-popping instruction. We used to do this a different way, but
4675 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
4676 if (dimode_p && !stack_top_dies)
4677 output_asm_insn ("fld\t%y1", operands);
e075ae69 4678
7a2e09f4 4679 if (!STACK_TOP_P (operands[1]))
10195bd8
JW
4680 abort ();
4681
e075ae69 4682 if (GET_CODE (operands[0]) != MEM)
7a2e09f4 4683 abort ();
e9a25f70 4684
7a2e09f4 4685 output_asm_insn ("fldcw\t%3", operands);
e075ae69 4686 if (stack_top_dies || dimode_p)
7a2e09f4 4687 output_asm_insn ("fistp%z0\t%0", operands);
10195bd8 4688 else
7a2e09f4 4689 output_asm_insn ("fist%z0\t%0", operands);
e075ae69 4690 output_asm_insn ("fldcw\t%2", operands);
10195bd8 4691
e075ae69 4692 return "";
2a2ab3f9 4693}
cda749b1 4694
e075ae69
RH
4695/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
4696 should be used and 2 when fnstsw should be used. UNORDERED_P is true
4697 when fucom should be used. */
4698
69ddee61 4699const char *
e075ae69 4700output_fp_compare (insn, operands, eflags_p, unordered_p)
cda749b1
JW
4701 rtx insn;
4702 rtx *operands;
e075ae69 4703 int eflags_p, unordered_p;
cda749b1 4704{
e075ae69
RH
4705 int stack_top_dies;
4706 rtx cmp_op0 = operands[0];
4707 rtx cmp_op1 = operands[1];
0644b628 4708 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
e075ae69
RH
4709
4710 if (eflags_p == 2)
4711 {
4712 cmp_op0 = cmp_op1;
4713 cmp_op1 = operands[2];
4714 }
0644b628
JH
4715 if (is_sse)
4716 {
4717 if (GET_MODE (operands[0]) == SFmode)
4718 if (unordered_p)
4719 return "ucomiss\t{%1, %0|%0, %1}";
4720 else
4721 return "comiss\t{%1, %0|%0, %y}";
4722 else
4723 if (unordered_p)
4724 return "ucomisd\t{%1, %0|%0, %1}";
4725 else
4726 return "comisd\t{%1, %0|%0, %y}";
4727 }
cda749b1 4728
e075ae69 4729 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
4730 abort ();
4731
e075ae69 4732 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 4733
e075ae69
RH
4734 if (STACK_REG_P (cmp_op1)
4735 && stack_top_dies
4736 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
4737 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 4738 {
e075ae69
RH
4739 /* If both the top of the 387 stack dies, and the other operand
4740 is also a stack register that dies, then this must be a
4741 `fcompp' float compare */
4742
4743 if (eflags_p == 1)
4744 {
4745 /* There is no double popping fcomi variant. Fortunately,
4746 eflags is immune from the fstp's cc clobbering. */
4747 if (unordered_p)
4748 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
4749 else
4750 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
4751 return "fstp\t%y0";
4752 }
4753 else
cda749b1 4754 {
e075ae69
RH
4755 if (eflags_p == 2)
4756 {
4757 if (unordered_p)
4758 return "fucompp\n\tfnstsw\t%0";
4759 else
4760 return "fcompp\n\tfnstsw\t%0";
4761 }
cda749b1
JW
4762 else
4763 {
e075ae69
RH
4764 if (unordered_p)
4765 return "fucompp";
4766 else
4767 return "fcompp";
cda749b1
JW
4768 }
4769 }
cda749b1
JW
4770 }
4771 else
4772 {
e075ae69 4773 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 4774
0f290768 4775 static const char * const alt[24] =
e075ae69
RH
4776 {
4777 "fcom%z1\t%y1",
4778 "fcomp%z1\t%y1",
4779 "fucom%z1\t%y1",
4780 "fucomp%z1\t%y1",
0f290768 4781
e075ae69
RH
4782 "ficom%z1\t%y1",
4783 "ficomp%z1\t%y1",
4784 NULL,
4785 NULL,
4786
4787 "fcomi\t{%y1, %0|%0, %y1}",
4788 "fcomip\t{%y1, %0|%0, %y1}",
4789 "fucomi\t{%y1, %0|%0, %y1}",
4790 "fucomip\t{%y1, %0|%0, %y1}",
4791
4792 NULL,
4793 NULL,
4794 NULL,
4795 NULL,
4796
4797 "fcom%z2\t%y2\n\tfnstsw\t%0",
4798 "fcomp%z2\t%y2\n\tfnstsw\t%0",
4799 "fucom%z2\t%y2\n\tfnstsw\t%0",
4800 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 4801
e075ae69
RH
4802 "ficom%z2\t%y2\n\tfnstsw\t%0",
4803 "ficomp%z2\t%y2\n\tfnstsw\t%0",
4804 NULL,
4805 NULL
4806 };
4807
4808 int mask;
69ddee61 4809 const char *ret;
e075ae69
RH
4810
4811 mask = eflags_p << 3;
4812 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
4813 mask |= unordered_p << 1;
4814 mask |= stack_top_dies;
4815
4816 if (mask >= 24)
4817 abort ();
4818 ret = alt[mask];
4819 if (ret == NULL)
4820 abort ();
cda749b1 4821
e075ae69 4822 return ret;
cda749b1
JW
4823 }
4824}
2a2ab3f9 4825
e075ae69 4826/* Output assembler code to FILE to initialize basic-block profiling.
2a2ab3f9 4827
e075ae69 4828 If profile_block_flag == 2
2a2ab3f9 4829
e075ae69
RH
4830 Output code to call the subroutine `__bb_init_trace_func'
4831 and pass two parameters to it. The first parameter is
4832 the address of a block allocated in the object module.
4833 The second parameter is the number of the first basic block
4834 of the function.
2a2ab3f9 4835
e075ae69 4836 The name of the block is a local symbol made with this statement:
0f290768 4837
e075ae69 4838 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
2a2ab3f9 4839
e075ae69
RH
4840 Of course, since you are writing the definition of
4841 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4842 can take a short cut in the definition of this macro and use the
4843 name that you know will result.
2a2ab3f9 4844
e075ae69
RH
4845 The number of the first basic block of the function is
4846 passed to the macro in BLOCK_OR_LABEL.
2a2ab3f9 4847
e075ae69
RH
4848 If described in a virtual assembler language the code to be
4849 output looks like:
2a2ab3f9 4850
e075ae69
RH
4851 parameter1 <- LPBX0
4852 parameter2 <- BLOCK_OR_LABEL
4853 call __bb_init_trace_func
2a2ab3f9 4854
e075ae69 4855 else if profile_block_flag != 0
e74389ff 4856
e075ae69
RH
4857 Output code to call the subroutine `__bb_init_func'
4858 and pass one single parameter to it, which is the same
4859 as the first parameter to `__bb_init_trace_func'.
e74389ff 4860
e075ae69
RH
4861 The first word of this parameter is a flag which will be nonzero if
4862 the object module has already been initialized. So test this word
4863 first, and do not call `__bb_init_func' if the flag is nonzero.
4864 Note: When profile_block_flag == 2 the test need not be done
4865 but `__bb_init_trace_func' *must* be called.
e74389ff 4866
e075ae69
RH
4867 BLOCK_OR_LABEL may be used to generate a label number as a
4868 branch destination in case `__bb_init_func' will not be called.
e74389ff 4869
e075ae69
RH
4870 If described in a virtual assembler language the code to be
4871 output looks like:
2a2ab3f9 4872
e075ae69
RH
4873 cmp (LPBX0),0
4874 jne local_label
4875 parameter1 <- LPBX0
4876 call __bb_init_func
4877 local_label:
4878*/
c572e5ba 4879
e075ae69
RH
4880void
4881ix86_output_function_block_profiler (file, block_or_label)
4882 FILE *file;
4883 int block_or_label;
c572e5ba 4884{
e075ae69
RH
4885 static int num_func = 0;
4886 rtx xops[8];
4887 char block_table[80], false_label[80];
c572e5ba 4888
e075ae69 4889 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
e9a25f70 4890
e075ae69
RH
4891 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4892 xops[5] = stack_pointer_rtx;
4893 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
2a2ab3f9 4894
e075ae69 4895 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
c572e5ba 4896
e075ae69 4897 switch (profile_block_flag)
c572e5ba 4898 {
e075ae69
RH
4899 case 2:
4900 xops[2] = GEN_INT (block_or_label);
4901 xops[3] = gen_rtx_MEM (Pmode,
4902 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
4903 xops[6] = GEN_INT (8);
e9a25f70 4904
e075ae69
RH
4905 output_asm_insn ("push{l}\t%2", xops);
4906 if (!flag_pic)
4907 output_asm_insn ("push{l}\t%1", xops);
e9a25f70 4908 else
870a0c2c 4909 {
e075ae69
RH
4910 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4911 output_asm_insn ("push{l}\t%7", xops);
870a0c2c 4912 }
e075ae69
RH
4913 output_asm_insn ("call\t%P3", xops);
4914 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4915 break;
c572e5ba 4916
e075ae69
RH
4917 default:
4918 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
c572e5ba 4919
e075ae69
RH
4920 xops[0] = const0_rtx;
4921 xops[2] = gen_rtx_MEM (Pmode,
4922 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
4923 xops[3] = gen_rtx_MEM (Pmode,
4924 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
4925 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
4926 xops[6] = GEN_INT (4);
a14003ee 4927
e075ae69 4928 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
446ba526 4929
e075ae69
RH
4930 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
4931 output_asm_insn ("jne\t%2", xops);
870a0c2c 4932
e075ae69
RH
4933 if (!flag_pic)
4934 output_asm_insn ("push{l}\t%1", xops);
4935 else
4936 {
4937 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
4938 output_asm_insn ("push{l}\t%7", xops);
870a0c2c 4939 }
e075ae69
RH
4940 output_asm_insn ("call\t%P3", xops);
4941 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4942 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
4943 num_func++;
4944 break;
c572e5ba 4945 }
2a2ab3f9 4946}
305f097e 4947
e075ae69
RH
4948/* Output assembler code to FILE to increment a counter associated
4949 with basic block number BLOCKNO.
305f097e 4950
e075ae69 4951 If profile_block_flag == 2
ecbc4695 4952
e075ae69
RH
4953 Output code to initialize the global structure `__bb' and
4954 call the function `__bb_trace_func' which will increment the
4955 counter.
ecbc4695 4956
e075ae69
RH
4957 `__bb' consists of two words. In the first word the number
4958 of the basic block has to be stored. In the second word
0f290768 4959 the address of a block allocated in the object module
e075ae69 4960 has to be stored.
ecbc4695 4961
e075ae69 4962 The basic block number is given by BLOCKNO.
ecbc4695 4963
0f290768 4964 The address of the block is given by the label created with
305f097e 4965
e075ae69 4966 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
305f097e 4967
e075ae69 4968 by FUNCTION_BLOCK_PROFILER.
ecbc4695 4969
e075ae69
RH
4970 Of course, since you are writing the definition of
4971 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4972 can take a short cut in the definition of this macro and use the
4973 name that you know will result.
305f097e 4974
e075ae69
RH
4975 If described in a virtual assembler language the code to be
4976 output looks like:
305f097e 4977
e075ae69
RH
4978 move BLOCKNO -> (__bb)
4979 move LPBX0 -> (__bb+4)
4980 call __bb_trace_func
305f097e 4981
e075ae69
RH
4982 Note that function `__bb_trace_func' must not change the
4983 machine state, especially the flag register. To grant
4984 this, you must output code to save and restore registers
4985 either in this macro or in the macros MACHINE_STATE_SAVE
4986 and MACHINE_STATE_RESTORE. The last two macros will be
4987 used in the function `__bb_trace_func', so you must make
0f290768 4988 sure that the function prologue does not change any
e075ae69 4989 register prior to saving it with MACHINE_STATE_SAVE.
305f097e 4990
e075ae69 4991 else if profile_block_flag != 0
305f097e 4992
e075ae69
RH
4993 Output code to increment the counter directly.
4994 Basic blocks are numbered separately from zero within each
4995 compiled object module. The count associated with block number
0f290768 4996 BLOCKNO is at index BLOCKNO in an array of words; the name of
e075ae69 4997 this array is a local symbol made with this statement:
32b5b1aa 4998
e075ae69 4999 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
32b5b1aa 5000
e075ae69
RH
5001 Of course, since you are writing the definition of
5002 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
5003 can take a short cut in the definition of this macro and use the
0f290768 5004 name that you know will result.
32b5b1aa 5005
e075ae69
RH
5006 If described in a virtual assembler language the code to be
5007 output looks like:
32b5b1aa 5008
e075ae69
RH
5009 inc (LPBX2+4*BLOCKNO)
5010*/
32b5b1aa 5011
e075ae69
RH
5012void
5013ix86_output_block_profiler (file, blockno)
5014 FILE *file ATTRIBUTE_UNUSED;
5015 int blockno;
5016{
5017 rtx xops[8], cnt_rtx;
5018 char counts[80];
5019 char *block_table = counts;
5020
5021 switch (profile_block_flag)
5022 {
5023 case 2:
5024 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
32b5b1aa 5025
e075ae69
RH
5026 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
5027 xops[2] = GEN_INT (blockno);
5028 xops[3] = gen_rtx_MEM (Pmode,
5029 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
5030 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
5031 xops[5] = plus_constant (xops[4], 4);
5032 xops[0] = gen_rtx_MEM (SImode, xops[4]);
5033 xops[6] = gen_rtx_MEM (SImode, xops[5]);
79325812 5034
e075ae69 5035 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
32b5b1aa 5036
e075ae69
RH
5037 output_asm_insn ("pushf", xops);
5038 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5039 if (flag_pic)
32b5b1aa 5040 {
e075ae69
RH
5041 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
5042 output_asm_insn ("push{l}\t%7", xops);
5043 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
5044 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
5045 output_asm_insn ("pop{l}\t%7", xops);
5046 }
5047 else
5048 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
5049 output_asm_insn ("call\t%P3", xops);
5050 output_asm_insn ("popf", xops);
32b5b1aa 5051
e075ae69 5052 break;
32b5b1aa 5053
e075ae69
RH
5054 default:
5055 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
5056 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
5057 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
32b5b1aa 5058
e075ae69
RH
5059 if (blockno)
5060 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
32b5b1aa 5061
e075ae69
RH
5062 if (flag_pic)
5063 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
32b5b1aa 5064
e075ae69
RH
5065 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
5066 output_asm_insn ("inc{l}\t%0", xops);
32b5b1aa 5067
e075ae69 5068 break;
32b5b1aa 5069 }
32b5b1aa 5070}
32b5b1aa 5071\f
79325812 5072void
e075ae69
RH
5073ix86_expand_move (mode, operands)
5074 enum machine_mode mode;
5075 rtx operands[];
32b5b1aa 5076{
e075ae69 5077 int strict = (reload_in_progress || reload_completed);
e075ae69 5078 rtx insn;
e9a25f70 5079
e075ae69 5080 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
32b5b1aa 5081 {
e075ae69 5082 /* Emit insns to move operands[1] into operands[0]. */
e9a25f70 5083
e075ae69
RH
5084 if (GET_CODE (operands[0]) == MEM)
5085 operands[1] = force_reg (Pmode, operands[1]);
5086 else
32b5b1aa 5087 {
e075ae69
RH
5088 rtx temp = operands[0];
5089 if (GET_CODE (temp) != REG)
5090 temp = gen_reg_rtx (Pmode);
5091 temp = legitimize_pic_address (operands[1], temp);
5092 if (temp == operands[0])
5093 return;
5094 operands[1] = temp;
32b5b1aa 5095 }
e075ae69
RH
5096 }
5097 else
5098 {
d7a29404
JH
5099 if (GET_CODE (operands[0]) == MEM
5100 && (GET_MODE (operands[0]) == QImode
5101 || !push_operand (operands[0], mode))
5102 && GET_CODE (operands[1]) == MEM)
e075ae69 5103 operands[1] = force_reg (mode, operands[1]);
e9a25f70 5104
2c5a510c
RH
5105 if (push_operand (operands[0], mode)
5106 && ! general_no_elim_operand (operands[1], mode))
5107 operands[1] = copy_to_mode_reg (mode, operands[1]);
5108
e075ae69 5109 if (FLOAT_MODE_P (mode))
32b5b1aa 5110 {
d7a29404
JH
5111 /* If we are loading a floating point constant to a register,
5112 force the value to memory now, since we'll get better code
5113 out the back end. */
e075ae69
RH
5114
5115 if (strict)
5116 ;
e075ae69 5117 else if (GET_CODE (operands[1]) == CONST_DOUBLE
d7a29404 5118 && register_operand (operands[0], mode))
e075ae69 5119 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
32b5b1aa 5120 }
32b5b1aa 5121 }
e9a25f70 5122
e075ae69 5123 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
e9a25f70 5124
e075ae69
RH
5125 emit_insn (insn);
5126}
e9a25f70 5127
e075ae69
RH
5128/* Attempt to expand a binary operator. Make the expansion closer to the
5129 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 5130 memory references (one output, two input) in a single insn. */
e9a25f70 5131
e075ae69
RH
5132void
5133ix86_expand_binary_operator (code, mode, operands)
5134 enum rtx_code code;
5135 enum machine_mode mode;
5136 rtx operands[];
5137{
5138 int matching_memory;
5139 rtx src1, src2, dst, op, clob;
5140
5141 dst = operands[0];
5142 src1 = operands[1];
5143 src2 = operands[2];
5144
5145 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
5146 if (GET_RTX_CLASS (code) == 'c'
5147 && (rtx_equal_p (dst, src2)
5148 || immediate_operand (src1, mode)))
5149 {
5150 rtx temp = src1;
5151 src1 = src2;
5152 src2 = temp;
32b5b1aa 5153 }
e9a25f70 5154
e075ae69
RH
5155 /* If the destination is memory, and we do not have matching source
5156 operands, do things in registers. */
5157 matching_memory = 0;
5158 if (GET_CODE (dst) == MEM)
32b5b1aa 5159 {
e075ae69
RH
5160 if (rtx_equal_p (dst, src1))
5161 matching_memory = 1;
5162 else if (GET_RTX_CLASS (code) == 'c'
5163 && rtx_equal_p (dst, src2))
5164 matching_memory = 2;
5165 else
5166 dst = gen_reg_rtx (mode);
5167 }
0f290768 5168
e075ae69
RH
5169 /* Both source operands cannot be in memory. */
5170 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
5171 {
5172 if (matching_memory != 2)
5173 src2 = force_reg (mode, src2);
5174 else
5175 src1 = force_reg (mode, src1);
32b5b1aa 5176 }
e9a25f70 5177
06a964de
JH
5178 /* If the operation is not commutable, source 1 cannot be a constant
5179 or non-matching memory. */
0f290768 5180 if ((CONSTANT_P (src1)
06a964de
JH
5181 || (!matching_memory && GET_CODE (src1) == MEM))
5182 && GET_RTX_CLASS (code) != 'c')
e075ae69 5183 src1 = force_reg (mode, src1);
0f290768 5184
e075ae69 5185 /* If optimizing, copy to regs to improve CSE */
fe577e58 5186 if (optimize && ! no_new_pseudos)
32b5b1aa 5187 {
e075ae69
RH
5188 if (GET_CODE (dst) == MEM)
5189 dst = gen_reg_rtx (mode);
5190 if (GET_CODE (src1) == MEM)
5191 src1 = force_reg (mode, src1);
5192 if (GET_CODE (src2) == MEM)
5193 src2 = force_reg (mode, src2);
32b5b1aa 5194 }
e9a25f70 5195
e075ae69
RH
5196 /* Emit the instruction. */
5197
5198 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
5199 if (reload_in_progress)
5200 {
5201 /* Reload doesn't know about the flags register, and doesn't know that
5202 it doesn't want to clobber it. We can only do this with PLUS. */
5203 if (code != PLUS)
5204 abort ();
5205 emit_insn (op);
5206 }
5207 else
32b5b1aa 5208 {
e075ae69
RH
5209 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
5210 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 5211 }
e9a25f70 5212
e075ae69
RH
5213 /* Fix up the destination if needed. */
5214 if (dst != operands[0])
5215 emit_move_insn (operands[0], dst);
5216}
5217
5218/* Return TRUE or FALSE depending on whether the binary operator meets the
5219 appropriate constraints. */
5220
5221int
5222ix86_binary_operator_ok (code, mode, operands)
5223 enum rtx_code code;
5224 enum machine_mode mode ATTRIBUTE_UNUSED;
5225 rtx operands[3];
5226{
5227 /* Both source operands cannot be in memory. */
5228 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
5229 return 0;
5230 /* If the operation is not commutable, source 1 cannot be a constant. */
5231 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
5232 return 0;
5233 /* If the destination is memory, we must have a matching source operand. */
5234 if (GET_CODE (operands[0]) == MEM
5235 && ! (rtx_equal_p (operands[0], operands[1])
5236 || (GET_RTX_CLASS (code) == 'c'
5237 && rtx_equal_p (operands[0], operands[2]))))
5238 return 0;
06a964de
JH
5239 /* If the operation is not commutable and the source 1 is memory, we must
5240 have a matching destionation. */
5241 if (GET_CODE (operands[1]) == MEM
5242 && GET_RTX_CLASS (code) != 'c'
5243 && ! rtx_equal_p (operands[0], operands[1]))
5244 return 0;
e075ae69
RH
5245 return 1;
5246}
5247
5248/* Attempt to expand a unary operator. Make the expansion closer to the
5249 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 5250 memory references (one output, one input) in a single insn. */
e075ae69 5251
9d81fc27 5252void
e075ae69
RH
5253ix86_expand_unary_operator (code, mode, operands)
5254 enum rtx_code code;
5255 enum machine_mode mode;
5256 rtx operands[];
5257{
06a964de
JH
5258 int matching_memory;
5259 rtx src, dst, op, clob;
5260
5261 dst = operands[0];
5262 src = operands[1];
e075ae69 5263
06a964de
JH
5264 /* If the destination is memory, and we do not have matching source
5265 operands, do things in registers. */
5266 matching_memory = 0;
5267 if (GET_CODE (dst) == MEM)
32b5b1aa 5268 {
06a964de
JH
5269 if (rtx_equal_p (dst, src))
5270 matching_memory = 1;
e075ae69 5271 else
06a964de 5272 dst = gen_reg_rtx (mode);
32b5b1aa 5273 }
e9a25f70 5274
06a964de
JH
5275 /* When source operand is memory, destination must match. */
5276 if (!matching_memory && GET_CODE (src) == MEM)
5277 src = force_reg (mode, src);
0f290768 5278
06a964de 5279 /* If optimizing, copy to regs to improve CSE */
fe577e58 5280 if (optimize && ! no_new_pseudos)
06a964de
JH
5281 {
5282 if (GET_CODE (dst) == MEM)
5283 dst = gen_reg_rtx (mode);
5284 if (GET_CODE (src) == MEM)
5285 src = force_reg (mode, src);
5286 }
5287
5288 /* Emit the instruction. */
5289
5290 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
5291 if (reload_in_progress || code == NOT)
5292 {
5293 /* Reload doesn't know about the flags register, and doesn't know that
5294 it doesn't want to clobber it. */
5295 if (code != NOT)
5296 abort ();
5297 emit_insn (op);
5298 }
5299 else
5300 {
5301 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
5302 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
5303 }
5304
5305 /* Fix up the destination if needed. */
5306 if (dst != operands[0])
5307 emit_move_insn (operands[0], dst);
e075ae69
RH
5308}
5309
5310/* Return TRUE or FALSE depending on whether the unary operator meets the
5311 appropriate constraints. */
5312
5313int
5314ix86_unary_operator_ok (code, mode, operands)
5315 enum rtx_code code ATTRIBUTE_UNUSED;
5316 enum machine_mode mode ATTRIBUTE_UNUSED;
5317 rtx operands[2] ATTRIBUTE_UNUSED;
5318{
06a964de
JH
5319 /* If one of operands is memory, source and destination must match. */
5320 if ((GET_CODE (operands[0]) == MEM
5321 || GET_CODE (operands[1]) == MEM)
5322 && ! rtx_equal_p (operands[0], operands[1]))
5323 return FALSE;
e075ae69
RH
5324 return TRUE;
5325}
5326
16189740
RH
5327/* Return TRUE or FALSE depending on whether the first SET in INSN
5328 has source and destination with matching CC modes, and that the
5329 CC mode is at least as constrained as REQ_MODE. */
5330
5331int
5332ix86_match_ccmode (insn, req_mode)
5333 rtx insn;
5334 enum machine_mode req_mode;
5335{
5336 rtx set;
5337 enum machine_mode set_mode;
5338
5339 set = PATTERN (insn);
5340 if (GET_CODE (set) == PARALLEL)
5341 set = XVECEXP (set, 0, 0);
5342 if (GET_CODE (set) != SET)
5343 abort ();
9076b9c1
JH
5344 if (GET_CODE (SET_SRC (set)) != COMPARE)
5345 abort ();
16189740
RH
5346
5347 set_mode = GET_MODE (SET_DEST (set));
5348 switch (set_mode)
5349 {
9076b9c1
JH
5350 case CCNOmode:
5351 if (req_mode != CCNOmode
5352 && (req_mode != CCmode
5353 || XEXP (SET_SRC (set), 1) != const0_rtx))
5354 return 0;
5355 break;
16189740 5356 case CCmode:
9076b9c1 5357 if (req_mode == CCGCmode)
16189740
RH
5358 return 0;
5359 /* FALLTHRU */
9076b9c1
JH
5360 case CCGCmode:
5361 if (req_mode == CCGOCmode || req_mode == CCNOmode)
5362 return 0;
5363 /* FALLTHRU */
5364 case CCGOCmode:
16189740
RH
5365 if (req_mode == CCZmode)
5366 return 0;
5367 /* FALLTHRU */
5368 case CCZmode:
5369 break;
5370
5371 default:
5372 abort ();
5373 }
5374
5375 return (GET_MODE (SET_SRC (set)) == set_mode);
5376}
5377
e075ae69
RH
5378/* Generate insn patterns to do an integer compare of OPERANDS. */
5379
5380static rtx
5381ix86_expand_int_compare (code, op0, op1)
5382 enum rtx_code code;
5383 rtx op0, op1;
5384{
5385 enum machine_mode cmpmode;
5386 rtx tmp, flags;
5387
5388 cmpmode = SELECT_CC_MODE (code, op0, op1);
5389 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
5390
5391 /* This is very simple, but making the interface the same as in the
5392 FP case makes the rest of the code easier. */
5393 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
5394 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
5395
5396 /* Return the test that should be put into the flags user, i.e.
5397 the bcc, scc, or cmov instruction. */
5398 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
5399}
5400
3a3677ff
RH
5401/* Figure out whether to use ordered or unordered fp comparisons.
5402 Return the appropriate mode to use. */
e075ae69 5403
b1cdafbb 5404enum machine_mode
3a3677ff 5405ix86_fp_compare_mode (code)
8752c357 5406 enum rtx_code code ATTRIBUTE_UNUSED;
e075ae69 5407{
9e7adcb3
JH
5408 /* ??? In order to make all comparisons reversible, we do all comparisons
5409 non-trapping when compiling for IEEE. Once gcc is able to distinguish
5410 all forms trapping and nontrapping comparisons, we can make inequality
5411 comparisons trapping again, since it results in better code when using
5412 FCOM based compares. */
5413 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
5414}
5415
9076b9c1
JH
5416enum machine_mode
5417ix86_cc_mode (code, op0, op1)
5418 enum rtx_code code;
5419 rtx op0, op1;
5420{
5421 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5422 return ix86_fp_compare_mode (code);
5423 switch (code)
5424 {
5425 /* Only zero flag is needed. */
5426 case EQ: /* ZF=0 */
5427 case NE: /* ZF!=0 */
5428 return CCZmode;
5429 /* Codes needing carry flag. */
265dab10
JH
5430 case GEU: /* CF=0 */
5431 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
5432 case LTU: /* CF=1 */
5433 case LEU: /* CF=1 | ZF=1 */
265dab10 5434 return CCmode;
9076b9c1
JH
5435 /* Codes possibly doable only with sign flag when
5436 comparing against zero. */
5437 case GE: /* SF=OF or SF=0 */
7e08e190 5438 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
5439 if (op1 == const0_rtx)
5440 return CCGOCmode;
5441 else
5442 /* For other cases Carry flag is not required. */
5443 return CCGCmode;
5444 /* Codes doable only with sign flag when comparing
5445 against zero, but we miss jump instruction for it
5446 so we need to use relational tests agains overflow
5447 that thus needs to be zero. */
5448 case GT: /* ZF=0 & SF=OF */
5449 case LE: /* ZF=1 | SF<>OF */
5450 if (op1 == const0_rtx)
5451 return CCNOmode;
5452 else
5453 return CCGCmode;
5454 default:
0f290768 5455 abort ();
9076b9c1
JH
5456 }
5457}
5458
3a3677ff
RH
5459/* Return true if we should use an FCOMI instruction for this fp comparison. */
5460
a940d8bd 5461int
3a3677ff 5462ix86_use_fcomi_compare (code)
9e7adcb3 5463 enum rtx_code code ATTRIBUTE_UNUSED;
3a3677ff 5464{
9e7adcb3
JH
5465 enum rtx_code swapped_code = swap_condition (code);
5466 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
5467 || (ix86_fp_comparison_cost (swapped_code)
5468 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
5469}
5470
0f290768 5471/* Swap, force into registers, or otherwise massage the two operands
3a3677ff
RH
5472 to a fp comparison. The operands are updated in place; the new
5473 comparsion code is returned. */
5474
5475static enum rtx_code
5476ix86_prepare_fp_compare_args (code, pop0, pop1)
5477 enum rtx_code code;
5478 rtx *pop0, *pop1;
5479{
5480 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
5481 rtx op0 = *pop0, op1 = *pop1;
5482 enum machine_mode op_mode = GET_MODE (op0);
0644b628 5483 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
3a3677ff 5484
e075ae69 5485 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
5486 The same is true of the XFmode compare instructions. The same is
5487 true of the fcomi compare instructions. */
5488
0644b628
JH
5489 if (!is_sse
5490 && (fpcmp_mode == CCFPUmode
5491 || op_mode == XFmode
5492 || op_mode == TFmode
5493 || ix86_use_fcomi_compare (code)))
e075ae69 5494 {
3a3677ff
RH
5495 op0 = force_reg (op_mode, op0);
5496 op1 = force_reg (op_mode, op1);
e075ae69
RH
5497 }
5498 else
5499 {
5500 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
5501 things around if they appear profitable, otherwise force op0
5502 into a register. */
5503
5504 if (standard_80387_constant_p (op0) == 0
5505 || (GET_CODE (op0) == MEM
5506 && ! (standard_80387_constant_p (op1) == 0
5507 || GET_CODE (op1) == MEM)))
32b5b1aa 5508 {
e075ae69
RH
5509 rtx tmp;
5510 tmp = op0, op0 = op1, op1 = tmp;
5511 code = swap_condition (code);
5512 }
5513
5514 if (GET_CODE (op0) != REG)
3a3677ff 5515 op0 = force_reg (op_mode, op0);
e075ae69
RH
5516
5517 if (CONSTANT_P (op1))
5518 {
5519 if (standard_80387_constant_p (op1))
3a3677ff 5520 op1 = force_reg (op_mode, op1);
e075ae69 5521 else
3a3677ff 5522 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
5523 }
5524 }
e9a25f70 5525
9e7adcb3
JH
5526 /* Try to rearrange the comparison to make it cheaper. */
5527 if (ix86_fp_comparison_cost (code)
5528 > ix86_fp_comparison_cost (swap_condition (code))
5529 && (GET_CODE (op0) == REG || !reload_completed))
5530 {
5531 rtx tmp;
5532 tmp = op0, op0 = op1, op1 = tmp;
5533 code = swap_condition (code);
5534 if (GET_CODE (op0) != REG)
5535 op0 = force_reg (op_mode, op0);
5536 }
5537
3a3677ff
RH
5538 *pop0 = op0;
5539 *pop1 = op1;
5540 return code;
5541}
5542
c0c102a9
JH
5543/* Convert comparison codes we use to represent FP comparison to integer
5544 code that will result in proper branch. Return UNKNOWN if no such code
5545 is available. */
5546static enum rtx_code
5547ix86_fp_compare_code_to_integer (code)
5548 enum rtx_code code;
5549{
5550 switch (code)
5551 {
5552 case GT:
5553 return GTU;
5554 case GE:
5555 return GEU;
5556 case ORDERED:
5557 case UNORDERED:
5558 return code;
5559 break;
5560 case UNEQ:
5561 return EQ;
5562 break;
5563 case UNLT:
5564 return LTU;
5565 break;
5566 case UNLE:
5567 return LEU;
5568 break;
5569 case LTGT:
5570 return NE;
5571 break;
5572 default:
5573 return UNKNOWN;
5574 }
5575}
5576
5577/* Split comparison code CODE into comparisons we can do using branch
5578 instructions. BYPASS_CODE is comparison code for branch that will
5579 branch around FIRST_CODE and SECOND_CODE. If some of branches
5580 is not required, set value to NIL.
5581 We never require more than two branches. */
5582static void
5583ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
5584 enum rtx_code code, *bypass_code, *first_code, *second_code;
5585{
5586 *first_code = code;
5587 *bypass_code = NIL;
5588 *second_code = NIL;
5589
5590 /* The fcomi comparison sets flags as follows:
5591
5592 cmp ZF PF CF
5593 > 0 0 0
5594 < 0 0 1
5595 = 1 0 0
5596 un 1 1 1 */
5597
5598 switch (code)
5599 {
5600 case GT: /* GTU - CF=0 & ZF=0 */
5601 case GE: /* GEU - CF=0 */
5602 case ORDERED: /* PF=0 */
5603 case UNORDERED: /* PF=1 */
5604 case UNEQ: /* EQ - ZF=1 */
5605 case UNLT: /* LTU - CF=1 */
5606 case UNLE: /* LEU - CF=1 | ZF=1 */
5607 case LTGT: /* EQ - ZF=0 */
5608 break;
5609 case LT: /* LTU - CF=1 - fails on unordered */
5610 *first_code = UNLT;
5611 *bypass_code = UNORDERED;
5612 break;
5613 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
5614 *first_code = UNLE;
5615 *bypass_code = UNORDERED;
5616 break;
5617 case EQ: /* EQ - ZF=1 - fails on unordered */
5618 *first_code = UNEQ;
5619 *bypass_code = UNORDERED;
5620 break;
5621 case NE: /* NE - ZF=0 - fails on unordered */
5622 *first_code = LTGT;
5623 *second_code = UNORDERED;
5624 break;
5625 case UNGE: /* GEU - CF=0 - fails on unordered */
5626 *first_code = GE;
5627 *second_code = UNORDERED;
5628 break;
5629 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
5630 *first_code = GT;
5631 *second_code = UNORDERED;
5632 break;
5633 default:
5634 abort ();
5635 }
5636 if (!TARGET_IEEE_FP)
5637 {
5638 *second_code = NIL;
5639 *bypass_code = NIL;
5640 }
5641}
5642
9e7adcb3
JH
5643/* Return cost of comparison done fcom + arithmetics operations on AX.
5644 All following functions do use number of instructions as an cost metrics.
5645 In future this should be tweaked to compute bytes for optimize_size and
5646 take into account performance of various instructions on various CPUs. */
5647static int
5648ix86_fp_comparison_arithmetics_cost (code)
5649 enum rtx_code code;
5650{
5651 if (!TARGET_IEEE_FP)
5652 return 4;
5653 /* The cost of code output by ix86_expand_fp_compare. */
5654 switch (code)
5655 {
5656 case UNLE:
5657 case UNLT:
5658 case LTGT:
5659 case GT:
5660 case GE:
5661 case UNORDERED:
5662 case ORDERED:
5663 case UNEQ:
5664 return 4;
5665 break;
5666 case LT:
5667 case NE:
5668 case EQ:
5669 case UNGE:
5670 return 5;
5671 break;
5672 case LE:
5673 case UNGT:
5674 return 6;
5675 break;
5676 default:
5677 abort ();
5678 }
5679}
5680
5681/* Return cost of comparison done using fcomi operation.
5682 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5683static int
5684ix86_fp_comparison_fcomi_cost (code)
5685 enum rtx_code code;
5686{
5687 enum rtx_code bypass_code, first_code, second_code;
5688 /* Return arbitarily high cost when instruction is not supported - this
5689 prevents gcc from using it. */
5690 if (!TARGET_CMOVE)
5691 return 1024;
5692 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5693 return (bypass_code != NIL || second_code != NIL) + 2;
5694}
5695
5696/* Return cost of comparison done using sahf operation.
5697 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5698static int
5699ix86_fp_comparison_sahf_cost (code)
5700 enum rtx_code code;
5701{
5702 enum rtx_code bypass_code, first_code, second_code;
5703 /* Return arbitarily high cost when instruction is not preferred - this
5704 avoids gcc from using it. */
5705 if (!TARGET_USE_SAHF && !optimize_size)
5706 return 1024;
5707 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5708 return (bypass_code != NIL || second_code != NIL) + 3;
5709}
5710
5711/* Compute cost of the comparison done using any method.
5712 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5713static int
5714ix86_fp_comparison_cost (code)
5715 enum rtx_code code;
5716{
5717 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
5718 int min;
5719
5720 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
5721 sahf_cost = ix86_fp_comparison_sahf_cost (code);
5722
5723 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
5724 if (min > sahf_cost)
5725 min = sahf_cost;
5726 if (min > fcomi_cost)
5727 min = fcomi_cost;
5728 return min;
5729}
c0c102a9 5730
3a3677ff
RH
5731/* Generate insn patterns to do a floating point compare of OPERANDS. */
5732
9e7adcb3
JH
5733static rtx
5734ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
3a3677ff
RH
5735 enum rtx_code code;
5736 rtx op0, op1, scratch;
9e7adcb3
JH
5737 rtx *second_test;
5738 rtx *bypass_test;
3a3677ff
RH
5739{
5740 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 5741 rtx tmp, tmp2;
9e7adcb3 5742 int cost = ix86_fp_comparison_cost (code);
c0c102a9 5743 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
5744
5745 fpcmp_mode = ix86_fp_compare_mode (code);
5746 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
5747
9e7adcb3
JH
5748 if (second_test)
5749 *second_test = NULL_RTX;
5750 if (bypass_test)
5751 *bypass_test = NULL_RTX;
5752
c0c102a9
JH
5753 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5754
9e7adcb3
JH
5755 /* Do fcomi/sahf based test when profitable. */
5756 if ((bypass_code == NIL || bypass_test)
5757 && (second_code == NIL || second_test)
5758 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 5759 {
c0c102a9
JH
5760 if (TARGET_CMOVE)
5761 {
5762 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5763 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
5764 tmp);
5765 emit_insn (tmp);
5766 }
5767 else
5768 {
5769 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5770 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
bf71a4f8
JH
5771 if (!scratch)
5772 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
5773 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
5774 emit_insn (gen_x86_sahf_1 (scratch));
5775 }
e075ae69
RH
5776
5777 /* The FP codes work out to act like unsigned. */
9a915772 5778 intcmp_mode = fpcmp_mode;
9e7adcb3
JH
5779 code = first_code;
5780 if (bypass_code != NIL)
5781 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
5782 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5783 const0_rtx);
5784 if (second_code != NIL)
5785 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
5786 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5787 const0_rtx);
e075ae69
RH
5788 }
5789 else
5790 {
5791 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69
RH
5792 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5793 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
bf71a4f8
JH
5794 if (!scratch)
5795 scratch = gen_reg_rtx (HImode);
3a3677ff 5796 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 5797
9a915772
JH
5798 /* In the unordered case, we have to check C2 for NaN's, which
5799 doesn't happen to work out to anything nice combination-wise.
5800 So do some bit twiddling on the value we've got in AH to come
5801 up with an appropriate set of condition codes. */
e075ae69 5802
9a915772
JH
5803 intcmp_mode = CCNOmode;
5804 switch (code)
32b5b1aa 5805 {
9a915772
JH
5806 case GT:
5807 case UNGT:
5808 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 5809 {
3a3677ff 5810 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 5811 code = EQ;
9a915772
JH
5812 }
5813 else
5814 {
5815 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5816 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5817 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
5818 intcmp_mode = CCmode;
5819 code = GEU;
5820 }
5821 break;
5822 case LT:
5823 case UNLT:
5824 if (code == LT && TARGET_IEEE_FP)
5825 {
3a3677ff
RH
5826 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5827 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
5828 intcmp_mode = CCmode;
5829 code = EQ;
9a915772
JH
5830 }
5831 else
5832 {
5833 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
5834 code = NE;
5835 }
5836 break;
5837 case GE:
5838 case UNGE:
5839 if (code == GE || !TARGET_IEEE_FP)
5840 {
3a3677ff 5841 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 5842 code = EQ;
9a915772
JH
5843 }
5844 else
5845 {
5846 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5847 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
5848 GEN_INT (0x01)));
5849 code = NE;
5850 }
5851 break;
5852 case LE:
5853 case UNLE:
5854 if (code == LE && TARGET_IEEE_FP)
5855 {
3a3677ff
RH
5856 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5857 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5858 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
5859 intcmp_mode = CCmode;
5860 code = LTU;
9a915772
JH
5861 }
5862 else
5863 {
5864 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
5865 code = NE;
5866 }
5867 break;
5868 case EQ:
5869 case UNEQ:
5870 if (code == EQ && TARGET_IEEE_FP)
5871 {
3a3677ff
RH
5872 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5873 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
5874 intcmp_mode = CCmode;
5875 code = EQ;
9a915772
JH
5876 }
5877 else
5878 {
3a3677ff
RH
5879 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
5880 code = NE;
5881 break;
9a915772
JH
5882 }
5883 break;
5884 case NE:
5885 case LTGT:
5886 if (code == NE && TARGET_IEEE_FP)
5887 {
3a3677ff 5888 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
5889 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
5890 GEN_INT (0x40)));
3a3677ff 5891 code = NE;
9a915772
JH
5892 }
5893 else
5894 {
3a3677ff
RH
5895 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
5896 code = EQ;
32b5b1aa 5897 }
9a915772
JH
5898 break;
5899
5900 case UNORDERED:
5901 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
5902 code = NE;
5903 break;
5904 case ORDERED:
5905 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
5906 code = EQ;
5907 break;
5908
5909 default:
5910 abort ();
32b5b1aa 5911 }
32b5b1aa 5912 }
e075ae69
RH
5913
5914 /* Return the test that should be put into the flags user, i.e.
5915 the bcc, scc, or cmov instruction. */
5916 return gen_rtx_fmt_ee (code, VOIDmode,
5917 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5918 const0_rtx);
5919}
5920
9e3e266c 5921rtx
a1b8572c 5922ix86_expand_compare (code, second_test, bypass_test)
e075ae69 5923 enum rtx_code code;
a1b8572c 5924 rtx *second_test, *bypass_test;
e075ae69
RH
5925{
5926 rtx op0, op1, ret;
5927 op0 = ix86_compare_op0;
5928 op1 = ix86_compare_op1;
5929
a1b8572c
JH
5930 if (second_test)
5931 *second_test = NULL_RTX;
5932 if (bypass_test)
5933 *bypass_test = NULL_RTX;
5934
e075ae69 5935 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
bf71a4f8 5936 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
77ebd435 5937 second_test, bypass_test);
32b5b1aa 5938 else
e075ae69
RH
5939 ret = ix86_expand_int_compare (code, op0, op1);
5940
5941 return ret;
5942}
5943
5944void
3a3677ff 5945ix86_expand_branch (code, label)
e075ae69 5946 enum rtx_code code;
e075ae69
RH
5947 rtx label;
5948{
3a3677ff 5949 rtx tmp;
e075ae69 5950
3a3677ff 5951 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 5952 {
3a3677ff
RH
5953 case QImode:
5954 case HImode:
5955 case SImode:
0d7d98ee 5956 simple:
a1b8572c 5957 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
5958 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5959 gen_rtx_LABEL_REF (VOIDmode, label),
5960 pc_rtx);
5961 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 5962 return;
e075ae69 5963
3a3677ff
RH
5964 case SFmode:
5965 case DFmode:
0f290768 5966 case XFmode:
2b589241 5967 case TFmode:
3a3677ff
RH
5968 /* Don't expand the comparison early, so that we get better code
5969 when jump or whoever decides to reverse the comparison. */
5970 {
5971 rtvec vec;
5972 int use_fcomi;
5973
5974 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
5975 &ix86_compare_op1);
5976
0b9aaeee 5977 tmp = gen_rtx_fmt_ee (code, VOIDmode,
3a3677ff
RH
5978 ix86_compare_op0, ix86_compare_op1);
5979 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5980 gen_rtx_LABEL_REF (VOIDmode, label),
5981 pc_rtx);
5982 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
5983
5984 use_fcomi = ix86_use_fcomi_compare (code);
5985 vec = rtvec_alloc (3 + !use_fcomi);
5986 RTVEC_ELT (vec, 0) = tmp;
5987 RTVEC_ELT (vec, 1)
5988 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
5989 RTVEC_ELT (vec, 2)
5990 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
5991 if (! use_fcomi)
5992 RTVEC_ELT (vec, 3)
5993 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
5994
5995 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
5996 return;
5997 }
32b5b1aa 5998
3a3677ff 5999 case DImode:
0d7d98ee
JH
6000 if (TARGET_64BIT)
6001 goto simple;
3a3677ff
RH
6002 /* Expand DImode branch into multiple compare+branch. */
6003 {
6004 rtx lo[2], hi[2], label2;
6005 enum rtx_code code1, code2, code3;
32b5b1aa 6006
3a3677ff
RH
6007 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
6008 {
6009 tmp = ix86_compare_op0;
6010 ix86_compare_op0 = ix86_compare_op1;
6011 ix86_compare_op1 = tmp;
6012 code = swap_condition (code);
6013 }
6014 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
6015 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 6016
3a3677ff
RH
6017 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
6018 avoid two branches. This costs one extra insn, so disable when
6019 optimizing for size. */
32b5b1aa 6020
3a3677ff
RH
6021 if ((code == EQ || code == NE)
6022 && (!optimize_size
6023 || hi[1] == const0_rtx || lo[1] == const0_rtx))
6024 {
6025 rtx xor0, xor1;
32b5b1aa 6026
3a3677ff
RH
6027 xor1 = hi[0];
6028 if (hi[1] != const0_rtx)
6029 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
6030 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 6031
3a3677ff
RH
6032 xor0 = lo[0];
6033 if (lo[1] != const0_rtx)
6034 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
6035 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 6036
3a3677ff
RH
6037 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
6038 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 6039
3a3677ff
RH
6040 ix86_compare_op0 = tmp;
6041 ix86_compare_op1 = const0_rtx;
6042 ix86_expand_branch (code, label);
6043 return;
6044 }
e075ae69 6045
1f9124e4
JJ
6046 /* Otherwise, if we are doing less-than or greater-or-equal-than,
6047 op1 is a constant and the low word is zero, then we can just
6048 examine the high word. */
32b5b1aa 6049
1f9124e4
JJ
6050 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
6051 switch (code)
6052 {
6053 case LT: case LTU: case GE: case GEU:
6054 ix86_compare_op0 = hi[0];
6055 ix86_compare_op1 = hi[1];
6056 ix86_expand_branch (code, label);
6057 return;
6058 default:
6059 break;
6060 }
e075ae69 6061
3a3677ff 6062 /* Otherwise, we need two or three jumps. */
e075ae69 6063
3a3677ff 6064 label2 = gen_label_rtx ();
e075ae69 6065
3a3677ff
RH
6066 code1 = code;
6067 code2 = swap_condition (code);
6068 code3 = unsigned_condition (code);
e075ae69 6069
3a3677ff
RH
6070 switch (code)
6071 {
6072 case LT: case GT: case LTU: case GTU:
6073 break;
e075ae69 6074
3a3677ff
RH
6075 case LE: code1 = LT; code2 = GT; break;
6076 case GE: code1 = GT; code2 = LT; break;
6077 case LEU: code1 = LTU; code2 = GTU; break;
6078 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 6079
3a3677ff
RH
6080 case EQ: code1 = NIL; code2 = NE; break;
6081 case NE: code2 = NIL; break;
e075ae69 6082
3a3677ff
RH
6083 default:
6084 abort ();
6085 }
e075ae69 6086
3a3677ff
RH
6087 /*
6088 * a < b =>
6089 * if (hi(a) < hi(b)) goto true;
6090 * if (hi(a) > hi(b)) goto false;
6091 * if (lo(a) < lo(b)) goto true;
6092 * false:
6093 */
6094
6095 ix86_compare_op0 = hi[0];
6096 ix86_compare_op1 = hi[1];
6097
6098 if (code1 != NIL)
6099 ix86_expand_branch (code1, label);
6100 if (code2 != NIL)
6101 ix86_expand_branch (code2, label2);
6102
6103 ix86_compare_op0 = lo[0];
6104 ix86_compare_op1 = lo[1];
6105 ix86_expand_branch (code3, label);
6106
6107 if (code2 != NIL)
6108 emit_label (label2);
6109 return;
6110 }
e075ae69 6111
3a3677ff
RH
6112 default:
6113 abort ();
6114 }
32b5b1aa 6115}
e075ae69 6116
9e7adcb3
JH
6117/* Split branch based on floating point condition. */
6118void
6119ix86_split_fp_branch (condition, op1, op2, target1, target2, tmp)
6120 rtx condition, op1, op2, target1, target2, tmp;
6121{
6122 rtx second, bypass;
6123 rtx label = NULL_RTX;
6124 enum rtx_code code = GET_CODE (condition);
9e7adcb3
JH
6125
6126 if (target2 != pc_rtx)
6127 {
6128 rtx tmp = target2;
6129 code = reverse_condition_maybe_unordered (code);
6130 target2 = target1;
6131 target1 = tmp;
6132 }
6133
6134 condition = ix86_expand_fp_compare (code, op1, op2,
6135 tmp, &second, &bypass);
6136 if (bypass != NULL_RTX)
6137 {
6138 label = gen_label_rtx ();
6139 emit_jump_insn (gen_rtx_SET
6140 (VOIDmode, pc_rtx,
6141 gen_rtx_IF_THEN_ELSE (VOIDmode,
6142 bypass,
6143 gen_rtx_LABEL_REF (VOIDmode,
6144 label),
6145 pc_rtx)));
6146 }
6147 /* AMD Athlon and probably other CPUs too have fast bypass path between the
6148 comparison and first branch. The second branch takes longer to execute
6149 so place first branch the worse predicable one if possible. */
6150 if (second != NULL_RTX
6151 && (GET_CODE (second) == UNORDERED || GET_CODE (second) == ORDERED))
6152 {
6153 rtx tmp = condition;
6154 condition = second;
6155 second = tmp;
6156 }
6157 emit_jump_insn (gen_rtx_SET
6158 (VOIDmode, pc_rtx,
6159 gen_rtx_IF_THEN_ELSE (VOIDmode,
6160 condition, target1, target2)));
6161 if (second != NULL_RTX)
6162 emit_jump_insn (gen_rtx_SET
6163 (VOIDmode, pc_rtx,
6164 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1, target2)));
6165 if (label != NULL_RTX)
6166 emit_label (label);
6167}
6168
32b5b1aa 6169int
3a3677ff 6170ix86_expand_setcc (code, dest)
e075ae69 6171 enum rtx_code code;
e075ae69 6172 rtx dest;
32b5b1aa 6173{
a1b8572c
JH
6174 rtx ret, tmp, tmpreg;
6175 rtx second_test, bypass_test;
e075ae69
RH
6176 int type;
6177
885a70fd
JH
6178 if (GET_MODE (ix86_compare_op0) == DImode
6179 && !TARGET_64BIT)
e075ae69
RH
6180 return 0; /* FAIL */
6181
6182 /* Three modes of generation:
6183 0 -- destination does not overlap compare sources:
6184 clear dest first, emit strict_low_part setcc.
6185 1 -- destination does overlap compare sources:
6186 emit subreg setcc, zero extend.
6187 2 -- destination is in QImode:
6188 emit setcc only.
6189 */
6190
6191 type = 0;
e075ae69
RH
6192
6193 if (GET_MODE (dest) == QImode)
6194 type = 2;
6195 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
a500c31b 6196 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
e075ae69
RH
6197 type = 1;
6198
6199 if (type == 0)
6200 emit_move_insn (dest, const0_rtx);
6201
a1b8572c 6202 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
6203 PUT_MODE (ret, QImode);
6204
6205 tmp = dest;
a1b8572c 6206 tmpreg = dest;
e075ae69 6207 if (type == 0)
32b5b1aa 6208 {
e075ae69 6209 tmp = gen_lowpart (QImode, dest);
a1b8572c 6210 tmpreg = tmp;
e075ae69
RH
6211 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
6212 }
6213 else if (type == 1)
6214 {
6215 if (!cse_not_expected)
6216 tmp = gen_reg_rtx (QImode);
6217 else
6218 tmp = gen_lowpart (QImode, dest);
a1b8572c 6219 tmpreg = tmp;
e075ae69 6220 }
32b5b1aa 6221
e075ae69 6222 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
6223 if (bypass_test || second_test)
6224 {
6225 rtx test = second_test;
6226 int bypass = 0;
6227 rtx tmp2 = gen_reg_rtx (QImode);
6228 if (bypass_test)
6229 {
6230 if (second_test)
6231 abort();
6232 test = bypass_test;
6233 bypass = 1;
6234 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
6235 }
6236 PUT_MODE (test, QImode);
6237 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
6238
6239 if (bypass)
6240 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
6241 else
6242 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
6243 }
e075ae69
RH
6244
6245 if (type == 1)
6246 {
6247 rtx clob;
6248
6249 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
6250 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
6251 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6252 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6253 emit_insn (tmp);
32b5b1aa 6254 }
e075ae69
RH
6255
6256 return 1; /* DONE */
32b5b1aa 6257}
e075ae69 6258
32b5b1aa 6259int
e075ae69
RH
6260ix86_expand_int_movcc (operands)
6261 rtx operands[];
32b5b1aa 6262{
e075ae69
RH
6263 enum rtx_code code = GET_CODE (operands[1]), compare_code;
6264 rtx compare_seq, compare_op;
a1b8572c 6265 rtx second_test, bypass_test;
32b5b1aa 6266
36583fea
JH
6267 /* When the compare code is not LTU or GEU, we can not use sbbl case.
6268 In case comparsion is done with immediate, we can convert it to LTU or
6269 GEU by altering the integer. */
6270
6271 if ((code == LEU || code == GTU)
6272 && GET_CODE (ix86_compare_op1) == CONST_INT
6273 && GET_MODE (operands[0]) != HImode
6274 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
0f290768 6275 && GET_CODE (operands[2]) == CONST_INT
36583fea
JH
6276 && GET_CODE (operands[3]) == CONST_INT)
6277 {
6278 if (code == LEU)
6279 code = LTU;
6280 else
6281 code = GEU;
6282 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
6283 }
3a3677ff 6284
e075ae69 6285 start_sequence ();
a1b8572c 6286 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
6287 compare_seq = gen_sequence ();
6288 end_sequence ();
6289
6290 compare_code = GET_CODE (compare_op);
6291
6292 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
6293 HImode insns, we'd be swallowed in word prefix ops. */
6294
6295 if (GET_MODE (operands[0]) != HImode
885a70fd 6296 && GET_MODE (operands[0]) != DImode
0f290768 6297 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
6298 && GET_CODE (operands[3]) == CONST_INT)
6299 {
6300 rtx out = operands[0];
6301 HOST_WIDE_INT ct = INTVAL (operands[2]);
6302 HOST_WIDE_INT cf = INTVAL (operands[3]);
6303 HOST_WIDE_INT diff;
6304
a1b8572c
JH
6305 if ((compare_code == LTU || compare_code == GEU)
6306 && !second_test && !bypass_test)
e075ae69 6307 {
e075ae69
RH
6308
6309 /* Detect overlap between destination and compare sources. */
6310 rtx tmp = out;
6311
0f290768 6312 /* To simplify rest of code, restrict to the GEU case. */
36583fea
JH
6313 if (compare_code == LTU)
6314 {
6315 int tmp = ct;
6316 ct = cf;
6317 cf = tmp;
6318 compare_code = reverse_condition (compare_code);
6319 code = reverse_condition (code);
6320 }
6321 diff = ct - cf;
6322
e075ae69 6323 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
a500c31b 6324 || reg_overlap_mentioned_p (out, ix86_compare_op1))
e075ae69
RH
6325 tmp = gen_reg_rtx (SImode);
6326
6327 emit_insn (compare_seq);
6328 emit_insn (gen_x86_movsicc_0_m1 (tmp));
6329
36583fea
JH
6330 if (diff == 1)
6331 {
6332 /*
6333 * cmpl op0,op1
6334 * sbbl dest,dest
6335 * [addl dest, ct]
6336 *
6337 * Size 5 - 8.
6338 */
6339 if (ct)
e99af66b 6340 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (ct)));
36583fea
JH
6341 }
6342 else if (cf == -1)
6343 {
6344 /*
6345 * cmpl op0,op1
6346 * sbbl dest,dest
6347 * orl $ct, dest
6348 *
6349 * Size 8.
6350 */
e99af66b 6351 emit_insn (gen_iorsi3 (tmp, tmp, GEN_INT (ct)));
36583fea
JH
6352 }
6353 else if (diff == -1 && ct)
6354 {
6355 /*
6356 * cmpl op0,op1
6357 * sbbl dest,dest
6358 * xorl $-1, dest
6359 * [addl dest, cf]
6360 *
6361 * Size 8 - 11.
6362 */
6363 emit_insn (gen_one_cmplsi2 (tmp, tmp));
6364 if (cf)
e99af66b 6365 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (cf)));
36583fea
JH
6366 }
6367 else
6368 {
6369 /*
6370 * cmpl op0,op1
6371 * sbbl dest,dest
6372 * andl cf - ct, dest
6373 * [addl dest, ct]
6374 *
6375 * Size 8 - 11.
6376 */
e99af66b 6377 emit_insn (gen_andsi3 (tmp, tmp, GEN_INT (trunc_int_for_mode
7471a1f0 6378 (cf - ct, SImode))));
36583fea 6379 if (ct)
e99af66b 6380 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (ct)));
36583fea 6381 }
e075ae69
RH
6382
6383 if (tmp != out)
6384 emit_move_insn (out, tmp);
6385
6386 return 1; /* DONE */
6387 }
6388
6389 diff = ct - cf;
6390 if (diff < 0)
6391 {
6392 HOST_WIDE_INT tmp;
6393 tmp = ct, ct = cf, cf = tmp;
6394 diff = -diff;
734dba19
JH
6395 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
6396 {
6397 /* We may be reversing unordered compare to normal compare, that
6398 is not valid in general (we may convert non-trapping condition
6399 to trapping one), however on i386 we currently emit all
6400 comparisons unordered. */
6401 compare_code = reverse_condition_maybe_unordered (compare_code);
6402 code = reverse_condition_maybe_unordered (code);
6403 }
6404 else
6405 {
6406 compare_code = reverse_condition (compare_code);
6407 code = reverse_condition (code);
6408 }
e075ae69
RH
6409 }
6410 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
6411 || diff == 3 || diff == 5 || diff == 9)
6412 {
6413 /*
6414 * xorl dest,dest
6415 * cmpl op1,op2
6416 * setcc dest
6417 * lea cf(dest*(ct-cf)),dest
6418 *
6419 * Size 14.
6420 *
6421 * This also catches the degenerate setcc-only case.
6422 */
6423
6424 rtx tmp;
6425 int nops;
6426
6427 out = emit_store_flag (out, code, ix86_compare_op0,
6428 ix86_compare_op1, VOIDmode, 0, 1);
6429
6430 nops = 0;
885a70fd
JH
6431 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
6432 done in proper mode to match. */
e075ae69 6433 if (diff == 1)
885a70fd
JH
6434 {
6435 if (Pmode != SImode)
6436 tmp = gen_lowpart (Pmode, out);
6437 else
6438 tmp = out;
6439 }
e075ae69
RH
6440 else
6441 {
885a70fd
JH
6442 rtx out1;
6443 if (Pmode != SImode)
6444 out1 = gen_lowpart (Pmode, out);
6445 else
6446 out1 = out;
6447 tmp = gen_rtx_MULT (Pmode, out1, GEN_INT (diff & ~1));
e075ae69
RH
6448 nops++;
6449 if (diff & 1)
6450 {
885a70fd 6451 tmp = gen_rtx_PLUS (Pmode, tmp, out1);
e075ae69
RH
6452 nops++;
6453 }
6454 }
6455 if (cf != 0)
6456 {
885a70fd 6457 tmp = gen_rtx_PLUS (Pmode, tmp, GEN_INT (cf));
e075ae69
RH
6458 nops++;
6459 }
885a70fd
JH
6460 if (tmp != out
6461 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
e075ae69 6462 {
885a70fd
JH
6463 if (Pmode != SImode)
6464 tmp = gen_rtx_SUBREG (SImode, tmp, 0);
6465
6466 /* ??? We should to take care for outputing non-lea arithmetics
6467 for Pmode != SImode case too, but it is quite tricky and not
6468 too important, since all TARGET_64BIT machines support real
6469 conditional moves. */
6470 if (nops == 1 && Pmode == SImode)
e075ae69
RH
6471 {
6472 rtx clob;
6473
6474 clob = gen_rtx_REG (CCmode, FLAGS_REG);
6475 clob = gen_rtx_CLOBBER (VOIDmode, clob);
6476
6477 tmp = gen_rtx_SET (VOIDmode, out, tmp);
6478 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6479 emit_insn (tmp);
6480 }
6481 else
6482 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
6483 }
6484 if (out != operands[0])
6485 emit_move_insn (operands[0], out);
6486
6487 return 1; /* DONE */
6488 }
6489
6490 /*
6491 * General case: Jumpful:
6492 * xorl dest,dest cmpl op1, op2
6493 * cmpl op1, op2 movl ct, dest
6494 * setcc dest jcc 1f
6495 * decl dest movl cf, dest
6496 * andl (cf-ct),dest 1:
6497 * addl ct,dest
0f290768 6498 *
e075ae69
RH
6499 * Size 20. Size 14.
6500 *
6501 * This is reasonably steep, but branch mispredict costs are
6502 * high on modern cpus, so consider failing only if optimizing
6503 * for space.
6504 *
6505 * %%% Parameterize branch_cost on the tuning architecture, then
6506 * use that. The 80386 couldn't care less about mispredicts.
6507 */
6508
6509 if (!optimize_size && !TARGET_CMOVE)
6510 {
6511 if (ct == 0)
6512 {
6513 ct = cf;
6514 cf = 0;
734dba19
JH
6515 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
6516 {
6517 /* We may be reversing unordered compare to normal compare,
6518 that is not valid in general (we may convert non-trapping
6519 condition to trapping one), however on i386 we currently
6520 emit all comparisons unordered. */
6521 compare_code = reverse_condition_maybe_unordered (compare_code);
6522 code = reverse_condition_maybe_unordered (code);
6523 }
6524 else
6525 {
6526 compare_code = reverse_condition (compare_code);
6527 code = reverse_condition (code);
6528 }
e075ae69
RH
6529 }
6530
6531 out = emit_store_flag (out, code, ix86_compare_op0,
6532 ix86_compare_op1, VOIDmode, 0, 1);
6533
6534 emit_insn (gen_addsi3 (out, out, constm1_rtx));
7471a1f0
AO
6535 emit_insn (gen_andsi3 (out, out, GEN_INT (trunc_int_for_mode
6536 (cf - ct, SImode))));
e075ae69
RH
6537 if (ct != 0)
6538 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
6539 if (out != operands[0])
6540 emit_move_insn (operands[0], out);
6541
6542 return 1; /* DONE */
6543 }
6544 }
6545
6546 if (!TARGET_CMOVE)
6547 {
6548 /* Try a few things more with specific constants and a variable. */
6549
78a0d70c 6550 optab op;
e075ae69
RH
6551 rtx var, orig_out, out, tmp;
6552
6553 if (optimize_size)
6554 return 0; /* FAIL */
6555
0f290768 6556 /* If one of the two operands is an interesting constant, load a
e075ae69 6557 constant with the above and mask it in with a logical operation. */
0f290768 6558
e075ae69
RH
6559 if (GET_CODE (operands[2]) == CONST_INT)
6560 {
6561 var = operands[3];
6562 if (INTVAL (operands[2]) == 0)
6563 operands[3] = constm1_rtx, op = and_optab;
6564 else if (INTVAL (operands[2]) == -1)
6565 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
6566 else
6567 return 0; /* FAIL */
e075ae69
RH
6568 }
6569 else if (GET_CODE (operands[3]) == CONST_INT)
6570 {
6571 var = operands[2];
6572 if (INTVAL (operands[3]) == 0)
6573 operands[2] = constm1_rtx, op = and_optab;
6574 else if (INTVAL (operands[3]) == -1)
6575 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
6576 else
6577 return 0; /* FAIL */
e075ae69 6578 }
78a0d70c 6579 else
e075ae69
RH
6580 return 0; /* FAIL */
6581
6582 orig_out = operands[0];
6583 tmp = gen_reg_rtx (GET_MODE (orig_out));
6584 operands[0] = tmp;
6585
6586 /* Recurse to get the constant loaded. */
6587 if (ix86_expand_int_movcc (operands) == 0)
6588 return 0; /* FAIL */
6589
6590 /* Mask in the interesting variable. */
6591 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
6592 OPTAB_WIDEN);
6593 if (out != orig_out)
6594 emit_move_insn (orig_out, out);
6595
6596 return 1; /* DONE */
6597 }
6598
6599 /*
6600 * For comparison with above,
6601 *
6602 * movl cf,dest
6603 * movl ct,tmp
6604 * cmpl op1,op2
6605 * cmovcc tmp,dest
6606 *
6607 * Size 15.
6608 */
6609
6610 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
6611 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
6612 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
6613 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
6614
a1b8572c
JH
6615 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
6616 {
6617 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
6618 emit_move_insn (tmp, operands[3]);
6619 operands[3] = tmp;
6620 }
6621 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
6622 {
6623 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
6624 emit_move_insn (tmp, operands[2]);
6625 operands[2] = tmp;
6626 }
c9682caf
JH
6627 if (! register_operand (operands[2], VOIDmode)
6628 && ! register_operand (operands[3], VOIDmode))
6629 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
a1b8572c 6630
e075ae69
RH
6631 emit_insn (compare_seq);
6632 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6633 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6634 compare_op, operands[2],
6635 operands[3])));
a1b8572c
JH
6636 if (bypass_test)
6637 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6638 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6639 bypass_test,
6640 operands[3],
6641 operands[0])));
6642 if (second_test)
6643 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6644 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6645 second_test,
6646 operands[2],
6647 operands[0])));
e075ae69
RH
6648
6649 return 1; /* DONE */
e9a25f70 6650}
e075ae69 6651
32b5b1aa 6652int
e075ae69
RH
6653ix86_expand_fp_movcc (operands)
6654 rtx operands[];
32b5b1aa 6655{
e075ae69 6656 enum rtx_code code;
e075ae69 6657 rtx tmp;
a1b8572c 6658 rtx compare_op, second_test, bypass_test;
32b5b1aa 6659
0073023d
JH
6660 /* For SF/DFmode conditional moves based on comparisons
6661 in same mode, we may want to use SSE min/max instructions. */
6662 if (((TARGET_SSE && GET_MODE (operands[0]) == SFmode)
6663 || (TARGET_SSE2 && GET_MODE (operands[0]) == DFmode))
6664 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
fa9f36a1
JH
6665 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
6666 && (!TARGET_IEEE_FP
6667 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
0073023d
JH
6668 /* We may be called from the post-reload splitter. */
6669 && (!REG_P (operands[0])
6670 || SSE_REG_P (operands[0])
52a661a6 6671 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
0073023d
JH
6672 {
6673 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
6674 code = GET_CODE (operands[1]);
6675
6676 /* See if we have (cross) match between comparison operands and
6677 conditional move operands. */
6678 if (rtx_equal_p (operands[2], op1))
6679 {
6680 rtx tmp = op0;
6681 op0 = op1;
6682 op1 = tmp;
6683 code = reverse_condition_maybe_unordered (code);
6684 }
6685 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
6686 {
6687 /* Check for min operation. */
6688 if (code == LT)
6689 {
6690 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
6691 if (memory_operand (op0, VOIDmode))
6692 op0 = force_reg (GET_MODE (operands[0]), op0);
6693 if (GET_MODE (operands[0]) == SFmode)
6694 emit_insn (gen_minsf3 (operands[0], op0, op1));
6695 else
6696 emit_insn (gen_mindf3 (operands[0], op0, op1));
6697 return 1;
6698 }
6699 /* Check for max operation. */
6700 if (code == GT)
6701 {
6702 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
6703 if (memory_operand (op0, VOIDmode))
6704 op0 = force_reg (GET_MODE (operands[0]), op0);
6705 if (GET_MODE (operands[0]) == SFmode)
6706 emit_insn (gen_maxsf3 (operands[0], op0, op1));
6707 else
6708 emit_insn (gen_maxdf3 (operands[0], op0, op1));
6709 return 1;
6710 }
6711 }
6712 /* Manage condition to be sse_comparison_operator. In case we are
6713 in non-ieee mode, try to canonicalize the destination operand
6714 to be first in the comparison - this helps reload to avoid extra
6715 moves. */
6716 if (!sse_comparison_operator (operands[1], VOIDmode)
6717 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
6718 {
6719 rtx tmp = ix86_compare_op0;
6720 ix86_compare_op0 = ix86_compare_op1;
6721 ix86_compare_op1 = tmp;
6722 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
6723 VOIDmode, ix86_compare_op0,
6724 ix86_compare_op1);
6725 }
6726 /* Similary try to manage result to be first operand of conditional
fa9f36a1
JH
6727 move. We also don't support the NE comparison on SSE, so try to
6728 avoid it. */
037f20f1
JH
6729 if ((rtx_equal_p (operands[0], operands[3])
6730 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
6731 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
0073023d
JH
6732 {
6733 rtx tmp = operands[2];
6734 operands[2] = operands[3];
92d0fb09 6735 operands[3] = tmp;
0073023d
JH
6736 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
6737 (GET_CODE (operands[1])),
6738 VOIDmode, ix86_compare_op0,
6739 ix86_compare_op1);
6740 }
6741 if (GET_MODE (operands[0]) == SFmode)
6742 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
6743 operands[2], operands[3],
6744 ix86_compare_op0, ix86_compare_op1));
6745 else
6746 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
6747 operands[2], operands[3],
6748 ix86_compare_op0, ix86_compare_op1));
6749 return 1;
6750 }
6751
e075ae69 6752 /* The floating point conditional move instructions don't directly
0f290768 6753 support conditions resulting from a signed integer comparison. */
32b5b1aa 6754
e075ae69 6755 code = GET_CODE (operands[1]);
a1b8572c 6756 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
6757
6758 /* The floating point conditional move instructions don't directly
6759 support signed integer comparisons. */
6760
a1b8572c 6761 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 6762 {
a1b8572c
JH
6763 if (second_test != NULL || bypass_test != NULL)
6764 abort();
e075ae69 6765 tmp = gen_reg_rtx (QImode);
3a3677ff 6766 ix86_expand_setcc (code, tmp);
e075ae69
RH
6767 code = NE;
6768 ix86_compare_op0 = tmp;
6769 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
6770 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
6771 }
6772 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
6773 {
6774 tmp = gen_reg_rtx (GET_MODE (operands[0]));
6775 emit_move_insn (tmp, operands[3]);
6776 operands[3] = tmp;
6777 }
6778 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
6779 {
6780 tmp = gen_reg_rtx (GET_MODE (operands[0]));
6781 emit_move_insn (tmp, operands[2]);
6782 operands[2] = tmp;
e075ae69 6783 }
e9a25f70 6784
e075ae69
RH
6785 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6786 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9e7adcb3 6787 compare_op,
e075ae69
RH
6788 operands[2],
6789 operands[3])));
a1b8572c
JH
6790 if (bypass_test)
6791 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6792 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6793 bypass_test,
6794 operands[3],
6795 operands[0])));
6796 if (second_test)
6797 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6798 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6799 second_test,
6800 operands[2],
6801 operands[0])));
32b5b1aa 6802
e075ae69 6803 return 1;
32b5b1aa
SC
6804}
6805
2450a057
JH
6806/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
6807 works for floating pointer parameters and nonoffsetable memories.
6808 For pushes, it returns just stack offsets; the values will be saved
6809 in the right order. Maximally three parts are generated. */
6810
2b589241 6811static int
2450a057
JH
6812ix86_split_to_parts (operand, parts, mode)
6813 rtx operand;
6814 rtx *parts;
6815 enum machine_mode mode;
32b5b1aa 6816{
26e5b205
JH
6817 int size;
6818
6819 if (!TARGET_64BIT)
6820 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
6821 else
6822 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 6823
a7180f70
BS
6824 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
6825 abort ();
2450a057
JH
6826 if (size < 2 || size > 3)
6827 abort ();
6828
d7a29404
JH
6829 /* Optimize constant pool reference to immediates. This is used by fp moves,
6830 that force all constants to memory to allow combining. */
6831
6832 if (GET_CODE (operand) == MEM
6833 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
6834 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
6835 operand = get_pool_constant (XEXP (operand, 0));
6836
2450a057 6837 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 6838 {
2450a057
JH
6839 /* The only non-offsetable memories we handle are pushes. */
6840 if (! push_operand (operand, VOIDmode))
6841 abort ();
6842
26e5b205
JH
6843 operand = copy_rtx (operand);
6844 PUT_MODE (operand, Pmode);
2450a057
JH
6845 parts[0] = parts[1] = parts[2] = operand;
6846 }
26e5b205 6847 else if (!TARGET_64BIT)
2450a057
JH
6848 {
6849 if (mode == DImode)
6850 split_di (&operand, 1, &parts[0], &parts[1]);
6851 else
e075ae69 6852 {
2450a057
JH
6853 if (REG_P (operand))
6854 {
6855 if (!reload_completed)
6856 abort ();
6857 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
6858 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
6859 if (size == 3)
6860 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
6861 }
6862 else if (offsettable_memref_p (operand))
6863 {
26e5b205 6864 operand = change_address (operand, SImode, XEXP (operand, 0));
2450a057
JH
6865 parts[0] = operand;
6866 parts[1] = adj_offsettable_operand (operand, 4);
6867 if (size == 3)
6868 parts[2] = adj_offsettable_operand (operand, 8);
6869 }
6870 else if (GET_CODE (operand) == CONST_DOUBLE)
6871 {
6872 REAL_VALUE_TYPE r;
2b589241 6873 long l[4];
2450a057
JH
6874
6875 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
6876 switch (mode)
6877 {
6878 case XFmode:
2b589241 6879 case TFmode:
2450a057
JH
6880 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
6881 parts[2] = GEN_INT (l[2]);
6882 break;
6883 case DFmode:
6884 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
6885 break;
6886 default:
6887 abort ();
6888 }
6889 parts[1] = GEN_INT (l[1]);
6890 parts[0] = GEN_INT (l[0]);
6891 }
6892 else
6893 abort ();
e075ae69 6894 }
2450a057 6895 }
26e5b205
JH
6896 else
6897 {
6898 if (mode == XFmode || mode == TFmode)
6899 {
6900 if (REG_P (operand))
6901 {
6902 if (!reload_completed)
6903 abort ();
6904 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
6905 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
6906 }
6907 else if (offsettable_memref_p (operand))
6908 {
6909 operand = change_address (operand, DImode, XEXP (operand, 0));
6910 parts[0] = operand;
6911 parts[1] = adj_offsettable_operand (operand, 8);
32ee7d1d 6912 parts[1] = change_address (parts[1], SImode, XEXP (parts[1], 0));
26e5b205
JH
6913 }
6914 else if (GET_CODE (operand) == CONST_DOUBLE)
6915 {
6916 REAL_VALUE_TYPE r;
6917 long l[3];
6918
6919 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
6920 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
6921 /* Do not use shift by 32 to avoid warning on 32bit systems. */
6922 if (HOST_BITS_PER_WIDE_INT >= 64)
6923 parts[0] = GEN_INT (l[0] + ((l[1] << 31) << 1));
6924 else
6925 parts[0] = immed_double_const (l[0], l[1], DImode);
6926 parts[1] = GEN_INT (l[2]);
6927 }
6928 else
6929 abort ();
6930 }
6931 }
2450a057 6932
2b589241 6933 return size;
2450a057
JH
6934}
6935
6936/* Emit insns to perform a move or push of DI, DF, and XF values.
6937 Return false when normal moves are needed; true when all required
6938 insns have been emitted. Operands 2-4 contain the input values
6939 int the correct order; operands 5-7 contain the output values. */
6940
26e5b205
JH
6941void
6942ix86_split_long_move (operands)
6943 rtx operands[];
2450a057
JH
6944{
6945 rtx part[2][3];
26e5b205 6946 int nparts;
2450a057
JH
6947 int push = 0;
6948 int collisions = 0;
26e5b205
JH
6949 enum machine_mode mode = GET_MODE (operands[0]);
6950
6951 /* The DFmode expanders may ask us to move double.
6952 For 64bit target this is single move. By hiding the fact
6953 here we simplify i386.md splitters. */
6954 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
6955 {
6956 /* Optimize constant pool reference to immediates. This is used by fp moves,
6957 that force all constants to memory to allow combining. */
6958
6959 if (GET_CODE (operands[1]) == MEM
6960 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
6961 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
6962 operands[1] = get_pool_constant (XEXP (operands[1], 0));
6963 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
6964 {
6965 operands[0] = copy_rtx (operands[0]);
6966 PUT_MODE (operands[0], Pmode);
6967 }
26e5b205
JH
6968 else
6969 operands[0] = gen_lowpart (DImode, operands[0]);
6970 operands[1] = gen_lowpart (DImode, operands[1]);
6971 emit_move_insn (operands[0], operands[1]);
6972 return;
6973 }
2450a057 6974
2450a057
JH
6975 /* The only non-offsettable memory we handle is push. */
6976 if (push_operand (operands[0], VOIDmode))
6977 push = 1;
6978 else if (GET_CODE (operands[0]) == MEM
6979 && ! offsettable_memref_p (operands[0]))
6980 abort ();
6981
26e5b205
JH
6982 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
6983 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
6984
6985 /* When emitting push, take care for source operands on the stack. */
6986 if (push && GET_CODE (operands[1]) == MEM
6987 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
6988 {
26e5b205 6989 if (nparts == 3)
886cbb88
JH
6990 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
6991 XEXP (part[1][2], 0));
6992 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
6993 XEXP (part[1][1], 0));
2450a057
JH
6994 }
6995
0f290768 6996 /* We need to do copy in the right order in case an address register
2450a057
JH
6997 of the source overlaps the destination. */
6998 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
6999 {
7000 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
7001 collisions++;
7002 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
7003 collisions++;
26e5b205 7004 if (nparts == 3
2450a057
JH
7005 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
7006 collisions++;
7007
7008 /* Collision in the middle part can be handled by reordering. */
26e5b205 7009 if (collisions == 1 && nparts == 3
2450a057 7010 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 7011 {
2450a057
JH
7012 rtx tmp;
7013 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
7014 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
7015 }
e075ae69 7016
2450a057
JH
7017 /* If there are more collisions, we can't handle it by reordering.
7018 Do an lea to the last part and use only one colliding move. */
7019 else if (collisions > 1)
7020 {
7021 collisions = 1;
26e5b205 7022 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
2450a057 7023 XEXP (part[1][0], 0)));
26e5b205
JH
7024 part[1][0] = change_address (part[1][0],
7025 TARGET_64BIT ? DImode : SImode,
7026 part[0][nparts - 1]);
7027 part[1][1] = adj_offsettable_operand (part[1][0],
7028 UNITS_PER_WORD);
7029 part[1][1] = change_address (part[1][1], GET_MODE (part[0][1]),
7030 XEXP (part[1][1], 0));
7031 if (nparts == 3)
2450a057
JH
7032 part[1][2] = adj_offsettable_operand (part[1][0], 8);
7033 }
7034 }
7035
7036 if (push)
7037 {
26e5b205 7038 if (!TARGET_64BIT)
2b589241 7039 {
26e5b205
JH
7040 if (nparts == 3)
7041 {
7042 /* We use only first 12 bytes of TFmode value, but for pushing we
7043 are required to adjust stack as if we were pushing real 16byte
7044 value. */
7045 if (mode == TFmode && !TARGET_64BIT)
7046 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
7047 GEN_INT (-4)));
7048 emit_move_insn (part[0][2], part[1][2]);
7049 }
2b589241 7050 }
26e5b205
JH
7051 else
7052 {
7053 /* In 64bit mode we don't have 32bit push available. In case this is
7054 register, it is OK - we will just use larger counterpart. We also
7055 retype memory - these comes from attempt to avoid REX prefix on
7056 moving of second half of TFmode value. */
7057 if (GET_MODE (part[1][1]) == SImode)
7058 {
7059 if (GET_CODE (part[1][1]) == MEM)
7060 part[1][1] = change_address (part[1][1], DImode, XEXP (part[1][1], 0));
7061 else if (REG_P (part[1][1]))
7062 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
7063 else
7064 abort();
886cbb88
JH
7065 if (GET_MODE (part[1][0]) == SImode)
7066 part[1][0] = part[1][1];
26e5b205
JH
7067 }
7068 }
7069 emit_move_insn (part[0][1], part[1][1]);
7070 emit_move_insn (part[0][0], part[1][0]);
7071 return;
2450a057
JH
7072 }
7073
7074 /* Choose correct order to not overwrite the source before it is copied. */
7075 if ((REG_P (part[0][0])
7076 && REG_P (part[1][1])
7077 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 7078 || (nparts == 3
2450a057
JH
7079 && REGNO (part[0][0]) == REGNO (part[1][2]))))
7080 || (collisions > 0
7081 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
7082 {
26e5b205 7083 if (nparts == 3)
2450a057 7084 {
26e5b205
JH
7085 operands[2] = part[0][2];
7086 operands[3] = part[0][1];
7087 operands[4] = part[0][0];
7088 operands[5] = part[1][2];
7089 operands[6] = part[1][1];
7090 operands[7] = part[1][0];
2450a057
JH
7091 }
7092 else
7093 {
26e5b205
JH
7094 operands[2] = part[0][1];
7095 operands[3] = part[0][0];
7096 operands[5] = part[1][1];
7097 operands[6] = part[1][0];
2450a057
JH
7098 }
7099 }
7100 else
7101 {
26e5b205 7102 if (nparts == 3)
2450a057 7103 {
26e5b205
JH
7104 operands[2] = part[0][0];
7105 operands[3] = part[0][1];
7106 operands[4] = part[0][2];
7107 operands[5] = part[1][0];
7108 operands[6] = part[1][1];
7109 operands[7] = part[1][2];
2450a057
JH
7110 }
7111 else
7112 {
26e5b205
JH
7113 operands[2] = part[0][0];
7114 operands[3] = part[0][1];
7115 operands[5] = part[1][0];
7116 operands[6] = part[1][1];
e075ae69
RH
7117 }
7118 }
26e5b205
JH
7119 emit_move_insn (operands[2], operands[5]);
7120 emit_move_insn (operands[3], operands[6]);
7121 if (nparts == 3)
7122 emit_move_insn (operands[4], operands[7]);
32b5b1aa 7123
26e5b205 7124 return;
32b5b1aa 7125}
32b5b1aa 7126
e075ae69
RH
7127void
7128ix86_split_ashldi (operands, scratch)
7129 rtx *operands, scratch;
32b5b1aa 7130{
e075ae69
RH
7131 rtx low[2], high[2];
7132 int count;
b985a30f 7133
e075ae69
RH
7134 if (GET_CODE (operands[2]) == CONST_INT)
7135 {
7136 split_di (operands, 2, low, high);
7137 count = INTVAL (operands[2]) & 63;
32b5b1aa 7138
e075ae69
RH
7139 if (count >= 32)
7140 {
7141 emit_move_insn (high[0], low[1]);
7142 emit_move_insn (low[0], const0_rtx);
b985a30f 7143
e075ae69
RH
7144 if (count > 32)
7145 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
7146 }
7147 else
7148 {
7149 if (!rtx_equal_p (operands[0], operands[1]))
7150 emit_move_insn (operands[0], operands[1]);
7151 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
7152 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
7153 }
7154 }
7155 else
7156 {
7157 if (!rtx_equal_p (operands[0], operands[1]))
7158 emit_move_insn (operands[0], operands[1]);
b985a30f 7159
e075ae69 7160 split_di (operands, 1, low, high);
b985a30f 7161
e075ae69
RH
7162 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
7163 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 7164
fe577e58 7165 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 7166 {
fe577e58 7167 if (! no_new_pseudos)
e075ae69
RH
7168 scratch = force_reg (SImode, const0_rtx);
7169 else
7170 emit_move_insn (scratch, const0_rtx);
7171
7172 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
7173 scratch));
7174 }
7175 else
7176 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
7177 }
e9a25f70 7178}
32b5b1aa 7179
e075ae69
RH
7180void
7181ix86_split_ashrdi (operands, scratch)
7182 rtx *operands, scratch;
32b5b1aa 7183{
e075ae69
RH
7184 rtx low[2], high[2];
7185 int count;
32b5b1aa 7186
e075ae69
RH
7187 if (GET_CODE (operands[2]) == CONST_INT)
7188 {
7189 split_di (operands, 2, low, high);
7190 count = INTVAL (operands[2]) & 63;
32b5b1aa 7191
e075ae69
RH
7192 if (count >= 32)
7193 {
7194 emit_move_insn (low[0], high[1]);
32b5b1aa 7195
e075ae69
RH
7196 if (! reload_completed)
7197 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
7198 else
7199 {
7200 emit_move_insn (high[0], low[0]);
7201 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
7202 }
7203
7204 if (count > 32)
7205 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
7206 }
7207 else
7208 {
7209 if (!rtx_equal_p (operands[0], operands[1]))
7210 emit_move_insn (operands[0], operands[1]);
7211 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
7212 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
7213 }
7214 }
7215 else
32b5b1aa 7216 {
e075ae69
RH
7217 if (!rtx_equal_p (operands[0], operands[1]))
7218 emit_move_insn (operands[0], operands[1]);
7219
7220 split_di (operands, 1, low, high);
7221
7222 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
7223 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
7224
fe577e58 7225 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 7226 {
fe577e58 7227 if (! no_new_pseudos)
e075ae69
RH
7228 scratch = gen_reg_rtx (SImode);
7229 emit_move_insn (scratch, high[0]);
7230 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
7231 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
7232 scratch));
7233 }
7234 else
7235 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 7236 }
e075ae69 7237}
32b5b1aa 7238
e075ae69
RH
7239void
7240ix86_split_lshrdi (operands, scratch)
7241 rtx *operands, scratch;
7242{
7243 rtx low[2], high[2];
7244 int count;
32b5b1aa 7245
e075ae69 7246 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 7247 {
e075ae69
RH
7248 split_di (operands, 2, low, high);
7249 count = INTVAL (operands[2]) & 63;
7250
7251 if (count >= 32)
c7271385 7252 {
e075ae69
RH
7253 emit_move_insn (low[0], high[1]);
7254 emit_move_insn (high[0], const0_rtx);
32b5b1aa 7255
e075ae69
RH
7256 if (count > 32)
7257 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
7258 }
7259 else
7260 {
7261 if (!rtx_equal_p (operands[0], operands[1]))
7262 emit_move_insn (operands[0], operands[1]);
7263 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
7264 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
7265 }
32b5b1aa 7266 }
e075ae69
RH
7267 else
7268 {
7269 if (!rtx_equal_p (operands[0], operands[1]))
7270 emit_move_insn (operands[0], operands[1]);
32b5b1aa 7271
e075ae69
RH
7272 split_di (operands, 1, low, high);
7273
7274 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
7275 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
7276
7277 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 7278 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 7279 {
fe577e58 7280 if (! no_new_pseudos)
e075ae69
RH
7281 scratch = force_reg (SImode, const0_rtx);
7282 else
7283 emit_move_insn (scratch, const0_rtx);
7284
7285 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
7286 scratch));
7287 }
7288 else
7289 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
7290 }
32b5b1aa 7291}
3f803cd9 7292
0945b39d
JH
7293/* Helper function for the string operations bellow. Dest VARIABLE whether
7294 it is aligned to VALUE bytes. If true, jump to the label. */
7295static rtx
7296ix86_expand_aligntest (variable, value)
7297 rtx variable;
7298 int value;
7299{
7300 rtx label = gen_label_rtx ();
7301 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
7302 if (GET_MODE (variable) == DImode)
7303 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
7304 else
7305 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
7306 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
7307 1, 0, label);
7308 return label;
7309}
7310
7311/* Adjust COUNTER by the VALUE. */
7312static void
7313ix86_adjust_counter (countreg, value)
7314 rtx countreg;
7315 HOST_WIDE_INT value;
7316{
7317 if (GET_MODE (countreg) == DImode)
7318 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
7319 else
7320 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
7321}
7322
7323/* Zero extend possibly SImode EXP to Pmode register. */
7324static rtx
7325ix86_zero_extend_to_Pmode (exp)
7326 rtx exp;
7327{
7328 rtx r;
7329 if (GET_MODE (exp) == VOIDmode)
7330 return force_reg (Pmode, exp);
7331 if (GET_MODE (exp) == Pmode)
7332 return copy_to_mode_reg (Pmode, exp);
7333 r = gen_reg_rtx (Pmode);
7334 emit_insn (gen_zero_extendsidi2 (r, exp));
7335 return r;
7336}
7337
7338/* Expand string move (memcpy) operation. Use i386 string operations when
7339 profitable. expand_clrstr contains similar code. */
7340int
7341ix86_expand_movstr (dst, src, count_exp, align_exp)
7342 rtx dst, src, count_exp, align_exp;
7343{
7344 rtx srcreg, destreg, countreg;
7345 enum machine_mode counter_mode;
7346 HOST_WIDE_INT align = 0;
7347 unsigned HOST_WIDE_INT count = 0;
7348 rtx insns;
7349
7350 start_sequence ();
7351
7352 if (GET_CODE (align_exp) == CONST_INT)
7353 align = INTVAL (align_exp);
7354
7355 /* This simple hack avoids all inlining code and simplifies code bellow. */
7356 if (!TARGET_ALIGN_STRINGOPS)
7357 align = 64;
7358
7359 if (GET_CODE (count_exp) == CONST_INT)
7360 count = INTVAL (count_exp);
7361
7362 /* Figure out proper mode for counter. For 32bits it is always SImode,
7363 for 64bits use SImode when possible, otherwise DImode.
7364 Set count to number of bytes copied when known at compile time. */
7365 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
7366 || x86_64_zero_extended_value (count_exp))
7367 counter_mode = SImode;
7368 else
7369 counter_mode = DImode;
7370
7371 if (counter_mode != SImode && counter_mode != DImode)
7372 abort ();
7373
7374 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
7375 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
7376
7377 emit_insn (gen_cld ());
7378
7379 /* When optimizing for size emit simple rep ; movsb instruction for
7380 counts not divisible by 4. */
7381
7382 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
7383 {
7384 countreg = ix86_zero_extend_to_Pmode (count_exp);
7385 if (TARGET_64BIT)
7386 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
7387 destreg, srcreg, countreg));
7388 else
7389 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
7390 destreg, srcreg, countreg));
7391 }
7392
7393 /* For constant aligned (or small unaligned) copies use rep movsl
7394 followed by code copying the rest. For PentiumPro ensure 8 byte
7395 alignment to allow rep movsl acceleration. */
7396
7397 else if (count != 0
7398 && (align >= 8
7399 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
7400 || optimize_size || count < (unsigned int)64))
7401 {
7402 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
7403 if (count & ~(size - 1))
7404 {
7405 countreg = copy_to_mode_reg (counter_mode,
7406 GEN_INT ((count >> (size == 4 ? 2 : 3))
7407 & (TARGET_64BIT ? -1 : 0x3fffffff)));
7408 countreg = ix86_zero_extend_to_Pmode (countreg);
7409 if (size == 4)
7410 {
7411 if (TARGET_64BIT)
7412 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
7413 destreg, srcreg, countreg));
7414 else
7415 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
7416 destreg, srcreg, countreg));
7417 }
7418 else
7419 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
7420 destreg, srcreg, countreg));
7421 }
7422 if (size == 8 && (count & 0x04))
7423 emit_insn (gen_strmovsi (destreg, srcreg));
7424 if (count & 0x02)
7425 emit_insn (gen_strmovhi (destreg, srcreg));
7426 if (count & 0x01)
7427 emit_insn (gen_strmovqi (destreg, srcreg));
7428 }
7429 /* The generic code based on the glibc implementation:
7430 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
7431 allowing accelerated copying there)
7432 - copy the data using rep movsl
7433 - copy the rest. */
7434 else
7435 {
7436 rtx countreg2;
7437 rtx label = NULL;
7438
7439 /* In case we don't know anything about the alignment, default to
7440 library version, since it is usually equally fast and result in
7441 shorter code. */
7442 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
7443 {
7444 end_sequence ();
7445 return 0;
7446 }
7447
7448 if (TARGET_SINGLE_STRINGOP)
7449 emit_insn (gen_cld ());
7450
7451 countreg2 = gen_reg_rtx (Pmode);
7452 countreg = copy_to_mode_reg (counter_mode, count_exp);
7453
7454 /* We don't use loops to align destination and to copy parts smaller
7455 than 4 bytes, because gcc is able to optimize such code better (in
7456 the case the destination or the count really is aligned, gcc is often
7457 able to predict the branches) and also it is friendlier to the
7458 hardware branch prediction.
7459
7460 Using loops is benefical for generic case, because we can
7461 handle small counts using the loops. Many CPUs (such as Athlon)
7462 have large REP prefix setup costs.
7463
7464 This is quite costy. Maybe we can revisit this decision later or
7465 add some customizability to this code. */
7466
7467 if (count == 0
7468 && align < (TARGET_PENTIUMPRO && (count == 0
7469 || count >= (unsigned int)260)
7470 ? 8 : UNITS_PER_WORD))
7471 {
7472 label = gen_label_rtx ();
7473 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
7474 LEU, 0, counter_mode, 1, 0, label);
7475 }
7476 if (align <= 1)
7477 {
7478 rtx label = ix86_expand_aligntest (destreg, 1);
7479 emit_insn (gen_strmovqi (destreg, srcreg));
7480 ix86_adjust_counter (countreg, 1);
7481 emit_label (label);
7482 LABEL_NUSES (label) = 1;
7483 }
7484 if (align <= 2)
7485 {
7486 rtx label = ix86_expand_aligntest (destreg, 2);
7487 emit_insn (gen_strmovhi (destreg, srcreg));
7488 ix86_adjust_counter (countreg, 2);
7489 emit_label (label);
7490 LABEL_NUSES (label) = 1;
7491 }
7492 if (align <= 4
7493 && ((TARGET_PENTIUMPRO && (count == 0
7494 || count >= (unsigned int)260))
7495 || TARGET_64BIT))
7496 {
7497 rtx label = ix86_expand_aligntest (destreg, 4);
7498 emit_insn (gen_strmovsi (destreg, srcreg));
7499 ix86_adjust_counter (countreg, 4);
7500 emit_label (label);
7501 LABEL_NUSES (label) = 1;
7502 }
7503
7504 if (!TARGET_SINGLE_STRINGOP)
7505 emit_insn (gen_cld ());
7506 if (TARGET_64BIT)
7507 {
7508 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
7509 GEN_INT (3)));
7510 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
7511 destreg, srcreg, countreg2));
7512 }
7513 else
7514 {
7515 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
7516 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
7517 destreg, srcreg, countreg2));
7518 }
7519
7520 if (label)
7521 {
7522 emit_label (label);
7523 LABEL_NUSES (label) = 1;
7524 }
7525 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
7526 emit_insn (gen_strmovsi (destreg, srcreg));
7527 if ((align <= 4 || count == 0) && TARGET_64BIT)
7528 {
7529 rtx label = ix86_expand_aligntest (countreg, 4);
7530 emit_insn (gen_strmovsi (destreg, srcreg));
7531 emit_label (label);
7532 LABEL_NUSES (label) = 1;
7533 }
7534 if (align > 2 && count != 0 && (count & 2))
7535 emit_insn (gen_strmovhi (destreg, srcreg));
7536 if (align <= 2 || count == 0)
7537 {
7538 rtx label = ix86_expand_aligntest (countreg, 2);
7539 emit_insn (gen_strmovhi (destreg, srcreg));
7540 emit_label (label);
7541 LABEL_NUSES (label) = 1;
7542 }
7543 if (align > 1 && count != 0 && (count & 1))
7544 emit_insn (gen_strmovqi (destreg, srcreg));
7545 if (align <= 1 || count == 0)
7546 {
7547 rtx label = ix86_expand_aligntest (countreg, 1);
7548 emit_insn (gen_strmovqi (destreg, srcreg));
7549 emit_label (label);
7550 LABEL_NUSES (label) = 1;
7551 }
7552 }
7553
7554 insns = get_insns ();
7555 end_sequence ();
7556
7557 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
7558 emit_insns (insns);
7559 return 1;
7560}
7561
7562/* Expand string clear operation (bzero). Use i386 string operations when
7563 profitable. expand_movstr contains similar code. */
7564int
7565ix86_expand_clrstr (src, count_exp, align_exp)
7566 rtx src, count_exp, align_exp;
7567{
7568 rtx destreg, zeroreg, countreg;
7569 enum machine_mode counter_mode;
7570 HOST_WIDE_INT align = 0;
7571 unsigned HOST_WIDE_INT count = 0;
7572
7573 if (GET_CODE (align_exp) == CONST_INT)
7574 align = INTVAL (align_exp);
7575
7576 /* This simple hack avoids all inlining code and simplifies code bellow. */
7577 if (!TARGET_ALIGN_STRINGOPS)
7578 align = 32;
7579
7580 if (GET_CODE (count_exp) == CONST_INT)
7581 count = INTVAL (count_exp);
7582 /* Figure out proper mode for counter. For 32bits it is always SImode,
7583 for 64bits use SImode when possible, otherwise DImode.
7584 Set count to number of bytes copied when known at compile time. */
7585 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
7586 || x86_64_zero_extended_value (count_exp))
7587 counter_mode = SImode;
7588 else
7589 counter_mode = DImode;
7590
7591 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
7592
7593 emit_insn (gen_cld ());
7594
7595 /* When optimizing for size emit simple rep ; movsb instruction for
7596 counts not divisible by 4. */
7597
7598 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
7599 {
7600 countreg = ix86_zero_extend_to_Pmode (count_exp);
7601 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
7602 if (TARGET_64BIT)
7603 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
7604 destreg, countreg));
7605 else
7606 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
7607 destreg, countreg));
7608 }
7609 else if (count != 0
7610 && (align >= 8
7611 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
7612 || optimize_size || count < (unsigned int)64))
7613 {
7614 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
7615 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
7616 if (count & ~(size - 1))
7617 {
7618 countreg = copy_to_mode_reg (counter_mode,
7619 GEN_INT ((count >> (size == 4 ? 2 : 3))
7620 & (TARGET_64BIT ? -1 : 0x3fffffff)));
7621 countreg = ix86_zero_extend_to_Pmode (countreg);
7622 if (size == 4)
7623 {
7624 if (TARGET_64BIT)
7625 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
7626 destreg, countreg));
7627 else
7628 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
7629 destreg, countreg));
7630 }
7631 else
7632 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
7633 destreg, countreg));
7634 }
7635 if (size == 8 && (count & 0x04))
7636 emit_insn (gen_strsetsi (destreg,
7637 gen_rtx_SUBREG (SImode, zeroreg, 0)));
7638 if (count & 0x02)
7639 emit_insn (gen_strsethi (destreg,
7640 gen_rtx_SUBREG (HImode, zeroreg, 0)));
7641 if (count & 0x01)
7642 emit_insn (gen_strsetqi (destreg,
7643 gen_rtx_SUBREG (QImode, zeroreg, 0)));
7644 }
7645 else
7646 {
7647 rtx countreg2;
7648 rtx label = NULL;
7649
7650 /* In case we don't know anything about the alignment, default to
7651 library version, since it is usually equally fast and result in
7652 shorter code. */
7653 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
7654 return 0;
7655
7656 if (TARGET_SINGLE_STRINGOP)
7657 emit_insn (gen_cld ());
7658
7659 countreg2 = gen_reg_rtx (Pmode);
7660 countreg = copy_to_mode_reg (counter_mode, count_exp);
7661 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
7662
7663 if (count == 0
7664 && align < (TARGET_PENTIUMPRO && (count == 0
7665 || count >= (unsigned int)260)
7666 ? 8 : UNITS_PER_WORD))
7667 {
7668 label = gen_label_rtx ();
7669 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
7670 LEU, 0, counter_mode, 1, 0, label);
7671 }
7672 if (align <= 1)
7673 {
7674 rtx label = ix86_expand_aligntest (destreg, 1);
7675 emit_insn (gen_strsetqi (destreg,
7676 gen_rtx_SUBREG (QImode, zeroreg, 0)));
7677 ix86_adjust_counter (countreg, 1);
7678 emit_label (label);
7679 LABEL_NUSES (label) = 1;
7680 }
7681 if (align <= 2)
7682 {
7683 rtx label = ix86_expand_aligntest (destreg, 2);
7684 emit_insn (gen_strsethi (destreg,
7685 gen_rtx_SUBREG (HImode, zeroreg, 0)));
7686 ix86_adjust_counter (countreg, 2);
7687 emit_label (label);
7688 LABEL_NUSES (label) = 1;
7689 }
7690 if (align <= 4 && TARGET_PENTIUMPRO && (count == 0
7691 || count >= (unsigned int)260))
7692 {
7693 rtx label = ix86_expand_aligntest (destreg, 4);
7694 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
7695 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
7696 : zeroreg)));
7697 ix86_adjust_counter (countreg, 4);
7698 emit_label (label);
7699 LABEL_NUSES (label) = 1;
7700 }
7701
7702 if (!TARGET_SINGLE_STRINGOP)
7703 emit_insn (gen_cld ());
7704 if (TARGET_64BIT)
7705 {
7706 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
7707 GEN_INT (3)));
7708 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
7709 destreg, countreg2));
7710 }
7711 else
7712 {
7713 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
7714 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
7715 destreg, countreg2));
7716 }
7717
7718 if (label)
7719 {
7720 emit_label (label);
7721 LABEL_NUSES (label) = 1;
7722 }
7723 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
7724 emit_insn (gen_strsetsi (destreg,
7725 gen_rtx_SUBREG (SImode, zeroreg, 0)));
7726 if (TARGET_64BIT && (align <= 4 || count == 0))
7727 {
7728 rtx label = ix86_expand_aligntest (destreg, 2);
7729 emit_insn (gen_strsetsi (destreg,
7730 gen_rtx_SUBREG (SImode, zeroreg, 0)));
7731 emit_label (label);
7732 LABEL_NUSES (label) = 1;
7733 }
7734 if (align > 2 && count != 0 && (count & 2))
7735 emit_insn (gen_strsethi (destreg,
7736 gen_rtx_SUBREG (HImode, zeroreg, 0)));
7737 if (align <= 2 || count == 0)
7738 {
7739 rtx label = ix86_expand_aligntest (destreg, 2);
7740 emit_insn (gen_strsethi (destreg,
7741 gen_rtx_SUBREG (HImode, zeroreg, 0)));
7742 emit_label (label);
7743 LABEL_NUSES (label) = 1;
7744 }
7745 if (align > 1 && count != 0 && (count & 1))
7746 emit_insn (gen_strsetqi (destreg,
7747 gen_rtx_SUBREG (QImode, zeroreg, 0)));
7748 if (align <= 1 || count == 0)
7749 {
7750 rtx label = ix86_expand_aligntest (destreg, 1);
7751 emit_insn (gen_strsetqi (destreg,
7752 gen_rtx_SUBREG (QImode, zeroreg, 0)));
7753 emit_label (label);
7754 LABEL_NUSES (label) = 1;
7755 }
7756 }
7757 return 1;
7758}
7759/* Expand strlen. */
7760int
7761ix86_expand_strlen (out, src, eoschar, align)
7762 rtx out, src, eoschar, align;
7763{
7764 rtx addr, scratch1, scratch2, scratch3, scratch4;
7765
7766 /* The generic case of strlen expander is long. Avoid it's
7767 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
7768
7769 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
7770 && !TARGET_INLINE_ALL_STRINGOPS
7771 && !optimize_size
7772 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
7773 return 0;
7774
7775 addr = force_reg (Pmode, XEXP (src, 0));
7776 scratch1 = gen_reg_rtx (Pmode);
7777
7778 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
7779 && !optimize_size)
7780 {
7781 /* Well it seems that some optimizer does not combine a call like
7782 foo(strlen(bar), strlen(bar));
7783 when the move and the subtraction is done here. It does calculate
7784 the length just once when these instructions are done inside of
7785 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
7786 often used and I use one fewer register for the lifetime of
7787 output_strlen_unroll() this is better. */
7788
7789 emit_move_insn (out, addr);
7790
7791 ix86_expand_strlensi_unroll_1 (out, align);
7792
7793 /* strlensi_unroll_1 returns the address of the zero at the end of
7794 the string, like memchr(), so compute the length by subtracting
7795 the start address. */
7796 if (TARGET_64BIT)
7797 emit_insn (gen_subdi3 (out, out, addr));
7798 else
7799 emit_insn (gen_subsi3 (out, out, addr));
7800 }
7801 else
7802 {
7803 scratch2 = gen_reg_rtx (Pmode);
7804 scratch3 = gen_reg_rtx (Pmode);
7805 scratch4 = force_reg (Pmode, constm1_rtx);
7806
7807 emit_move_insn (scratch3, addr);
7808 eoschar = force_reg (QImode, eoschar);
7809
7810 emit_insn (gen_cld ());
7811 if (TARGET_64BIT)
7812 {
7813 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
7814 align, scratch4, scratch3));
7815 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
7816 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
7817 }
7818 else
7819 {
7820 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
7821 align, scratch4, scratch3));
7822 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
7823 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
7824 }
7825 }
7826 return 1;
7827}
7828
e075ae69
RH
7829/* Expand the appropriate insns for doing strlen if not just doing
7830 repnz; scasb
7831
7832 out = result, initialized with the start address
7833 align_rtx = alignment of the address.
7834 scratch = scratch register, initialized with the startaddress when
77ebd435 7835 not aligned, otherwise undefined
3f803cd9
SC
7836
7837 This is just the body. It needs the initialisations mentioned above and
7838 some address computing at the end. These things are done in i386.md. */
7839
0945b39d
JH
7840static void
7841ix86_expand_strlensi_unroll_1 (out, align_rtx)
7842 rtx out, align_rtx;
3f803cd9 7843{
e075ae69
RH
7844 int align;
7845 rtx tmp;
7846 rtx align_2_label = NULL_RTX;
7847 rtx align_3_label = NULL_RTX;
7848 rtx align_4_label = gen_label_rtx ();
7849 rtx end_0_label = gen_label_rtx ();
e075ae69 7850 rtx mem;
e2e52e1b 7851 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 7852 rtx scratch = gen_reg_rtx (SImode);
e075ae69
RH
7853
7854 align = 0;
7855 if (GET_CODE (align_rtx) == CONST_INT)
7856 align = INTVAL (align_rtx);
3f803cd9 7857
e9a25f70 7858 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 7859
e9a25f70 7860 /* Is there a known alignment and is it less than 4? */
e075ae69 7861 if (align < 4)
3f803cd9 7862 {
0945b39d
JH
7863 rtx scratch1 = gen_reg_rtx (Pmode);
7864 emit_move_insn (scratch1, out);
e9a25f70 7865 /* Is there a known alignment and is it not 2? */
e075ae69 7866 if (align != 2)
3f803cd9 7867 {
e075ae69
RH
7868 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
7869 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
7870
7871 /* Leave just the 3 lower bits. */
0945b39d 7872 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
7873 NULL_RTX, 0, OPTAB_WIDEN);
7874
9076b9c1 7875 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
0945b39d 7876 Pmode, 1, 0, align_4_label);
9076b9c1 7877 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
0945b39d 7878 Pmode, 1, 0, align_2_label);
9076b9c1 7879 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
0945b39d 7880 Pmode, 1, 0, align_3_label);
3f803cd9
SC
7881 }
7882 else
7883 {
e9a25f70
JL
7884 /* Since the alignment is 2, we have to check 2 or 0 bytes;
7885 check if is aligned to 4 - byte. */
e9a25f70 7886
0945b39d 7887 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
e075ae69
RH
7888 NULL_RTX, 0, OPTAB_WIDEN);
7889
9076b9c1 7890 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
0945b39d 7891 Pmode, 1, 0, align_4_label);
3f803cd9
SC
7892 }
7893
e075ae69 7894 mem = gen_rtx_MEM (QImode, out);
e9a25f70 7895
e075ae69 7896 /* Now compare the bytes. */
e9a25f70 7897
0f290768 7898 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1
JH
7899 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
7900 QImode, 1, 0, end_0_label);
3f803cd9 7901
0f290768 7902 /* Increment the address. */
0945b39d
JH
7903 if (TARGET_64BIT)
7904 emit_insn (gen_adddi3 (out, out, const1_rtx));
7905 else
7906 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 7907
e075ae69
RH
7908 /* Not needed with an alignment of 2 */
7909 if (align != 2)
7910 {
7911 emit_label (align_2_label);
3f803cd9 7912
9076b9c1
JH
7913 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
7914 QImode, 1, 0, end_0_label);
e075ae69 7915
0945b39d
JH
7916 if (TARGET_64BIT)
7917 emit_insn (gen_adddi3 (out, out, const1_rtx));
7918 else
7919 emit_insn (gen_addsi3 (out, out, const1_rtx));
e075ae69
RH
7920
7921 emit_label (align_3_label);
7922 }
7923
9076b9c1
JH
7924 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
7925 QImode, 1, 0, end_0_label);
e075ae69 7926
0945b39d
JH
7927 if (TARGET_64BIT)
7928 emit_insn (gen_adddi3 (out, out, const1_rtx));
7929 else
7930 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
7931 }
7932
e075ae69
RH
7933 /* Generate loop to check 4 bytes at a time. It is not a good idea to
7934 align this loop. It gives only huge programs, but does not help to
7935 speed up. */
7936 emit_label (align_4_label);
3f803cd9 7937
e075ae69
RH
7938 mem = gen_rtx_MEM (SImode, out);
7939 emit_move_insn (scratch, mem);
0945b39d
JH
7940 if (TARGET_64BIT)
7941 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
7942 else
7943 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 7944
e2e52e1b
JH
7945 /* This formula yields a nonzero result iff one of the bytes is zero.
7946 This saves three branches inside loop and many cycles. */
7947
7948 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
7949 emit_insn (gen_one_cmplsi2 (scratch, scratch));
7950 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0
AO
7951 emit_insn (gen_andsi3 (tmpreg, tmpreg,
7952 GEN_INT (trunc_int_for_mode
7953 (0x80808080, SImode))));
9076b9c1
JH
7954 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0,
7955 SImode, 1, 0, align_4_label);
e2e52e1b
JH
7956
7957 if (TARGET_CMOVE)
7958 {
7959 rtx reg = gen_reg_rtx (SImode);
0945b39d 7960 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
7961 emit_move_insn (reg, tmpreg);
7962 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
7963
0f290768 7964 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 7965 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
7966 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
7967 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
7968 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
7969 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
7970 reg,
7971 tmpreg)));
e2e52e1b 7972 /* Emit lea manually to avoid clobbering of flags. */
0945b39d
JH
7973 emit_insn (gen_rtx_SET (SImode, reg2,
7974 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
e2e52e1b
JH
7975
7976 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
7977 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
7978 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d
JH
7979 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
7980 reg2,
7981 out)));
e2e52e1b
JH
7982
7983 }
7984 else
7985 {
7986 rtx end_2_label = gen_label_rtx ();
7987 /* Is zero in the first two bytes? */
7988
16189740 7989 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
7990 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
7991 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
7992 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7993 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
7994 pc_rtx);
7995 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
7996 JUMP_LABEL (tmp) = end_2_label;
7997
0f290768 7998 /* Not in the first two. Move two bytes forward. */
e2e52e1b 7999 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
0945b39d
JH
8000 if (TARGET_64BIT)
8001 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
8002 else
8003 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
e2e52e1b
JH
8004
8005 emit_label (end_2_label);
8006
8007 }
8008
0f290768 8009 /* Avoid branch in fixing the byte. */
e2e52e1b 8010 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 8011 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
0945b39d
JH
8012 if (TARGET_64BIT)
8013 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
8014 else
8015 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
e075ae69
RH
8016
8017 emit_label (end_0_label);
8018}
8019\f
e075ae69
RH
8020/* Clear stack slot assignments remembered from previous functions.
8021 This is called from INIT_EXPANDERS once before RTL is emitted for each
8022 function. */
8023
36edd3cc
BS
8024static void
8025ix86_init_machine_status (p)
1526a060 8026 struct function *p;
e075ae69 8027{
37b15744
RH
8028 p->machine = (struct machine_function *)
8029 xcalloc (1, sizeof (struct machine_function));
e075ae69
RH
8030}
8031
1526a060
BS
8032/* Mark machine specific bits of P for GC. */
8033static void
8034ix86_mark_machine_status (p)
8035 struct function *p;
8036{
37b15744 8037 struct machine_function *machine = p->machine;
1526a060
BS
8038 enum machine_mode mode;
8039 int n;
8040
37b15744
RH
8041 if (! machine)
8042 return;
8043
1526a060
BS
8044 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
8045 mode = (enum machine_mode) ((int) mode + 1))
8046 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
37b15744
RH
8047 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
8048}
8049
8050static void
8051ix86_free_machine_status (p)
8052 struct function *p;
8053{
8054 free (p->machine);
8055 p->machine = NULL;
1526a060
BS
8056}
8057
e075ae69
RH
8058/* Return a MEM corresponding to a stack slot with mode MODE.
8059 Allocate a new slot if necessary.
8060
8061 The RTL for a function can have several slots available: N is
8062 which slot to use. */
8063
8064rtx
8065assign_386_stack_local (mode, n)
8066 enum machine_mode mode;
8067 int n;
8068{
8069 if (n < 0 || n >= MAX_386_STACK_LOCALS)
8070 abort ();
8071
8072 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
8073 ix86_stack_locals[(int) mode][n]
8074 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
8075
8076 return ix86_stack_locals[(int) mode][n];
8077}
8078\f
8079/* Calculate the length of the memory address in the instruction
8080 encoding. Does not include the one-byte modrm, opcode, or prefix. */
8081
8082static int
8083memory_address_length (addr)
8084 rtx addr;
8085{
8086 struct ix86_address parts;
8087 rtx base, index, disp;
8088 int len;
8089
8090 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
8091 || GET_CODE (addr) == POST_INC
8092 || GET_CODE (addr) == PRE_MODIFY
8093 || GET_CODE (addr) == POST_MODIFY)
e075ae69 8094 return 0;
3f803cd9 8095
e075ae69
RH
8096 if (! ix86_decompose_address (addr, &parts))
8097 abort ();
3f803cd9 8098
e075ae69
RH
8099 base = parts.base;
8100 index = parts.index;
8101 disp = parts.disp;
8102 len = 0;
3f803cd9 8103
e075ae69
RH
8104 /* Register Indirect. */
8105 if (base && !index && !disp)
8106 {
8107 /* Special cases: ebp and esp need the two-byte modrm form. */
8108 if (addr == stack_pointer_rtx
8109 || addr == arg_pointer_rtx
564d80f4
JH
8110 || addr == frame_pointer_rtx
8111 || addr == hard_frame_pointer_rtx)
e075ae69 8112 len = 1;
3f803cd9 8113 }
e9a25f70 8114
e075ae69
RH
8115 /* Direct Addressing. */
8116 else if (disp && !base && !index)
8117 len = 4;
8118
3f803cd9
SC
8119 else
8120 {
e075ae69
RH
8121 /* Find the length of the displacement constant. */
8122 if (disp)
8123 {
8124 if (GET_CODE (disp) == CONST_INT
8125 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
8126 len = 1;
8127 else
8128 len = 4;
8129 }
3f803cd9 8130
e075ae69
RH
8131 /* An index requires the two-byte modrm form. */
8132 if (index)
8133 len += 1;
3f803cd9
SC
8134 }
8135
e075ae69
RH
8136 return len;
8137}
79325812 8138
6ef67412
JH
8139/* Compute default value for "length_immediate" attribute. When SHORTFORM is set
8140 expect that insn have 8bit immediate alternative. */
e075ae69 8141int
6ef67412 8142ix86_attr_length_immediate_default (insn, shortform)
e075ae69 8143 rtx insn;
6ef67412 8144 int shortform;
e075ae69 8145{
6ef67412
JH
8146 int len = 0;
8147 int i;
6c698a6d 8148 extract_insn_cached (insn);
6ef67412
JH
8149 for (i = recog_data.n_operands - 1; i >= 0; --i)
8150 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 8151 {
6ef67412 8152 if (len)
3071fab5 8153 abort ();
6ef67412
JH
8154 if (shortform
8155 && GET_CODE (recog_data.operand[i]) == CONST_INT
8156 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
8157 len = 1;
8158 else
8159 {
8160 switch (get_attr_mode (insn))
8161 {
8162 case MODE_QI:
8163 len+=1;
8164 break;
8165 case MODE_HI:
8166 len+=2;
8167 break;
8168 case MODE_SI:
8169 len+=4;
8170 break;
8171 default:
8172 fatal_insn ("Unknown insn mode", insn);
8173 }
8174 }
3071fab5 8175 }
6ef67412
JH
8176 return len;
8177}
8178/* Compute default value for "length_address" attribute. */
8179int
8180ix86_attr_length_address_default (insn)
8181 rtx insn;
8182{
8183 int i;
6c698a6d 8184 extract_insn_cached (insn);
1ccbefce
RH
8185 for (i = recog_data.n_operands - 1; i >= 0; --i)
8186 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 8187 {
6ef67412 8188 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
8189 break;
8190 }
6ef67412 8191 return 0;
3f803cd9 8192}
e075ae69
RH
8193\f
8194/* Return the maximum number of instructions a cpu can issue. */
b657fc39 8195
e075ae69
RH
8196int
8197ix86_issue_rate ()
b657fc39 8198{
e075ae69 8199 switch (ix86_cpu)
b657fc39 8200 {
e075ae69
RH
8201 case PROCESSOR_PENTIUM:
8202 case PROCESSOR_K6:
8203 return 2;
79325812 8204
e075ae69 8205 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
8206 case PROCESSOR_PENTIUM4:
8207 case PROCESSOR_ATHLON:
e075ae69 8208 return 3;
b657fc39 8209
b657fc39 8210 default:
e075ae69 8211 return 1;
b657fc39 8212 }
b657fc39
L
8213}
8214
e075ae69
RH
8215/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
8216 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 8217
e075ae69
RH
8218static int
8219ix86_flags_dependant (insn, dep_insn, insn_type)
8220 rtx insn, dep_insn;
8221 enum attr_type insn_type;
8222{
8223 rtx set, set2;
b657fc39 8224
e075ae69
RH
8225 /* Simplify the test for uninteresting insns. */
8226 if (insn_type != TYPE_SETCC
8227 && insn_type != TYPE_ICMOV
8228 && insn_type != TYPE_FCMOV
8229 && insn_type != TYPE_IBR)
8230 return 0;
b657fc39 8231
e075ae69
RH
8232 if ((set = single_set (dep_insn)) != 0)
8233 {
8234 set = SET_DEST (set);
8235 set2 = NULL_RTX;
8236 }
8237 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
8238 && XVECLEN (PATTERN (dep_insn), 0) == 2
8239 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
8240 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
8241 {
8242 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
8243 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
8244 }
78a0d70c
ZW
8245 else
8246 return 0;
b657fc39 8247
78a0d70c
ZW
8248 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
8249 return 0;
b657fc39 8250
78a0d70c
ZW
8251 /* This test is true if the dependant insn reads the flags but
8252 not any other potentially set register. */
8253 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
8254 return 0;
8255
8256 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
8257 return 0;
8258
8259 return 1;
e075ae69 8260}
b657fc39 8261
e075ae69
RH
8262/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
8263 address with operands set by DEP_INSN. */
8264
8265static int
8266ix86_agi_dependant (insn, dep_insn, insn_type)
8267 rtx insn, dep_insn;
8268 enum attr_type insn_type;
8269{
8270 rtx addr;
8271
8272 if (insn_type == TYPE_LEA)
5fbdde42
RH
8273 {
8274 addr = PATTERN (insn);
8275 if (GET_CODE (addr) == SET)
8276 ;
8277 else if (GET_CODE (addr) == PARALLEL
8278 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
8279 addr = XVECEXP (addr, 0, 0);
8280 else
8281 abort ();
8282 addr = SET_SRC (addr);
8283 }
e075ae69
RH
8284 else
8285 {
8286 int i;
6c698a6d 8287 extract_insn_cached (insn);
1ccbefce
RH
8288 for (i = recog_data.n_operands - 1; i >= 0; --i)
8289 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 8290 {
1ccbefce 8291 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
8292 goto found;
8293 }
8294 return 0;
8295 found:;
b657fc39
L
8296 }
8297
e075ae69 8298 return modified_in_p (addr, dep_insn);
b657fc39 8299}
a269a03c
JC
8300
8301int
e075ae69 8302ix86_adjust_cost (insn, link, dep_insn, cost)
a269a03c
JC
8303 rtx insn, link, dep_insn;
8304 int cost;
8305{
e075ae69 8306 enum attr_type insn_type, dep_insn_type;
0b5107cf 8307 enum attr_memory memory;
e075ae69 8308 rtx set, set2;
9b00189f 8309 int dep_insn_code_number;
a269a03c 8310
309ada50 8311 /* Anti and output depenancies have zero cost on all CPUs. */
e075ae69 8312 if (REG_NOTE_KIND (link) != 0)
309ada50 8313 return 0;
a269a03c 8314
9b00189f
JH
8315 dep_insn_code_number = recog_memoized (dep_insn);
8316
e075ae69 8317 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 8318 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 8319 return cost;
a269a03c 8320
1c71e60e
JH
8321 insn_type = get_attr_type (insn);
8322 dep_insn_type = get_attr_type (dep_insn);
9b00189f 8323
a269a03c
JC
8324 switch (ix86_cpu)
8325 {
8326 case PROCESSOR_PENTIUM:
e075ae69
RH
8327 /* Address Generation Interlock adds a cycle of latency. */
8328 if (ix86_agi_dependant (insn, dep_insn, insn_type))
8329 cost += 1;
8330
8331 /* ??? Compares pair with jump/setcc. */
8332 if (ix86_flags_dependant (insn, dep_insn, insn_type))
8333 cost = 0;
8334
8335 /* Floating point stores require value to be ready one cycle ealier. */
0f290768 8336 if (insn_type == TYPE_FMOV
e075ae69
RH
8337 && get_attr_memory (insn) == MEMORY_STORE
8338 && !ix86_agi_dependant (insn, dep_insn, insn_type))
8339 cost += 1;
8340 break;
a269a03c 8341
e075ae69 8342 case PROCESSOR_PENTIUMPRO:
0f290768 8343 /* Since we can't represent delayed latencies of load+operation,
e075ae69
RH
8344 increase the cost here for non-imov insns. */
8345 if (dep_insn_type != TYPE_IMOV
8346 && dep_insn_type != TYPE_FMOV
0b5107cf
JH
8347 && ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
8348 || memory == MEMORY_BOTH))
e075ae69
RH
8349 cost += 1;
8350
8351 /* INT->FP conversion is expensive. */
8352 if (get_attr_fp_int_src (dep_insn))
8353 cost += 5;
8354
8355 /* There is one cycle extra latency between an FP op and a store. */
8356 if (insn_type == TYPE_FMOV
8357 && (set = single_set (dep_insn)) != NULL_RTX
8358 && (set2 = single_set (insn)) != NULL_RTX
8359 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
8360 && GET_CODE (SET_DEST (set2)) == MEM)
8361 cost += 1;
8362 break;
a269a03c 8363
e075ae69
RH
8364 case PROCESSOR_K6:
8365 /* The esp dependency is resolved before the instruction is really
8366 finished. */
8367 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
8368 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
8369 return 1;
a269a03c 8370
0f290768 8371 /* Since we can't represent delayed latencies of load+operation,
e075ae69 8372 increase the cost here for non-imov insns. */
0b5107cf
JH
8373 if ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
8374 || memory == MEMORY_BOTH)
e075ae69
RH
8375 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
8376
8377 /* INT->FP conversion is expensive. */
8378 if (get_attr_fp_int_src (dep_insn))
8379 cost += 5;
a14003ee 8380 break;
e075ae69 8381
309ada50 8382 case PROCESSOR_ATHLON:
0b5107cf
JH
8383 if ((memory = get_attr_memory (dep_insn)) == MEMORY_LOAD
8384 || memory == MEMORY_BOTH)
8385 {
8386 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
8387 cost += 2;
8388 else
8389 cost += 3;
8390 }
309ada50 8391
a269a03c 8392 default:
a269a03c
JC
8393 break;
8394 }
8395
8396 return cost;
8397}
0a726ef1 8398
e075ae69
RH
8399static union
8400{
8401 struct ppro_sched_data
8402 {
8403 rtx decode[3];
8404 int issued_this_cycle;
8405 } ppro;
8406} ix86_sched_data;
0a726ef1 8407
e075ae69
RH
8408static int
8409ix86_safe_length (insn)
8410 rtx insn;
8411{
8412 if (recog_memoized (insn) >= 0)
8413 return get_attr_length(insn);
8414 else
8415 return 128;
8416}
0a726ef1 8417
e075ae69
RH
8418static int
8419ix86_safe_length_prefix (insn)
8420 rtx insn;
8421{
8422 if (recog_memoized (insn) >= 0)
8423 return get_attr_length(insn);
8424 else
8425 return 0;
8426}
8427
8428static enum attr_memory
8429ix86_safe_memory (insn)
8430 rtx insn;
8431{
8432 if (recog_memoized (insn) >= 0)
8433 return get_attr_memory(insn);
8434 else
8435 return MEMORY_UNKNOWN;
8436}
0a726ef1 8437
e075ae69
RH
8438static enum attr_pent_pair
8439ix86_safe_pent_pair (insn)
8440 rtx insn;
8441{
8442 if (recog_memoized (insn) >= 0)
8443 return get_attr_pent_pair(insn);
8444 else
8445 return PENT_PAIR_NP;
8446}
0a726ef1 8447
e075ae69
RH
8448static enum attr_ppro_uops
8449ix86_safe_ppro_uops (insn)
8450 rtx insn;
8451{
8452 if (recog_memoized (insn) >= 0)
8453 return get_attr_ppro_uops (insn);
8454 else
8455 return PPRO_UOPS_MANY;
8456}
0a726ef1 8457
e075ae69
RH
8458static void
8459ix86_dump_ppro_packet (dump)
8460 FILE *dump;
0a726ef1 8461{
e075ae69 8462 if (ix86_sched_data.ppro.decode[0])
0a726ef1 8463 {
e075ae69
RH
8464 fprintf (dump, "PPRO packet: %d",
8465 INSN_UID (ix86_sched_data.ppro.decode[0]));
8466 if (ix86_sched_data.ppro.decode[1])
8467 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
8468 if (ix86_sched_data.ppro.decode[2])
8469 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
8470 fputc ('\n', dump);
8471 }
8472}
0a726ef1 8473
e075ae69 8474/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 8475
e075ae69
RH
8476void
8477ix86_sched_init (dump, sched_verbose)
8478 FILE *dump ATTRIBUTE_UNUSED;
8479 int sched_verbose ATTRIBUTE_UNUSED;
8480{
8481 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
8482}
8483
8484/* Shift INSN to SLOT, and shift everything else down. */
8485
8486static void
8487ix86_reorder_insn (insnp, slot)
8488 rtx *insnp, *slot;
8489{
8490 if (insnp != slot)
8491 {
8492 rtx insn = *insnp;
0f290768 8493 do
e075ae69
RH
8494 insnp[0] = insnp[1];
8495 while (++insnp != slot);
8496 *insnp = insn;
0a726ef1 8497 }
e075ae69
RH
8498}
8499
8500/* Find an instruction with given pairability and minimal amount of cycles
8501 lost by the fact that the CPU waits for both pipelines to finish before
8502 reading next instructions. Also take care that both instructions together
8503 can not exceed 7 bytes. */
8504
8505static rtx *
8506ix86_pent_find_pair (e_ready, ready, type, first)
8507 rtx *e_ready;
8508 rtx *ready;
8509 enum attr_pent_pair type;
8510 rtx first;
8511{
8512 int mincycles, cycles;
8513 enum attr_pent_pair tmp;
8514 enum attr_memory memory;
8515 rtx *insnp, *bestinsnp = NULL;
0a726ef1 8516
e075ae69
RH
8517 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
8518 return NULL;
0a726ef1 8519
e075ae69
RH
8520 memory = ix86_safe_memory (first);
8521 cycles = result_ready_cost (first);
8522 mincycles = INT_MAX;
8523
8524 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
8525 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
8526 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
6ec6d558 8527 {
e075ae69
RH
8528 enum attr_memory second_memory;
8529 int secondcycles, currentcycles;
8530
8531 second_memory = ix86_safe_memory (*insnp);
8532 secondcycles = result_ready_cost (*insnp);
8533 currentcycles = abs (cycles - secondcycles);
8534
8535 if (secondcycles >= 1 && cycles >= 1)
6ec6d558 8536 {
e075ae69
RH
8537 /* Two read/modify/write instructions together takes two
8538 cycles longer. */
8539 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
8540 currentcycles += 2;
0f290768 8541
e075ae69
RH
8542 /* Read modify/write instruction followed by read/modify
8543 takes one cycle longer. */
8544 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
8545 && tmp != PENT_PAIR_UV
8546 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
8547 currentcycles += 1;
6ec6d558 8548 }
e075ae69
RH
8549 if (currentcycles < mincycles)
8550 bestinsnp = insnp, mincycles = currentcycles;
6ec6d558 8551 }
0a726ef1 8552
e075ae69
RH
8553 return bestinsnp;
8554}
8555
78a0d70c 8556/* Subroutines of ix86_sched_reorder. */
e075ae69 8557
c6991660 8558static void
78a0d70c 8559ix86_sched_reorder_pentium (ready, e_ready)
e075ae69 8560 rtx *ready;
78a0d70c 8561 rtx *e_ready;
e075ae69 8562{
78a0d70c 8563 enum attr_pent_pair pair1, pair2;
e075ae69 8564 rtx *insnp;
e075ae69 8565
78a0d70c
ZW
8566 /* This wouldn't be necessary if Haifa knew that static insn ordering
8567 is important to which pipe an insn is issued to. So we have to make
8568 some minor rearrangements. */
e075ae69 8569
78a0d70c
ZW
8570 pair1 = ix86_safe_pent_pair (*e_ready);
8571
8572 /* If the first insn is non-pairable, let it be. */
8573 if (pair1 == PENT_PAIR_NP)
8574 return;
8575
8576 pair2 = PENT_PAIR_NP;
8577 insnp = 0;
8578
8579 /* If the first insn is UV or PV pairable, search for a PU
8580 insn to go with. */
8581 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
e075ae69 8582 {
78a0d70c
ZW
8583 insnp = ix86_pent_find_pair (e_ready-1, ready,
8584 PENT_PAIR_PU, *e_ready);
8585 if (insnp)
8586 pair2 = PENT_PAIR_PU;
8587 }
e075ae69 8588
78a0d70c
ZW
8589 /* If the first insn is PU or UV pairable, search for a PV
8590 insn to go with. */
8591 if (pair2 == PENT_PAIR_NP
8592 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
8593 {
8594 insnp = ix86_pent_find_pair (e_ready-1, ready,
8595 PENT_PAIR_PV, *e_ready);
8596 if (insnp)
8597 pair2 = PENT_PAIR_PV;
8598 }
e075ae69 8599
78a0d70c
ZW
8600 /* If the first insn is pairable, search for a UV
8601 insn to go with. */
8602 if (pair2 == PENT_PAIR_NP)
8603 {
8604 insnp = ix86_pent_find_pair (e_ready-1, ready,
8605 PENT_PAIR_UV, *e_ready);
8606 if (insnp)
8607 pair2 = PENT_PAIR_UV;
8608 }
e075ae69 8609
78a0d70c
ZW
8610 if (pair2 == PENT_PAIR_NP)
8611 return;
e075ae69 8612
78a0d70c
ZW
8613 /* Found something! Decide if we need to swap the order. */
8614 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
8615 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
8616 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
8617 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
8618 ix86_reorder_insn (insnp, e_ready);
8619 else
8620 ix86_reorder_insn (insnp, e_ready - 1);
8621}
e075ae69 8622
c6991660 8623static void
78a0d70c
ZW
8624ix86_sched_reorder_ppro (ready, e_ready)
8625 rtx *ready;
8626 rtx *e_ready;
8627{
8628 rtx decode[3];
8629 enum attr_ppro_uops cur_uops;
8630 int issued_this_cycle;
8631 rtx *insnp;
8632 int i;
e075ae69 8633
0f290768 8634 /* At this point .ppro.decode contains the state of the three
78a0d70c 8635 decoders from last "cycle". That is, those insns that were
0f290768 8636 actually independent. But here we're scheduling for the
78a0d70c
ZW
8637 decoder, and we may find things that are decodable in the
8638 same cycle. */
e075ae69 8639
0f290768 8640 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
78a0d70c 8641 issued_this_cycle = 0;
e075ae69 8642
78a0d70c
ZW
8643 insnp = e_ready;
8644 cur_uops = ix86_safe_ppro_uops (*insnp);
0a726ef1 8645
78a0d70c
ZW
8646 /* If the decoders are empty, and we've a complex insn at the
8647 head of the priority queue, let it issue without complaint. */
8648 if (decode[0] == NULL)
8649 {
8650 if (cur_uops == PPRO_UOPS_MANY)
8651 {
8652 decode[0] = *insnp;
8653 goto ppro_done;
8654 }
8655
8656 /* Otherwise, search for a 2-4 uop unsn to issue. */
8657 while (cur_uops != PPRO_UOPS_FEW)
8658 {
8659 if (insnp == ready)
8660 break;
8661 cur_uops = ix86_safe_ppro_uops (*--insnp);
8662 }
8663
8664 /* If so, move it to the head of the line. */
8665 if (cur_uops == PPRO_UOPS_FEW)
8666 ix86_reorder_insn (insnp, e_ready);
0a726ef1 8667
78a0d70c
ZW
8668 /* Issue the head of the queue. */
8669 issued_this_cycle = 1;
8670 decode[0] = *e_ready--;
8671 }
fb693d44 8672
78a0d70c
ZW
8673 /* Look for simple insns to fill in the other two slots. */
8674 for (i = 1; i < 3; ++i)
8675 if (decode[i] == NULL)
8676 {
8677 if (ready >= e_ready)
8678 goto ppro_done;
fb693d44 8679
e075ae69
RH
8680 insnp = e_ready;
8681 cur_uops = ix86_safe_ppro_uops (*insnp);
78a0d70c
ZW
8682 while (cur_uops != PPRO_UOPS_ONE)
8683 {
8684 if (insnp == ready)
8685 break;
8686 cur_uops = ix86_safe_ppro_uops (*--insnp);
8687 }
fb693d44 8688
78a0d70c
ZW
8689 /* Found one. Move it to the head of the queue and issue it. */
8690 if (cur_uops == PPRO_UOPS_ONE)
e075ae69 8691 {
78a0d70c
ZW
8692 ix86_reorder_insn (insnp, e_ready);
8693 decode[i] = *e_ready--;
8694 issued_this_cycle++;
8695 continue;
8696 }
fb693d44 8697
78a0d70c
ZW
8698 /* ??? Didn't find one. Ideally, here we would do a lazy split
8699 of 2-uop insns, issue one and queue the other. */
8700 }
fb693d44 8701
78a0d70c
ZW
8702 ppro_done:
8703 if (issued_this_cycle == 0)
8704 issued_this_cycle = 1;
8705 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
8706}
fb693d44 8707
0f290768 8708/* We are about to being issuing insns for this clock cycle.
78a0d70c
ZW
8709 Override the default sort algorithm to better slot instructions. */
8710int
8711ix86_sched_reorder (dump, sched_verbose, ready, n_ready, clock_var)
8712 FILE *dump ATTRIBUTE_UNUSED;
8713 int sched_verbose ATTRIBUTE_UNUSED;
8714 rtx *ready;
8715 int n_ready;
8716 int clock_var ATTRIBUTE_UNUSED;
8717{
8718 rtx *e_ready = ready + n_ready - 1;
fb693d44 8719
78a0d70c
ZW
8720 if (n_ready < 2)
8721 goto out;
e075ae69 8722
78a0d70c
ZW
8723 switch (ix86_cpu)
8724 {
8725 default:
8726 break;
e075ae69 8727
78a0d70c
ZW
8728 case PROCESSOR_PENTIUM:
8729 ix86_sched_reorder_pentium (ready, e_ready);
8730 break;
e075ae69 8731
78a0d70c
ZW
8732 case PROCESSOR_PENTIUMPRO:
8733 ix86_sched_reorder_ppro (ready, e_ready);
e075ae69 8734 break;
fb693d44
RH
8735 }
8736
e075ae69
RH
8737out:
8738 return ix86_issue_rate ();
8739}
fb693d44 8740
e075ae69
RH
8741/* We are about to issue INSN. Return the number of insns left on the
8742 ready queue that can be issued this cycle. */
b222082e 8743
e075ae69
RH
8744int
8745ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
8746 FILE *dump;
8747 int sched_verbose;
8748 rtx insn;
8749 int can_issue_more;
8750{
8751 int i;
8752 switch (ix86_cpu)
fb693d44 8753 {
e075ae69
RH
8754 default:
8755 return can_issue_more - 1;
fb693d44 8756
e075ae69
RH
8757 case PROCESSOR_PENTIUMPRO:
8758 {
8759 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 8760
e075ae69
RH
8761 if (uops == PPRO_UOPS_MANY)
8762 {
8763 if (sched_verbose)
8764 ix86_dump_ppro_packet (dump);
8765 ix86_sched_data.ppro.decode[0] = insn;
8766 ix86_sched_data.ppro.decode[1] = NULL;
8767 ix86_sched_data.ppro.decode[2] = NULL;
8768 if (sched_verbose)
8769 ix86_dump_ppro_packet (dump);
8770 ix86_sched_data.ppro.decode[0] = NULL;
8771 }
8772 else if (uops == PPRO_UOPS_FEW)
8773 {
8774 if (sched_verbose)
8775 ix86_dump_ppro_packet (dump);
8776 ix86_sched_data.ppro.decode[0] = insn;
8777 ix86_sched_data.ppro.decode[1] = NULL;
8778 ix86_sched_data.ppro.decode[2] = NULL;
8779 }
8780 else
8781 {
8782 for (i = 0; i < 3; ++i)
8783 if (ix86_sched_data.ppro.decode[i] == NULL)
8784 {
8785 ix86_sched_data.ppro.decode[i] = insn;
8786 break;
8787 }
8788 if (i == 3)
8789 abort ();
8790 if (i == 2)
8791 {
8792 if (sched_verbose)
8793 ix86_dump_ppro_packet (dump);
8794 ix86_sched_data.ppro.decode[0] = NULL;
8795 ix86_sched_data.ppro.decode[1] = NULL;
8796 ix86_sched_data.ppro.decode[2] = NULL;
8797 }
8798 }
8799 }
8800 return --ix86_sched_data.ppro.issued_this_cycle;
8801 }
fb693d44 8802}
a7180f70 8803\f
0e4970d7
RK
8804/* Walk through INSNS and look for MEM references whose address is DSTREG or
8805 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
8806 appropriate. */
8807
8808void
8809ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
8810 rtx insns;
8811 rtx dstref, srcref, dstreg, srcreg;
8812{
8813 rtx insn;
8814
8815 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
8816 if (INSN_P (insn))
8817 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
8818 dstreg, srcreg);
8819}
8820
8821/* Subroutine of above to actually do the updating by recursively walking
8822 the rtx. */
8823
8824static void
8825ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
8826 rtx x;
8827 rtx dstref, srcref, dstreg, srcreg;
8828{
8829 enum rtx_code code = GET_CODE (x);
8830 const char *format_ptr = GET_RTX_FORMAT (code);
8831 int i, j;
8832
8833 if (code == MEM && XEXP (x, 0) == dstreg)
8834 MEM_COPY_ATTRIBUTES (x, dstref);
8835 else if (code == MEM && XEXP (x, 0) == srcreg)
8836 MEM_COPY_ATTRIBUTES (x, srcref);
8837
8838 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
8839 {
8840 if (*format_ptr == 'e')
8841 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
8842 dstreg, srcreg);
8843 else if (*format_ptr == 'E')
8844 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
397be6cf 8845 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
0e4970d7
RK
8846 dstreg, srcreg);
8847 }
8848}
8849\f
a7180f70
BS
8850/* Compute the alignment given to a constant that is being placed in memory.
8851 EXP is the constant and ALIGN is the alignment that the object would
8852 ordinarily have.
8853 The value of this function is used instead of that alignment to align
8854 the object. */
8855
8856int
8857ix86_constant_alignment (exp, align)
8858 tree exp;
8859 int align;
8860{
8861 if (TREE_CODE (exp) == REAL_CST)
8862 {
8863 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
8864 return 64;
8865 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
8866 return 128;
8867 }
8868 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
8869 && align < 256)
8870 return 256;
8871
8872 return align;
8873}
8874
8875/* Compute the alignment for a static variable.
8876 TYPE is the data type, and ALIGN is the alignment that
8877 the object would ordinarily have. The value of this function is used
8878 instead of that alignment to align the object. */
8879
8880int
8881ix86_data_alignment (type, align)
8882 tree type;
8883 int align;
8884{
8885 if (AGGREGATE_TYPE_P (type)
8886 && TYPE_SIZE (type)
8887 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
8888 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
8889 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
8890 return 256;
8891
0d7d98ee
JH
8892 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
8893 to 16byte boundary. */
8894 if (TARGET_64BIT)
8895 {
8896 if (AGGREGATE_TYPE_P (type)
8897 && TYPE_SIZE (type)
8898 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
8899 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
8900 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
8901 return 128;
8902 }
8903
a7180f70
BS
8904 if (TREE_CODE (type) == ARRAY_TYPE)
8905 {
8906 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
8907 return 64;
8908 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
8909 return 128;
8910 }
8911 else if (TREE_CODE (type) == COMPLEX_TYPE)
8912 {
0f290768 8913
a7180f70
BS
8914 if (TYPE_MODE (type) == DCmode && align < 64)
8915 return 64;
8916 if (TYPE_MODE (type) == XCmode && align < 128)
8917 return 128;
8918 }
8919 else if ((TREE_CODE (type) == RECORD_TYPE
8920 || TREE_CODE (type) == UNION_TYPE
8921 || TREE_CODE (type) == QUAL_UNION_TYPE)
8922 && TYPE_FIELDS (type))
8923 {
8924 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
8925 return 64;
8926 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
8927 return 128;
8928 }
8929 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
8930 || TREE_CODE (type) == INTEGER_TYPE)
8931 {
8932 if (TYPE_MODE (type) == DFmode && align < 64)
8933 return 64;
8934 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
8935 return 128;
8936 }
8937
8938 return align;
8939}
8940
8941/* Compute the alignment for a local variable.
8942 TYPE is the data type, and ALIGN is the alignment that
8943 the object would ordinarily have. The value of this macro is used
8944 instead of that alignment to align the object. */
8945
8946int
8947ix86_local_alignment (type, align)
8948 tree type;
8949 int align;
8950{
0d7d98ee
JH
8951 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
8952 to 16byte boundary. */
8953 if (TARGET_64BIT)
8954 {
8955 if (AGGREGATE_TYPE_P (type)
8956 && TYPE_SIZE (type)
8957 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
8958 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
8959 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
8960 return 128;
8961 }
a7180f70
BS
8962 if (TREE_CODE (type) == ARRAY_TYPE)
8963 {
8964 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
8965 return 64;
8966 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
8967 return 128;
8968 }
8969 else if (TREE_CODE (type) == COMPLEX_TYPE)
8970 {
8971 if (TYPE_MODE (type) == DCmode && align < 64)
8972 return 64;
8973 if (TYPE_MODE (type) == XCmode && align < 128)
8974 return 128;
8975 }
8976 else if ((TREE_CODE (type) == RECORD_TYPE
8977 || TREE_CODE (type) == UNION_TYPE
8978 || TREE_CODE (type) == QUAL_UNION_TYPE)
8979 && TYPE_FIELDS (type))
8980 {
8981 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
8982 return 64;
8983 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
8984 return 128;
8985 }
8986 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
8987 || TREE_CODE (type) == INTEGER_TYPE)
8988 {
0f290768 8989
a7180f70
BS
8990 if (TYPE_MODE (type) == DFmode && align < 64)
8991 return 64;
8992 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
8993 return 128;
8994 }
8995 return align;
8996}
0ed08620
JH
8997\f
8998/* Emit RTL insns to initialize the variable parts of a trampoline.
8999 FNADDR is an RTX for the address of the function's pure code.
9000 CXT is an RTX for the static chain value for the function. */
9001void
9002x86_initialize_trampoline (tramp, fnaddr, cxt)
9003 rtx tramp, fnaddr, cxt;
9004{
9005 if (!TARGET_64BIT)
9006 {
9007 /* Compute offset from the end of the jmp to the target function. */
9008 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
9009 plus_constant (tramp, 10),
9010 NULL_RTX, 1, OPTAB_DIRECT);
9011 emit_move_insn (gen_rtx_MEM (QImode, tramp),
9012 GEN_INT (trunc_int_for_mode (0xb9, QImode)));
9013 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
9014 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
9015 GEN_INT (trunc_int_for_mode (0xe9, QImode)));
9016 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
9017 }
9018 else
9019 {
9020 int offset = 0;
9021 /* Try to load address using shorter movl instead of movabs.
9022 We may want to support movq for kernel mode, but kernel does not use
9023 trampolines at the moment. */
9024 if (x86_64_zero_extended_value (fnaddr))
9025 {
9026 fnaddr = copy_to_mode_reg (DImode, fnaddr);
9027 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
9028 GEN_INT (trunc_int_for_mode (0xbb41, HImode)));
9029 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
9030 gen_lowpart (SImode, fnaddr));
9031 offset += 6;
9032 }
9033 else
9034 {
9035 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
9036 GEN_INT (trunc_int_for_mode (0xbb49, HImode)));
9037 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
9038 fnaddr);
9039 offset += 10;
9040 }
9041 /* Load static chain using movabs to r10. */
9042 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
9043 GEN_INT (trunc_int_for_mode (0xba49, HImode)));
9044 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
9045 cxt);
9046 offset += 10;
9047 /* Jump to the r11 */
9048 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
9049 GEN_INT (trunc_int_for_mode (0xff49, HImode)));
9050 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
9051 GEN_INT (trunc_int_for_mode (0xe3, HImode)));
9052 offset += 3;
9053 if (offset > TRAMPOLINE_SIZE)
9054 abort();
9055 }
9056}
bd793c65
BS
9057
9058#define def_builtin(NAME, TYPE, CODE) \
df4ae160 9059 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL)
bd793c65
BS
9060struct builtin_description
9061{
9062 enum insn_code icode;
9063 const char * name;
9064 enum ix86_builtins code;
9065 enum rtx_code comparison;
9066 unsigned int flag;
9067};
9068
9069static struct builtin_description bdesc_comi[] =
9070{
9071 { CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
9072 { CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
9073 { CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
9074 { CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
9075 { CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
9076 { CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
9077 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
9078 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
9079 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
9080 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
9081 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
9082 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
9083};
9084
9085static struct builtin_description bdesc_2arg[] =
9086{
9087 /* SSE */
9088 { CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
9089 { CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
9090 { CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
9091 { CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
9092 { CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
9093 { CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
9094 { CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
9095 { CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
9096
9097 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
9098 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
9099 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
9100 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
9101 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
9102 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
9103 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
9104 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
9105 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
9106 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
9107 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
9108 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
9109 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
9110 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
9111 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
9112 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
9113 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
9114 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
9115 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
9116 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
9117 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
9118 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
9119 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
9120 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
9121
9122 { CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
9123 { CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
9124 { CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
9125 { CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
9126
9127 { CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
9128 { CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
9129 { CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
9130 { CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
9131
9132 { CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
9133 { CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
9134 { CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
9135 { CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
9136 { CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
9137
9138 /* MMX */
9139 { CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
9140 { CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
9141 { CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
9142 { CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
9143 { CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
9144 { CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
9145
9146 { CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
9147 { CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
9148 { CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
9149 { CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
9150 { CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
9151 { CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
9152 { CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
9153 { CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
9154
9155 { CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
9156 { CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
9157 { CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
9158
9159 { CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
9160 { CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
9161 { CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
9162 { CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
9163
9164 { CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
9165 { CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
9166
9167 { CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
9168 { CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
9169 { CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
9170 { CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
9171 { CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
9172 { CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
9173
9174 { CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
9175 { CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
9176 { CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
9177 { CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
9178
9179 { CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
9180 { CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
9181 { CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
9182 { CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
9183 { CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
9184 { CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
9185
9186 /* Special. */
9187 { CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
9188 { CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
9189 { CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
9190
9191 { CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
9192 { CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
9193
9194 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
9195 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
9196 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
9197 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
9198 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
9199 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
9200
9201 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
9202 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
9203 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
9204 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
9205 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
9206 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
9207
9208 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
9209 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
9210 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
9211 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
9212
9213 { CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
9214 { CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
9215
9216};
9217
9218static struct builtin_description bdesc_1arg[] =
9219{
9220 { CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
9221 { CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
9222
9223 { CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
9224 { CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
9225 { CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
9226
9227 { CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
9228 { CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
9229 { CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
9230 { CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
9231
9232};
9233
9234/* Expand all the target specific builtins. This is not called if TARGET_MMX
9235 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
9236 builtins. */
9237void
9238ix86_init_builtins ()
9239{
9240 struct builtin_description * d;
77ebd435 9241 size_t i;
cbd5937a 9242 tree endlink = void_list_node;
bd793c65
BS
9243
9244 tree pchar_type_node = build_pointer_type (char_type_node);
9245 tree pfloat_type_node = build_pointer_type (float_type_node);
9246 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
9247 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
9248
9249 /* Comparisons. */
9250 tree int_ftype_v4sf_v4sf
9251 = build_function_type (integer_type_node,
9252 tree_cons (NULL_TREE, V4SF_type_node,
9253 tree_cons (NULL_TREE,
9254 V4SF_type_node,
9255 endlink)));
9256 tree v4si_ftype_v4sf_v4sf
9257 = build_function_type (V4SI_type_node,
9258 tree_cons (NULL_TREE, V4SF_type_node,
9259 tree_cons (NULL_TREE,
9260 V4SF_type_node,
9261 endlink)));
9262 /* MMX/SSE/integer conversions. */
9263 tree int_ftype_v4sf_int
9264 = build_function_type (integer_type_node,
9265 tree_cons (NULL_TREE, V4SF_type_node,
9266 tree_cons (NULL_TREE,
9267 integer_type_node,
9268 endlink)));
9269 tree int_ftype_v4sf
9270 = build_function_type (integer_type_node,
9271 tree_cons (NULL_TREE, V4SF_type_node,
9272 endlink));
9273 tree int_ftype_v8qi
9274 = build_function_type (integer_type_node,
9275 tree_cons (NULL_TREE, V8QI_type_node,
9276 endlink));
9277 tree int_ftype_v2si
9278 = build_function_type (integer_type_node,
9279 tree_cons (NULL_TREE, V2SI_type_node,
9280 endlink));
9281 tree v2si_ftype_int
9282 = build_function_type (V2SI_type_node,
9283 tree_cons (NULL_TREE, integer_type_node,
9284 endlink));
9285 tree v4sf_ftype_v4sf_int
9286 = build_function_type (integer_type_node,
9287 tree_cons (NULL_TREE, V4SF_type_node,
9288 tree_cons (NULL_TREE, integer_type_node,
9289 endlink)));
9290 tree v4sf_ftype_v4sf_v2si
9291 = build_function_type (V4SF_type_node,
9292 tree_cons (NULL_TREE, V4SF_type_node,
9293 tree_cons (NULL_TREE, V2SI_type_node,
9294 endlink)));
9295 tree int_ftype_v4hi_int
9296 = build_function_type (integer_type_node,
9297 tree_cons (NULL_TREE, V4HI_type_node,
9298 tree_cons (NULL_TREE, integer_type_node,
9299 endlink)));
9300 tree v4hi_ftype_v4hi_int_int
332316cd 9301 = build_function_type (V4HI_type_node,
bd793c65
BS
9302 tree_cons (NULL_TREE, V4HI_type_node,
9303 tree_cons (NULL_TREE, integer_type_node,
9304 tree_cons (NULL_TREE,
9305 integer_type_node,
9306 endlink))));
9307 /* Miscellaneous. */
9308 tree v8qi_ftype_v4hi_v4hi
9309 = build_function_type (V8QI_type_node,
9310 tree_cons (NULL_TREE, V4HI_type_node,
9311 tree_cons (NULL_TREE, V4HI_type_node,
9312 endlink)));
9313 tree v4hi_ftype_v2si_v2si
9314 = build_function_type (V4HI_type_node,
9315 tree_cons (NULL_TREE, V2SI_type_node,
9316 tree_cons (NULL_TREE, V2SI_type_node,
9317 endlink)));
9318 tree v4sf_ftype_v4sf_v4sf_int
9319 = build_function_type (V4SF_type_node,
9320 tree_cons (NULL_TREE, V4SF_type_node,
9321 tree_cons (NULL_TREE, V4SF_type_node,
9322 tree_cons (NULL_TREE,
9323 integer_type_node,
9324 endlink))));
9325 tree v4hi_ftype_v8qi_v8qi
9326 = build_function_type (V4HI_type_node,
9327 tree_cons (NULL_TREE, V8QI_type_node,
9328 tree_cons (NULL_TREE, V8QI_type_node,
9329 endlink)));
9330 tree v2si_ftype_v4hi_v4hi
9331 = build_function_type (V2SI_type_node,
9332 tree_cons (NULL_TREE, V4HI_type_node,
9333 tree_cons (NULL_TREE, V4HI_type_node,
9334 endlink)));
9335 tree v4hi_ftype_v4hi_int
9336 = build_function_type (V4HI_type_node,
9337 tree_cons (NULL_TREE, V4HI_type_node,
9338 tree_cons (NULL_TREE, integer_type_node,
9339 endlink)));
9340 tree di_ftype_di_int
9341 = build_function_type (long_long_unsigned_type_node,
9342 tree_cons (NULL_TREE, long_long_unsigned_type_node,
9343 tree_cons (NULL_TREE, integer_type_node,
9344 endlink)));
9345 tree v8qi_ftype_v8qi_di
9346 = build_function_type (V8QI_type_node,
9347 tree_cons (NULL_TREE, V8QI_type_node,
9348 tree_cons (NULL_TREE,
9349 long_long_integer_type_node,
9350 endlink)));
9351 tree v4hi_ftype_v4hi_di
9352 = build_function_type (V4HI_type_node,
9353 tree_cons (NULL_TREE, V4HI_type_node,
9354 tree_cons (NULL_TREE,
9355 long_long_integer_type_node,
9356 endlink)));
9357 tree v2si_ftype_v2si_di
9358 = build_function_type (V2SI_type_node,
9359 tree_cons (NULL_TREE, V2SI_type_node,
9360 tree_cons (NULL_TREE,
9361 long_long_integer_type_node,
9362 endlink)));
9363 tree void_ftype_void
9364 = build_function_type (void_type_node, endlink);
9365 tree void_ftype_pchar_int
9366 = build_function_type (void_type_node,
9367 tree_cons (NULL_TREE, pchar_type_node,
9368 tree_cons (NULL_TREE, integer_type_node,
9369 endlink)));
9370 tree void_ftype_unsigned
9371 = build_function_type (void_type_node,
9372 tree_cons (NULL_TREE, unsigned_type_node,
9373 endlink));
9374 tree unsigned_ftype_void
9375 = build_function_type (unsigned_type_node, endlink);
9376 tree di_ftype_void
9377 = build_function_type (long_long_unsigned_type_node, endlink);
9378 tree ti_ftype_void
9379 = build_function_type (intTI_type_node, endlink);
9380 tree v2si_ftype_v4sf
9381 = build_function_type (V2SI_type_node,
9382 tree_cons (NULL_TREE, V4SF_type_node,
9383 endlink));
9384 /* Loads/stores. */
9385 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
9386 tree_cons (NULL_TREE, V8QI_type_node,
9387 tree_cons (NULL_TREE,
9388 pchar_type_node,
9389 endlink)));
9390 tree void_ftype_v8qi_v8qi_pchar
9391 = build_function_type (void_type_node, maskmovq_args);
9392 tree v4sf_ftype_pfloat
9393 = build_function_type (V4SF_type_node,
9394 tree_cons (NULL_TREE, pfloat_type_node,
9395 endlink));
9396 tree v4sf_ftype_float
9397 = build_function_type (V4SF_type_node,
9398 tree_cons (NULL_TREE, float_type_node,
9399 endlink));
9400 tree v4sf_ftype_float_float_float_float
9401 = build_function_type (V4SF_type_node,
9402 tree_cons (NULL_TREE, float_type_node,
9403 tree_cons (NULL_TREE, float_type_node,
9404 tree_cons (NULL_TREE,
9405 float_type_node,
9406 tree_cons (NULL_TREE,
9407 float_type_node,
9408 endlink)))));
9409 /* @@@ the type is bogus */
9410 tree v4sf_ftype_v4sf_pv2si
9411 = build_function_type (V4SF_type_node,
9412 tree_cons (NULL_TREE, V4SF_type_node,
9413 tree_cons (NULL_TREE, pv2si_type_node,
9414 endlink)));
9415 tree v4sf_ftype_pv2si_v4sf
9416 = build_function_type (V4SF_type_node,
9417 tree_cons (NULL_TREE, V4SF_type_node,
9418 tree_cons (NULL_TREE, pv2si_type_node,
9419 endlink)));
9420 tree void_ftype_pfloat_v4sf
9421 = build_function_type (void_type_node,
9422 tree_cons (NULL_TREE, pfloat_type_node,
9423 tree_cons (NULL_TREE, V4SF_type_node,
9424 endlink)));
9425 tree void_ftype_pdi_di
9426 = build_function_type (void_type_node,
9427 tree_cons (NULL_TREE, pdi_type_node,
9428 tree_cons (NULL_TREE,
9429 long_long_unsigned_type_node,
9430 endlink)));
9431 /* Normal vector unops. */
9432 tree v4sf_ftype_v4sf
9433 = build_function_type (V4SF_type_node,
9434 tree_cons (NULL_TREE, V4SF_type_node,
9435 endlink));
0f290768 9436
bd793c65
BS
9437 /* Normal vector binops. */
9438 tree v4sf_ftype_v4sf_v4sf
9439 = build_function_type (V4SF_type_node,
9440 tree_cons (NULL_TREE, V4SF_type_node,
9441 tree_cons (NULL_TREE, V4SF_type_node,
9442 endlink)));
9443 tree v8qi_ftype_v8qi_v8qi
9444 = build_function_type (V8QI_type_node,
9445 tree_cons (NULL_TREE, V8QI_type_node,
9446 tree_cons (NULL_TREE, V8QI_type_node,
9447 endlink)));
9448 tree v4hi_ftype_v4hi_v4hi
9449 = build_function_type (V4HI_type_node,
9450 tree_cons (NULL_TREE, V4HI_type_node,
9451 tree_cons (NULL_TREE, V4HI_type_node,
9452 endlink)));
9453 tree v2si_ftype_v2si_v2si
9454 = build_function_type (V2SI_type_node,
9455 tree_cons (NULL_TREE, V2SI_type_node,
9456 tree_cons (NULL_TREE, V2SI_type_node,
9457 endlink)));
9458 tree ti_ftype_ti_ti
9459 = build_function_type (intTI_type_node,
9460 tree_cons (NULL_TREE, intTI_type_node,
9461 tree_cons (NULL_TREE, intTI_type_node,
9462 endlink)));
9463 tree di_ftype_di_di
9464 = build_function_type (long_long_unsigned_type_node,
9465 tree_cons (NULL_TREE, long_long_unsigned_type_node,
9466 tree_cons (NULL_TREE,
9467 long_long_unsigned_type_node,
9468 endlink)));
9469
9470 /* Add all builtins that are more or less simple operations on two
9471 operands. */
9472 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
9473 {
9474 /* Use one of the operands; the target can have a different mode for
9475 mask-generating compares. */
9476 enum machine_mode mode;
9477 tree type;
9478
9479 if (d->name == 0)
9480 continue;
9481 mode = insn_data[d->icode].operand[1].mode;
9482
9483 if (! TARGET_SSE && ! VALID_MMX_REG_MODE (mode))
9484 continue;
9485
9486 switch (mode)
9487 {
9488 case V4SFmode:
9489 type = v4sf_ftype_v4sf_v4sf;
9490 break;
9491 case V8QImode:
9492 type = v8qi_ftype_v8qi_v8qi;
9493 break;
9494 case V4HImode:
9495 type = v4hi_ftype_v4hi_v4hi;
9496 break;
9497 case V2SImode:
9498 type = v2si_ftype_v2si_v2si;
9499 break;
9500 case TImode:
9501 type = ti_ftype_ti_ti;
9502 break;
9503 case DImode:
9504 type = di_ftype_di_di;
9505 break;
9506
9507 default:
9508 abort ();
9509 }
0f290768 9510
bd793c65
BS
9511 /* Override for comparisons. */
9512 if (d->icode == CODE_FOR_maskcmpv4sf3
9513 || d->icode == CODE_FOR_maskncmpv4sf3
9514 || d->icode == CODE_FOR_vmmaskcmpv4sf3
9515 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
9516 type = v4si_ftype_v4sf_v4sf;
9517
9518 def_builtin (d->name, type, d->code);
9519 }
9520
9521 /* Add the remaining MMX insns with somewhat more complicated types. */
9522 def_builtin ("__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
9523 def_builtin ("__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
9524 def_builtin ("__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
9525 def_builtin ("__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
9526 def_builtin ("__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
9527 def_builtin ("__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
9528 def_builtin ("__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
9529 def_builtin ("__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
9530 def_builtin ("__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
9531
9532 def_builtin ("__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
9533 def_builtin ("__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
9534 def_builtin ("__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
9535
9536 def_builtin ("__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
9537 def_builtin ("__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
9538
9539 def_builtin ("__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
9540 def_builtin ("__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
9541
9542 /* Everything beyond this point is SSE only. */
9543 if (! TARGET_SSE)
9544 return;
0f290768 9545
bd793c65
BS
9546 /* comi/ucomi insns. */
9547 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
9548 def_builtin (d->name, int_ftype_v4sf_v4sf, d->code);
9549
9550 def_builtin ("__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
9551 def_builtin ("__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
9552 def_builtin ("__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
9553
9554 def_builtin ("__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
9555 def_builtin ("__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
9556 def_builtin ("__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
9557 def_builtin ("__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
9558 def_builtin ("__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
9559 def_builtin ("__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
9560
9561 def_builtin ("__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
9562 def_builtin ("__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
9563
9564 def_builtin ("__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
9565
9566 def_builtin ("__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
9567 def_builtin ("__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
9568 def_builtin ("__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
9569 def_builtin ("__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
9570 def_builtin ("__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
9571 def_builtin ("__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
9572
9573 def_builtin ("__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
9574 def_builtin ("__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
9575 def_builtin ("__builtin_ia32_storehps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
9576 def_builtin ("__builtin_ia32_storelps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
9577
9578 def_builtin ("__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
9579 def_builtin ("__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
9580 def_builtin ("__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
9581 def_builtin ("__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
9582
9583 def_builtin ("__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
9584 def_builtin ("__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
9585
9586 def_builtin ("__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
9587
9588 def_builtin ("__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
9589 def_builtin ("__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
9590 def_builtin ("__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
9591 def_builtin ("__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
9592 def_builtin ("__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
9593 def_builtin ("__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
9594
9595 def_builtin ("__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
9596
9597 /* Composite intrinsics. */
9598 def_builtin ("__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
9599 def_builtin ("__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
9600 def_builtin ("__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
9601 def_builtin ("__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
9602 def_builtin ("__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
9603 def_builtin ("__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
9604 def_builtin ("__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
9605}
9606
9607/* Errors in the source file can cause expand_expr to return const0_rtx
9608 where we expect a vector. To avoid crashing, use one of the vector
9609 clear instructions. */
9610static rtx
9611safe_vector_operand (x, mode)
9612 rtx x;
9613 enum machine_mode mode;
9614{
9615 if (x != const0_rtx)
9616 return x;
9617 x = gen_reg_rtx (mode);
9618
9619 if (VALID_MMX_REG_MODE (mode))
9620 emit_insn (gen_mmx_clrdi (mode == DImode ? x
9621 : gen_rtx_SUBREG (DImode, x, 0)));
9622 else
9623 emit_insn (gen_sse_clrti (mode == TImode ? x
9624 : gen_rtx_SUBREG (TImode, x, 0)));
9625 return x;
9626}
9627
9628/* Subroutine of ix86_expand_builtin to take care of binop insns. */
9629
9630static rtx
9631ix86_expand_binop_builtin (icode, arglist, target)
9632 enum insn_code icode;
9633 tree arglist;
9634 rtx target;
9635{
9636 rtx pat;
9637 tree arg0 = TREE_VALUE (arglist);
9638 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9639 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9640 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9641 enum machine_mode tmode = insn_data[icode].operand[0].mode;
9642 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
9643 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
9644
9645 if (VECTOR_MODE_P (mode0))
9646 op0 = safe_vector_operand (op0, mode0);
9647 if (VECTOR_MODE_P (mode1))
9648 op1 = safe_vector_operand (op1, mode1);
9649
9650 if (! target
9651 || GET_MODE (target) != tmode
9652 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9653 target = gen_reg_rtx (tmode);
9654
9655 /* In case the insn wants input operands in modes different from
9656 the result, abort. */
9657 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
9658 abort ();
9659
9660 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9661 op0 = copy_to_mode_reg (mode0, op0);
9662 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
9663 op1 = copy_to_mode_reg (mode1, op1);
9664
9665 pat = GEN_FCN (icode) (target, op0, op1);
9666 if (! pat)
9667 return 0;
9668 emit_insn (pat);
9669 return target;
9670}
9671
9672/* Subroutine of ix86_expand_builtin to take care of stores. */
9673
9674static rtx
9675ix86_expand_store_builtin (icode, arglist, shuffle)
9676 enum insn_code icode;
9677 tree arglist;
9678 int shuffle;
9679{
9680 rtx pat;
9681 tree arg0 = TREE_VALUE (arglist);
9682 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9683 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9684 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9685 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
9686 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
9687
9688 if (VECTOR_MODE_P (mode1))
9689 op1 = safe_vector_operand (op1, mode1);
9690
9691 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
9692 if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
9693 op1 = copy_to_mode_reg (mode1, op1);
9694 if (shuffle >= 0)
9695 emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
9696 pat = GEN_FCN (icode) (op0, op1);
9697 if (pat)
9698 emit_insn (pat);
9699 return 0;
9700}
9701
9702/* Subroutine of ix86_expand_builtin to take care of unop insns. */
9703
9704static rtx
9705ix86_expand_unop_builtin (icode, arglist, target, do_load)
9706 enum insn_code icode;
9707 tree arglist;
9708 rtx target;
9709 int do_load;
9710{
9711 rtx pat;
9712 tree arg0 = TREE_VALUE (arglist);
9713 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9714 enum machine_mode tmode = insn_data[icode].operand[0].mode;
9715 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
9716
9717 if (! target
9718 || GET_MODE (target) != tmode
9719 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9720 target = gen_reg_rtx (tmode);
9721 if (do_load)
9722 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
9723 else
9724 {
9725 if (VECTOR_MODE_P (mode0))
9726 op0 = safe_vector_operand (op0, mode0);
9727
9728 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9729 op0 = copy_to_mode_reg (mode0, op0);
9730 }
9731
9732 pat = GEN_FCN (icode) (target, op0);
9733 if (! pat)
9734 return 0;
9735 emit_insn (pat);
9736 return target;
9737}
9738
9739/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
9740 sqrtss, rsqrtss, rcpss. */
9741
9742static rtx
9743ix86_expand_unop1_builtin (icode, arglist, target)
9744 enum insn_code icode;
9745 tree arglist;
9746 rtx target;
9747{
9748 rtx pat;
9749 tree arg0 = TREE_VALUE (arglist);
9750 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9751 enum machine_mode tmode = insn_data[icode].operand[0].mode;
9752 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
9753
9754 if (! target
9755 || GET_MODE (target) != tmode
9756 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9757 target = gen_reg_rtx (tmode);
9758
9759 if (VECTOR_MODE_P (mode0))
9760 op0 = safe_vector_operand (op0, mode0);
9761
9762 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9763 op0 = copy_to_mode_reg (mode0, op0);
9764
9765 pat = GEN_FCN (icode) (target, op0, op0);
9766 if (! pat)
9767 return 0;
9768 emit_insn (pat);
9769 return target;
9770}
9771
9772/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
9773
9774static rtx
9775ix86_expand_sse_compare (d, arglist, target)
9776 struct builtin_description *d;
9777 tree arglist;
9778 rtx target;
9779{
9780 rtx pat;
9781 tree arg0 = TREE_VALUE (arglist);
9782 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9783 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9784 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9785 rtx op2;
9786 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
9787 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
9788 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
9789 enum rtx_code comparison = d->comparison;
9790
9791 if (VECTOR_MODE_P (mode0))
9792 op0 = safe_vector_operand (op0, mode0);
9793 if (VECTOR_MODE_P (mode1))
9794 op1 = safe_vector_operand (op1, mode1);
9795
9796 /* Swap operands if we have a comparison that isn't available in
9797 hardware. */
9798 if (d->flag)
9799 {
9800 target = gen_reg_rtx (tmode);
9801 emit_move_insn (target, op1);
9802 op1 = op0;
9803 op0 = target;
9804 comparison = swap_condition (comparison);
9805 }
9806 else if (! target
9807 || GET_MODE (target) != tmode
9808 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
9809 target = gen_reg_rtx (tmode);
9810
9811 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
9812 op0 = copy_to_mode_reg (mode0, op0);
9813 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
9814 op1 = copy_to_mode_reg (mode1, op1);
9815
9816 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
9817 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
9818 if (! pat)
9819 return 0;
9820 emit_insn (pat);
9821 return target;
9822}
9823
9824/* Subroutine of ix86_expand_builtin to take care of comi insns. */
9825
9826static rtx
9827ix86_expand_sse_comi (d, arglist, target)
9828 struct builtin_description *d;
9829 tree arglist;
9830 rtx target;
9831{
9832 rtx pat;
9833 tree arg0 = TREE_VALUE (arglist);
9834 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9835 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9836 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9837 rtx op2;
9838 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
9839 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
9840 enum rtx_code comparison = d->comparison;
9841
9842 if (VECTOR_MODE_P (mode0))
9843 op0 = safe_vector_operand (op0, mode0);
9844 if (VECTOR_MODE_P (mode1))
9845 op1 = safe_vector_operand (op1, mode1);
9846
9847 /* Swap operands if we have a comparison that isn't available in
9848 hardware. */
9849 if (d->flag)
9850 {
9851 rtx tmp = op1;
9852 op1 = op0;
9853 op0 = tmp;
9854 comparison = swap_condition (comparison);
9855 }
9856
9857 target = gen_reg_rtx (SImode);
9858 emit_move_insn (target, const0_rtx);
9859 target = gen_rtx_SUBREG (QImode, target, 0);
9860
9861 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
9862 op0 = copy_to_mode_reg (mode0, op0);
9863 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
9864 op1 = copy_to_mode_reg (mode1, op1);
9865
9866 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
9867 pat = GEN_FCN (d->icode) (op0, op1, op2);
9868 if (! pat)
9869 return 0;
9870 emit_insn (pat);
9871 emit_insn (gen_setcc_2 (target, op2));
9872
9873 return target;
9874}
9875
9876/* Expand an expression EXP that calls a built-in function,
9877 with result going to TARGET if that's convenient
9878 (and in mode MODE if that's convenient).
9879 SUBTARGET may be used as the target for computing one of EXP's operands.
9880 IGNORE is nonzero if the value is to be ignored. */
9881
9882rtx
9883ix86_expand_builtin (exp, target, subtarget, mode, ignore)
9884 tree exp;
9885 rtx target;
9886 rtx subtarget ATTRIBUTE_UNUSED;
9887 enum machine_mode mode ATTRIBUTE_UNUSED;
9888 int ignore ATTRIBUTE_UNUSED;
9889{
9890 struct builtin_description *d;
77ebd435 9891 size_t i;
bd793c65
BS
9892 enum insn_code icode;
9893 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9894 tree arglist = TREE_OPERAND (exp, 1);
9895 tree arg0, arg1, arg2, arg3;
9896 rtx op0, op1, op2, pat;
9897 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 9898 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
9899
9900 switch (fcode)
9901 {
9902 case IX86_BUILTIN_EMMS:
9903 emit_insn (gen_emms ());
9904 return 0;
9905
9906 case IX86_BUILTIN_SFENCE:
9907 emit_insn (gen_sfence ());
9908 return 0;
9909
9910 case IX86_BUILTIN_M_FROM_INT:
9911 target = gen_reg_rtx (DImode);
9912 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
9913 emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
9914 return target;
9915
9916 case IX86_BUILTIN_M_TO_INT:
9917 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
9918 op0 = copy_to_mode_reg (DImode, op0);
9919 target = gen_reg_rtx (SImode);
9920 emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
9921 return target;
9922
9923 case IX86_BUILTIN_PEXTRW:
9924 icode = CODE_FOR_mmx_pextrw;
9925 arg0 = TREE_VALUE (arglist);
9926 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9927 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9928 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9929 tmode = insn_data[icode].operand[0].mode;
9930 mode0 = insn_data[icode].operand[1].mode;
9931 mode1 = insn_data[icode].operand[2].mode;
9932
9933 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9934 op0 = copy_to_mode_reg (mode0, op0);
9935 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
9936 {
9937 /* @@@ better error message */
9938 error ("selector must be an immediate");
9939 return const0_rtx;
9940 }
9941 if (target == 0
9942 || GET_MODE (target) != tmode
9943 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9944 target = gen_reg_rtx (tmode);
9945 pat = GEN_FCN (icode) (target, op0, op1);
9946 if (! pat)
9947 return 0;
9948 emit_insn (pat);
9949 return target;
9950
9951 case IX86_BUILTIN_PINSRW:
9952 icode = CODE_FOR_mmx_pinsrw;
9953 arg0 = TREE_VALUE (arglist);
9954 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9955 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
9956 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9957 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9958 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
9959 tmode = insn_data[icode].operand[0].mode;
9960 mode0 = insn_data[icode].operand[1].mode;
9961 mode1 = insn_data[icode].operand[2].mode;
9962 mode2 = insn_data[icode].operand[3].mode;
9963
9964 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9965 op0 = copy_to_mode_reg (mode0, op0);
9966 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
9967 op1 = copy_to_mode_reg (mode1, op1);
9968 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
9969 {
9970 /* @@@ better error message */
9971 error ("selector must be an immediate");
9972 return const0_rtx;
9973 }
9974 if (target == 0
9975 || GET_MODE (target) != tmode
9976 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9977 target = gen_reg_rtx (tmode);
9978 pat = GEN_FCN (icode) (target, op0, op1, op2);
9979 if (! pat)
9980 return 0;
9981 emit_insn (pat);
9982 return target;
9983
9984 case IX86_BUILTIN_MASKMOVQ:
9985 icode = CODE_FOR_mmx_maskmovq;
9986 /* Note the arg order is different from the operand order. */
9987 arg1 = TREE_VALUE (arglist);
9988 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
9989 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
9990 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9991 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9992 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
9993 mode0 = insn_data[icode].operand[0].mode;
9994 mode1 = insn_data[icode].operand[1].mode;
9995 mode2 = insn_data[icode].operand[2].mode;
9996
9997 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9998 op0 = copy_to_mode_reg (mode0, op0);
9999 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
10000 op1 = copy_to_mode_reg (mode1, op1);
10001 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
10002 op2 = copy_to_mode_reg (mode2, op2);
10003 pat = GEN_FCN (icode) (op0, op1, op2);
10004 if (! pat)
10005 return 0;
10006 emit_insn (pat);
10007 return 0;
10008
10009 case IX86_BUILTIN_SQRTSS:
10010 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
10011 case IX86_BUILTIN_RSQRTSS:
10012 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
10013 case IX86_BUILTIN_RCPSS:
10014 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
10015
10016 case IX86_BUILTIN_LOADAPS:
10017 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
10018
10019 case IX86_BUILTIN_LOADUPS:
10020 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
10021
10022 case IX86_BUILTIN_STOREAPS:
10023 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
10024 case IX86_BUILTIN_STOREUPS:
10025 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
10026
10027 case IX86_BUILTIN_LOADSS:
10028 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
10029
10030 case IX86_BUILTIN_STORESS:
10031 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
10032
0f290768 10033 case IX86_BUILTIN_LOADHPS:
bd793c65
BS
10034 case IX86_BUILTIN_LOADLPS:
10035 icode = (fcode == IX86_BUILTIN_LOADHPS
10036 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
10037 arg0 = TREE_VALUE (arglist);
10038 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10039 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10040 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10041 tmode = insn_data[icode].operand[0].mode;
10042 mode0 = insn_data[icode].operand[1].mode;
10043 mode1 = insn_data[icode].operand[2].mode;
10044
10045 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
10046 op0 = copy_to_mode_reg (mode0, op0);
10047 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
10048 if (target == 0
10049 || GET_MODE (target) != tmode
10050 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10051 target = gen_reg_rtx (tmode);
10052 pat = GEN_FCN (icode) (target, op0, op1);
10053 if (! pat)
10054 return 0;
10055 emit_insn (pat);
10056 return target;
0f290768 10057
bd793c65
BS
10058 case IX86_BUILTIN_STOREHPS:
10059 case IX86_BUILTIN_STORELPS:
10060 icode = (fcode == IX86_BUILTIN_STOREHPS
10061 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
10062 arg0 = TREE_VALUE (arglist);
10063 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10064 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10065 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10066 mode0 = insn_data[icode].operand[1].mode;
10067 mode1 = insn_data[icode].operand[2].mode;
10068
10069 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
10070 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
10071 op1 = copy_to_mode_reg (mode1, op1);
10072
10073 pat = GEN_FCN (icode) (op0, op0, op1);
10074 if (! pat)
10075 return 0;
10076 emit_insn (pat);
10077 return 0;
10078
10079 case IX86_BUILTIN_MOVNTPS:
10080 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
10081 case IX86_BUILTIN_MOVNTQ:
10082 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
10083
10084 case IX86_BUILTIN_LDMXCSR:
10085 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
10086 target = assign_386_stack_local (SImode, 0);
10087 emit_move_insn (target, op0);
10088 emit_insn (gen_ldmxcsr (target));
10089 return 0;
10090
10091 case IX86_BUILTIN_STMXCSR:
10092 target = assign_386_stack_local (SImode, 0);
10093 emit_insn (gen_stmxcsr (target));
10094 return copy_to_mode_reg (SImode, target);
10095
10096 case IX86_BUILTIN_PREFETCH:
10097 icode = CODE_FOR_prefetch;
10098 arg0 = TREE_VALUE (arglist);
10099 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10100 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10101 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
332316cd
BS
10102 mode0 = insn_data[icode].operand[0].mode;
10103 mode1 = insn_data[icode].operand[1].mode;
bd793c65 10104
332316cd 10105 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
bd793c65
BS
10106 {
10107 /* @@@ better error message */
10108 error ("selector must be an immediate");
10109 return const0_rtx;
10110 }
10111
332316cd 10112 op0 = copy_to_mode_reg (Pmode, op0);
bd793c65
BS
10113 pat = GEN_FCN (icode) (op0, op1);
10114 if (! pat)
10115 return 0;
10116 emit_insn (pat);
10117 return target;
0f290768 10118
bd793c65
BS
10119 case IX86_BUILTIN_SHUFPS:
10120 icode = CODE_FOR_sse_shufps;
10121 arg0 = TREE_VALUE (arglist);
10122 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10123 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
10124 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10125 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10126 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
10127 tmode = insn_data[icode].operand[0].mode;
10128 mode0 = insn_data[icode].operand[1].mode;
10129 mode1 = insn_data[icode].operand[2].mode;
10130 mode2 = insn_data[icode].operand[3].mode;
10131
10132 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
10133 op0 = copy_to_mode_reg (mode0, op0);
10134 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
10135 op1 = copy_to_mode_reg (mode1, op1);
10136 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
10137 {
10138 /* @@@ better error message */
10139 error ("mask must be an immediate");
10140 return const0_rtx;
10141 }
10142 if (target == 0
10143 || GET_MODE (target) != tmode
10144 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10145 target = gen_reg_rtx (tmode);
10146 pat = GEN_FCN (icode) (target, op0, op1, op2);
10147 if (! pat)
10148 return 0;
10149 emit_insn (pat);
10150 return target;
10151
10152 case IX86_BUILTIN_PSHUFW:
10153 icode = CODE_FOR_mmx_pshufw;
10154 arg0 = TREE_VALUE (arglist);
10155 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10156 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10157 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10158 tmode = insn_data[icode].operand[0].mode;
10159 mode0 = insn_data[icode].operand[2].mode;
10160 mode1 = insn_data[icode].operand[3].mode;
10161
10162 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
10163 op0 = copy_to_mode_reg (mode0, op0);
10164 if (! (*insn_data[icode].operand[3].predicate) (op1, mode1))
10165 {
10166 /* @@@ better error message */
10167 error ("mask must be an immediate");
10168 return const0_rtx;
10169 }
10170 if (target == 0
10171 || GET_MODE (target) != tmode
10172 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10173 target = gen_reg_rtx (tmode);
10174 pat = GEN_FCN (icode) (target, target, op0, op1);
10175 if (! pat)
10176 return 0;
10177 emit_insn (pat);
10178 return target;
10179
10180 /* Composite intrinsics. */
10181 case IX86_BUILTIN_SETPS1:
10182 target = assign_386_stack_local (SFmode, 0);
10183 arg0 = TREE_VALUE (arglist);
10184 emit_move_insn (change_address (target, SFmode, XEXP (target, 0)),
10185 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
10186 op0 = gen_reg_rtx (V4SFmode);
10187 emit_insn (gen_sse_loadss (op0, change_address (target, V4SFmode,
10188 XEXP (target, 0))));
10189 emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
10190 return op0;
0f290768 10191
bd793c65
BS
10192 case IX86_BUILTIN_SETPS:
10193 target = assign_386_stack_local (V4SFmode, 0);
10194 op0 = change_address (target, SFmode, XEXP (target, 0));
10195 arg0 = TREE_VALUE (arglist);
10196 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10197 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
10198 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
10199 emit_move_insn (op0,
10200 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
10201 emit_move_insn (adj_offsettable_operand (op0, 4),
10202 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
10203 emit_move_insn (adj_offsettable_operand (op0, 8),
10204 expand_expr (arg2, NULL_RTX, VOIDmode, 0));
10205 emit_move_insn (adj_offsettable_operand (op0, 12),
10206 expand_expr (arg3, NULL_RTX, VOIDmode, 0));
10207 op0 = gen_reg_rtx (V4SFmode);
10208 emit_insn (gen_sse_movaps (op0, target));
10209 return op0;
10210
10211 case IX86_BUILTIN_CLRPS:
10212 target = gen_reg_rtx (TImode);
10213 emit_insn (gen_sse_clrti (target));
10214 return target;
10215
10216 case IX86_BUILTIN_LOADRPS:
10217 target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
10218 gen_reg_rtx (V4SFmode), 1);
10219 emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
10220 return target;
10221
10222 case IX86_BUILTIN_LOADPS1:
10223 target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
10224 gen_reg_rtx (V4SFmode), 1);
10225 emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
10226 return target;
10227
10228 case IX86_BUILTIN_STOREPS1:
10229 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
10230 case IX86_BUILTIN_STORERPS:
10231 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
10232
10233 case IX86_BUILTIN_MMX_ZERO:
10234 target = gen_reg_rtx (DImode);
10235 emit_insn (gen_mmx_clrdi (target));
10236 return target;
10237
10238 default:
10239 break;
10240 }
10241
10242 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
10243 if (d->code == fcode)
10244 {
10245 /* Compares are treated specially. */
10246 if (d->icode == CODE_FOR_maskcmpv4sf3
10247 || d->icode == CODE_FOR_vmmaskcmpv4sf3
10248 || d->icode == CODE_FOR_maskncmpv4sf3
10249 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
10250 return ix86_expand_sse_compare (d, arglist, target);
10251
10252 return ix86_expand_binop_builtin (d->icode, arglist, target);
10253 }
10254
10255 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
10256 if (d->code == fcode)
10257 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 10258
bd793c65
BS
10259 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
10260 if (d->code == fcode)
10261 return ix86_expand_sse_comi (d, arglist, target);
0f290768 10262
bd793c65
BS
10263 /* @@@ Should really do something sensible here. */
10264 return 0;
bd793c65 10265}
4211a8fb
JH
10266
10267/* Store OPERAND to the memory after reload is completed. This means
10268 that we can't easilly use assign_stack_local. */
10269rtx
10270ix86_force_to_memory (mode, operand)
10271 enum machine_mode mode;
10272 rtx operand;
10273{
898d374d 10274 rtx result;
4211a8fb
JH
10275 if (!reload_completed)
10276 abort ();
898d374d
JH
10277 if (TARGET_64BIT && TARGET_RED_ZONE)
10278 {
10279 result = gen_rtx_MEM (mode,
10280 gen_rtx_PLUS (Pmode,
10281 stack_pointer_rtx,
10282 GEN_INT (-RED_ZONE_SIZE)));
10283 emit_move_insn (result, operand);
10284 }
10285 else if (TARGET_64BIT && !TARGET_RED_ZONE)
4211a8fb 10286 {
898d374d 10287 switch (mode)
4211a8fb 10288 {
898d374d
JH
10289 case HImode:
10290 case SImode:
10291 operand = gen_lowpart (DImode, operand);
10292 /* FALLTHRU */
10293 case DImode:
4211a8fb 10294 emit_insn (
898d374d
JH
10295 gen_rtx_SET (VOIDmode,
10296 gen_rtx_MEM (DImode,
10297 gen_rtx_PRE_DEC (DImode,
10298 stack_pointer_rtx)),
10299 operand));
10300 break;
10301 default:
10302 abort ();
10303 }
10304 result = gen_rtx_MEM (mode, stack_pointer_rtx);
10305 }
10306 else
10307 {
10308 switch (mode)
10309 {
10310 case DImode:
10311 {
10312 rtx operands[2];
10313 split_di (&operand, 1, operands, operands + 1);
10314 emit_insn (
10315 gen_rtx_SET (VOIDmode,
10316 gen_rtx_MEM (SImode,
10317 gen_rtx_PRE_DEC (Pmode,
10318 stack_pointer_rtx)),
10319 operands[1]));
10320 emit_insn (
10321 gen_rtx_SET (VOIDmode,
10322 gen_rtx_MEM (SImode,
10323 gen_rtx_PRE_DEC (Pmode,
10324 stack_pointer_rtx)),
10325 operands[0]));
10326 }
10327 break;
10328 case HImode:
10329 /* It is better to store HImodes as SImodes. */
10330 if (!TARGET_PARTIAL_REG_STALL)
10331 operand = gen_lowpart (SImode, operand);
10332 /* FALLTHRU */
10333 case SImode:
4211a8fb 10334 emit_insn (
898d374d
JH
10335 gen_rtx_SET (VOIDmode,
10336 gen_rtx_MEM (GET_MODE (operand),
10337 gen_rtx_PRE_DEC (SImode,
10338 stack_pointer_rtx)),
10339 operand));
10340 break;
10341 default:
10342 abort ();
4211a8fb 10343 }
898d374d 10344 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 10345 }
898d374d 10346 return result;
4211a8fb
JH
10347}
10348
10349/* Free operand from the memory. */
10350void
10351ix86_free_from_memory (mode)
10352 enum machine_mode mode;
10353{
898d374d
JH
10354 if (!TARGET_64BIT || !TARGET_RED_ZONE)
10355 {
10356 int size;
10357
10358 if (mode == DImode || TARGET_64BIT)
10359 size = 8;
10360 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
10361 size = 2;
10362 else
10363 size = 4;
10364 /* Use LEA to deallocate stack space. In peephole2 it will be converted
10365 to pop or add instruction if registers are available. */
10366 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10367 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10368 GEN_INT (size))));
10369 }
4211a8fb 10370}
a946dd00 10371
f84aa48a
JH
10372/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
10373 QImode must go into class Q_REGS.
10374 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
10375 movdf to do mem-to-mem moves through integer regs. */
10376enum reg_class
10377ix86_preferred_reload_class (x, class)
10378 rtx x;
10379 enum reg_class class;
10380{
10381 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
10382 {
10383 /* SSE can't load any constant directly yet. */
10384 if (SSE_CLASS_P (class))
10385 return NO_REGS;
10386 /* Floats can load 0 and 1. */
10387 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
10388 {
10389 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
10390 if (MAYBE_SSE_CLASS_P (class))
10391 return (reg_class_subset_p (class, GENERAL_REGS)
10392 ? GENERAL_REGS : FLOAT_REGS);
10393 else
10394 return class;
10395 }
10396 /* General regs can load everything. */
10397 if (reg_class_subset_p (class, GENERAL_REGS))
10398 return GENERAL_REGS;
10399 /* In case we haven't resolved FLOAT or SSE yet, give up. */
10400 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
10401 return NO_REGS;
10402 }
10403 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
10404 return NO_REGS;
10405 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
10406 return Q_REGS;
10407 return class;
10408}
10409
10410/* If we are copying between general and FP registers, we need a memory
10411 location. The same is true for SSE and MMX registers.
10412
10413 The macro can't work reliably when one of the CLASSES is class containing
10414 registers from multiple units (SSE, MMX, integer). We avoid this by never
10415 combining those units in single alternative in the machine description.
10416 Ensure that this constraint holds to avoid unexpected surprises.
10417
10418 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
10419 enforce these sanity checks. */
10420int
10421ix86_secondary_memory_needed (class1, class2, mode, strict)
10422 enum reg_class class1, class2;
10423 enum machine_mode mode;
10424 int strict;
10425{
10426 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
10427 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
10428 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
10429 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
10430 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
10431 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
10432 {
10433 if (strict)
10434 abort ();
10435 else
10436 return 1;
10437 }
10438 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
10439 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
10440 && (mode) != SImode)
10441 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
10442 && (mode) != SImode));
10443}
10444/* Return the cost of moving data from a register in class CLASS1 to
10445 one in class CLASS2.
10446
10447 It is not required that the cost always equal 2 when FROM is the same as TO;
10448 on some machines it is expensive to move between registers if they are not
10449 general registers. */
10450int
10451ix86_register_move_cost (mode, class1, class2)
10452 enum machine_mode mode;
10453 enum reg_class class1, class2;
10454{
10455 /* In case we require secondary memory, compute cost of the store followed
10456 by load. In case of copying from general_purpose_register we may emit
10457 multiple stores followed by single load causing memory size mismatch
10458 stall. Count this as arbitarily high cost of 20. */
10459 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
10460 {
92d0fb09 10461 int add_cost = 0;
62415523 10462 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
92d0fb09 10463 add_cost = 20;
62415523 10464 return (MEMORY_MOVE_COST (mode, class1, 0)
92d0fb09 10465 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
f84aa48a 10466 }
92d0fb09 10467 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
10468 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
10469 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
10470 return ix86_cost->mmxsse_to_integer;
10471 if (MAYBE_FLOAT_CLASS_P (class1))
10472 return ix86_cost->fp_move;
10473 if (MAYBE_SSE_CLASS_P (class1))
10474 return ix86_cost->sse_move;
10475 if (MAYBE_MMX_CLASS_P (class1))
10476 return ix86_cost->mmx_move;
f84aa48a
JH
10477 return 2;
10478}
10479
a946dd00
JH
10480/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
10481int
10482ix86_hard_regno_mode_ok (regno, mode)
10483 int regno;
10484 enum machine_mode mode;
10485{
10486 /* Flags and only flags can only hold CCmode values. */
10487 if (CC_REGNO_P (regno))
10488 return GET_MODE_CLASS (mode) == MODE_CC;
10489 if (GET_MODE_CLASS (mode) == MODE_CC
10490 || GET_MODE_CLASS (mode) == MODE_RANDOM
10491 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
10492 return 0;
10493 if (FP_REGNO_P (regno))
10494 return VALID_FP_MODE_P (mode);
10495 if (SSE_REGNO_P (regno))
10496 return VALID_SSE_REG_MODE (mode);
10497 if (MMX_REGNO_P (regno))
10498 return VALID_MMX_REG_MODE (mode);
10499 /* We handle both integer and floats in the general purpose registers.
10500 In future we should be able to handle vector modes as well. */
10501 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
10502 return 0;
10503 /* Take care for QImode values - they can be in non-QI regs, but then
10504 they do cause partial register stalls. */
d2836273 10505 if (regno < 4 || mode != QImode || TARGET_64BIT)
a946dd00
JH
10506 return 1;
10507 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
10508}
fa79946e
JH
10509
10510/* Return the cost of moving data of mode M between a
10511 register and memory. A value of 2 is the default; this cost is
10512 relative to those in `REGISTER_MOVE_COST'.
10513
10514 If moving between registers and memory is more expensive than
10515 between two registers, you should define this macro to express the
10516 relative cost.
10517
10518 Model also increased moving costs of QImode registers in non
10519 Q_REGS classes.
10520 */
10521int
10522ix86_memory_move_cost (mode, class, in)
10523 enum machine_mode mode;
10524 enum reg_class class;
10525 int in;
10526{
10527 if (FLOAT_CLASS_P (class))
10528 {
10529 int index;
10530 switch (mode)
10531 {
10532 case SFmode:
10533 index = 0;
10534 break;
10535 case DFmode:
10536 index = 1;
10537 break;
10538 case XFmode:
10539 case TFmode:
10540 index = 2;
10541 break;
10542 default:
10543 return 100;
10544 }
10545 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
10546 }
10547 if (SSE_CLASS_P (class))
10548 {
10549 int index;
10550 switch (GET_MODE_SIZE (mode))
10551 {
10552 case 4:
10553 index = 0;
10554 break;
10555 case 8:
10556 index = 1;
10557 break;
10558 case 16:
10559 index = 2;
10560 break;
10561 default:
10562 return 100;
10563 }
10564 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
10565 }
10566 if (MMX_CLASS_P (class))
10567 {
10568 int index;
10569 switch (GET_MODE_SIZE (mode))
10570 {
10571 case 4:
10572 index = 0;
10573 break;
10574 case 8:
10575 index = 1;
10576 break;
10577 default:
10578 return 100;
10579 }
10580 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
10581 }
10582 switch (GET_MODE_SIZE (mode))
10583 {
10584 case 1:
10585 if (in)
10586 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
10587 : ix86_cost->movzbl_load);
10588 else
10589 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
10590 : ix86_cost->int_store[0] + 4);
10591 break;
10592 case 2:
10593 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
10594 default:
10595 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
10596 if (mode == TFmode)
10597 mode = XFmode;
3bb7e126 10598 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
fa79946e
JH
10599 * (int) GET_MODE_SIZE (mode) / 4);
10600 }
10601}
This page took 2.56283 seconds and 5 git commands to generate.