]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
crtbegin.asm (__JCR_LIST__): New.
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
8752c357 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
4592bdcb 3 Free Software Foundation, Inc.
2a2ab3f9
JVA
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
1fba7553 23#include <setjmp.h>
bb5177ac 24#include "system.h"
2a2ab3f9 25#include "rtl.h"
6baf1cc8
BS
26#include "tree.h"
27#include "tm_p.h"
2a2ab3f9
JVA
28#include "regs.h"
29#include "hard-reg-set.h"
30#include "real.h"
31#include "insn-config.h"
32#include "conditions.h"
2a2ab3f9
JVA
33#include "output.h"
34#include "insn-attr.h"
2a2ab3f9 35#include "flags.h"
a8ffcc81 36#include "except.h"
ecbc4695 37#include "function.h"
00c79232 38#include "recog.h"
ced8dd8c 39#include "expr.h"
f103890b 40#include "toplev.h"
e075ae69 41#include "basic-block.h"
1526a060 42#include "ggc.h"
672a6f42
NB
43#include "target.h"
44#include "target-def.h"
2a2ab3f9 45
8dfe5673
RK
46#ifndef CHECK_STACK_LIMIT
47#define CHECK_STACK_LIMIT -1
48#endif
49
32b5b1aa
SC
50/* Processor costs (relative to an add) */
51struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 52 1, /* cost of an add instruction */
32b5b1aa
SC
53 1, /* cost of a lea instruction */
54 3, /* variable shift costs */
55 2, /* constant shift costs */
56 6, /* cost of starting a multiply */
57 1, /* cost of multiply per each bit set */
e075ae69 58 23, /* cost of a divide/mod */
96e7ae40 59 15, /* "large" insn */
e2e52e1b 60 3, /* MOVE_RATIO */
7c6b971d 61 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
62 {2, 4, 2}, /* cost of loading integer registers
63 in QImode, HImode and SImode.
0f290768 64 Relative to reg-reg move (2). */
96e7ae40
JH
65 {2, 4, 2}, /* cost of storing integer registers */
66 2, /* cost of reg,reg fld/fst */
67 {8, 8, 8}, /* cost of loading fp registers
68 in SFmode, DFmode and XFmode */
fa79946e
JH
69 {8, 8, 8}, /* cost of loading integer registers */
70 2, /* cost of moving MMX register */
71 {4, 8}, /* cost of loading MMX registers
72 in SImode and DImode */
73 {4, 8}, /* cost of storing MMX registers
74 in SImode and DImode */
75 2, /* cost of moving SSE register */
76 {4, 8, 16}, /* cost of loading SSE registers
77 in SImode, DImode and TImode */
78 {4, 8, 16}, /* cost of storing SSE registers
79 in SImode, DImode and TImode */
80 3, /* MMX or SSE register to integer */
32b5b1aa
SC
81};
82
83struct processor_costs i486_cost = { /* 486 specific costs */
84 1, /* cost of an add instruction */
85 1, /* cost of a lea instruction */
86 3, /* variable shift costs */
87 2, /* constant shift costs */
88 12, /* cost of starting a multiply */
89 1, /* cost of multiply per each bit set */
e075ae69 90 40, /* cost of a divide/mod */
96e7ae40 91 15, /* "large" insn */
e2e52e1b 92 3, /* MOVE_RATIO */
7c6b971d 93 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
94 {2, 4, 2}, /* cost of loading integer registers
95 in QImode, HImode and SImode.
0f290768 96 Relative to reg-reg move (2). */
96e7ae40
JH
97 {2, 4, 2}, /* cost of storing integer registers */
98 2, /* cost of reg,reg fld/fst */
99 {8, 8, 8}, /* cost of loading fp registers
100 in SFmode, DFmode and XFmode */
fa79946e
JH
101 {8, 8, 8}, /* cost of loading integer registers */
102 2, /* cost of moving MMX register */
103 {4, 8}, /* cost of loading MMX registers
104 in SImode and DImode */
105 {4, 8}, /* cost of storing MMX registers
106 in SImode and DImode */
107 2, /* cost of moving SSE register */
108 {4, 8, 16}, /* cost of loading SSE registers
109 in SImode, DImode and TImode */
110 {4, 8, 16}, /* cost of storing SSE registers
111 in SImode, DImode and TImode */
112 3 /* MMX or SSE register to integer */
32b5b1aa
SC
113};
114
e5cb57e8 115struct processor_costs pentium_cost = {
32b5b1aa
SC
116 1, /* cost of an add instruction */
117 1, /* cost of a lea instruction */
856b07a1 118 4, /* variable shift costs */
e5cb57e8 119 1, /* constant shift costs */
856b07a1
SC
120 11, /* cost of starting a multiply */
121 0, /* cost of multiply per each bit set */
e075ae69 122 25, /* cost of a divide/mod */
96e7ae40 123 8, /* "large" insn */
e2e52e1b 124 6, /* MOVE_RATIO */
7c6b971d 125 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
126 {2, 4, 2}, /* cost of loading integer registers
127 in QImode, HImode and SImode.
0f290768 128 Relative to reg-reg move (2). */
96e7ae40
JH
129 {2, 4, 2}, /* cost of storing integer registers */
130 2, /* cost of reg,reg fld/fst */
131 {2, 2, 6}, /* cost of loading fp registers
132 in SFmode, DFmode and XFmode */
fa79946e
JH
133 {4, 4, 6}, /* cost of loading integer registers */
134 8, /* cost of moving MMX register */
135 {8, 8}, /* cost of loading MMX registers
136 in SImode and DImode */
137 {8, 8}, /* cost of storing MMX registers
138 in SImode and DImode */
139 2, /* cost of moving SSE register */
140 {4, 8, 16}, /* cost of loading SSE registers
141 in SImode, DImode and TImode */
142 {4, 8, 16}, /* cost of storing SSE registers
143 in SImode, DImode and TImode */
144 3 /* MMX or SSE register to integer */
32b5b1aa
SC
145};
146
856b07a1
SC
147struct processor_costs pentiumpro_cost = {
148 1, /* cost of an add instruction */
149 1, /* cost of a lea instruction */
e075ae69 150 1, /* variable shift costs */
856b07a1 151 1, /* constant shift costs */
369e59b1 152 4, /* cost of starting a multiply */
856b07a1 153 0, /* cost of multiply per each bit set */
e075ae69 154 17, /* cost of a divide/mod */
96e7ae40 155 8, /* "large" insn */
e2e52e1b 156 6, /* MOVE_RATIO */
7c6b971d 157 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
158 {4, 4, 4}, /* cost of loading integer registers
159 in QImode, HImode and SImode.
0f290768 160 Relative to reg-reg move (2). */
96e7ae40
JH
161 {2, 2, 2}, /* cost of storing integer registers */
162 2, /* cost of reg,reg fld/fst */
163 {2, 2, 6}, /* cost of loading fp registers
164 in SFmode, DFmode and XFmode */
fa79946e
JH
165 {4, 4, 6}, /* cost of loading integer registers */
166 2, /* cost of moving MMX register */
167 {2, 2}, /* cost of loading MMX registers
168 in SImode and DImode */
169 {2, 2}, /* cost of storing MMX registers
170 in SImode and DImode */
171 2, /* cost of moving SSE register */
172 {2, 2, 8}, /* cost of loading SSE registers
173 in SImode, DImode and TImode */
174 {2, 2, 8}, /* cost of storing SSE registers
175 in SImode, DImode and TImode */
176 3 /* MMX or SSE register to integer */
856b07a1
SC
177};
178
a269a03c
JC
179struct processor_costs k6_cost = {
180 1, /* cost of an add instruction */
e075ae69 181 2, /* cost of a lea instruction */
a269a03c
JC
182 1, /* variable shift costs */
183 1, /* constant shift costs */
73fe76e4 184 3, /* cost of starting a multiply */
a269a03c 185 0, /* cost of multiply per each bit set */
e075ae69 186 18, /* cost of a divide/mod */
96e7ae40 187 8, /* "large" insn */
e2e52e1b 188 4, /* MOVE_RATIO */
7c6b971d 189 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
190 {4, 5, 4}, /* cost of loading integer registers
191 in QImode, HImode and SImode.
0f290768 192 Relative to reg-reg move (2). */
96e7ae40
JH
193 {2, 3, 2}, /* cost of storing integer registers */
194 4, /* cost of reg,reg fld/fst */
195 {6, 6, 6}, /* cost of loading fp registers
196 in SFmode, DFmode and XFmode */
fa79946e
JH
197 {4, 4, 4}, /* cost of loading integer registers */
198 2, /* cost of moving MMX register */
199 {2, 2}, /* cost of loading MMX registers
200 in SImode and DImode */
201 {2, 2}, /* cost of storing MMX registers
202 in SImode and DImode */
203 2, /* cost of moving SSE register */
204 {2, 2, 8}, /* cost of loading SSE registers
205 in SImode, DImode and TImode */
206 {2, 2, 8}, /* cost of storing SSE registers
207 in SImode, DImode and TImode */
208 6 /* MMX or SSE register to integer */
a269a03c
JC
209};
210
309ada50
JH
211struct processor_costs athlon_cost = {
212 1, /* cost of an add instruction */
0b5107cf 213 2, /* cost of a lea instruction */
309ada50
JH
214 1, /* variable shift costs */
215 1, /* constant shift costs */
216 5, /* cost of starting a multiply */
217 0, /* cost of multiply per each bit set */
0b5107cf 218 42, /* cost of a divide/mod */
309ada50 219 8, /* "large" insn */
e2e52e1b 220 9, /* MOVE_RATIO */
309ada50
JH
221 4, /* cost for loading QImode using movzbl */
222 {4, 5, 4}, /* cost of loading integer registers
223 in QImode, HImode and SImode.
0f290768 224 Relative to reg-reg move (2). */
309ada50
JH
225 {2, 3, 2}, /* cost of storing integer registers */
226 4, /* cost of reg,reg fld/fst */
0b5107cf 227 {6, 6, 20}, /* cost of loading fp registers
309ada50 228 in SFmode, DFmode and XFmode */
fa79946e
JH
229 {4, 4, 16}, /* cost of loading integer registers */
230 2, /* cost of moving MMX register */
231 {2, 2}, /* cost of loading MMX registers
232 in SImode and DImode */
233 {2, 2}, /* cost of storing MMX registers
234 in SImode and DImode */
235 2, /* cost of moving SSE register */
236 {2, 2, 8}, /* cost of loading SSE registers
237 in SImode, DImode and TImode */
238 {2, 2, 8}, /* cost of storing SSE registers
239 in SImode, DImode and TImode */
240 6 /* MMX or SSE register to integer */
309ada50
JH
241};
242
b4e89e2d
JH
243struct processor_costs pentium4_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 8, /* variable shift costs */
247 8, /* constant shift costs */
248 30, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 112, /* cost of a divide/mod */
251 16, /* "large" insn */
252 6, /* MOVE_RATIO */
253 2, /* cost for loading QImode using movzbl */
254 {4, 5, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 3, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 12, /* cost of moving SSE register */
268 {12, 12, 12}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 10, /* MMX or SSE register to integer */
273};
274
32b5b1aa
SC
275struct processor_costs *ix86_cost = &pentium_cost;
276
a269a03c
JC
277/* Processor feature/optimization bitmasks. */
278#define m_386 (1<<PROCESSOR_I386)
279#define m_486 (1<<PROCESSOR_I486)
280#define m_PENT (1<<PROCESSOR_PENTIUM)
281#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
282#define m_K6 (1<<PROCESSOR_K6)
309ada50 283#define m_ATHLON (1<<PROCESSOR_ATHLON)
b4e89e2d 284#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
a269a03c 285
309ada50 286const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
b4e89e2d 287const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
a269a03c 288const int x86_zero_extend_with_and = m_486 | m_PENT;
b4e89e2d 289const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
e075ae69 290const int x86_double_with_add = ~m_386;
a269a03c 291const int x86_use_bit_test = m_386;
e2e52e1b 292const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
b4e89e2d
JH
293const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
294const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
ef6257cd 295const int x86_branch_hints = m_PENT4;
b4e89e2d 296const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
e075ae69
RH
297const int x86_partial_reg_stall = m_PPRO;
298const int x86_use_loop = m_K6;
309ada50 299const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
e075ae69
RH
300const int x86_use_mov0 = m_K6;
301const int x86_use_cltd = ~(m_PENT | m_K6);
302const int x86_read_modify_write = ~m_PENT;
303const int x86_read_modify = ~(m_PENT | m_PPRO);
304const int x86_split_long_moves = m_PPRO;
e9e80858 305const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
b4e89e2d 306const int x86_single_stringop = m_386 | m_PENT4;
d9f32422
JH
307const int x86_qimode_math = ~(0);
308const int x86_promote_qi_regs = 0;
309const int x86_himode_math = ~(m_PPRO);
310const int x86_promote_hi_regs = m_PPRO;
b4e89e2d
JH
311const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
312const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
313const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
314const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
315const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
316const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
317const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
c6036a37
JH
318const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
319const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
320const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
a269a03c 321
564d80f4 322#define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
2a2ab3f9 323
e075ae69
RH
324const char * const hi_reg_name[] = HI_REGISTER_NAMES;
325const char * const qi_reg_name[] = QI_REGISTER_NAMES;
326const char * const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
4c0d89b5
RS
327
328/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 329 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 330
e075ae69 331enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
332{
333 /* ax, dx, cx, bx */
ab408a86 334 AREG, DREG, CREG, BREG,
4c0d89b5 335 /* si, di, bp, sp */
e075ae69 336 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
337 /* FP registers */
338 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 339 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 340 /* arg pointer */
83774849 341 NON_Q_REGS,
564d80f4 342 /* flags, fpsr, dirflag, frame */
a7180f70
BS
343 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
344 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
345 SSE_REGS, SSE_REGS,
346 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30
JH
347 MMX_REGS, MMX_REGS,
348 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
349 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
350 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
351 SSE_REGS, SSE_REGS,
4c0d89b5 352};
c572e5ba 353
3d117b30 354/* The "default" register map used in 32bit mode. */
83774849 355
0f290768 356int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
357{
358 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
359 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
3d117b30 360 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
361 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
362 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
363 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
364 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
365};
366
0f7fa3d0
JH
367/* The "default" register map used in 64bit mode. */
368int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
369{
370 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
371 33, 34, 35, 36, 37, 38, 39, 40 /* fp regs */
372 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
373 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
374 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
375 8,9,10,11,12,13,14,15, /* extended integer registers */
376 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
377};
378
83774849
RH
379/* Define the register numbers to be used in Dwarf debugging information.
380 The SVR4 reference port C compiler uses the following register numbers
381 in its Dwarf output code:
382 0 for %eax (gcc regno = 0)
383 1 for %ecx (gcc regno = 2)
384 2 for %edx (gcc regno = 1)
385 3 for %ebx (gcc regno = 3)
386 4 for %esp (gcc regno = 7)
387 5 for %ebp (gcc regno = 6)
388 6 for %esi (gcc regno = 4)
389 7 for %edi (gcc regno = 5)
390 The following three DWARF register numbers are never generated by
391 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
392 believes these numbers have these meanings.
393 8 for %eip (no gcc equivalent)
394 9 for %eflags (gcc regno = 17)
395 10 for %trapno (no gcc equivalent)
396 It is not at all clear how we should number the FP stack registers
397 for the x86 architecture. If the version of SDB on x86/svr4 were
398 a bit less brain dead with respect to floating-point then we would
399 have a precedent to follow with respect to DWARF register numbers
400 for x86 FP registers, but the SDB on x86/svr4 is so completely
401 broken with respect to FP registers that it is hardly worth thinking
402 of it as something to strive for compatibility with.
403 The version of x86/svr4 SDB I have at the moment does (partially)
404 seem to believe that DWARF register number 11 is associated with
405 the x86 register %st(0), but that's about all. Higher DWARF
406 register numbers don't seem to be associated with anything in
407 particular, and even for DWARF regno 11, SDB only seems to under-
408 stand that it should say that a variable lives in %st(0) (when
409 asked via an `=' command) if we said it was in DWARF regno 11,
410 but SDB still prints garbage when asked for the value of the
411 variable in question (via a `/' command).
412 (Also note that the labels SDB prints for various FP stack regs
413 when doing an `x' command are all wrong.)
414 Note that these problems generally don't affect the native SVR4
415 C compiler because it doesn't allow the use of -O with -g and
416 because when it is *not* optimizing, it allocates a memory
417 location for each floating-point variable, and the memory
418 location is what gets described in the DWARF AT_location
419 attribute for the variable in question.
420 Regardless of the severe mental illness of the x86/svr4 SDB, we
421 do something sensible here and we use the following DWARF
422 register numbers. Note that these are all stack-top-relative
423 numbers.
424 11 for %st(0) (gcc regno = 8)
425 12 for %st(1) (gcc regno = 9)
426 13 for %st(2) (gcc regno = 10)
427 14 for %st(3) (gcc regno = 11)
428 15 for %st(4) (gcc regno = 12)
429 16 for %st(5) (gcc regno = 13)
430 17 for %st(6) (gcc regno = 14)
431 18 for %st(7) (gcc regno = 15)
432*/
0f290768 433int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
434{
435 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
436 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
3f3f2124 437 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
438 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
439 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
3f3f2124
JH
440 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
441 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
83774849
RH
442};
443
c572e5ba
JVA
444/* Test and compare insns in i386.md store the information needed to
445 generate branch and scc insns here. */
446
e075ae69
RH
447struct rtx_def *ix86_compare_op0 = NULL_RTX;
448struct rtx_def *ix86_compare_op1 = NULL_RTX;
f5316dfe 449
7a2e09f4 450#define MAX_386_STACK_LOCALS 3
8362f420
JH
451/* Size of the register save area. */
452#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
36edd3cc
BS
453
454/* Define the structure for the machine field in struct function. */
455struct machine_function
456{
457 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
8362f420 458 int save_varrargs_registers;
6fca22eb 459 int accesses_prev_frame;
36edd3cc
BS
460};
461
01d939e8 462#define ix86_stack_locals (cfun->machine->stack_locals)
8362f420 463#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
36edd3cc 464
4dd2ac2c
JH
465/* Structure describing stack frame layout.
466 Stack grows downward:
467
468 [arguments]
469 <- ARG_POINTER
470 saved pc
471
472 saved frame pointer if frame_pointer_needed
473 <- HARD_FRAME_POINTER
474 [saved regs]
475
476 [padding1] \
477 )
478 [va_arg registers] (
479 > to_allocate <- FRAME_POINTER
480 [frame] (
481 )
482 [padding2] /
483 */
484struct ix86_frame
485{
486 int nregs;
487 int padding1;
8362f420 488 int va_arg_size;
4dd2ac2c
JH
489 HOST_WIDE_INT frame;
490 int padding2;
491 int outgoing_arguments_size;
8362f420 492 int red_zone_size;
4dd2ac2c
JH
493
494 HOST_WIDE_INT to_allocate;
495 /* The offsets relative to ARG_POINTER. */
496 HOST_WIDE_INT frame_pointer_offset;
497 HOST_WIDE_INT hard_frame_pointer_offset;
498 HOST_WIDE_INT stack_pointer_offset;
499};
500
6189a572
JH
501/* Code model option as passed by user. */
502const char *ix86_cmodel_string;
503/* Parsed value. */
504enum cmodel ix86_cmodel;
505
c8c5cb99 506/* which cpu are we scheduling for */
e42ea7f9 507enum processor_type ix86_cpu;
c8c5cb99
SC
508
509/* which instruction set architecture to use. */
c942177e 510int ix86_arch;
c8c5cb99
SC
511
512/* Strings to hold which cpu and instruction set architecture to use. */
9c23aa47
ZW
513const char *ix86_cpu_string; /* for -mcpu=<xxx> */
514const char *ix86_arch_string; /* for -march=<xxx> */
c8c5cb99 515
0f290768 516/* # of registers to use to pass arguments. */
e075ae69 517const char *ix86_regparm_string;
e9a25f70 518
e075ae69
RH
519/* ix86_regparm_string as a number */
520int ix86_regparm;
e9a25f70
JL
521
522/* Alignment to use for loops and jumps: */
523
0f290768 524/* Power of two alignment for loops. */
e075ae69 525const char *ix86_align_loops_string;
e9a25f70 526
0f290768 527/* Power of two alignment for non-loop jumps. */
e075ae69 528const char *ix86_align_jumps_string;
e9a25f70 529
3af4bd89 530/* Power of two alignment for stack boundary in bytes. */
e075ae69 531const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
532
533/* Preferred alignment for stack boundary in bits. */
e075ae69 534int ix86_preferred_stack_boundary;
3af4bd89 535
e9a25f70 536/* Values 1-5: see jump.c */
e075ae69
RH
537int ix86_branch_cost;
538const char *ix86_branch_cost_string;
e9a25f70 539
0f290768 540/* Power of two alignment for functions. */
e075ae69 541const char *ix86_align_funcs_string;
e075ae69 542\f
f6da8bc3
KG
543static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
544static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
e075ae69 545 int, int, FILE *));
f6da8bc3 546static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
3a3677ff
RH
547static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
548 rtx *, rtx *));
f6da8bc3
KG
549static rtx gen_push PARAMS ((rtx));
550static int memory_address_length PARAMS ((rtx addr));
551static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
552static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
553static int ix86_safe_length PARAMS ((rtx));
554static enum attr_memory ix86_safe_memory PARAMS ((rtx));
555static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
556static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
557static void ix86_dump_ppro_packet PARAMS ((FILE *));
558static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
559static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
e075ae69 560 rtx));
f6da8bc3
KG
561static void ix86_init_machine_status PARAMS ((struct function *));
562static void ix86_mark_machine_status PARAMS ((struct function *));
37b15744 563static void ix86_free_machine_status PARAMS ((struct function *));
2b589241 564static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
f6da8bc3 565static int ix86_safe_length_prefix PARAMS ((rtx));
0903fcab
JH
566static int ix86_nsaved_regs PARAMS((void));
567static void ix86_emit_save_regs PARAMS((void));
c6036a37 568static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
37a58036 569static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
0e4970d7 570static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
c6991660
KG
571static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
572static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
55efb413 573static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
0945b39d 574static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
0945b39d
JH
575static rtx ix86_expand_aligntest PARAMS ((rtx, int));
576static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
e075ae69
RH
577
578struct ix86_address
579{
580 rtx base, index, disp;
581 HOST_WIDE_INT scale;
582};
b08de47e 583
e075ae69 584static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
bd793c65
BS
585
586struct builtin_description;
587static rtx ix86_expand_sse_comi PARAMS ((struct builtin_description *, tree,
588 rtx));
589static rtx ix86_expand_sse_compare PARAMS ((struct builtin_description *, tree,
590 rtx));
591static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
592static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
593static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
594static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
595static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
c0c102a9
JH
596static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
597static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
598 enum rtx_code *,
599 enum rtx_code *,
600 enum rtx_code *));
9e7adcb3
JH
601static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
602 rtx *, rtx *));
603static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
604static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
605static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
606static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
37a58036 607static int ix86_save_reg PARAMS ((int, int));
4dd2ac2c 608static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
8d8e52be 609static int ix86_comp_type_attributes PARAMS ((tree, tree));
7c262518
RH
610
611#if defined(TARGET_ELF) && defined(TARGET_COFF)
612static void sco_asm_named_section PARAMS ((const char *, unsigned int,
613 unsigned int));
614#endif
672a6f42
NB
615\f
616/* Initialize the GCC target structure. */
f5f4be42 617#undef TARGET_VALID_TYPE_ATTRIBUTE
672a6f42 618#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
f5f4be42
NB
619# define TARGET_VALID_TYPE_ATTRIBUTE i386_pe_valid_type_attribute_p
620# undef TARGET_VALID_DECL_ATTRIBUTE
621# define TARGET_VALID_DECL_ATTRIBUTE i386_pe_valid_decl_attribute_p
622# undef TARGET_MERGE_DECL_ATTRIBUTES
623# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
624#else
625# define TARGET_VALID_TYPE_ATTRIBUTE ix86_valid_type_attribute_p
672a6f42
NB
626#endif
627
8d8e52be
JM
628#undef TARGET_COMP_TYPE_ATTRIBUTES
629#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
630
f6155fda
SS
631#undef TARGET_INIT_BUILTINS
632#define TARGET_INIT_BUILTINS ix86_init_builtins
633
634#undef TARGET_EXPAND_BUILTIN
635#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
636
08c148a8
NB
637#if defined (OSF_OS) || defined (TARGET_OSF1ELF)
638 static void ix86_osf_output_function_prologue PARAMS ((FILE *,
639 HOST_WIDE_INT));
640# undef TARGET_ASM_FUNCTION_PROLOGUE
641# define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue
642#endif
643
17b53c33
NB
644#undef TARGET_ASM_OPEN_PAREN
645#define TARGET_ASM_OPEN_PAREN ""
646#undef TARGET_ASM_CLOSE_PAREN
647#define TARGET_ASM_CLOSE_PAREN ""
648
f6897b10 649struct gcc_target targetm = TARGET_INITIALIZER;
e075ae69 650\f
f5316dfe
MM
651/* Sometimes certain combinations of command options do not make
652 sense on a particular target machine. You can define a macro
653 `OVERRIDE_OPTIONS' to take account of this. This macro, if
654 defined, is executed once just after all the command options have
655 been parsed.
656
657 Don't use this macro to turn on various extra optimizations for
658 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
659
660void
661override_options ()
662{
400500c4 663 int i;
e075ae69
RH
664 /* Comes from final.c -- no real reason to change it. */
665#define MAX_CODE_ALIGN 16
f5316dfe 666
c8c5cb99
SC
667 static struct ptt
668 {
e075ae69
RH
669 struct processor_costs *cost; /* Processor costs */
670 int target_enable; /* Target flags to enable. */
671 int target_disable; /* Target flags to disable. */
672 int align_loop; /* Default alignments. */
673 int align_jump;
674 int align_func;
675 int branch_cost;
676 }
0f290768 677 const processor_target_table[PROCESSOR_max] =
e075ae69
RH
678 {
679 {&i386_cost, 0, 0, 2, 2, 2, 1},
680 {&i486_cost, 0, 0, 4, 4, 4, 1},
681 {&pentium_cost, 0, 0, -4, -4, -4, 1},
682 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
309ada50 683 {&k6_cost, 0, 0, -5, -5, 4, 1},
b4e89e2d
JH
684 {&athlon_cost, 0, 0, 4, -4, 4, 1},
685 {&pentium4_cost, 0, 0, 2, 2, 2, 1}
e075ae69
RH
686 };
687
688 static struct pta
689 {
0f290768 690 const char *name; /* processor name or nickname. */
e075ae69
RH
691 enum processor_type processor;
692 }
0f290768 693 const processor_alias_table[] =
e075ae69
RH
694 {
695 {"i386", PROCESSOR_I386},
696 {"i486", PROCESSOR_I486},
697 {"i586", PROCESSOR_PENTIUM},
698 {"pentium", PROCESSOR_PENTIUM},
699 {"i686", PROCESSOR_PENTIUMPRO},
700 {"pentiumpro", PROCESSOR_PENTIUMPRO},
e075ae69 701 {"k6", PROCESSOR_K6},
309ada50 702 {"athlon", PROCESSOR_ATHLON},
b4e89e2d 703 {"pentium4", PROCESSOR_PENTIUM4},
3af4bd89 704 };
c8c5cb99 705
0f290768 706 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
c8c5cb99 707
f5316dfe
MM
708#ifdef SUBTARGET_OVERRIDE_OPTIONS
709 SUBTARGET_OVERRIDE_OPTIONS;
710#endif
711
5a6ee819 712 ix86_arch = PROCESSOR_I386;
e075ae69
RH
713 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
714
6189a572
JH
715 if (ix86_cmodel_string != 0)
716 {
717 if (!strcmp (ix86_cmodel_string, "small"))
718 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
719 else if (flag_pic)
720 sorry ("Code model %s not supported in PIC mode", ix86_cmodel_string);
721 else if (!strcmp (ix86_cmodel_string, "32"))
722 ix86_cmodel = CM_32;
723 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
724 ix86_cmodel = CM_KERNEL;
725 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
726 ix86_cmodel = CM_MEDIUM;
727 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
728 ix86_cmodel = CM_LARGE;
729 else
730 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
731 }
732 else
733 {
734 ix86_cmodel = CM_32;
735 if (TARGET_64BIT)
736 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
737 }
738 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
739 error ("Code model `%s' not supported in the %s bit mode.",
740 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
741 if (ix86_cmodel == CM_LARGE)
742 sorry ("Code model `large' not supported yet.");
0c2dc519
JH
743 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
744 sorry ("%i-bit mode not compiled in.",
745 (target_flags & MASK_64BIT) ? 64 : 32);
6189a572 746
e075ae69
RH
747 if (ix86_arch_string != 0)
748 {
e075ae69
RH
749 for (i = 0; i < pta_size; i++)
750 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
751 {
752 ix86_arch = processor_alias_table[i].processor;
753 /* Default cpu tuning to the architecture. */
754 ix86_cpu = ix86_arch;
755 break;
756 }
400500c4 757
e075ae69
RH
758 if (i == pta_size)
759 error ("bad value (%s) for -march= switch", ix86_arch_string);
760 }
761
762 if (ix86_cpu_string != 0)
763 {
e075ae69
RH
764 for (i = 0; i < pta_size; i++)
765 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
766 {
767 ix86_cpu = processor_alias_table[i].processor;
768 break;
769 }
770 if (i == pta_size)
771 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
772 }
773
774 ix86_cost = processor_target_table[ix86_cpu].cost;
775 target_flags |= processor_target_table[ix86_cpu].target_enable;
776 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
777
36edd3cc
BS
778 /* Arrange to set up i386_stack_locals for all functions. */
779 init_machine_status = ix86_init_machine_status;
1526a060 780 mark_machine_status = ix86_mark_machine_status;
37b15744 781 free_machine_status = ix86_free_machine_status;
36edd3cc 782
0f290768 783 /* Validate -mregparm= value. */
e075ae69 784 if (ix86_regparm_string)
b08de47e 785 {
400500c4
RK
786 i = atoi (ix86_regparm_string);
787 if (i < 0 || i > REGPARM_MAX)
788 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
789 else
790 ix86_regparm = i;
b08de47e 791 }
0d7d98ee
JH
792 else
793 if (TARGET_64BIT)
794 ix86_regparm = REGPARM_MAX;
b08de47e 795
3e18fdf6 796 /* If the user has provided any of the -malign-* options,
a4f31c00 797 warn and use that value only if -falign-* is not set.
3e18fdf6 798 Remove this code in GCC 3.2 or later. */
e075ae69 799 if (ix86_align_loops_string)
b08de47e 800 {
3e18fdf6
GK
801 warning ("-malign-loops is obsolete, use -falign-loops");
802 if (align_loops == 0)
803 {
804 i = atoi (ix86_align_loops_string);
805 if (i < 0 || i > MAX_CODE_ALIGN)
806 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
807 else
808 align_loops = 1 << i;
809 }
b08de47e 810 }
3af4bd89 811
e075ae69 812 if (ix86_align_jumps_string)
b08de47e 813 {
3e18fdf6
GK
814 warning ("-malign-jumps is obsolete, use -falign-jumps");
815 if (align_jumps == 0)
816 {
817 i = atoi (ix86_align_jumps_string);
818 if (i < 0 || i > MAX_CODE_ALIGN)
819 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
820 else
821 align_jumps = 1 << i;
822 }
b08de47e 823 }
b08de47e 824
e075ae69 825 if (ix86_align_funcs_string)
b08de47e 826 {
3e18fdf6
GK
827 warning ("-malign-functions is obsolete, use -falign-functions");
828 if (align_functions == 0)
829 {
830 i = atoi (ix86_align_funcs_string);
831 if (i < 0 || i > MAX_CODE_ALIGN)
832 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
833 else
834 align_functions = 1 << i;
835 }
b08de47e 836 }
3af4bd89 837
3e18fdf6
GK
838 /* Default align_* from the processor table. */
839#define abs(n) (n < 0 ? -n : n)
840 if (align_loops == 0)
841 align_loops = 1 << abs (processor_target_table[ix86_cpu].align_loop);
842 if (align_jumps == 0)
843 align_jumps = 1 << abs (processor_target_table[ix86_cpu].align_jump);
844 if (align_functions == 0)
845 align_functions = 1 << abs (processor_target_table[ix86_cpu].align_func);
846
e4c0478d 847 /* Validate -mpreferred-stack-boundary= value, or provide default.
3af4bd89 848 The default of 128 bits is for Pentium III's SSE __m128. */
e075ae69
RH
849 ix86_preferred_stack_boundary = 128;
850 if (ix86_preferred_stack_boundary_string)
3af4bd89 851 {
400500c4 852 i = atoi (ix86_preferred_stack_boundary_string);
0d7d98ee
JH
853 if (i < (TARGET_64BIT ? 3 : 2) || i > 31)
854 error ("-mpreferred-stack-boundary=%d is not between %d and 31", i,
855 TARGET_64BIT ? 3 : 2);
400500c4
RK
856 else
857 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 858 }
77a989d1 859
0f290768 860 /* Validate -mbranch-cost= value, or provide default. */
e075ae69
RH
861 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
862 if (ix86_branch_cost_string)
804a8ee0 863 {
400500c4
RK
864 i = atoi (ix86_branch_cost_string);
865 if (i < 0 || i > 5)
866 error ("-mbranch-cost=%d is not between 0 and 5", i);
867 else
868 ix86_branch_cost = i;
804a8ee0 869 }
804a8ee0 870
e9a25f70
JL
871 /* Keep nonleaf frame pointers. */
872 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 873 flag_omit_frame_pointer = 1;
e075ae69
RH
874
875 /* If we're doing fast math, we don't care about comparison order
876 wrt NaNs. This lets us use a shorter comparison sequence. */
de6c5979 877 if (flag_unsafe_math_optimizations)
e075ae69
RH
878 target_flags &= ~MASK_IEEE_FP;
879
a7180f70
BS
880 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
881 on by -msse. */
882 if (TARGET_SSE)
883 target_flags |= MASK_MMX;
c6036a37
JH
884
885 if ((x86_accumulate_outgoing_args & CPUMASK)
886 && !(target_flags & MASK_NO_ACCUMULATE_OUTGOING_ARGS)
887 && !optimize_size)
888 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
f5316dfe
MM
889}
890\f
32b5b1aa 891void
c6aded7c 892optimization_options (level, size)
32b5b1aa 893 int level;
bb5177ac 894 int size ATTRIBUTE_UNUSED;
32b5b1aa 895{
e9a25f70
JL
896 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
897 make the problem with not enough registers even worse. */
32b5b1aa
SC
898#ifdef INSN_SCHEDULING
899 if (level > 1)
900 flag_schedule_insns = 0;
901#endif
902}
b08de47e 903\f
b08de47e
MM
904/* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
905 attribute for TYPE. The attributes in ATTRIBUTES have previously been
906 assigned to TYPE. */
907
9959db6d 908int
e075ae69 909ix86_valid_type_attribute_p (type, attributes, identifier, args)
b08de47e 910 tree type;
bb5177ac 911 tree attributes ATTRIBUTE_UNUSED;
b08de47e
MM
912 tree identifier;
913 tree args;
914{
915 if (TREE_CODE (type) != FUNCTION_TYPE
ac478ac0 916 && TREE_CODE (type) != METHOD_TYPE
b08de47e
MM
917 && TREE_CODE (type) != FIELD_DECL
918 && TREE_CODE (type) != TYPE_DECL)
919 return 0;
920
921 /* Stdcall attribute says callee is responsible for popping arguments
922 if they are not variable. */
0d7d98ee
JH
923 if (is_attribute_p ("stdcall", identifier)
924 && !TARGET_64BIT)
b08de47e
MM
925 return (args == NULL_TREE);
926
0f290768 927 /* Cdecl attribute says the callee is a normal C declaration. */
0d7d98ee
JH
928 if (is_attribute_p ("cdecl", identifier)
929 && !TARGET_64BIT)
b08de47e
MM
930 return (args == NULL_TREE);
931
932 /* Regparm attribute specifies how many integer arguments are to be
0f290768 933 passed in registers. */
b08de47e
MM
934 if (is_attribute_p ("regparm", identifier))
935 {
936 tree cst;
937
e9a25f70 938 if (! args || TREE_CODE (args) != TREE_LIST
b08de47e
MM
939 || TREE_CHAIN (args) != NULL_TREE
940 || TREE_VALUE (args) == NULL_TREE)
941 return 0;
942
943 cst = TREE_VALUE (args);
944 if (TREE_CODE (cst) != INTEGER_CST)
945 return 0;
946
cce097f1 947 if (compare_tree_int (cst, REGPARM_MAX) > 0)
b08de47e
MM
948 return 0;
949
950 return 1;
951 }
952
953 return 0;
954}
955
08c148a8
NB
956#if defined (OSF_OS) || defined (TARGET_OSF1ELF)
957
958/* Generate the assembly code for function entry. FILE is a stdio
959 stream to output the code to. SIZE is an int: how many units of
960 temporary storage to allocate.
961
962 Refer to the array `regs_ever_live' to determine which registers to
963 save; `regs_ever_live[I]' is nonzero if register number I is ever
964 used in the function. This function is responsible for knowing
965 which registers should not be saved even if used.
966
967 We override it here to allow for the new profiling code to go before
968 the prologue and the old mcount code to go after the prologue (and
969 after %ebx has been set up for ELF shared library support). */
970
971static void
972ix86_osf_output_function_prologue (file, size)
973 FILE *file;
974 HOST_WIDE_INT size;
975{
976 char *prefix = "";
977 char *lprefix = LPREFIX;
978 int labelno = profile_label_no;
979
980#ifdef OSF_OS
981
982 if (TARGET_UNDERSCORES)
983 prefix = "_";
984
985 if (profile_flag && OSF_PROFILE_BEFORE_PROLOGUE)
986 {
987 if (!flag_pic && !HALF_PIC_P ())
988 {
989 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
990 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
991 }
992
993 else if (HALF_PIC_P ())
994 {
995 rtx symref;
996
997 HALF_PIC_EXTERNAL ("_mcount_ptr");
998 symref = HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode,
999 "_mcount_ptr"));
1000
1001 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1002 fprintf (file, "\tmovl %s%s,%%eax\n", prefix,
1003 XSTR (symref, 0));
1004 fprintf (file, "\tcall *(%%eax)\n");
1005 }
1006
1007 else
1008 {
1009 static int call_no = 0;
1010
1011 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1012 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1013 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1014 lprefix, call_no++);
1015 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1016 lprefix, labelno);
1017 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1018 prefix);
1019 fprintf (file, "\tcall *(%%eax)\n");
1020 }
1021 }
1022
1023#else /* !OSF_OS */
1024
1025 if (profile_flag && OSF_PROFILE_BEFORE_PROLOGUE)
1026 {
1027 if (!flag_pic)
1028 {
1029 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1030 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1031 }
1032
1033 else
1034 {
1035 static int call_no = 0;
1036
1037 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1038 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1039 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1040 lprefix, call_no++);
1041 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1042 lprefix, labelno);
1043 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1044 prefix);
1045 fprintf (file, "\tcall *(%%eax)\n");
1046 }
1047 }
1048#endif /* !OSF_OS */
1049
1050 function_prologue (file, size);
1051}
1052
1053#endif /* OSF_OS || TARGET_OSF1ELF */
1054
b08de47e
MM
1055/* Return 0 if the attributes for two types are incompatible, 1 if they
1056 are compatible, and 2 if they are nearly compatible (which causes a
1057 warning to be generated). */
1058
8d8e52be 1059static int
e075ae69 1060ix86_comp_type_attributes (type1, type2)
afcfe58c
MM
1061 tree type1;
1062 tree type2;
b08de47e 1063{
0f290768 1064 /* Check for mismatch of non-default calling convention. */
69ddee61 1065 const char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
1066
1067 if (TREE_CODE (type1) != FUNCTION_TYPE)
1068 return 1;
1069
1070 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
1071 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1072 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 1073 return 0;
b08de47e
MM
1074 return 1;
1075}
b08de47e
MM
1076\f
1077/* Value is the number of bytes of arguments automatically
1078 popped when returning from a subroutine call.
1079 FUNDECL is the declaration node of the function (as a tree),
1080 FUNTYPE is the data type of the function (as a tree),
1081 or for a library call it is an identifier node for the subroutine name.
1082 SIZE is the number of bytes of arguments passed on the stack.
1083
1084 On the 80386, the RTD insn may be used to pop them if the number
1085 of args is fixed, but if the number is variable then the caller
1086 must pop them all. RTD can't be used for library calls now
1087 because the library is compiled with the Unix compiler.
1088 Use of RTD is a selectable option, since it is incompatible with
1089 standard Unix calling sequences. If the option is not selected,
1090 the caller must always pop the args.
1091
1092 The attribute stdcall is equivalent to RTD on a per module basis. */
1093
1094int
e075ae69 1095ix86_return_pops_args (fundecl, funtype, size)
b08de47e
MM
1096 tree fundecl;
1097 tree funtype;
1098 int size;
79325812 1099{
3345ee7d 1100 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 1101
0f290768 1102 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 1103 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 1104
0f290768 1105 /* Stdcall functions will pop the stack if not variable args. */
698cdd84
SC
1106 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1107 rtd = 1;
79325812 1108
698cdd84
SC
1109 if (rtd
1110 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
1111 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1112 == void_type_node)))
698cdd84
SC
1113 return size;
1114 }
79325812 1115
e9a25f70 1116 /* Lose any fake structure return argument. */
0d7d98ee
JH
1117 if (aggregate_value_p (TREE_TYPE (funtype))
1118 && !TARGET_64BIT)
698cdd84 1119 return GET_MODE_SIZE (Pmode);
79325812 1120
2614aac6 1121 return 0;
b08de47e 1122}
b08de47e
MM
1123\f
1124/* Argument support functions. */
1125
1126/* Initialize a variable CUM of type CUMULATIVE_ARGS
1127 for a call to a function whose data type is FNTYPE.
1128 For a library call, FNTYPE is 0. */
1129
1130void
1131init_cumulative_args (cum, fntype, libname)
e9a25f70 1132 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
b08de47e
MM
1133 tree fntype; /* tree ptr for function decl */
1134 rtx libname; /* SYMBOL_REF of library name or 0 */
1135{
1136 static CUMULATIVE_ARGS zero_cum;
1137 tree param, next_param;
1138
1139 if (TARGET_DEBUG_ARG)
1140 {
1141 fprintf (stderr, "\ninit_cumulative_args (");
1142 if (fntype)
e9a25f70
JL
1143 fprintf (stderr, "fntype code = %s, ret code = %s",
1144 tree_code_name[(int) TREE_CODE (fntype)],
1145 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
1146 else
1147 fprintf (stderr, "no fntype");
1148
1149 if (libname)
1150 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1151 }
1152
1153 *cum = zero_cum;
1154
1155 /* Set up the number of registers to use for passing arguments. */
e075ae69 1156 cum->nregs = ix86_regparm;
b08de47e
MM
1157 if (fntype)
1158 {
1159 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
e9a25f70 1160
b08de47e
MM
1161 if (attr)
1162 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1163 }
1164
1165 /* Determine if this function has variable arguments. This is
1166 indicated by the last argument being 'void_type_mode' if there
1167 are no variable arguments. If there are variable arguments, then
1168 we won't pass anything in registers */
1169
1170 if (cum->nregs)
1171 {
1172 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 1173 param != 0; param = next_param)
b08de47e
MM
1174 {
1175 next_param = TREE_CHAIN (param);
e9a25f70 1176 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
b08de47e
MM
1177 cum->nregs = 0;
1178 }
1179 }
1180
1181 if (TARGET_DEBUG_ARG)
1182 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1183
1184 return;
1185}
1186
1187/* Update the data in CUM to advance over an argument
1188 of mode MODE and data type TYPE.
1189 (TYPE is null for libcalls where that information may not be available.) */
1190
1191void
1192function_arg_advance (cum, mode, type, named)
1193 CUMULATIVE_ARGS *cum; /* current arg information */
1194 enum machine_mode mode; /* current arg mode */
1195 tree type; /* type of the argument or 0 if lib support */
1196 int named; /* whether or not the argument was named */
1197{
5ac9118e
KG
1198 int bytes =
1199 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
1200 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1201
1202 if (TARGET_DEBUG_ARG)
1203 fprintf (stderr,
e9a25f70 1204 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
b08de47e 1205 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
82a127a9 1206 if (TARGET_SSE && mode == TImode)
b08de47e 1207 {
82a127a9
CM
1208 cum->sse_words += words;
1209 cum->sse_nregs -= 1;
1210 cum->sse_regno += 1;
1211 if (cum->sse_nregs <= 0)
1212 {
1213 cum->sse_nregs = 0;
1214 cum->sse_regno = 0;
1215 }
b08de47e 1216 }
a4f31c00 1217 else
82a127a9
CM
1218 {
1219 cum->words += words;
1220 cum->nregs -= words;
1221 cum->regno += words;
b08de47e 1222
82a127a9
CM
1223 if (cum->nregs <= 0)
1224 {
1225 cum->nregs = 0;
1226 cum->regno = 0;
1227 }
1228 }
b08de47e
MM
1229 return;
1230}
1231
1232/* Define where to put the arguments to a function.
1233 Value is zero to push the argument on the stack,
1234 or a hard register in which to store the argument.
1235
1236 MODE is the argument's machine mode.
1237 TYPE is the data type of the argument (as a tree).
1238 This is null for libcalls where that information may
1239 not be available.
1240 CUM is a variable of type CUMULATIVE_ARGS which gives info about
1241 the preceding args and about the function being called.
1242 NAMED is nonzero if this argument is a named parameter
1243 (otherwise it is an extra parameter matching an ellipsis). */
1244
1245struct rtx_def *
1246function_arg (cum, mode, type, named)
1247 CUMULATIVE_ARGS *cum; /* current arg information */
1248 enum machine_mode mode; /* current arg mode */
1249 tree type; /* type of the argument or 0 if lib support */
1250 int named; /* != 0 for normal args, == 0 for ... args */
1251{
1252 rtx ret = NULL_RTX;
5ac9118e
KG
1253 int bytes =
1254 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
1255 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1256
32ee7d1d
JH
1257 if (mode == VOIDmode)
1258 return constm1_rtx;
1259
b08de47e
MM
1260 switch (mode)
1261 {
0f290768 1262 /* For now, pass fp/complex values on the stack. */
e9a25f70 1263 default:
b08de47e
MM
1264 break;
1265
1266 case BLKmode:
1267 case DImode:
1268 case SImode:
1269 case HImode:
1270 case QImode:
1271 if (words <= cum->nregs)
f64cecad 1272 ret = gen_rtx_REG (mode, cum->regno);
b08de47e 1273 break;
82a127a9
CM
1274 case TImode:
1275 if (cum->sse_nregs)
1276 ret = gen_rtx_REG (mode, cum->sse_regno);
a4f31c00 1277 break;
b08de47e
MM
1278 }
1279
1280 if (TARGET_DEBUG_ARG)
1281 {
1282 fprintf (stderr,
e9a25f70 1283 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
b08de47e
MM
1284 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
1285
1286 if (ret)
1287 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
1288 else
1289 fprintf (stderr, ", stack");
1290
1291 fprintf (stderr, " )\n");
1292 }
1293
1294 return ret;
1295}
e075ae69 1296\f
8bad7136 1297
7dd4b4a3
JH
1298/* Return nonzero if OP is general operand representable on x86_64. */
1299
1300int
1301x86_64_general_operand (op, mode)
1302 rtx op;
1303 enum machine_mode mode;
1304{
1305 if (!TARGET_64BIT)
1306 return general_operand (op, mode);
1307 if (nonimmediate_operand (op, mode))
1308 return 1;
1309 return x86_64_sign_extended_value (op);
1310}
1311
1312/* Return nonzero if OP is general operand representable on x86_64
1313 as eighter sign extended or zero extended constant. */
1314
1315int
1316x86_64_szext_general_operand (op, mode)
1317 rtx op;
1318 enum machine_mode mode;
1319{
1320 if (!TARGET_64BIT)
1321 return general_operand (op, mode);
1322 if (nonimmediate_operand (op, mode))
1323 return 1;
1324 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
1325}
1326
1327/* Return nonzero if OP is nonmemory operand representable on x86_64. */
1328
1329int
1330x86_64_nonmemory_operand (op, mode)
1331 rtx op;
1332 enum machine_mode mode;
1333{
1334 if (!TARGET_64BIT)
1335 return nonmemory_operand (op, mode);
1336 if (register_operand (op, mode))
1337 return 1;
1338 return x86_64_sign_extended_value (op);
1339}
1340
1341/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
1342
1343int
1344x86_64_movabs_operand (op, mode)
1345 rtx op;
1346 enum machine_mode mode;
1347{
1348 if (!TARGET_64BIT || !flag_pic)
1349 return nonmemory_operand (op, mode);
1350 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
1351 return 1;
1352 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
1353 return 1;
1354 return 0;
1355}
1356
1357/* Return nonzero if OP is nonmemory operand representable on x86_64. */
1358
1359int
1360x86_64_szext_nonmemory_operand (op, mode)
1361 rtx op;
1362 enum machine_mode mode;
1363{
1364 if (!TARGET_64BIT)
1365 return nonmemory_operand (op, mode);
1366 if (register_operand (op, mode))
1367 return 1;
1368 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
1369}
1370
1371/* Return nonzero if OP is immediate operand representable on x86_64. */
1372
1373int
1374x86_64_immediate_operand (op, mode)
1375 rtx op;
1376 enum machine_mode mode;
1377{
1378 if (!TARGET_64BIT)
1379 return immediate_operand (op, mode);
1380 return x86_64_sign_extended_value (op);
1381}
1382
1383/* Return nonzero if OP is immediate operand representable on x86_64. */
1384
1385int
1386x86_64_zext_immediate_operand (op, mode)
1387 rtx op;
1388 enum machine_mode mode ATTRIBUTE_UNUSED;
1389{
1390 return x86_64_zero_extended_value (op);
1391}
1392
8bad7136
JL
1393/* Return nonzero if OP is (const_int 1), else return zero. */
1394
1395int
1396const_int_1_operand (op, mode)
1397 rtx op;
1398 enum machine_mode mode ATTRIBUTE_UNUSED;
1399{
1400 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
1401}
1402
e075ae69
RH
1403/* Returns 1 if OP is either a symbol reference or a sum of a symbol
1404 reference and a constant. */
b08de47e
MM
1405
1406int
e075ae69
RH
1407symbolic_operand (op, mode)
1408 register rtx op;
1409 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 1410{
e075ae69 1411 switch (GET_CODE (op))
2a2ab3f9 1412 {
e075ae69
RH
1413 case SYMBOL_REF:
1414 case LABEL_REF:
1415 return 1;
1416
1417 case CONST:
1418 op = XEXP (op, 0);
1419 if (GET_CODE (op) == SYMBOL_REF
1420 || GET_CODE (op) == LABEL_REF
1421 || (GET_CODE (op) == UNSPEC
1422 && XINT (op, 1) >= 6
1423 && XINT (op, 1) <= 7))
1424 return 1;
1425 if (GET_CODE (op) != PLUS
1426 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1427 return 0;
1428
1429 op = XEXP (op, 0);
1430 if (GET_CODE (op) == SYMBOL_REF
1431 || GET_CODE (op) == LABEL_REF)
1432 return 1;
1433 /* Only @GOTOFF gets offsets. */
1434 if (GET_CODE (op) != UNSPEC
1435 || XINT (op, 1) != 7)
1436 return 0;
1437
1438 op = XVECEXP (op, 0, 0);
1439 if (GET_CODE (op) == SYMBOL_REF
1440 || GET_CODE (op) == LABEL_REF)
1441 return 1;
1442 return 0;
1443
1444 default:
1445 return 0;
2a2ab3f9
JVA
1446 }
1447}
2a2ab3f9 1448
e075ae69 1449/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 1450
e075ae69
RH
1451int
1452pic_symbolic_operand (op, mode)
1453 register rtx op;
1454 enum machine_mode mode ATTRIBUTE_UNUSED;
1455{
1456 if (GET_CODE (op) == CONST)
2a2ab3f9 1457 {
e075ae69
RH
1458 op = XEXP (op, 0);
1459 if (GET_CODE (op) == UNSPEC)
1460 return 1;
1461 if (GET_CODE (op) != PLUS
1462 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1463 return 0;
1464 op = XEXP (op, 0);
1465 if (GET_CODE (op) == UNSPEC)
1466 return 1;
2a2ab3f9 1467 }
e075ae69 1468 return 0;
2a2ab3f9 1469}
2a2ab3f9 1470
28d52ffb
RH
1471/* Test for a valid operand for a call instruction. Don't allow the
1472 arg pointer register or virtual regs since they may decay into
1473 reg + const, which the patterns can't handle. */
2a2ab3f9 1474
e075ae69
RH
1475int
1476call_insn_operand (op, mode)
1477 rtx op;
1478 enum machine_mode mode ATTRIBUTE_UNUSED;
1479{
e075ae69
RH
1480 /* Disallow indirect through a virtual register. This leads to
1481 compiler aborts when trying to eliminate them. */
1482 if (GET_CODE (op) == REG
1483 && (op == arg_pointer_rtx
564d80f4 1484 || op == frame_pointer_rtx
e075ae69
RH
1485 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
1486 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
1487 return 0;
2a2ab3f9 1488
28d52ffb
RH
1489 /* Disallow `call 1234'. Due to varying assembler lameness this
1490 gets either rejected or translated to `call .+1234'. */
1491 if (GET_CODE (op) == CONST_INT)
1492 return 0;
1493
cbbf65e0
RH
1494 /* Explicitly allow SYMBOL_REF even if pic. */
1495 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 1496 return 1;
2a2ab3f9 1497
cbbf65e0
RH
1498 /* Half-pic doesn't allow anything but registers and constants.
1499 We've just taken care of the later. */
1500 if (HALF_PIC_P ())
1501 return register_operand (op, Pmode);
1502
1503 /* Otherwise we can allow any general_operand in the address. */
1504 return general_operand (op, Pmode);
e075ae69 1505}
79325812 1506
e075ae69
RH
1507int
1508constant_call_address_operand (op, mode)
1509 rtx op;
1510 enum machine_mode mode ATTRIBUTE_UNUSED;
1511{
eaf19aba
JJ
1512 if (GET_CODE (op) == CONST
1513 && GET_CODE (XEXP (op, 0)) == PLUS
1514 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1515 op = XEXP (XEXP (op, 0), 0);
e1ff012c 1516 return GET_CODE (op) == SYMBOL_REF;
e075ae69 1517}
2a2ab3f9 1518
e075ae69 1519/* Match exactly zero and one. */
e9a25f70 1520
0f290768 1521int
e075ae69
RH
1522const0_operand (op, mode)
1523 register rtx op;
1524 enum machine_mode mode;
1525{
1526 return op == CONST0_RTX (mode);
1527}
e9a25f70 1528
0f290768 1529int
e075ae69
RH
1530const1_operand (op, mode)
1531 register rtx op;
1532 enum machine_mode mode ATTRIBUTE_UNUSED;
1533{
1534 return op == const1_rtx;
1535}
2a2ab3f9 1536
e075ae69 1537/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 1538
e075ae69
RH
1539int
1540const248_operand (op, mode)
1541 register rtx op;
1542 enum machine_mode mode ATTRIBUTE_UNUSED;
1543{
1544 return (GET_CODE (op) == CONST_INT
1545 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
1546}
e9a25f70 1547
e075ae69 1548/* True if this is a constant appropriate for an increment or decremenmt. */
81fd0956 1549
e075ae69
RH
1550int
1551incdec_operand (op, mode)
1552 register rtx op;
0631e0bf 1553 enum machine_mode mode ATTRIBUTE_UNUSED;
e075ae69 1554{
b4e89e2d
JH
1555 /* On Pentium4, the inc and dec operations causes extra dependancy on flag
1556 registers, since carry flag is not set. */
1557 if (TARGET_PENTIUM4 && !optimize_size)
1558 return 0;
2b1c08f5 1559 return op == const1_rtx || op == constm1_rtx;
e075ae69 1560}
2a2ab3f9 1561
371bc54b
JH
1562/* Return nonzero if OP is acceptable as operand of DImode shift
1563 expander. */
1564
1565int
1566shiftdi_operand (op, mode)
1567 rtx op;
1568 enum machine_mode mode ATTRIBUTE_UNUSED;
1569{
1570 if (TARGET_64BIT)
1571 return nonimmediate_operand (op, mode);
1572 else
1573 return register_operand (op, mode);
1574}
1575
0f290768 1576/* Return false if this is the stack pointer, or any other fake
e075ae69
RH
1577 register eliminable to the stack pointer. Otherwise, this is
1578 a register operand.
2a2ab3f9 1579
e075ae69
RH
1580 This is used to prevent esp from being used as an index reg.
1581 Which would only happen in pathological cases. */
5f1ec3e6 1582
e075ae69
RH
1583int
1584reg_no_sp_operand (op, mode)
1585 register rtx op;
1586 enum machine_mode mode;
1587{
1588 rtx t = op;
1589 if (GET_CODE (t) == SUBREG)
1590 t = SUBREG_REG (t);
564d80f4 1591 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 1592 return 0;
2a2ab3f9 1593
e075ae69 1594 return register_operand (op, mode);
2a2ab3f9 1595}
b840bfb0 1596
915119a5
BS
1597int
1598mmx_reg_operand (op, mode)
1599 register rtx op;
bd793c65 1600 enum machine_mode mode ATTRIBUTE_UNUSED;
915119a5
BS
1601{
1602 return MMX_REG_P (op);
1603}
1604
2c5a510c
RH
1605/* Return false if this is any eliminable register. Otherwise
1606 general_operand. */
1607
1608int
1609general_no_elim_operand (op, mode)
1610 register rtx op;
1611 enum machine_mode mode;
1612{
1613 rtx t = op;
1614 if (GET_CODE (t) == SUBREG)
1615 t = SUBREG_REG (t);
1616 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1617 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1618 || t == virtual_stack_dynamic_rtx)
1619 return 0;
1020a5ab
RH
1620 if (REG_P (t)
1621 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
1622 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
1623 return 0;
2c5a510c
RH
1624
1625 return general_operand (op, mode);
1626}
1627
1628/* Return false if this is any eliminable register. Otherwise
1629 register_operand or const_int. */
1630
1631int
1632nonmemory_no_elim_operand (op, mode)
1633 register rtx op;
1634 enum machine_mode mode;
1635{
1636 rtx t = op;
1637 if (GET_CODE (t) == SUBREG)
1638 t = SUBREG_REG (t);
1639 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1640 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1641 || t == virtual_stack_dynamic_rtx)
1642 return 0;
1643
1644 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
1645}
1646
e075ae69 1647/* Return true if op is a Q_REGS class register. */
b840bfb0 1648
e075ae69
RH
1649int
1650q_regs_operand (op, mode)
1651 register rtx op;
1652 enum machine_mode mode;
b840bfb0 1653{
e075ae69
RH
1654 if (mode != VOIDmode && GET_MODE (op) != mode)
1655 return 0;
1656 if (GET_CODE (op) == SUBREG)
1657 op = SUBREG_REG (op);
1658 return QI_REG_P (op);
0f290768 1659}
b840bfb0 1660
e075ae69 1661/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 1662
e075ae69
RH
1663int
1664non_q_regs_operand (op, mode)
1665 register rtx op;
1666 enum machine_mode mode;
1667{
1668 if (mode != VOIDmode && GET_MODE (op) != mode)
1669 return 0;
1670 if (GET_CODE (op) == SUBREG)
1671 op = SUBREG_REG (op);
1672 return NON_QI_REG_P (op);
0f290768 1673}
b840bfb0 1674
915119a5
BS
1675/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
1676 insns. */
1677int
1678sse_comparison_operator (op, mode)
1679 rtx op;
1680 enum machine_mode mode ATTRIBUTE_UNUSED;
1681{
1682 enum rtx_code code = GET_CODE (op);
a46d1d38
JH
1683 switch (code)
1684 {
1685 /* Operations supported directly. */
1686 case EQ:
1687 case LT:
1688 case LE:
1689 case UNORDERED:
1690 case NE:
1691 case UNGE:
1692 case UNGT:
1693 case ORDERED:
1694 return 1;
1695 /* These are equivalent to ones above in non-IEEE comparisons. */
1696 case UNEQ:
1697 case UNLT:
1698 case UNLE:
1699 case LTGT:
1700 case GE:
1701 case GT:
1702 return !TARGET_IEEE_FP;
1703 default:
1704 return 0;
1705 }
915119a5 1706}
9076b9c1 1707/* Return 1 if OP is a valid comparison operator in valid mode. */
e075ae69 1708int
9076b9c1
JH
1709ix86_comparison_operator (op, mode)
1710 register rtx op;
1711 enum machine_mode mode;
e075ae69 1712{
9076b9c1 1713 enum machine_mode inmode;
9a915772 1714 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
1715 if (mode != VOIDmode && GET_MODE (op) != mode)
1716 return 0;
9a915772
JH
1717 if (GET_RTX_CLASS (code) != '<')
1718 return 0;
1719 inmode = GET_MODE (XEXP (op, 0));
1720
1721 if (inmode == CCFPmode || inmode == CCFPUmode)
1722 {
1723 enum rtx_code second_code, bypass_code;
1724 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1725 return (bypass_code == NIL && second_code == NIL);
1726 }
1727 switch (code)
3a3677ff
RH
1728 {
1729 case EQ: case NE:
3a3677ff 1730 return 1;
9076b9c1 1731 case LT: case GE:
7e08e190 1732 if (inmode == CCmode || inmode == CCGCmode
9076b9c1
JH
1733 || inmode == CCGOCmode || inmode == CCNOmode)
1734 return 1;
1735 return 0;
7e08e190 1736 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
7e08e190 1737 if (inmode == CCmode)
9076b9c1
JH
1738 return 1;
1739 return 0;
1740 case GT: case LE:
7e08e190 1741 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
9076b9c1
JH
1742 return 1;
1743 return 0;
3a3677ff
RH
1744 default:
1745 return 0;
1746 }
1747}
1748
9076b9c1 1749/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3a3677ff 1750
9076b9c1
JH
1751int
1752fcmov_comparison_operator (op, mode)
3a3677ff
RH
1753 register rtx op;
1754 enum machine_mode mode;
1755{
b62d22a2 1756 enum machine_mode inmode;
9a915772 1757 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
1758 if (mode != VOIDmode && GET_MODE (op) != mode)
1759 return 0;
9a915772
JH
1760 if (GET_RTX_CLASS (code) != '<')
1761 return 0;
1762 inmode = GET_MODE (XEXP (op, 0));
1763 if (inmode == CCFPmode || inmode == CCFPUmode)
3a3677ff 1764 {
9a915772
JH
1765 enum rtx_code second_code, bypass_code;
1766 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1767 if (bypass_code != NIL || second_code != NIL)
1768 return 0;
1769 code = ix86_fp_compare_code_to_integer (code);
1770 }
1771 /* i387 supports just limited amount of conditional codes. */
1772 switch (code)
1773 {
1774 case LTU: case GTU: case LEU: case GEU:
1775 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
9076b9c1
JH
1776 return 1;
1777 return 0;
9a915772
JH
1778 case ORDERED: case UNORDERED:
1779 case EQ: case NE:
1780 return 1;
3a3677ff
RH
1781 default:
1782 return 0;
1783 }
e075ae69 1784}
b840bfb0 1785
e9e80858
JH
1786/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1787
1788int
1789promotable_binary_operator (op, mode)
1790 register rtx op;
1791 enum machine_mode mode ATTRIBUTE_UNUSED;
1792{
1793 switch (GET_CODE (op))
1794 {
1795 case MULT:
1796 /* Modern CPUs have same latency for HImode and SImode multiply,
1797 but 386 and 486 do HImode multiply faster. */
1798 return ix86_cpu > PROCESSOR_I486;
1799 case PLUS:
1800 case AND:
1801 case IOR:
1802 case XOR:
1803 case ASHIFT:
1804 return 1;
1805 default:
1806 return 0;
1807 }
1808}
1809
e075ae69
RH
1810/* Nearly general operand, but accept any const_double, since we wish
1811 to be able to drop them into memory rather than have them get pulled
1812 into registers. */
b840bfb0 1813
2a2ab3f9 1814int
e075ae69
RH
1815cmp_fp_expander_operand (op, mode)
1816 register rtx op;
1817 enum machine_mode mode;
2a2ab3f9 1818{
e075ae69 1819 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 1820 return 0;
e075ae69 1821 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 1822 return 1;
e075ae69 1823 return general_operand (op, mode);
2a2ab3f9
JVA
1824}
1825
e075ae69 1826/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
1827
1828int
e075ae69 1829ext_register_operand (op, mode)
2a2ab3f9 1830 register rtx op;
bb5177ac 1831 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 1832{
3522082b 1833 int regno;
0d7d98ee
JH
1834 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
1835 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
e075ae69 1836 return 0;
3522082b
JH
1837
1838 if (!register_operand (op, VOIDmode))
1839 return 0;
1840
1841 /* Be curefull to accept only registers having upper parts. */
1842 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
1843 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
e075ae69
RH
1844}
1845
1846/* Return 1 if this is a valid binary floating-point operation.
0f290768 1847 OP is the expression matched, and MODE is its mode. */
e075ae69
RH
1848
1849int
1850binary_fp_operator (op, mode)
1851 register rtx op;
1852 enum machine_mode mode;
1853{
1854 if (mode != VOIDmode && mode != GET_MODE (op))
1855 return 0;
1856
2a2ab3f9
JVA
1857 switch (GET_CODE (op))
1858 {
e075ae69
RH
1859 case PLUS:
1860 case MINUS:
1861 case MULT:
1862 case DIV:
1863 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 1864
2a2ab3f9
JVA
1865 default:
1866 return 0;
1867 }
1868}
fee2770d 1869
e075ae69
RH
1870int
1871mult_operator(op, mode)
1872 register rtx op;
1873 enum machine_mode mode ATTRIBUTE_UNUSED;
1874{
1875 return GET_CODE (op) == MULT;
1876}
1877
1878int
1879div_operator(op, mode)
1880 register rtx op;
1881 enum machine_mode mode ATTRIBUTE_UNUSED;
1882{
1883 return GET_CODE (op) == DIV;
1884}
0a726ef1
JL
1885
1886int
e075ae69
RH
1887arith_or_logical_operator (op, mode)
1888 rtx op;
1889 enum machine_mode mode;
0a726ef1 1890{
e075ae69
RH
1891 return ((mode == VOIDmode || GET_MODE (op) == mode)
1892 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
1893 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
1894}
1895
e075ae69 1896/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
1897
1898int
e075ae69
RH
1899memory_displacement_operand (op, mode)
1900 register rtx op;
1901 enum machine_mode mode;
4f2c8ebb 1902{
e075ae69 1903 struct ix86_address parts;
e9a25f70 1904
e075ae69
RH
1905 if (! memory_operand (op, mode))
1906 return 0;
1907
1908 if (! ix86_decompose_address (XEXP (op, 0), &parts))
1909 abort ();
1910
1911 return parts.disp != NULL_RTX;
4f2c8ebb
RS
1912}
1913
16189740 1914/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
1915 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1916
1917 ??? It seems likely that this will only work because cmpsi is an
1918 expander, and no actual insns use this. */
4f2c8ebb
RS
1919
1920int
e075ae69
RH
1921cmpsi_operand (op, mode)
1922 rtx op;
1923 enum machine_mode mode;
fee2770d 1924{
b9b2c339 1925 if (nonimmediate_operand (op, mode))
e075ae69
RH
1926 return 1;
1927
1928 if (GET_CODE (op) == AND
1929 && GET_MODE (op) == SImode
1930 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
1931 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
1932 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
1933 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
1934 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
1935 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 1936 return 1;
e9a25f70 1937
fee2770d
RS
1938 return 0;
1939}
d784886d 1940
e075ae69
RH
1941/* Returns 1 if OP is memory operand that can not be represented by the
1942 modRM array. */
d784886d
RK
1943
1944int
e075ae69 1945long_memory_operand (op, mode)
d784886d
RK
1946 register rtx op;
1947 enum machine_mode mode;
1948{
e075ae69 1949 if (! memory_operand (op, mode))
d784886d
RK
1950 return 0;
1951
e075ae69 1952 return memory_address_length (op) != 0;
d784886d 1953}
2247f6ed
JH
1954
1955/* Return nonzero if the rtx is known aligned. */
1956
1957int
1958aligned_operand (op, mode)
1959 rtx op;
1960 enum machine_mode mode;
1961{
1962 struct ix86_address parts;
1963
1964 if (!general_operand (op, mode))
1965 return 0;
1966
0f290768 1967 /* Registers and immediate operands are always "aligned". */
2247f6ed
JH
1968 if (GET_CODE (op) != MEM)
1969 return 1;
1970
0f290768 1971 /* Don't even try to do any aligned optimizations with volatiles. */
2247f6ed
JH
1972 if (MEM_VOLATILE_P (op))
1973 return 0;
1974
1975 op = XEXP (op, 0);
1976
1977 /* Pushes and pops are only valid on the stack pointer. */
1978 if (GET_CODE (op) == PRE_DEC
1979 || GET_CODE (op) == POST_INC)
1980 return 1;
1981
1982 /* Decode the address. */
1983 if (! ix86_decompose_address (op, &parts))
1984 abort ();
1985
1986 /* Look for some component that isn't known to be aligned. */
1987 if (parts.index)
1988 {
1989 if (parts.scale < 4
bdb429a5 1990 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
1991 return 0;
1992 }
1993 if (parts.base)
1994 {
bdb429a5 1995 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
1996 return 0;
1997 }
1998 if (parts.disp)
1999 {
2000 if (GET_CODE (parts.disp) != CONST_INT
2001 || (INTVAL (parts.disp) & 3) != 0)
2002 return 0;
2003 }
2004
2005 /* Didn't find one -- this must be an aligned address. */
2006 return 1;
2007}
e075ae69
RH
2008\f
2009/* Return true if the constant is something that can be loaded with
2010 a special instruction. Only handle 0.0 and 1.0; others are less
2011 worthwhile. */
57dbca5e
BS
2012
2013int
e075ae69
RH
2014standard_80387_constant_p (x)
2015 rtx x;
57dbca5e 2016{
2b04e52b 2017 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 2018 return -1;
2b04e52b
JH
2019 /* Note that on the 80387, other constants, such as pi, that we should support
2020 too. On some machines, these are much slower to load as standard constant,
2021 than to load from doubles in memory. */
2022 if (x == CONST0_RTX (GET_MODE (x)))
2023 return 1;
2024 if (x == CONST1_RTX (GET_MODE (x)))
2025 return 2;
e075ae69 2026 return 0;
57dbca5e
BS
2027}
2028
2b04e52b
JH
2029/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
2030 */
2031int
2032standard_sse_constant_p (x)
2033 rtx x;
2034{
2035 if (GET_CODE (x) != CONST_DOUBLE)
2036 return -1;
2037 return (x == CONST0_RTX (GET_MODE (x)));
2038}
2039
2a2ab3f9
JVA
2040/* Returns 1 if OP contains a symbol reference */
2041
2042int
2043symbolic_reference_mentioned_p (op)
2044 rtx op;
2045{
6f7d635c 2046 register const char *fmt;
2a2ab3f9
JVA
2047 register int i;
2048
2049 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
2050 return 1;
2051
2052 fmt = GET_RTX_FORMAT (GET_CODE (op));
2053 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
2054 {
2055 if (fmt[i] == 'E')
2056 {
2057 register int j;
2058
2059 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
2060 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
2061 return 1;
2062 }
e9a25f70 2063
2a2ab3f9
JVA
2064 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
2065 return 1;
2066 }
2067
2068 return 0;
2069}
e075ae69
RH
2070
2071/* Return 1 if it is appropriate to emit `ret' instructions in the
2072 body of a function. Do this only if the epilogue is simple, needing a
2073 couple of insns. Prior to reloading, we can't tell how many registers
2074 must be saved, so return 0 then. Return 0 if there is no frame
2075 marker to de-allocate.
2076
2077 If NON_SAVING_SETJMP is defined and true, then it is not possible
2078 for the epilogue to be simple, so return 0. This is a special case
2079 since NON_SAVING_SETJMP will not cause regs_ever_live to change
2080 until final, but jump_optimize may need to know sooner if a
2081 `return' is OK. */
32b5b1aa
SC
2082
2083int
e075ae69 2084ix86_can_use_return_insn_p ()
32b5b1aa 2085{
4dd2ac2c 2086 struct ix86_frame frame;
9a7372d6 2087
e075ae69
RH
2088#ifdef NON_SAVING_SETJMP
2089 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
2090 return 0;
2091#endif
9a7372d6
RH
2092#ifdef FUNCTION_BLOCK_PROFILER_EXIT
2093 if (profile_block_flag == 2)
2094 return 0;
2095#endif
2096
2097 if (! reload_completed || frame_pointer_needed)
2098 return 0;
32b5b1aa 2099
9a7372d6
RH
2100 /* Don't allow more than 32 pop, since that's all we can do
2101 with one instruction. */
2102 if (current_function_pops_args
2103 && current_function_args_size >= 32768)
e075ae69 2104 return 0;
32b5b1aa 2105
4dd2ac2c
JH
2106 ix86_compute_frame_layout (&frame);
2107 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 2108}
6189a572
JH
2109\f
2110/* Return 1 if VALUE can be stored in the sign extended immediate field. */
2111int
2112x86_64_sign_extended_value (value)
2113 rtx value;
2114{
2115 switch (GET_CODE (value))
2116 {
2117 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
2118 to be at least 32 and this all acceptable constants are
2119 represented as CONST_INT. */
2120 case CONST_INT:
2121 if (HOST_BITS_PER_WIDE_INT == 32)
2122 return 1;
2123 else
2124 {
2125 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
fa9f36a1 2126 return trunc_int_for_mode (val, SImode) == val;
6189a572
JH
2127 }
2128 break;
2129
2130 /* For certain code models, the symbolic references are known to fit. */
2131 case SYMBOL_REF:
2132 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
2133
2134 /* For certain code models, the code is near as well. */
2135 case LABEL_REF:
2136 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
2137
2138 /* We also may accept the offsetted memory references in certain special
2139 cases. */
2140 case CONST:
2141 if (GET_CODE (XEXP (value, 0)) == UNSPEC
2142 && XVECLEN (XEXP (value, 0), 0) == 1
2143 && XINT (XEXP (value, 0), 1) == 15)
2144 return 1;
2145 else if (GET_CODE (XEXP (value, 0)) == PLUS)
2146 {
2147 rtx op1 = XEXP (XEXP (value, 0), 0);
2148 rtx op2 = XEXP (XEXP (value, 0), 1);
2149 HOST_WIDE_INT offset;
2150
2151 if (ix86_cmodel == CM_LARGE)
2152 return 0;
2153 if (GET_CODE (op2) != CONST_INT)
2154 return 0;
2155 offset = trunc_int_for_mode (INTVAL (op2), DImode);
2156 switch (GET_CODE (op1))
2157 {
2158 case SYMBOL_REF:
2159 /* For CM_SMALL assume that latest object is 1MB before
2160 end of 31bits boundary. We may also accept pretty
2161 large negative constants knowing that all objects are
2162 in the positive half of address space. */
2163 if (ix86_cmodel == CM_SMALL
2164 && offset < 1024*1024*1024
2165 && trunc_int_for_mode (offset, SImode) == offset)
2166 return 1;
2167 /* For CM_KERNEL we know that all object resist in the
2168 negative half of 32bits address space. We may not
2169 accept negative offsets, since they may be just off
2170 and we may accept pretty large possitive ones. */
2171 if (ix86_cmodel == CM_KERNEL
2172 && offset > 0
2173 && trunc_int_for_mode (offset, SImode) == offset)
2174 return 1;
2175 break;
2176 case LABEL_REF:
2177 /* These conditions are similar to SYMBOL_REF ones, just the
2178 constraints for code models differ. */
2179 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
2180 && offset < 1024*1024*1024
2181 && trunc_int_for_mode (offset, SImode) == offset)
2182 return 1;
2183 if (ix86_cmodel == CM_KERNEL
2184 && offset > 0
2185 && trunc_int_for_mode (offset, SImode) == offset)
2186 return 1;
2187 break;
2188 default:
2189 return 0;
2190 }
2191 }
2192 return 0;
2193 default:
2194 return 0;
2195 }
2196}
2197
2198/* Return 1 if VALUE can be stored in the zero extended immediate field. */
2199int
2200x86_64_zero_extended_value (value)
2201 rtx value;
2202{
2203 switch (GET_CODE (value))
2204 {
2205 case CONST_DOUBLE:
2206 if (HOST_BITS_PER_WIDE_INT == 32)
2207 return (GET_MODE (value) == VOIDmode
2208 && !CONST_DOUBLE_HIGH (value));
2209 else
2210 return 0;
2211 case CONST_INT:
2212 if (HOST_BITS_PER_WIDE_INT == 32)
2213 return INTVAL (value) >= 0;
2214 else
2215 return !(INTVAL (value) & ~(HOST_WIDE_INT)0xffffffff);
2216 break;
2217
2218 /* For certain code models, the symbolic references are known to fit. */
2219 case SYMBOL_REF:
2220 return ix86_cmodel == CM_SMALL;
2221
2222 /* For certain code models, the code is near as well. */
2223 case LABEL_REF:
2224 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
2225
2226 /* We also may accept the offsetted memory references in certain special
2227 cases. */
2228 case CONST:
2229 if (GET_CODE (XEXP (value, 0)) == PLUS)
2230 {
2231 rtx op1 = XEXP (XEXP (value, 0), 0);
2232 rtx op2 = XEXP (XEXP (value, 0), 1);
2233
2234 if (ix86_cmodel == CM_LARGE)
2235 return 0;
2236 switch (GET_CODE (op1))
2237 {
2238 case SYMBOL_REF:
2239 return 0;
2240 /* For small code model we may accept pretty large possitive
2241 offsets, since one bit is available for free. Negative
2242 offsets are limited by the size of NULL pointer area
2243 specified by the ABI. */
2244 if (ix86_cmodel == CM_SMALL
2245 && GET_CODE (op2) == CONST_INT
2246 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
2247 && (trunc_int_for_mode (INTVAL (op2), SImode)
2248 == INTVAL (op2)))
2249 return 1;
2250 /* ??? For the kernel, we may accept adjustment of
2251 -0x10000000, since we know that it will just convert
2252 negative address space to possitive, but perhaps this
2253 is not worthwhile. */
2254 break;
2255 case LABEL_REF:
2256 /* These conditions are similar to SYMBOL_REF ones, just the
2257 constraints for code models differ. */
2258 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
2259 && GET_CODE (op2) == CONST_INT
2260 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
2261 && (trunc_int_for_mode (INTVAL (op2), SImode)
2262 == INTVAL (op2)))
2263 return 1;
2264 break;
2265 default:
2266 return 0;
2267 }
2268 }
2269 return 0;
2270 default:
2271 return 0;
2272 }
2273}
6fca22eb
RH
2274
2275/* Value should be nonzero if functions must have frame pointers.
2276 Zero means the frame pointer need not be set up (and parms may
2277 be accessed via the stack pointer) in functions that seem suitable. */
2278
2279int
2280ix86_frame_pointer_required ()
2281{
2282 /* If we accessed previous frames, then the generated code expects
2283 to be able to access the saved ebp value in our frame. */
2284 if (cfun->machine->accesses_prev_frame)
2285 return 1;
a4f31c00 2286
6fca22eb
RH
2287 /* Several x86 os'es need a frame pointer for other reasons,
2288 usually pertaining to setjmp. */
2289 if (SUBTARGET_FRAME_POINTER_REQUIRED)
2290 return 1;
2291
2292 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
2293 the frame pointer by default. Turn it back on now if we've not
2294 got a leaf function. */
2295 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
2296 return 1;
2297
2298 return 0;
2299}
2300
2301/* Record that the current function accesses previous call frames. */
2302
2303void
2304ix86_setup_frame_addresses ()
2305{
2306 cfun->machine->accesses_prev_frame = 1;
2307}
e075ae69 2308\f
4cf12e7e 2309static char pic_label_name[32];
e9a25f70 2310
e075ae69
RH
2311/* This function generates code for -fpic that loads %ebx with
2312 the return address of the caller and then returns. */
2313
2314void
4cf12e7e 2315ix86_asm_file_end (file)
e075ae69 2316 FILE *file;
e075ae69
RH
2317{
2318 rtx xops[2];
32b5b1aa 2319
4cf12e7e
RH
2320 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
2321 return;
32b5b1aa 2322
c7f0da1d
RH
2323 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
2324 to updating relocations to a section being discarded such that this
2325 doesn't work. Ought to detect this at configure time. */
7c262518 2326#if 0
4cf12e7e
RH
2327 /* The trick here is to create a linkonce section containing the
2328 pic label thunk, but to refer to it with an internal label.
2329 Because the label is internal, we don't have inter-dso name
2330 binding issues on hosts that don't support ".hidden".
e9a25f70 2331
4cf12e7e
RH
2332 In order to use these macros, however, we must create a fake
2333 function decl. */
7c262518
RH
2334 if (targetm.have_named_sections)
2335 {
2336 tree decl = build_decl (FUNCTION_DECL,
2337 get_identifier ("i686.get_pc_thunk"),
2338 error_mark_node);
2339 DECL_ONE_ONLY (decl) = 1;
2340 UNIQUE_SECTION (decl, 0);
2341 named_section (decl, NULL, 0);
2342 }
2343 else
4cf12e7e 2344#else
7c262518 2345 text_section ();
4cf12e7e 2346#endif
0afeb08a 2347
4cf12e7e
RH
2348 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
2349 internal (non-global) label that's being emitted, it didn't make
2350 sense to have .type information for local labels. This caused
2351 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
2352 me debug info for a label that you're declaring non-global?) this
2353 was changed to call ASM_OUTPUT_LABEL() instead. */
2354
2355 ASM_OUTPUT_LABEL (file, pic_label_name);
2356
2357 xops[0] = pic_offset_table_rtx;
2358 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
2359 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
2360 output_asm_insn ("ret", xops);
32b5b1aa 2361}
32b5b1aa 2362
e075ae69
RH
2363void
2364load_pic_register ()
32b5b1aa 2365{
e075ae69 2366 rtx gotsym, pclab;
32b5b1aa 2367
0d7d98ee
JH
2368 if (TARGET_64BIT)
2369 abort();
2370
a8a05998 2371 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
32b5b1aa 2372
e075ae69 2373 if (TARGET_DEEP_BRANCH_PREDICTION)
32b5b1aa 2374 {
4cf12e7e
RH
2375 if (! pic_label_name[0])
2376 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
e075ae69 2377 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
32b5b1aa 2378 }
e075ae69 2379 else
e5cb57e8 2380 {
e075ae69 2381 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
e5cb57e8 2382 }
e5cb57e8 2383
e075ae69 2384 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
2a2ab3f9 2385
e075ae69
RH
2386 if (! TARGET_DEEP_BRANCH_PREDICTION)
2387 emit_insn (gen_popsi1 (pic_offset_table_rtx));
79325812 2388
e075ae69 2389 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
e9a25f70 2390}
8dfe5673 2391
0d7d98ee 2392/* Generate an "push" pattern for input ARG. */
e9a25f70 2393
e075ae69
RH
2394static rtx
2395gen_push (arg)
2396 rtx arg;
e9a25f70 2397{
c5c76735 2398 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
2399 gen_rtx_MEM (Pmode,
2400 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
2401 stack_pointer_rtx)),
2402 arg);
e9a25f70
JL
2403}
2404
4dd2ac2c
JH
2405/* Return 1 if we need to save REGNO. */
2406static int
1020a5ab
RH
2407ix86_save_reg (regno, maybe_eh_return)
2408 int regno;
37a58036 2409 int maybe_eh_return;
1020a5ab
RH
2410{
2411 if (flag_pic
2412 && ! TARGET_64BIT
2413 && regno == PIC_OFFSET_TABLE_REGNUM
2414 && (current_function_uses_pic_offset_table
2415 || current_function_uses_const_pool
2416 || current_function_calls_eh_return))
2417 return 1;
2418
2419 if (current_function_calls_eh_return && maybe_eh_return)
2420 {
2421 unsigned i;
2422 for (i = 0; ; i++)
2423 {
2424 unsigned test = EH_RETURN_DATA_REGNO(i);
2425 if (test == INVALID_REGNUM)
2426 break;
2427 if (test == (unsigned) regno)
2428 return 1;
2429 }
2430 }
4dd2ac2c 2431
1020a5ab
RH
2432 return (regs_ever_live[regno]
2433 && !call_used_regs[regno]
2434 && !fixed_regs[regno]
2435 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
2436}
2437
0903fcab
JH
2438/* Return number of registers to be saved on the stack. */
2439
2440static int
2441ix86_nsaved_regs ()
2442{
2443 int nregs = 0;
0903fcab
JH
2444 int regno;
2445
4dd2ac2c 2446 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 2447 if (ix86_save_reg (regno, true))
4dd2ac2c 2448 nregs++;
0903fcab
JH
2449 return nregs;
2450}
2451
2452/* Return the offset between two registers, one to be eliminated, and the other
2453 its replacement, at the start of a routine. */
2454
2455HOST_WIDE_INT
2456ix86_initial_elimination_offset (from, to)
2457 int from;
2458 int to;
2459{
4dd2ac2c
JH
2460 struct ix86_frame frame;
2461 ix86_compute_frame_layout (&frame);
564d80f4
JH
2462
2463 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 2464 return frame.hard_frame_pointer_offset;
564d80f4
JH
2465 else if (from == FRAME_POINTER_REGNUM
2466 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 2467 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
2468 else
2469 {
564d80f4
JH
2470 if (to != STACK_POINTER_REGNUM)
2471 abort ();
2472 else if (from == ARG_POINTER_REGNUM)
4dd2ac2c 2473 return frame.stack_pointer_offset;
564d80f4
JH
2474 else if (from != FRAME_POINTER_REGNUM)
2475 abort ();
0903fcab 2476 else
4dd2ac2c 2477 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
2478 }
2479}
2480
4dd2ac2c 2481/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 2482
4dd2ac2c
JH
2483static void
2484ix86_compute_frame_layout (frame)
2485 struct ix86_frame *frame;
65954bd8 2486{
65954bd8 2487 HOST_WIDE_INT total_size;
564d80f4 2488 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
44affdae
JH
2489 int offset;
2490 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4dd2ac2c 2491 HOST_WIDE_INT size = get_frame_size ();
65954bd8 2492
4dd2ac2c 2493 frame->nregs = ix86_nsaved_regs ();
564d80f4 2494 total_size = size;
65954bd8 2495
4dd2ac2c
JH
2496 /* Skip return value and save base pointer. */
2497 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
2498
2499 frame->hard_frame_pointer_offset = offset;
564d80f4 2500
fcbfaa65
RK
2501 /* Do some sanity checking of stack_alignment_needed and
2502 preferred_alignment, since i386 port is the only using those features
2503 that may break easilly. */
564d80f4 2504
44affdae
JH
2505 if (size && !stack_alignment_needed)
2506 abort ();
44affdae
JH
2507 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
2508 abort ();
2509 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
2510 abort ();
2511 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
2512 abort ();
564d80f4 2513
4dd2ac2c
JH
2514 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
2515 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 2516
4dd2ac2c
JH
2517 /* Register save area */
2518 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 2519
8362f420
JH
2520 /* Va-arg area */
2521 if (ix86_save_varrargs_registers)
2522 {
2523 offset += X86_64_VARARGS_SIZE;
2524 frame->va_arg_size = X86_64_VARARGS_SIZE;
2525 }
2526 else
2527 frame->va_arg_size = 0;
2528
4dd2ac2c
JH
2529 /* Align start of frame for local function. */
2530 frame->padding1 = ((offset + stack_alignment_needed - 1)
2531 & -stack_alignment_needed) - offset;
f73ad30e 2532
4dd2ac2c 2533 offset += frame->padding1;
65954bd8 2534
4dd2ac2c
JH
2535 /* Frame pointer points here. */
2536 frame->frame_pointer_offset = offset;
54ff41b7 2537
4dd2ac2c 2538 offset += size;
65954bd8 2539
4dd2ac2c 2540 /* Add outgoing arguments area. */
f73ad30e 2541 if (ACCUMULATE_OUTGOING_ARGS)
4dd2ac2c
JH
2542 {
2543 offset += current_function_outgoing_args_size;
2544 frame->outgoing_arguments_size = current_function_outgoing_args_size;
2545 }
2546 else
2547 frame->outgoing_arguments_size = 0;
564d80f4 2548
4dd2ac2c
JH
2549 /* Align stack boundary. */
2550 frame->padding2 = ((offset + preferred_alignment - 1)
2551 & -preferred_alignment) - offset;
2552
2553 offset += frame->padding2;
2554
2555 /* We've reached end of stack frame. */
2556 frame->stack_pointer_offset = offset;
2557
2558 /* Size prologue needs to allocate. */
2559 frame->to_allocate =
2560 (size + frame->padding1 + frame->padding2
8362f420 2561 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 2562
8362f420
JH
2563 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
2564 && current_function_is_leaf)
2565 {
2566 frame->red_zone_size = frame->to_allocate;
2567 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
2568 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
2569 }
2570 else
2571 frame->red_zone_size = 0;
2572 frame->to_allocate -= frame->red_zone_size;
2573 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c
JH
2574#if 0
2575 fprintf (stderr, "nregs: %i\n", frame->nregs);
2576 fprintf (stderr, "size: %i\n", size);
2577 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
2578 fprintf (stderr, "padding1: %i\n", frame->padding1);
8362f420 2579 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4dd2ac2c
JH
2580 fprintf (stderr, "padding2: %i\n", frame->padding2);
2581 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
8362f420 2582 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4dd2ac2c
JH
2583 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
2584 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
2585 frame->hard_frame_pointer_offset);
2586 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
2587#endif
65954bd8
JL
2588}
2589
0903fcab
JH
2590/* Emit code to save registers in the prologue. */
2591
2592static void
2593ix86_emit_save_regs ()
2594{
2595 register int regno;
0903fcab 2596 rtx insn;
0903fcab 2597
4dd2ac2c 2598 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 2599 if (ix86_save_reg (regno, true))
0903fcab 2600 {
0d7d98ee 2601 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
2602 RTX_FRAME_RELATED_P (insn) = 1;
2603 }
2604}
2605
c6036a37
JH
2606/* Emit code to save registers using MOV insns. First register
2607 is restored from POINTER + OFFSET. */
2608static void
2609ix86_emit_save_regs_using_mov (pointer, offset)
b72f00af
RK
2610 rtx pointer;
2611 HOST_WIDE_INT offset;
c6036a37
JH
2612{
2613 int regno;
2614 rtx insn;
2615
2616 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2617 if (ix86_save_reg (regno, true))
2618 {
b72f00af
RK
2619 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
2620 Pmode, offset),
c6036a37
JH
2621 gen_rtx_REG (Pmode, regno));
2622 RTX_FRAME_RELATED_P (insn) = 1;
2623 offset += UNITS_PER_WORD;
2624 }
2625}
2626
0f290768 2627/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
2628
2629void
2630ix86_expand_prologue ()
2a2ab3f9 2631{
564d80f4 2632 rtx insn;
0d7d98ee
JH
2633 int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table
2634 || current_function_uses_const_pool)
2635 && !TARGET_64BIT);
4dd2ac2c 2636 struct ix86_frame frame;
c6036a37
JH
2637 int use_mov = (TARGET_PROLOGUE_USING_MOVE && !optimize_size);
2638 HOST_WIDE_INT allocate;
4dd2ac2c
JH
2639
2640 ix86_compute_frame_layout (&frame);
79325812 2641
e075ae69
RH
2642 /* Note: AT&T enter does NOT have reversed args. Enter is probably
2643 slower on all targets. Also sdb doesn't like it. */
e9a25f70 2644
2a2ab3f9
JVA
2645 if (frame_pointer_needed)
2646 {
564d80f4 2647 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 2648 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 2649
564d80f4 2650 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 2651 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
2652 }
2653
c6036a37
JH
2654 allocate = frame.to_allocate;
2655 /* In case we are dealing only with single register and empty frame,
2656 push is equivalent of the mov+add sequence. */
2657 if (allocate == 0 && frame.nregs <= 1)
2658 use_mov = 0;
2659
2660 if (!use_mov)
2661 ix86_emit_save_regs ();
2662 else
2663 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 2664
c6036a37 2665 if (allocate == 0)
8dfe5673 2666 ;
e323735c 2667 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
469ac993 2668 {
f2042df3
RH
2669 insn = emit_insn (gen_pro_epilogue_adjust_stack
2670 (stack_pointer_rtx, stack_pointer_rtx,
e323735c 2671 GEN_INT (-allocate)));
e075ae69 2672 RTX_FRAME_RELATED_P (insn) = 1;
469ac993 2673 }
79325812 2674 else
8dfe5673 2675 {
e075ae69 2676 /* ??? Is this only valid for Win32? */
e9a25f70 2677
e075ae69 2678 rtx arg0, sym;
e9a25f70 2679
8362f420
JH
2680 if (TARGET_64BIT)
2681 abort();
2682
e075ae69 2683 arg0 = gen_rtx_REG (SImode, 0);
c6036a37 2684 emit_move_insn (arg0, GEN_INT (allocate));
77a989d1 2685
e075ae69
RH
2686 sym = gen_rtx_MEM (FUNCTION_MODE,
2687 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
32ee7d1d 2688 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
e075ae69
RH
2689
2690 CALL_INSN_FUNCTION_USAGE (insn)
276ab4a4
RH
2691 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
2692 CALL_INSN_FUNCTION_USAGE (insn));
e075ae69 2693 }
c6036a37
JH
2694 if (use_mov)
2695 {
2696 if (!frame_pointer_needed || !frame.to_allocate)
2697 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
2698 else
2699 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
2700 -frame.nregs * UNITS_PER_WORD);
2701 }
e9a25f70 2702
84530511
SC
2703#ifdef SUBTARGET_PROLOGUE
2704 SUBTARGET_PROLOGUE;
0f290768 2705#endif
84530511 2706
e9a25f70 2707 if (pic_reg_used)
e075ae69 2708 load_pic_register ();
77a989d1 2709
e9a25f70
JL
2710 /* If we are profiling, make sure no instructions are scheduled before
2711 the call to mcount. However, if -fpic, the above call will have
2712 done that. */
e075ae69 2713 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
e9a25f70 2714 emit_insn (gen_blockage ());
77a989d1
SC
2715}
2716
da2d1d3a
JH
2717/* Emit code to restore saved registers using MOV insns. First register
2718 is restored from POINTER + OFFSET. */
2719static void
1020a5ab
RH
2720ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
2721 rtx pointer;
2722 int offset;
37a58036 2723 int maybe_eh_return;
da2d1d3a
JH
2724{
2725 int regno;
da2d1d3a 2726
4dd2ac2c 2727 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 2728 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 2729 {
4dd2ac2c 2730 emit_move_insn (gen_rtx_REG (Pmode, regno),
b72f00af
RK
2731 adjust_address (gen_rtx_MEM (Pmode, pointer),
2732 Pmode, offset));
4dd2ac2c 2733 offset += UNITS_PER_WORD;
da2d1d3a
JH
2734 }
2735}
2736
0f290768 2737/* Restore function stack, frame, and registers. */
e9a25f70 2738
2a2ab3f9 2739void
1020a5ab
RH
2740ix86_expand_epilogue (style)
2741 int style;
2a2ab3f9 2742{
1c71e60e 2743 int regno;
fdb8a883 2744 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 2745 struct ix86_frame frame;
65954bd8 2746 HOST_WIDE_INT offset;
4dd2ac2c
JH
2747
2748 ix86_compute_frame_layout (&frame);
2a2ab3f9 2749
a4f31c00 2750 /* Calculate start of saved registers relative to ebp. Special care
84e306b4
RH
2751 must be taken for the normal return case of a function using
2752 eh_return: the eax and edx registers are marked as saved, but not
2753 restored along this path. */
2754 offset = frame.nregs;
2755 if (current_function_calls_eh_return && style != 2)
2756 offset -= 2;
2757 offset *= -UNITS_PER_WORD;
2a2ab3f9 2758
1c71e60e
JH
2759#ifdef FUNCTION_BLOCK_PROFILER_EXIT
2760 if (profile_block_flag == 2)
564d80f4 2761 {
1c71e60e 2762 FUNCTION_BLOCK_PROFILER_EXIT;
564d80f4 2763 }
1c71e60e 2764#endif
564d80f4 2765
fdb8a883
JW
2766 /* If we're only restoring one register and sp is not valid then
2767 using a move instruction to restore the register since it's
0f290768 2768 less work than reloading sp and popping the register.
da2d1d3a
JH
2769
2770 The default code result in stack adjustment using add/lea instruction,
2771 while this code results in LEAVE instruction (or discrete equivalent),
2772 so it is profitable in some other cases as well. Especially when there
2773 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2774 and there is exactly one register to pop. This heruistic may need some
2775 tuning in future. */
4dd2ac2c 2776 if ((!sp_valid && frame.nregs <= 1)
c6036a37
JH
2777 || (TARGET_EPILOGUE_USING_MOVE && !optimize_size
2778 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 2779 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
da2d1d3a 2780 || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
1020a5ab
RH
2781 && frame.nregs == 1)
2782 || style == 2)
2a2ab3f9 2783 {
da2d1d3a
JH
2784 /* Restore registers. We can use ebp or esp to address the memory
2785 locations. If both are available, default to ebp, since offsets
2786 are known to be small. Only exception is esp pointing directly to the
2787 end of block of saved registers, where we may simplify addressing
2788 mode. */
2789
4dd2ac2c 2790 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
2791 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
2792 frame.to_allocate, style == 2);
da2d1d3a 2793 else
1020a5ab
RH
2794 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
2795 offset, style == 2);
2796
2797 /* eh_return epilogues need %ecx added to the stack pointer. */
2798 if (style == 2)
2799 {
2800 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 2801
1020a5ab
RH
2802 if (frame_pointer_needed)
2803 {
2804 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
2805 tmp = plus_constant (tmp, UNITS_PER_WORD);
2806 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
2807
2808 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
2809 emit_move_insn (hard_frame_pointer_rtx, tmp);
2810
2811 emit_insn (gen_pro_epilogue_adjust_stack
f2042df3 2812 (stack_pointer_rtx, sa, const0_rtx));
1020a5ab
RH
2813 }
2814 else
2815 {
2816 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
2817 tmp = plus_constant (tmp, (frame.to_allocate
2818 + frame.nregs * UNITS_PER_WORD));
2819 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
2820 }
2821 }
2822 else if (!frame_pointer_needed)
f2042df3
RH
2823 emit_insn (gen_pro_epilogue_adjust_stack
2824 (stack_pointer_rtx, stack_pointer_rtx,
2825 GEN_INT (frame.to_allocate
2826 + frame.nregs * UNITS_PER_WORD)));
0f290768 2827 /* If not an i386, mov & pop is faster than "leave". */
da2d1d3a 2828 else if (TARGET_USE_LEAVE || optimize_size)
8362f420 2829 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
c8c5cb99 2830 else
2a2ab3f9 2831 {
1c71e60e
JH
2832 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2833 hard_frame_pointer_rtx,
f2042df3 2834 const0_rtx));
8362f420
JH
2835 if (TARGET_64BIT)
2836 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
2837 else
2838 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
2839 }
2840 }
1c71e60e 2841 else
68f654ec 2842 {
1c71e60e
JH
2843 /* First step is to deallocate the stack frame so that we can
2844 pop the registers. */
2845 if (!sp_valid)
2846 {
2847 if (!frame_pointer_needed)
2848 abort ();
2849 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2850 hard_frame_pointer_rtx,
f2042df3 2851 GEN_INT (offset)));
1c71e60e 2852 }
4dd2ac2c 2853 else if (frame.to_allocate)
f2042df3
RH
2854 emit_insn (gen_pro_epilogue_adjust_stack
2855 (stack_pointer_rtx, stack_pointer_rtx,
2856 GEN_INT (frame.to_allocate)));
1c71e60e 2857
4dd2ac2c 2858 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 2859 if (ix86_save_reg (regno, false))
8362f420
JH
2860 {
2861 if (TARGET_64BIT)
2862 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
2863 else
2864 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
2865 }
4dd2ac2c 2866 if (frame_pointer_needed)
8362f420
JH
2867 {
2868 if (TARGET_64BIT)
2869 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
2870 else
2871 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
2872 }
68f654ec 2873 }
68f654ec 2874
cbbf65e0 2875 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 2876 if (style == 0)
cbbf65e0
RH
2877 return;
2878
2a2ab3f9
JVA
2879 if (current_function_pops_args && current_function_args_size)
2880 {
e075ae69 2881 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 2882
b8c752c8
UD
2883 /* i386 can only pop 64K bytes. If asked to pop more, pop
2884 return address, do explicit add, and jump indirectly to the
0f290768 2885 caller. */
2a2ab3f9 2886
b8c752c8 2887 if (current_function_pops_args >= 65536)
2a2ab3f9 2888 {
e075ae69 2889 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 2890
8362f420
JH
2891 /* There are is no "pascal" calling convention in 64bit ABI. */
2892 if (TARGET_64BIT)
2893 abort();
2894
e075ae69
RH
2895 emit_insn (gen_popsi1 (ecx));
2896 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 2897 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 2898 }
79325812 2899 else
e075ae69
RH
2900 emit_jump_insn (gen_return_pop_internal (popc));
2901 }
2902 else
2903 emit_jump_insn (gen_return_internal ());
2904}
2905\f
2906/* Extract the parts of an RTL expression that is a valid memory address
2907 for an instruction. Return false if the structure of the address is
2908 grossly off. */
2909
2910static int
2911ix86_decompose_address (addr, out)
2912 register rtx addr;
2913 struct ix86_address *out;
2914{
2915 rtx base = NULL_RTX;
2916 rtx index = NULL_RTX;
2917 rtx disp = NULL_RTX;
2918 HOST_WIDE_INT scale = 1;
2919 rtx scale_rtx = NULL_RTX;
2920
2921 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
2922 base = addr;
2923 else if (GET_CODE (addr) == PLUS)
2924 {
2925 rtx op0 = XEXP (addr, 0);
2926 rtx op1 = XEXP (addr, 1);
2927 enum rtx_code code0 = GET_CODE (op0);
2928 enum rtx_code code1 = GET_CODE (op1);
2929
2930 if (code0 == REG || code0 == SUBREG)
2931 {
2932 if (code1 == REG || code1 == SUBREG)
2933 index = op0, base = op1; /* index + base */
2934 else
2935 base = op0, disp = op1; /* base + displacement */
2936 }
2937 else if (code0 == MULT)
e9a25f70 2938 {
e075ae69
RH
2939 index = XEXP (op0, 0);
2940 scale_rtx = XEXP (op0, 1);
2941 if (code1 == REG || code1 == SUBREG)
2942 base = op1; /* index*scale + base */
e9a25f70 2943 else
e075ae69
RH
2944 disp = op1; /* index*scale + disp */
2945 }
2946 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
2947 {
2948 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
2949 scale_rtx = XEXP (XEXP (op0, 0), 1);
2950 base = XEXP (op0, 1);
2951 disp = op1;
2a2ab3f9 2952 }
e075ae69
RH
2953 else if (code0 == PLUS)
2954 {
2955 index = XEXP (op0, 0); /* index + base + disp */
2956 base = XEXP (op0, 1);
2957 disp = op1;
2958 }
2959 else
2960 return FALSE;
2961 }
2962 else if (GET_CODE (addr) == MULT)
2963 {
2964 index = XEXP (addr, 0); /* index*scale */
2965 scale_rtx = XEXP (addr, 1);
2966 }
2967 else if (GET_CODE (addr) == ASHIFT)
2968 {
2969 rtx tmp;
2970
2971 /* We're called for lea too, which implements ashift on occasion. */
2972 index = XEXP (addr, 0);
2973 tmp = XEXP (addr, 1);
2974 if (GET_CODE (tmp) != CONST_INT)
2975 return FALSE;
2976 scale = INTVAL (tmp);
2977 if ((unsigned HOST_WIDE_INT) scale > 3)
2978 return FALSE;
2979 scale = 1 << scale;
2a2ab3f9 2980 }
2a2ab3f9 2981 else
e075ae69
RH
2982 disp = addr; /* displacement */
2983
2984 /* Extract the integral value of scale. */
2985 if (scale_rtx)
e9a25f70 2986 {
e075ae69
RH
2987 if (GET_CODE (scale_rtx) != CONST_INT)
2988 return FALSE;
2989 scale = INTVAL (scale_rtx);
e9a25f70 2990 }
3b3c6a3f 2991
e075ae69
RH
2992 /* Allow arg pointer and stack pointer as index if there is not scaling */
2993 if (base && index && scale == 1
564d80f4
JH
2994 && (index == arg_pointer_rtx || index == frame_pointer_rtx
2995 || index == stack_pointer_rtx))
e075ae69
RH
2996 {
2997 rtx tmp = base;
2998 base = index;
2999 index = tmp;
3000 }
3001
3002 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
3003 if ((base == hard_frame_pointer_rtx
3004 || base == frame_pointer_rtx
3005 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
3006 disp = const0_rtx;
3007
3008 /* Special case: on K6, [%esi] makes the instruction vector decoded.
3009 Avoid this by transforming to [%esi+0]. */
3010 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
3011 && base && !index && !disp
329e1d01 3012 && REG_P (base)
e075ae69
RH
3013 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
3014 disp = const0_rtx;
3015
3016 /* Special case: encode reg+reg instead of reg*2. */
3017 if (!base && index && scale && scale == 2)
3018 base = index, scale = 1;
0f290768 3019
e075ae69
RH
3020 /* Special case: scaling cannot be encoded without base or displacement. */
3021 if (!base && !disp && index && scale != 1)
3022 disp = const0_rtx;
3023
3024 out->base = base;
3025 out->index = index;
3026 out->disp = disp;
3027 out->scale = scale;
3b3c6a3f 3028
e075ae69
RH
3029 return TRUE;
3030}
01329426
JH
3031\f
3032/* Return cost of the memory address x.
3033 For i386, it is better to use a complex address than let gcc copy
3034 the address into a reg and make a new pseudo. But not if the address
3035 requires to two regs - that would mean more pseudos with longer
3036 lifetimes. */
3037int
3038ix86_address_cost (x)
3039 rtx x;
3040{
3041 struct ix86_address parts;
3042 int cost = 1;
3b3c6a3f 3043
01329426
JH
3044 if (!ix86_decompose_address (x, &parts))
3045 abort ();
3046
3047 /* More complex memory references are better. */
3048 if (parts.disp && parts.disp != const0_rtx)
3049 cost--;
3050
3051 /* Attempt to minimize number of registers in the address. */
3052 if ((parts.base
3053 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
3054 || (parts.index
3055 && (!REG_P (parts.index)
3056 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
3057 cost++;
3058
3059 if (parts.base
3060 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
3061 && parts.index
3062 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
3063 && parts.base != parts.index)
3064 cost++;
3065
3066 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
3067 since it's predecode logic can't detect the length of instructions
3068 and it degenerates to vector decoded. Increase cost of such
3069 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 3070 to split such addresses or even refuse such addresses at all.
01329426
JH
3071
3072 Following addressing modes are affected:
3073 [base+scale*index]
3074 [scale*index+disp]
3075 [base+index]
0f290768 3076
01329426
JH
3077 The first and last case may be avoidable by explicitly coding the zero in
3078 memory address, but I don't have AMD-K6 machine handy to check this
3079 theory. */
3080
3081 if (TARGET_K6
3082 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
3083 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
3084 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
3085 cost += 10;
0f290768 3086
01329426
JH
3087 return cost;
3088}
3089\f
b949ea8b
JW
3090/* If X is a machine specific address (i.e. a symbol or label being
3091 referenced as a displacement from the GOT implemented using an
3092 UNSPEC), then return the base term. Otherwise return X. */
3093
3094rtx
3095ix86_find_base_term (x)
3096 rtx x;
3097{
3098 rtx term;
3099
3100 if (GET_CODE (x) != PLUS
3101 || XEXP (x, 0) != pic_offset_table_rtx
3102 || GET_CODE (XEXP (x, 1)) != CONST)
3103 return x;
3104
3105 term = XEXP (XEXP (x, 1), 0);
3106
3107 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
3108 term = XEXP (term, 0);
3109
3110 if (GET_CODE (term) != UNSPEC
3111 || XVECLEN (term, 0) != 1
3112 || XINT (term, 1) != 7)
3113 return x;
3114
3115 term = XVECEXP (term, 0, 0);
3116
3117 if (GET_CODE (term) != SYMBOL_REF
3118 && GET_CODE (term) != LABEL_REF)
3119 return x;
3120
3121 return term;
3122}
3123\f
e075ae69
RH
3124/* Determine if a given CONST RTX is a valid memory displacement
3125 in PIC mode. */
0f290768 3126
59be65f6 3127int
91bb873f
RH
3128legitimate_pic_address_disp_p (disp)
3129 register rtx disp;
3130{
3131 if (GET_CODE (disp) != CONST)
3132 return 0;
3133 disp = XEXP (disp, 0);
3134
3135 if (GET_CODE (disp) == PLUS)
3136 {
3137 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
3138 return 0;
3139 disp = XEXP (disp, 0);
3140 }
3141
3142 if (GET_CODE (disp) != UNSPEC
3143 || XVECLEN (disp, 0) != 1)
3144 return 0;
3145
3146 /* Must be @GOT or @GOTOFF. */
3147 if (XINT (disp, 1) != 6
3148 && XINT (disp, 1) != 7)
3149 return 0;
3150
3151 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
3152 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
3153 return 0;
3154
3155 return 1;
3156}
3157
e075ae69
RH
3158/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
3159 memory address for an instruction. The MODE argument is the machine mode
3160 for the MEM expression that wants to use this address.
3161
3162 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
3163 convert common non-canonical forms to canonical form so that they will
3164 be recognized. */
3165
3b3c6a3f
MM
3166int
3167legitimate_address_p (mode, addr, strict)
3168 enum machine_mode mode;
3169 register rtx addr;
3170 int strict;
3171{
e075ae69
RH
3172 struct ix86_address parts;
3173 rtx base, index, disp;
3174 HOST_WIDE_INT scale;
3175 const char *reason = NULL;
3176 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
3177
3178 if (TARGET_DEBUG_ADDR)
3179 {
3180 fprintf (stderr,
e9a25f70 3181 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 3182 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
3183 debug_rtx (addr);
3184 }
3185
e075ae69 3186 if (! ix86_decompose_address (addr, &parts))
3b3c6a3f 3187 {
e075ae69 3188 reason = "decomposition failed";
50e60bc3 3189 goto report_error;
3b3c6a3f
MM
3190 }
3191
e075ae69
RH
3192 base = parts.base;
3193 index = parts.index;
3194 disp = parts.disp;
3195 scale = parts.scale;
91f0226f 3196
e075ae69 3197 /* Validate base register.
e9a25f70
JL
3198
3199 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
3200 is one word out of a two word structure, which is represented internally
3201 as a DImode int. */
e9a25f70 3202
3b3c6a3f
MM
3203 if (base)
3204 {
e075ae69
RH
3205 reason_rtx = base;
3206
3d771dfd 3207 if (GET_CODE (base) != REG)
3b3c6a3f 3208 {
e075ae69 3209 reason = "base is not a register";
50e60bc3 3210 goto report_error;
3b3c6a3f
MM
3211 }
3212
c954bd01
RH
3213 if (GET_MODE (base) != Pmode)
3214 {
e075ae69 3215 reason = "base is not in Pmode";
50e60bc3 3216 goto report_error;
c954bd01
RH
3217 }
3218
e9a25f70
JL
3219 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
3220 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
3b3c6a3f 3221 {
e075ae69 3222 reason = "base is not valid";
50e60bc3 3223 goto report_error;
3b3c6a3f
MM
3224 }
3225 }
3226
e075ae69 3227 /* Validate index register.
e9a25f70
JL
3228
3229 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
3230 is one word out of a two word structure, which is represented internally
3231 as a DImode int. */
e075ae69
RH
3232
3233 if (index)
3b3c6a3f 3234 {
e075ae69
RH
3235 reason_rtx = index;
3236
3237 if (GET_CODE (index) != REG)
3b3c6a3f 3238 {
e075ae69 3239 reason = "index is not a register";
50e60bc3 3240 goto report_error;
3b3c6a3f
MM
3241 }
3242
e075ae69 3243 if (GET_MODE (index) != Pmode)
c954bd01 3244 {
e075ae69 3245 reason = "index is not in Pmode";
50e60bc3 3246 goto report_error;
c954bd01
RH
3247 }
3248
e075ae69
RH
3249 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
3250 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
3b3c6a3f 3251 {
e075ae69 3252 reason = "index is not valid";
50e60bc3 3253 goto report_error;
3b3c6a3f
MM
3254 }
3255 }
3b3c6a3f 3256
e075ae69
RH
3257 /* Validate scale factor. */
3258 if (scale != 1)
3b3c6a3f 3259 {
e075ae69
RH
3260 reason_rtx = GEN_INT (scale);
3261 if (!index)
3b3c6a3f 3262 {
e075ae69 3263 reason = "scale without index";
50e60bc3 3264 goto report_error;
3b3c6a3f
MM
3265 }
3266
e075ae69 3267 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 3268 {
e075ae69 3269 reason = "scale is not a valid multiplier";
50e60bc3 3270 goto report_error;
3b3c6a3f
MM
3271 }
3272 }
3273
91bb873f 3274 /* Validate displacement. */
3b3c6a3f
MM
3275 if (disp)
3276 {
e075ae69
RH
3277 reason_rtx = disp;
3278
91bb873f 3279 if (!CONSTANT_ADDRESS_P (disp))
3b3c6a3f 3280 {
e075ae69 3281 reason = "displacement is not constant";
50e60bc3 3282 goto report_error;
3b3c6a3f
MM
3283 }
3284
0d7d98ee 3285 if (TARGET_64BIT)
3b3c6a3f 3286 {
0d7d98ee
JH
3287 if (!x86_64_sign_extended_value (disp))
3288 {
3289 reason = "displacement is out of range";
3290 goto report_error;
3291 }
3292 }
3293 else
3294 {
3295 if (GET_CODE (disp) == CONST_DOUBLE)
3296 {
3297 reason = "displacement is a const_double";
3298 goto report_error;
3299 }
3b3c6a3f
MM
3300 }
3301
91bb873f 3302 if (flag_pic && SYMBOLIC_CONST (disp))
3b3c6a3f 3303 {
0d7d98ee
JH
3304 if (TARGET_64BIT && (index || base))
3305 {
3306 reason = "non-constant pic memory reference";
3307 goto report_error;
3308 }
91bb873f
RH
3309 if (! legitimate_pic_address_disp_p (disp))
3310 {
e075ae69 3311 reason = "displacement is an invalid pic construct";
50e60bc3 3312 goto report_error;
91bb873f
RH
3313 }
3314
4e9efe54 3315 /* This code used to verify that a symbolic pic displacement
0f290768
KH
3316 includes the pic_offset_table_rtx register.
3317
4e9efe54
JH
3318 While this is good idea, unfortunately these constructs may
3319 be created by "adds using lea" optimization for incorrect
3320 code like:
3321
3322 int a;
3323 int foo(int i)
3324 {
3325 return *(&a+i);
3326 }
3327
50e60bc3 3328 This code is nonsensical, but results in addressing
4e9efe54
JH
3329 GOT table with pic_offset_table_rtx base. We can't
3330 just refuse it easilly, since it gets matched by
3331 "addsi3" pattern, that later gets split to lea in the
3332 case output register differs from input. While this
3333 can be handled by separate addsi pattern for this case
3334 that never results in lea, this seems to be easier and
3335 correct fix for crash to disable this test. */
3b3c6a3f 3336 }
91bb873f 3337 else if (HALF_PIC_P ())
3b3c6a3f 3338 {
91bb873f 3339 if (! HALF_PIC_ADDRESS_P (disp)
e075ae69 3340 || (base != NULL_RTX || index != NULL_RTX))
91bb873f 3341 {
e075ae69 3342 reason = "displacement is an invalid half-pic reference";
50e60bc3 3343 goto report_error;
91bb873f 3344 }
3b3c6a3f
MM
3345 }
3346 }
3347
e075ae69 3348 /* Everything looks valid. */
3b3c6a3f 3349 if (TARGET_DEBUG_ADDR)
e075ae69 3350 fprintf (stderr, "Success.\n");
3b3c6a3f 3351 return TRUE;
e075ae69 3352
50e60bc3 3353report_error:
e075ae69
RH
3354 if (TARGET_DEBUG_ADDR)
3355 {
3356 fprintf (stderr, "Error: %s\n", reason);
3357 debug_rtx (reason_rtx);
3358 }
3359 return FALSE;
3b3c6a3f 3360}
3b3c6a3f 3361\f
55efb413
JW
3362/* Return an unique alias set for the GOT. */
3363
0f290768 3364static HOST_WIDE_INT
55efb413
JW
3365ix86_GOT_alias_set ()
3366{
3367 static HOST_WIDE_INT set = -1;
3368 if (set == -1)
3369 set = new_alias_set ();
3370 return set;
0f290768 3371}
55efb413 3372
3b3c6a3f
MM
3373/* Return a legitimate reference for ORIG (an address) using the
3374 register REG. If REG is 0, a new pseudo is generated.
3375
91bb873f 3376 There are two types of references that must be handled:
3b3c6a3f
MM
3377
3378 1. Global data references must load the address from the GOT, via
3379 the PIC reg. An insn is emitted to do this load, and the reg is
3380 returned.
3381
91bb873f
RH
3382 2. Static data references, constant pool addresses, and code labels
3383 compute the address as an offset from the GOT, whose base is in
3384 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
3385 differentiate them from global data objects. The returned
3386 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
3387
3388 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 3389 reg also appears in the address. */
3b3c6a3f
MM
3390
3391rtx
3392legitimize_pic_address (orig, reg)
3393 rtx orig;
3394 rtx reg;
3395{
3396 rtx addr = orig;
3397 rtx new = orig;
91bb873f 3398 rtx base;
3b3c6a3f 3399
91bb873f
RH
3400 if (GET_CODE (addr) == LABEL_REF
3401 || (GET_CODE (addr) == SYMBOL_REF
3402 && (CONSTANT_POOL_ADDRESS_P (addr)
3403 || SYMBOL_REF_FLAG (addr))))
3b3c6a3f 3404 {
91bb873f
RH
3405 /* This symbol may be referenced via a displacement from the PIC
3406 base address (@GOTOFF). */
3b3c6a3f 3407
91bb873f 3408 current_function_uses_pic_offset_table = 1;
4859dd36
RH
3409 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
3410 new = gen_rtx_CONST (Pmode, new);
91bb873f 3411 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 3412
91bb873f
RH
3413 if (reg != 0)
3414 {
3b3c6a3f 3415 emit_move_insn (reg, new);
91bb873f 3416 new = reg;
3b3c6a3f 3417 }
3b3c6a3f 3418 }
91bb873f 3419 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 3420 {
91bb873f 3421 /* This symbol must be referenced via a load from the
0f290768 3422 Global Offset Table (@GOT). */
3b3c6a3f 3423
91bb873f 3424 current_function_uses_pic_offset_table = 1;
4859dd36
RH
3425 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
3426 new = gen_rtx_CONST (Pmode, new);
91bb873f
RH
3427 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3428 new = gen_rtx_MEM (Pmode, new);
3429 RTX_UNCHANGING_P (new) = 1;
ba4828e0 3430 set_mem_alias_set (new, ix86_GOT_alias_set ());
3b3c6a3f
MM
3431
3432 if (reg == 0)
3433 reg = gen_reg_rtx (Pmode);
91bb873f
RH
3434 emit_move_insn (reg, new);
3435 new = reg;
0f290768 3436 }
91bb873f
RH
3437 else
3438 {
3439 if (GET_CODE (addr) == CONST)
3b3c6a3f 3440 {
91bb873f
RH
3441 addr = XEXP (addr, 0);
3442 if (GET_CODE (addr) == UNSPEC)
3443 {
3444 /* Check that the unspec is one of the ones we generate? */
3445 }
3446 else if (GET_CODE (addr) != PLUS)
564d80f4 3447 abort ();
3b3c6a3f 3448 }
91bb873f
RH
3449 if (GET_CODE (addr) == PLUS)
3450 {
3451 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 3452
91bb873f
RH
3453 /* Check first to see if this is a constant offset from a @GOTOFF
3454 symbol reference. */
3455 if ((GET_CODE (op0) == LABEL_REF
3456 || (GET_CODE (op0) == SYMBOL_REF
3457 && (CONSTANT_POOL_ADDRESS_P (op0)
3458 || SYMBOL_REF_FLAG (op0))))
3459 && GET_CODE (op1) == CONST_INT)
3460 {
3461 current_function_uses_pic_offset_table = 1;
4859dd36
RH
3462 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
3463 new = gen_rtx_PLUS (Pmode, new, op1);
3464 new = gen_rtx_CONST (Pmode, new);
91bb873f
RH
3465 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3466
3467 if (reg != 0)
3468 {
3469 emit_move_insn (reg, new);
3470 new = reg;
3471 }
3472 }
3473 else
3474 {
3475 base = legitimize_pic_address (XEXP (addr, 0), reg);
3476 new = legitimize_pic_address (XEXP (addr, 1),
3477 base == reg ? NULL_RTX : reg);
3478
3479 if (GET_CODE (new) == CONST_INT)
3480 new = plus_constant (base, INTVAL (new));
3481 else
3482 {
3483 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
3484 {
3485 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
3486 new = XEXP (new, 1);
3487 }
3488 new = gen_rtx_PLUS (Pmode, base, new);
3489 }
3490 }
3491 }
3b3c6a3f
MM
3492 }
3493 return new;
3494}
3495\f
3b3c6a3f
MM
3496/* Try machine-dependent ways of modifying an illegitimate address
3497 to be legitimate. If we find one, return the new, valid address.
3498 This macro is used in only one place: `memory_address' in explow.c.
3499
3500 OLDX is the address as it was before break_out_memory_refs was called.
3501 In some cases it is useful to look at this to decide what needs to be done.
3502
3503 MODE and WIN are passed so that this macro can use
3504 GO_IF_LEGITIMATE_ADDRESS.
3505
3506 It is always safe for this macro to do nothing. It exists to recognize
3507 opportunities to optimize the output.
3508
3509 For the 80386, we handle X+REG by loading X into a register R and
3510 using R+REG. R will go in a general reg and indexing will be used.
3511 However, if REG is a broken-out memory address or multiplication,
3512 nothing needs to be done because REG can certainly go in a general reg.
3513
3514 When -fpic is used, special handling is needed for symbolic references.
3515 See comments by legitimize_pic_address in i386.c for details. */
3516
3517rtx
3518legitimize_address (x, oldx, mode)
3519 register rtx x;
bb5177ac 3520 register rtx oldx ATTRIBUTE_UNUSED;
3b3c6a3f
MM
3521 enum machine_mode mode;
3522{
3523 int changed = 0;
3524 unsigned log;
3525
3526 if (TARGET_DEBUG_ADDR)
3527 {
e9a25f70
JL
3528 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
3529 GET_MODE_NAME (mode));
3b3c6a3f
MM
3530 debug_rtx (x);
3531 }
3532
3533 if (flag_pic && SYMBOLIC_CONST (x))
3534 return legitimize_pic_address (x, 0);
3535
3536 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
3537 if (GET_CODE (x) == ASHIFT
3538 && GET_CODE (XEXP (x, 1)) == CONST_INT
3539 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3540 {
3541 changed = 1;
a269a03c
JC
3542 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
3543 GEN_INT (1 << log));
3b3c6a3f
MM
3544 }
3545
3546 if (GET_CODE (x) == PLUS)
3547 {
0f290768 3548 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 3549
3b3c6a3f
MM
3550 if (GET_CODE (XEXP (x, 0)) == ASHIFT
3551 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3552 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3553 {
3554 changed = 1;
c5c76735
JL
3555 XEXP (x, 0) = gen_rtx_MULT (Pmode,
3556 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
3557 GEN_INT (1 << log));
3b3c6a3f
MM
3558 }
3559
3560 if (GET_CODE (XEXP (x, 1)) == ASHIFT
3561 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
3562 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3563 {
3564 changed = 1;
c5c76735
JL
3565 XEXP (x, 1) = gen_rtx_MULT (Pmode,
3566 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
3567 GEN_INT (1 << log));
3b3c6a3f
MM
3568 }
3569
0f290768 3570 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
3571 if (GET_CODE (XEXP (x, 1)) == MULT)
3572 {
3573 rtx tmp = XEXP (x, 0);
3574 XEXP (x, 0) = XEXP (x, 1);
3575 XEXP (x, 1) = tmp;
3576 changed = 1;
3577 }
3578
3579 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
3580 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
3581 created by virtual register instantiation, register elimination, and
3582 similar optimizations. */
3583 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
3584 {
3585 changed = 1;
c5c76735
JL
3586 x = gen_rtx_PLUS (Pmode,
3587 gen_rtx_PLUS (Pmode, XEXP (x, 0),
3588 XEXP (XEXP (x, 1), 0)),
3589 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
3590 }
3591
e9a25f70
JL
3592 /* Canonicalize
3593 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
3594 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
3595 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
3596 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
3597 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
3598 && CONSTANT_P (XEXP (x, 1)))
3599 {
00c79232
ML
3600 rtx constant;
3601 rtx other = NULL_RTX;
3b3c6a3f
MM
3602
3603 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
3604 {
3605 constant = XEXP (x, 1);
3606 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
3607 }
3608 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
3609 {
3610 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
3611 other = XEXP (x, 1);
3612 }
3613 else
3614 constant = 0;
3615
3616 if (constant)
3617 {
3618 changed = 1;
c5c76735
JL
3619 x = gen_rtx_PLUS (Pmode,
3620 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
3621 XEXP (XEXP (XEXP (x, 0), 1), 0)),
3622 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
3623 }
3624 }
3625
3626 if (changed && legitimate_address_p (mode, x, FALSE))
3627 return x;
3628
3629 if (GET_CODE (XEXP (x, 0)) == MULT)
3630 {
3631 changed = 1;
3632 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
3633 }
3634
3635 if (GET_CODE (XEXP (x, 1)) == MULT)
3636 {
3637 changed = 1;
3638 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
3639 }
3640
3641 if (changed
3642 && GET_CODE (XEXP (x, 1)) == REG
3643 && GET_CODE (XEXP (x, 0)) == REG)
3644 return x;
3645
3646 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
3647 {
3648 changed = 1;
3649 x = legitimize_pic_address (x, 0);
3650 }
3651
3652 if (changed && legitimate_address_p (mode, x, FALSE))
3653 return x;
3654
3655 if (GET_CODE (XEXP (x, 0)) == REG)
3656 {
3657 register rtx temp = gen_reg_rtx (Pmode);
3658 register rtx val = force_operand (XEXP (x, 1), temp);
3659 if (val != temp)
3660 emit_move_insn (temp, val);
3661
3662 XEXP (x, 1) = temp;
3663 return x;
3664 }
3665
3666 else if (GET_CODE (XEXP (x, 1)) == REG)
3667 {
3668 register rtx temp = gen_reg_rtx (Pmode);
3669 register rtx val = force_operand (XEXP (x, 0), temp);
3670 if (val != temp)
3671 emit_move_insn (temp, val);
3672
3673 XEXP (x, 0) = temp;
3674 return x;
3675 }
3676 }
3677
3678 return x;
3679}
2a2ab3f9
JVA
3680\f
3681/* Print an integer constant expression in assembler syntax. Addition
3682 and subtraction are the only arithmetic that may appear in these
3683 expressions. FILE is the stdio stream to write to, X is the rtx, and
3684 CODE is the operand print code from the output string. */
3685
3686static void
3687output_pic_addr_const (file, x, code)
3688 FILE *file;
3689 rtx x;
3690 int code;
3691{
3692 char buf[256];
3693
3694 switch (GET_CODE (x))
3695 {
3696 case PC:
3697 if (flag_pic)
3698 putc ('.', file);
3699 else
3700 abort ();
3701 break;
3702
3703 case SYMBOL_REF:
91bb873f
RH
3704 assemble_name (file, XSTR (x, 0));
3705 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
3706 fputs ("@PLT", file);
2a2ab3f9
JVA
3707 break;
3708
91bb873f
RH
3709 case LABEL_REF:
3710 x = XEXP (x, 0);
3711 /* FALLTHRU */
2a2ab3f9
JVA
3712 case CODE_LABEL:
3713 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
3714 assemble_name (asm_out_file, buf);
3715 break;
3716
3717 case CONST_INT:
f64cecad 3718 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
3719 break;
3720
3721 case CONST:
3722 /* This used to output parentheses around the expression,
3723 but that does not work on the 386 (either ATT or BSD assembler). */
3724 output_pic_addr_const (file, XEXP (x, 0), code);
3725 break;
3726
3727 case CONST_DOUBLE:
3728 if (GET_MODE (x) == VOIDmode)
3729 {
3730 /* We can use %d if the number is <32 bits and positive. */
3731 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
3732 fprintf (file, "0x%lx%08lx",
3733 (unsigned long) CONST_DOUBLE_HIGH (x),
3734 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 3735 else
f64cecad 3736 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
3737 }
3738 else
3739 /* We can't handle floating point constants;
3740 PRINT_OPERAND must handle them. */
3741 output_operand_lossage ("floating constant misused");
3742 break;
3743
3744 case PLUS:
e9a25f70 3745 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
3746 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
3747 {
2a2ab3f9 3748 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 3749 putc ('+', file);
e9a25f70 3750 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 3751 }
91bb873f 3752 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 3753 {
2a2ab3f9 3754 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 3755 putc ('+', file);
e9a25f70 3756 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 3757 }
91bb873f
RH
3758 else
3759 abort ();
2a2ab3f9
JVA
3760 break;
3761
3762 case MINUS:
e075ae69 3763 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
2a2ab3f9 3764 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 3765 putc ('-', file);
2a2ab3f9 3766 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 3767 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
2a2ab3f9
JVA
3768 break;
3769
91bb873f
RH
3770 case UNSPEC:
3771 if (XVECLEN (x, 0) != 1)
77ebd435 3772 abort ();
91bb873f
RH
3773 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
3774 switch (XINT (x, 1))
77ebd435
AJ
3775 {
3776 case 6:
3777 fputs ("@GOT", file);
3778 break;
3779 case 7:
3780 fputs ("@GOTOFF", file);
3781 break;
3782 case 8:
3783 fputs ("@PLT", file);
3784 break;
3785 default:
3786 output_operand_lossage ("invalid UNSPEC as operand");
3787 break;
3788 }
91bb873f
RH
3789 break;
3790
2a2ab3f9
JVA
3791 default:
3792 output_operand_lossage ("invalid expression as operand");
3793 }
3794}
1865dbb5 3795
0f290768 3796/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
1865dbb5
JM
3797 We need to handle our special PIC relocations. */
3798
0f290768 3799void
1865dbb5
JM
3800i386_dwarf_output_addr_const (file, x)
3801 FILE *file;
3802 rtx x;
3803{
f0ca81d2 3804 fprintf (file, "%s", INT_ASM_OP);
1865dbb5
JM
3805 if (flag_pic)
3806 output_pic_addr_const (file, x, '\0');
3807 else
3808 output_addr_const (file, x);
3809 fputc ('\n', file);
3810}
3811
3812/* In the name of slightly smaller debug output, and to cater to
3813 general assembler losage, recognize PIC+GOTOFF and turn it back
3814 into a direct symbol reference. */
3815
3816rtx
3817i386_simplify_dwarf_addr (orig_x)
3818 rtx orig_x;
3819{
3820 rtx x = orig_x;
3821
3822 if (GET_CODE (x) != PLUS
3823 || GET_CODE (XEXP (x, 0)) != REG
3824 || GET_CODE (XEXP (x, 1)) != CONST)
3825 return orig_x;
3826
3827 x = XEXP (XEXP (x, 1), 0);
3828 if (GET_CODE (x) == UNSPEC
3adbce3d
RH
3829 && (XINT (x, 1) == 6
3830 || XINT (x, 1) == 7))
1865dbb5
JM
3831 return XVECEXP (x, 0, 0);
3832
3833 if (GET_CODE (x) == PLUS
3834 && GET_CODE (XEXP (x, 0)) == UNSPEC
3835 && GET_CODE (XEXP (x, 1)) == CONST_INT
3adbce3d
RH
3836 && (XINT (XEXP (x, 0), 1) == 6
3837 || XINT (XEXP (x, 0), 1) == 7))
1865dbb5
JM
3838 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
3839
3840 return orig_x;
3841}
2a2ab3f9 3842\f
a269a03c 3843static void
e075ae69 3844put_condition_code (code, mode, reverse, fp, file)
a269a03c 3845 enum rtx_code code;
e075ae69
RH
3846 enum machine_mode mode;
3847 int reverse, fp;
a269a03c
JC
3848 FILE *file;
3849{
a269a03c
JC
3850 const char *suffix;
3851
9a915772
JH
3852 if (mode == CCFPmode || mode == CCFPUmode)
3853 {
3854 enum rtx_code second_code, bypass_code;
3855 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3856 if (bypass_code != NIL || second_code != NIL)
3857 abort();
3858 code = ix86_fp_compare_code_to_integer (code);
3859 mode = CCmode;
3860 }
a269a03c
JC
3861 if (reverse)
3862 code = reverse_condition (code);
e075ae69 3863
a269a03c
JC
3864 switch (code)
3865 {
3866 case EQ:
3867 suffix = "e";
3868 break;
a269a03c
JC
3869 case NE:
3870 suffix = "ne";
3871 break;
a269a03c 3872 case GT:
7e08e190 3873 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
3874 abort ();
3875 suffix = "g";
a269a03c 3876 break;
a269a03c 3877 case GTU:
e075ae69
RH
3878 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
3879 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 3880 if (mode != CCmode)
0f290768 3881 abort ();
e075ae69 3882 suffix = fp ? "nbe" : "a";
a269a03c 3883 break;
a269a03c 3884 case LT:
9076b9c1 3885 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 3886 suffix = "s";
7e08e190 3887 else if (mode == CCmode || mode == CCGCmode)
e075ae69 3888 suffix = "l";
9076b9c1 3889 else
0f290768 3890 abort ();
a269a03c 3891 break;
a269a03c 3892 case LTU:
9076b9c1 3893 if (mode != CCmode)
0f290768 3894 abort ();
a269a03c
JC
3895 suffix = "b";
3896 break;
a269a03c 3897 case GE:
9076b9c1 3898 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 3899 suffix = "ns";
7e08e190 3900 else if (mode == CCmode || mode == CCGCmode)
e075ae69 3901 suffix = "ge";
9076b9c1 3902 else
0f290768 3903 abort ();
a269a03c 3904 break;
a269a03c 3905 case GEU:
e075ae69 3906 /* ??? As above. */
7e08e190 3907 if (mode != CCmode)
0f290768 3908 abort ();
7e08e190 3909 suffix = fp ? "nb" : "ae";
a269a03c 3910 break;
a269a03c 3911 case LE:
7e08e190 3912 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
3913 abort ();
3914 suffix = "le";
a269a03c 3915 break;
a269a03c 3916 case LEU:
9076b9c1
JH
3917 if (mode != CCmode)
3918 abort ();
7e08e190 3919 suffix = "be";
a269a03c 3920 break;
3a3677ff 3921 case UNORDERED:
9e7adcb3 3922 suffix = fp ? "u" : "p";
3a3677ff
RH
3923 break;
3924 case ORDERED:
9e7adcb3 3925 suffix = fp ? "nu" : "np";
3a3677ff 3926 break;
a269a03c
JC
3927 default:
3928 abort ();
3929 }
3930 fputs (suffix, file);
3931}
3932
e075ae69
RH
3933void
3934print_reg (x, code, file)
3935 rtx x;
3936 int code;
3937 FILE *file;
e5cb57e8 3938{
e075ae69 3939 if (REGNO (x) == ARG_POINTER_REGNUM
564d80f4 3940 || REGNO (x) == FRAME_POINTER_REGNUM
e075ae69
RH
3941 || REGNO (x) == FLAGS_REG
3942 || REGNO (x) == FPSR_REG)
3943 abort ();
e9a25f70 3944
e075ae69
RH
3945 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
3946 putc ('%', file);
3947
ef6257cd 3948 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
3949 code = 2;
3950 else if (code == 'b')
3951 code = 1;
3952 else if (code == 'k')
3953 code = 4;
3f3f2124
JH
3954 else if (code == 'q')
3955 code = 8;
e075ae69
RH
3956 else if (code == 'y')
3957 code = 3;
3958 else if (code == 'h')
3959 code = 0;
3960 else
3961 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 3962
3f3f2124
JH
3963 /* Irritatingly, AMD extended registers use different naming convention
3964 from the normal registers. */
3965 if (REX_INT_REG_P (x))
3966 {
885a70fd
JH
3967 if (!TARGET_64BIT)
3968 abort ();
3f3f2124
JH
3969 switch (code)
3970 {
ef6257cd 3971 case 0:
3f3f2124
JH
3972 error ("Extended registers have no high halves\n");
3973 break;
3974 case 1:
3975 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
3976 break;
3977 case 2:
3978 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
3979 break;
3980 case 4:
3981 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
3982 break;
3983 case 8:
3984 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
3985 break;
3986 default:
3987 error ("Unsupported operand size for extended register.\n");
3988 break;
3989 }
3990 return;
3991 }
e075ae69
RH
3992 switch (code)
3993 {
3994 case 3:
3995 if (STACK_TOP_P (x))
3996 {
3997 fputs ("st(0)", file);
3998 break;
3999 }
4000 /* FALLTHRU */
e075ae69 4001 case 8:
3f3f2124 4002 case 4:
e075ae69 4003 case 12:
446988df 4004 if (! ANY_FP_REG_P (x))
885a70fd 4005 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
e075ae69 4006 /* FALLTHRU */
a7180f70 4007 case 16:
e075ae69
RH
4008 case 2:
4009 fputs (hi_reg_name[REGNO (x)], file);
4010 break;
4011 case 1:
4012 fputs (qi_reg_name[REGNO (x)], file);
4013 break;
4014 case 0:
4015 fputs (qi_high_reg_name[REGNO (x)], file);
4016 break;
4017 default:
4018 abort ();
fe25fea3 4019 }
e5cb57e8
SC
4020}
4021
2a2ab3f9 4022/* Meaning of CODE:
fe25fea3 4023 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 4024 C -- print opcode suffix for set/cmov insn.
fe25fea3 4025 c -- like C, but print reversed condition
ef6257cd 4026 F,f -- likewise, but for floating-point.
2a2ab3f9
JVA
4027 R -- print the prefix for register names.
4028 z -- print the opcode suffix for the size of the current operand.
4029 * -- print a star (in certain assembler syntax)
fb204271 4030 A -- print an absolute memory reference.
2a2ab3f9 4031 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
4032 s -- print a shift double count, followed by the assemblers argument
4033 delimiter.
fe25fea3
SC
4034 b -- print the QImode name of the register for the indicated operand.
4035 %b0 would print %al if operands[0] is reg 0.
4036 w -- likewise, print the HImode name of the register.
4037 k -- likewise, print the SImode name of the register.
3f3f2124 4038 q -- likewise, print the DImode name of the register.
ef6257cd
JH
4039 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
4040 y -- print "st(0)" instead of "st" as a register.
a46d1d38 4041 D -- print condition for SSE cmp instruction.
ef6257cd
JH
4042 P -- if PIC, print an @PLT suffix.
4043 X -- don't print any sort of PIC '@' suffix for a symbol.
a46d1d38 4044 */
2a2ab3f9
JVA
4045
4046void
4047print_operand (file, x, code)
4048 FILE *file;
4049 rtx x;
4050 int code;
4051{
4052 if (code)
4053 {
4054 switch (code)
4055 {
4056 case '*':
e075ae69 4057 if (ASSEMBLER_DIALECT == 0)
2a2ab3f9
JVA
4058 putc ('*', file);
4059 return;
4060
fb204271
DN
4061 case 'A':
4062 if (ASSEMBLER_DIALECT == 0)
4063 putc ('*', file);
4064 else if (ASSEMBLER_DIALECT == 1)
4065 {
4066 /* Intel syntax. For absolute addresses, registers should not
4067 be surrounded by braces. */
4068 if (GET_CODE (x) != REG)
4069 {
4070 putc ('[', file);
4071 PRINT_OPERAND (file, x, 0);
4072 putc (']', file);
4073 return;
4074 }
4075 }
4076
4077 PRINT_OPERAND (file, x, 0);
4078 return;
4079
4080
2a2ab3f9 4081 case 'L':
e075ae69
RH
4082 if (ASSEMBLER_DIALECT == 0)
4083 putc ('l', file);
2a2ab3f9
JVA
4084 return;
4085
4086 case 'W':
e075ae69
RH
4087 if (ASSEMBLER_DIALECT == 0)
4088 putc ('w', file);
2a2ab3f9
JVA
4089 return;
4090
4091 case 'B':
e075ae69
RH
4092 if (ASSEMBLER_DIALECT == 0)
4093 putc ('b', file);
2a2ab3f9
JVA
4094 return;
4095
4096 case 'Q':
e075ae69
RH
4097 if (ASSEMBLER_DIALECT == 0)
4098 putc ('l', file);
2a2ab3f9
JVA
4099 return;
4100
4101 case 'S':
e075ae69
RH
4102 if (ASSEMBLER_DIALECT == 0)
4103 putc ('s', file);
2a2ab3f9
JVA
4104 return;
4105
5f1ec3e6 4106 case 'T':
e075ae69
RH
4107 if (ASSEMBLER_DIALECT == 0)
4108 putc ('t', file);
5f1ec3e6
JVA
4109 return;
4110
2a2ab3f9
JVA
4111 case 'z':
4112 /* 387 opcodes don't get size suffixes if the operands are
0f290768 4113 registers. */
2a2ab3f9
JVA
4114
4115 if (STACK_REG_P (x))
4116 return;
4117
4118 /* this is the size of op from size of operand */
4119 switch (GET_MODE_SIZE (GET_MODE (x)))
4120 {
2a2ab3f9 4121 case 2:
155d8a47
JW
4122#ifdef HAVE_GAS_FILDS_FISTS
4123 putc ('s', file);
4124#endif
2a2ab3f9
JVA
4125 return;
4126
4127 case 4:
4128 if (GET_MODE (x) == SFmode)
4129 {
e075ae69 4130 putc ('s', file);
2a2ab3f9
JVA
4131 return;
4132 }
4133 else
e075ae69 4134 putc ('l', file);
2a2ab3f9
JVA
4135 return;
4136
5f1ec3e6 4137 case 12:
2b589241 4138 case 16:
e075ae69
RH
4139 putc ('t', file);
4140 return;
5f1ec3e6 4141
2a2ab3f9
JVA
4142 case 8:
4143 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
4144 {
4145#ifdef GAS_MNEMONICS
e075ae69 4146 putc ('q', file);
56c0e8fa 4147#else
e075ae69
RH
4148 putc ('l', file);
4149 putc ('l', file);
56c0e8fa
JVA
4150#endif
4151 }
e075ae69
RH
4152 else
4153 putc ('l', file);
2a2ab3f9 4154 return;
155d8a47
JW
4155
4156 default:
4157 abort ();
2a2ab3f9 4158 }
4af3895e
JVA
4159
4160 case 'b':
4161 case 'w':
4162 case 'k':
3f3f2124 4163 case 'q':
4af3895e
JVA
4164 case 'h':
4165 case 'y':
5cb6195d 4166 case 'X':
e075ae69 4167 case 'P':
4af3895e
JVA
4168 break;
4169
2d49677f
SC
4170 case 's':
4171 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
4172 {
4173 PRINT_OPERAND (file, x, 0);
e075ae69 4174 putc (',', file);
2d49677f 4175 }
a269a03c
JC
4176 return;
4177
a46d1d38
JH
4178 case 'D':
4179 /* Little bit of braindamage here. The SSE compare instructions
4180 does use completely different names for the comparisons that the
4181 fp conditional moves. */
4182 switch (GET_CODE (x))
4183 {
4184 case EQ:
4185 case UNEQ:
4186 fputs ("eq", file);
4187 break;
4188 case LT:
4189 case UNLT:
4190 fputs ("lt", file);
4191 break;
4192 case LE:
4193 case UNLE:
4194 fputs ("le", file);
4195 break;
4196 case UNORDERED:
4197 fputs ("unord", file);
4198 break;
4199 case NE:
4200 case LTGT:
4201 fputs ("neq", file);
4202 break;
4203 case UNGE:
4204 case GE:
4205 fputs ("nlt", file);
4206 break;
4207 case UNGT:
4208 case GT:
4209 fputs ("nle", file);
4210 break;
4211 case ORDERED:
4212 fputs ("ord", file);
4213 break;
4214 default:
4215 abort ();
4216 break;
4217 }
4218 return;
1853aadd 4219 case 'C':
e075ae69 4220 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 4221 return;
fe25fea3 4222 case 'F':
e075ae69 4223 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
4224 return;
4225
e9a25f70 4226 /* Like above, but reverse condition */
e075ae69
RH
4227 case 'c':
4228 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
4229 return;
fe25fea3 4230 case 'f':
e075ae69 4231 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 4232 return;
ef6257cd
JH
4233 case '+':
4234 {
4235 rtx x;
e5cb57e8 4236
ef6257cd
JH
4237 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
4238 return;
a4f31c00 4239
ef6257cd
JH
4240 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4241 if (x)
4242 {
4243 int pred_val = INTVAL (XEXP (x, 0));
4244
4245 if (pred_val < REG_BR_PROB_BASE * 45 / 100
4246 || pred_val > REG_BR_PROB_BASE * 55 / 100)
4247 {
4248 int taken = pred_val > REG_BR_PROB_BASE / 2;
4249 int cputaken = final_forward_branch_p (current_output_insn) == 0;
4250
4251 /* Emit hints only in the case default branch prediction
4252 heruistics would fail. */
4253 if (taken != cputaken)
4254 {
4255 /* We use 3e (DS) prefix for taken branches and
4256 2e (CS) prefix for not taken branches. */
4257 if (taken)
4258 fputs ("ds ; ", file);
4259 else
4260 fputs ("cs ; ", file);
4261 }
4262 }
4263 }
4264 return;
4265 }
4af3895e 4266 default:
68daafd4
JVA
4267 {
4268 char str[50];
68daafd4
JVA
4269 sprintf (str, "invalid operand code `%c'", code);
4270 output_operand_lossage (str);
4271 }
2a2ab3f9
JVA
4272 }
4273 }
e9a25f70 4274
2a2ab3f9
JVA
4275 if (GET_CODE (x) == REG)
4276 {
4277 PRINT_REG (x, code, file);
4278 }
e9a25f70 4279
2a2ab3f9
JVA
4280 else if (GET_CODE (x) == MEM)
4281 {
e075ae69
RH
4282 /* No `byte ptr' prefix for call instructions. */
4283 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
2a2ab3f9 4284 {
69ddee61 4285 const char * size;
e075ae69
RH
4286 switch (GET_MODE_SIZE (GET_MODE (x)))
4287 {
4288 case 1: size = "BYTE"; break;
4289 case 2: size = "WORD"; break;
4290 case 4: size = "DWORD"; break;
4291 case 8: size = "QWORD"; break;
4292 case 12: size = "XWORD"; break;
a7180f70 4293 case 16: size = "XMMWORD"; break;
e075ae69 4294 default:
564d80f4 4295 abort ();
e075ae69 4296 }
fb204271
DN
4297
4298 /* Check for explicit size override (codes 'b', 'w' and 'k') */
4299 if (code == 'b')
4300 size = "BYTE";
4301 else if (code == 'w')
4302 size = "WORD";
4303 else if (code == 'k')
4304 size = "DWORD";
4305
e075ae69
RH
4306 fputs (size, file);
4307 fputs (" PTR ", file);
2a2ab3f9 4308 }
e075ae69
RH
4309
4310 x = XEXP (x, 0);
4311 if (flag_pic && CONSTANT_ADDRESS_P (x))
4312 output_pic_addr_const (file, x, code);
0d7d98ee
JH
4313 /* Avoid (%rip) for call operands. */
4314 else if (CONSTANT_ADDRESS_P (x) && code =='P'
4315 && GET_CODE (x) != CONST_INT)
4316 output_addr_const (file, x);
2a2ab3f9 4317 else
e075ae69 4318 output_address (x);
2a2ab3f9 4319 }
e9a25f70 4320
2a2ab3f9
JVA
4321 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
4322 {
e9a25f70
JL
4323 REAL_VALUE_TYPE r;
4324 long l;
4325
5f1ec3e6
JVA
4326 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4327 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69
RH
4328
4329 if (ASSEMBLER_DIALECT == 0)
4330 putc ('$', file);
52267fcb 4331 fprintf (file, "0x%lx", l);
5f1ec3e6 4332 }
e9a25f70 4333
0f290768 4334 /* These float cases don't actually occur as immediate operands. */
5f1ec3e6
JVA
4335 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
4336 {
e9a25f70
JL
4337 REAL_VALUE_TYPE r;
4338 char dstr[30];
4339
5f1ec3e6
JVA
4340 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4341 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
4342 fprintf (file, "%s", dstr);
2a2ab3f9 4343 }
e9a25f70 4344
2b589241
JH
4345 else if (GET_CODE (x) == CONST_DOUBLE
4346 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
2a2ab3f9 4347 {
e9a25f70
JL
4348 REAL_VALUE_TYPE r;
4349 char dstr[30];
4350
5f1ec3e6
JVA
4351 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4352 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
4353 fprintf (file, "%s", dstr);
2a2ab3f9 4354 }
79325812 4355 else
2a2ab3f9 4356 {
4af3895e 4357 if (code != 'P')
2a2ab3f9 4358 {
695dac07 4359 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69
RH
4360 {
4361 if (ASSEMBLER_DIALECT == 0)
4362 putc ('$', file);
4363 }
2a2ab3f9
JVA
4364 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
4365 || GET_CODE (x) == LABEL_REF)
e075ae69
RH
4366 {
4367 if (ASSEMBLER_DIALECT == 0)
4368 putc ('$', file);
4369 else
4370 fputs ("OFFSET FLAT:", file);
4371 }
2a2ab3f9 4372 }
e075ae69
RH
4373 if (GET_CODE (x) == CONST_INT)
4374 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
4375 else if (flag_pic)
2a2ab3f9
JVA
4376 output_pic_addr_const (file, x, code);
4377 else
4378 output_addr_const (file, x);
4379 }
4380}
4381\f
4382/* Print a memory operand whose address is ADDR. */
4383
4384void
4385print_operand_address (file, addr)
4386 FILE *file;
4387 register rtx addr;
4388{
e075ae69
RH
4389 struct ix86_address parts;
4390 rtx base, index, disp;
4391 int scale;
e9a25f70 4392
e075ae69
RH
4393 if (! ix86_decompose_address (addr, &parts))
4394 abort ();
e9a25f70 4395
e075ae69
RH
4396 base = parts.base;
4397 index = parts.index;
4398 disp = parts.disp;
4399 scale = parts.scale;
e9a25f70 4400
e075ae69
RH
4401 if (!base && !index)
4402 {
4403 /* Displacement only requires special attention. */
e9a25f70 4404
e075ae69 4405 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 4406 {
e075ae69 4407 if (ASSEMBLER_DIALECT != 0)
fb204271
DN
4408 {
4409 if (USER_LABEL_PREFIX[0] == 0)
4410 putc ('%', file);
4411 fputs ("ds:", file);
4412 }
e075ae69 4413 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
2a2ab3f9 4414 }
e075ae69
RH
4415 else if (flag_pic)
4416 output_pic_addr_const (file, addr, 0);
4417 else
4418 output_addr_const (file, addr);
0d7d98ee
JH
4419
4420 /* Use one byte shorter RIP relative addressing for 64bit mode. */
4421 if (GET_CODE (disp) != CONST_INT && TARGET_64BIT)
4422 fputs ("(%rip)", file);
e075ae69
RH
4423 }
4424 else
4425 {
4426 if (ASSEMBLER_DIALECT == 0)
2a2ab3f9 4427 {
e075ae69 4428 if (disp)
2a2ab3f9 4429 {
c399861d 4430 if (flag_pic)
e075ae69
RH
4431 output_pic_addr_const (file, disp, 0);
4432 else if (GET_CODE (disp) == LABEL_REF)
4433 output_asm_label (disp);
2a2ab3f9 4434 else
e075ae69 4435 output_addr_const (file, disp);
2a2ab3f9
JVA
4436 }
4437
e075ae69
RH
4438 putc ('(', file);
4439 if (base)
4440 PRINT_REG (base, 0, file);
4441 if (index)
2a2ab3f9 4442 {
e075ae69
RH
4443 putc (',', file);
4444 PRINT_REG (index, 0, file);
4445 if (scale != 1)
4446 fprintf (file, ",%d", scale);
2a2ab3f9 4447 }
e075ae69 4448 putc (')', file);
2a2ab3f9 4449 }
2a2ab3f9
JVA
4450 else
4451 {
e075ae69 4452 rtx offset = NULL_RTX;
e9a25f70 4453
e075ae69
RH
4454 if (disp)
4455 {
4456 /* Pull out the offset of a symbol; print any symbol itself. */
4457 if (GET_CODE (disp) == CONST
4458 && GET_CODE (XEXP (disp, 0)) == PLUS
4459 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
4460 {
4461 offset = XEXP (XEXP (disp, 0), 1);
4462 disp = gen_rtx_CONST (VOIDmode,
4463 XEXP (XEXP (disp, 0), 0));
4464 }
ce193852 4465
e075ae69
RH
4466 if (flag_pic)
4467 output_pic_addr_const (file, disp, 0);
4468 else if (GET_CODE (disp) == LABEL_REF)
4469 output_asm_label (disp);
4470 else if (GET_CODE (disp) == CONST_INT)
4471 offset = disp;
4472 else
4473 output_addr_const (file, disp);
4474 }
e9a25f70 4475
e075ae69
RH
4476 putc ('[', file);
4477 if (base)
a8620236 4478 {
e075ae69
RH
4479 PRINT_REG (base, 0, file);
4480 if (offset)
4481 {
4482 if (INTVAL (offset) >= 0)
4483 putc ('+', file);
4484 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
4485 }
a8620236 4486 }
e075ae69
RH
4487 else if (offset)
4488 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 4489 else
e075ae69 4490 putc ('0', file);
e9a25f70 4491
e075ae69
RH
4492 if (index)
4493 {
4494 putc ('+', file);
4495 PRINT_REG (index, 0, file);
4496 if (scale != 1)
4497 fprintf (file, "*%d", scale);
4498 }
4499 putc (']', file);
4500 }
2a2ab3f9
JVA
4501 }
4502}
4503\f
4504/* Split one or more DImode RTL references into pairs of SImode
4505 references. The RTL can be REG, offsettable MEM, integer constant, or
4506 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
4507 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 4508 that parallel "operands". */
2a2ab3f9
JVA
4509
4510void
4511split_di (operands, num, lo_half, hi_half)
4512 rtx operands[];
4513 int num;
4514 rtx lo_half[], hi_half[];
4515{
4516 while (num--)
4517 {
57dbca5e 4518 rtx op = operands[num];
e075ae69
RH
4519 if (CONSTANT_P (op))
4520 split_double (op, &lo_half[num], &hi_half[num]);
4521 else if (! reload_completed)
a269a03c
JC
4522 {
4523 lo_half[num] = gen_lowpart (SImode, op);
4524 hi_half[num] = gen_highpart (SImode, op);
4525 }
4526 else if (GET_CODE (op) == REG)
2a2ab3f9 4527 {
0d7d98ee
JH
4528 if (TARGET_64BIT)
4529 abort();
57dbca5e
BS
4530 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
4531 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
2a2ab3f9 4532 }
57dbca5e 4533 else if (offsettable_memref_p (op))
2a2ab3f9 4534 {
f4ef873c 4535 lo_half[num] = adjust_address (op, SImode, 0);
b72f00af 4536 hi_half[num] = adjust_address (op, SImode, 4);
2a2ab3f9
JVA
4537 }
4538 else
564d80f4 4539 abort ();
2a2ab3f9
JVA
4540 }
4541}
4542\f
2a2ab3f9
JVA
4543/* Output code to perform a 387 binary operation in INSN, one of PLUS,
4544 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
4545 is the expression of the binary operation. The output may either be
4546 emitted here, or returned to the caller, like all output_* functions.
4547
4548 There is no guarantee that the operands are the same mode, as they
0f290768 4549 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 4550
e3c2afab
AM
4551#ifndef SYSV386_COMPAT
4552/* Set to 1 for compatibility with brain-damaged assemblers. No-one
4553 wants to fix the assemblers because that causes incompatibility
4554 with gcc. No-one wants to fix gcc because that causes
4555 incompatibility with assemblers... You can use the option of
4556 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
4557#define SYSV386_COMPAT 1
4558#endif
4559
69ddee61 4560const char *
2a2ab3f9
JVA
4561output_387_binary_op (insn, operands)
4562 rtx insn;
4563 rtx *operands;
4564{
e3c2afab 4565 static char buf[30];
69ddee61 4566 const char *p;
1deaa899
JH
4567 const char *ssep;
4568 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
2a2ab3f9 4569
e3c2afab
AM
4570#ifdef ENABLE_CHECKING
4571 /* Even if we do not want to check the inputs, this documents input
4572 constraints. Which helps in understanding the following code. */
4573 if (STACK_REG_P (operands[0])
4574 && ((REG_P (operands[1])
4575 && REGNO (operands[0]) == REGNO (operands[1])
4576 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
4577 || (REG_P (operands[2])
4578 && REGNO (operands[0]) == REGNO (operands[2])
4579 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
4580 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
4581 ; /* ok */
1deaa899 4582 else if (!is_sse)
e3c2afab
AM
4583 abort ();
4584#endif
4585
2a2ab3f9
JVA
4586 switch (GET_CODE (operands[3]))
4587 {
4588 case PLUS:
e075ae69
RH
4589 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4590 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4591 p = "fiadd";
4592 else
4593 p = "fadd";
1deaa899 4594 ssep = "add";
2a2ab3f9
JVA
4595 break;
4596
4597 case MINUS:
e075ae69
RH
4598 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4599 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4600 p = "fisub";
4601 else
4602 p = "fsub";
1deaa899 4603 ssep = "sub";
2a2ab3f9
JVA
4604 break;
4605
4606 case MULT:
e075ae69
RH
4607 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4608 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4609 p = "fimul";
4610 else
4611 p = "fmul";
1deaa899 4612 ssep = "mul";
2a2ab3f9
JVA
4613 break;
4614
4615 case DIV:
e075ae69
RH
4616 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4617 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4618 p = "fidiv";
4619 else
4620 p = "fdiv";
1deaa899 4621 ssep = "div";
2a2ab3f9
JVA
4622 break;
4623
4624 default:
4625 abort ();
4626 }
4627
1deaa899
JH
4628 if (is_sse)
4629 {
4630 strcpy (buf, ssep);
4631 if (GET_MODE (operands[0]) == SFmode)
4632 strcat (buf, "ss\t{%2, %0|%0, %2}");
4633 else
4634 strcat (buf, "sd\t{%2, %0|%0, %2}");
4635 return buf;
4636 }
e075ae69 4637 strcpy (buf, p);
2a2ab3f9
JVA
4638
4639 switch (GET_CODE (operands[3]))
4640 {
4641 case MULT:
4642 case PLUS:
4643 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
4644 {
e3c2afab 4645 rtx temp = operands[2];
2a2ab3f9
JVA
4646 operands[2] = operands[1];
4647 operands[1] = temp;
4648 }
4649
e3c2afab
AM
4650 /* know operands[0] == operands[1]. */
4651
2a2ab3f9 4652 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
4653 {
4654 p = "%z2\t%2";
4655 break;
4656 }
2a2ab3f9
JVA
4657
4658 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
4659 {
4660 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
4661 /* How is it that we are storing to a dead operand[2]?
4662 Well, presumably operands[1] is dead too. We can't
4663 store the result to st(0) as st(0) gets popped on this
4664 instruction. Instead store to operands[2] (which I
4665 think has to be st(1)). st(1) will be popped later.
4666 gcc <= 2.8.1 didn't have this check and generated
4667 assembly code that the Unixware assembler rejected. */
4668 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 4669 else
e3c2afab 4670 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 4671 break;
6b28fd63 4672 }
2a2ab3f9
JVA
4673
4674 if (STACK_TOP_P (operands[0]))
e3c2afab 4675 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 4676 else
e3c2afab 4677 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 4678 break;
2a2ab3f9
JVA
4679
4680 case MINUS:
4681 case DIV:
4682 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
4683 {
4684 p = "r%z1\t%1";
4685 break;
4686 }
2a2ab3f9
JVA
4687
4688 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
4689 {
4690 p = "%z2\t%2";
4691 break;
4692 }
2a2ab3f9 4693
2a2ab3f9 4694 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 4695 {
e3c2afab
AM
4696#if SYSV386_COMPAT
4697 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
4698 derived assemblers, confusingly reverse the direction of
4699 the operation for fsub{r} and fdiv{r} when the
4700 destination register is not st(0). The Intel assembler
4701 doesn't have this brain damage. Read !SYSV386_COMPAT to
4702 figure out what the hardware really does. */
4703 if (STACK_TOP_P (operands[0]))
4704 p = "{p\t%0, %2|rp\t%2, %0}";
4705 else
4706 p = "{rp\t%2, %0|p\t%0, %2}";
4707#else
6b28fd63 4708 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
4709 /* As above for fmul/fadd, we can't store to st(0). */
4710 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 4711 else
e3c2afab
AM
4712 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
4713#endif
e075ae69 4714 break;
6b28fd63 4715 }
2a2ab3f9
JVA
4716
4717 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 4718 {
e3c2afab 4719#if SYSV386_COMPAT
6b28fd63 4720 if (STACK_TOP_P (operands[0]))
e3c2afab 4721 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 4722 else
e3c2afab
AM
4723 p = "{p\t%1, %0|rp\t%0, %1}";
4724#else
4725 if (STACK_TOP_P (operands[0]))
4726 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
4727 else
4728 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
4729#endif
e075ae69 4730 break;
6b28fd63 4731 }
2a2ab3f9
JVA
4732
4733 if (STACK_TOP_P (operands[0]))
4734 {
4735 if (STACK_TOP_P (operands[1]))
e3c2afab 4736 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 4737 else
e3c2afab 4738 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 4739 break;
2a2ab3f9
JVA
4740 }
4741 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
4742 {
4743#if SYSV386_COMPAT
4744 p = "{\t%1, %0|r\t%0, %1}";
4745#else
4746 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
4747#endif
4748 }
2a2ab3f9 4749 else
e3c2afab
AM
4750 {
4751#if SYSV386_COMPAT
4752 p = "{r\t%2, %0|\t%0, %2}";
4753#else
4754 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
4755#endif
4756 }
e075ae69 4757 break;
2a2ab3f9
JVA
4758
4759 default:
4760 abort ();
4761 }
e075ae69
RH
4762
4763 strcat (buf, p);
4764 return buf;
2a2ab3f9 4765}
e075ae69 4766
a4f31c00 4767/* Output code to initialize control word copies used by
7a2e09f4
JH
4768 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
4769 is set to control word rounding downwards. */
4770void
4771emit_i387_cw_initialization (normal, round_down)
4772 rtx normal, round_down;
4773{
4774 rtx reg = gen_reg_rtx (HImode);
4775
4776 emit_insn (gen_x86_fnstcw_1 (normal));
4777 emit_move_insn (reg, normal);
4778 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
4779 && !TARGET_64BIT)
4780 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
4781 else
4782 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
4783 emit_move_insn (round_down, reg);
4784}
4785
2a2ab3f9 4786/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 4787 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 4788 operand may be [SDX]Fmode. */
2a2ab3f9 4789
69ddee61 4790const char *
2a2ab3f9
JVA
4791output_fix_trunc (insn, operands)
4792 rtx insn;
4793 rtx *operands;
4794{
4795 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69 4796 int dimode_p = GET_MODE (operands[0]) == DImode;
2a2ab3f9 4797
e075ae69
RH
4798 /* Jump through a hoop or two for DImode, since the hardware has no
4799 non-popping instruction. We used to do this a different way, but
4800 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
4801 if (dimode_p && !stack_top_dies)
4802 output_asm_insn ("fld\t%y1", operands);
e075ae69 4803
7a2e09f4 4804 if (!STACK_TOP_P (operands[1]))
10195bd8
JW
4805 abort ();
4806
e075ae69 4807 if (GET_CODE (operands[0]) != MEM)
7a2e09f4 4808 abort ();
e9a25f70 4809
7a2e09f4 4810 output_asm_insn ("fldcw\t%3", operands);
e075ae69 4811 if (stack_top_dies || dimode_p)
7a2e09f4 4812 output_asm_insn ("fistp%z0\t%0", operands);
10195bd8 4813 else
7a2e09f4 4814 output_asm_insn ("fist%z0\t%0", operands);
e075ae69 4815 output_asm_insn ("fldcw\t%2", operands);
10195bd8 4816
e075ae69 4817 return "";
2a2ab3f9 4818}
cda749b1 4819
e075ae69
RH
4820/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
4821 should be used and 2 when fnstsw should be used. UNORDERED_P is true
4822 when fucom should be used. */
4823
69ddee61 4824const char *
e075ae69 4825output_fp_compare (insn, operands, eflags_p, unordered_p)
cda749b1
JW
4826 rtx insn;
4827 rtx *operands;
e075ae69 4828 int eflags_p, unordered_p;
cda749b1 4829{
e075ae69
RH
4830 int stack_top_dies;
4831 rtx cmp_op0 = operands[0];
4832 rtx cmp_op1 = operands[1];
0644b628 4833 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
e075ae69
RH
4834
4835 if (eflags_p == 2)
4836 {
4837 cmp_op0 = cmp_op1;
4838 cmp_op1 = operands[2];
4839 }
0644b628
JH
4840 if (is_sse)
4841 {
4842 if (GET_MODE (operands[0]) == SFmode)
4843 if (unordered_p)
4844 return "ucomiss\t{%1, %0|%0, %1}";
4845 else
4846 return "comiss\t{%1, %0|%0, %y}";
4847 else
4848 if (unordered_p)
4849 return "ucomisd\t{%1, %0|%0, %1}";
4850 else
4851 return "comisd\t{%1, %0|%0, %y}";
4852 }
cda749b1 4853
e075ae69 4854 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
4855 abort ();
4856
e075ae69 4857 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 4858
e075ae69
RH
4859 if (STACK_REG_P (cmp_op1)
4860 && stack_top_dies
4861 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
4862 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 4863 {
e075ae69
RH
4864 /* If both the top of the 387 stack dies, and the other operand
4865 is also a stack register that dies, then this must be a
4866 `fcompp' float compare */
4867
4868 if (eflags_p == 1)
4869 {
4870 /* There is no double popping fcomi variant. Fortunately,
4871 eflags is immune from the fstp's cc clobbering. */
4872 if (unordered_p)
4873 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
4874 else
4875 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
4876 return "fstp\t%y0";
4877 }
4878 else
cda749b1 4879 {
e075ae69
RH
4880 if (eflags_p == 2)
4881 {
4882 if (unordered_p)
4883 return "fucompp\n\tfnstsw\t%0";
4884 else
4885 return "fcompp\n\tfnstsw\t%0";
4886 }
cda749b1
JW
4887 else
4888 {
e075ae69
RH
4889 if (unordered_p)
4890 return "fucompp";
4891 else
4892 return "fcompp";
cda749b1
JW
4893 }
4894 }
cda749b1
JW
4895 }
4896 else
4897 {
e075ae69 4898 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 4899
0f290768 4900 static const char * const alt[24] =
e075ae69
RH
4901 {
4902 "fcom%z1\t%y1",
4903 "fcomp%z1\t%y1",
4904 "fucom%z1\t%y1",
4905 "fucomp%z1\t%y1",
0f290768 4906
e075ae69
RH
4907 "ficom%z1\t%y1",
4908 "ficomp%z1\t%y1",
4909 NULL,
4910 NULL,
4911
4912 "fcomi\t{%y1, %0|%0, %y1}",
4913 "fcomip\t{%y1, %0|%0, %y1}",
4914 "fucomi\t{%y1, %0|%0, %y1}",
4915 "fucomip\t{%y1, %0|%0, %y1}",
4916
4917 NULL,
4918 NULL,
4919 NULL,
4920 NULL,
4921
4922 "fcom%z2\t%y2\n\tfnstsw\t%0",
4923 "fcomp%z2\t%y2\n\tfnstsw\t%0",
4924 "fucom%z2\t%y2\n\tfnstsw\t%0",
4925 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 4926
e075ae69
RH
4927 "ficom%z2\t%y2\n\tfnstsw\t%0",
4928 "ficomp%z2\t%y2\n\tfnstsw\t%0",
4929 NULL,
4930 NULL
4931 };
4932
4933 int mask;
69ddee61 4934 const char *ret;
e075ae69
RH
4935
4936 mask = eflags_p << 3;
4937 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
4938 mask |= unordered_p << 1;
4939 mask |= stack_top_dies;
4940
4941 if (mask >= 24)
4942 abort ();
4943 ret = alt[mask];
4944 if (ret == NULL)
4945 abort ();
cda749b1 4946
e075ae69 4947 return ret;
cda749b1
JW
4948 }
4949}
2a2ab3f9 4950
e075ae69 4951/* Output assembler code to FILE to initialize basic-block profiling.
2a2ab3f9 4952
e075ae69 4953 If profile_block_flag == 2
2a2ab3f9 4954
e075ae69
RH
4955 Output code to call the subroutine `__bb_init_trace_func'
4956 and pass two parameters to it. The first parameter is
4957 the address of a block allocated in the object module.
4958 The second parameter is the number of the first basic block
4959 of the function.
2a2ab3f9 4960
e075ae69 4961 The name of the block is a local symbol made with this statement:
0f290768 4962
e075ae69 4963 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
2a2ab3f9 4964
e075ae69
RH
4965 Of course, since you are writing the definition of
4966 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4967 can take a short cut in the definition of this macro and use the
4968 name that you know will result.
2a2ab3f9 4969
e075ae69
RH
4970 The number of the first basic block of the function is
4971 passed to the macro in BLOCK_OR_LABEL.
2a2ab3f9 4972
e075ae69
RH
4973 If described in a virtual assembler language the code to be
4974 output looks like:
2a2ab3f9 4975
e075ae69
RH
4976 parameter1 <- LPBX0
4977 parameter2 <- BLOCK_OR_LABEL
4978 call __bb_init_trace_func
2a2ab3f9 4979
e075ae69 4980 else if profile_block_flag != 0
e74389ff 4981
e075ae69
RH
4982 Output code to call the subroutine `__bb_init_func'
4983 and pass one single parameter to it, which is the same
4984 as the first parameter to `__bb_init_trace_func'.
e74389ff 4985
e075ae69
RH
4986 The first word of this parameter is a flag which will be nonzero if
4987 the object module has already been initialized. So test this word
4988 first, and do not call `__bb_init_func' if the flag is nonzero.
4989 Note: When profile_block_flag == 2 the test need not be done
4990 but `__bb_init_trace_func' *must* be called.
e74389ff 4991
e075ae69
RH
4992 BLOCK_OR_LABEL may be used to generate a label number as a
4993 branch destination in case `__bb_init_func' will not be called.
e74389ff 4994
e075ae69
RH
4995 If described in a virtual assembler language the code to be
4996 output looks like:
2a2ab3f9 4997
e075ae69
RH
4998 cmp (LPBX0),0
4999 jne local_label
5000 parameter1 <- LPBX0
5001 call __bb_init_func
5002 local_label:
5003*/
c572e5ba 5004
e075ae69
RH
5005void
5006ix86_output_function_block_profiler (file, block_or_label)
5007 FILE *file;
5008 int block_or_label;
c572e5ba 5009{
e075ae69
RH
5010 static int num_func = 0;
5011 rtx xops[8];
5012 char block_table[80], false_label[80];
c572e5ba 5013
e075ae69 5014 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
e9a25f70 5015
e075ae69
RH
5016 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
5017 xops[5] = stack_pointer_rtx;
5018 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
2a2ab3f9 5019
e075ae69 5020 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
c572e5ba 5021
e075ae69 5022 switch (profile_block_flag)
c572e5ba 5023 {
e075ae69
RH
5024 case 2:
5025 xops[2] = GEN_INT (block_or_label);
5026 xops[3] = gen_rtx_MEM (Pmode,
5027 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
5028 xops[6] = GEN_INT (8);
e9a25f70 5029
e075ae69
RH
5030 output_asm_insn ("push{l}\t%2", xops);
5031 if (!flag_pic)
5032 output_asm_insn ("push{l}\t%1", xops);
e9a25f70 5033 else
870a0c2c 5034 {
e075ae69
RH
5035 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
5036 output_asm_insn ("push{l}\t%7", xops);
870a0c2c 5037 }
e075ae69
RH
5038 output_asm_insn ("call\t%P3", xops);
5039 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
5040 break;
c572e5ba 5041
e075ae69
RH
5042 default:
5043 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
c572e5ba 5044
e075ae69
RH
5045 xops[0] = const0_rtx;
5046 xops[2] = gen_rtx_MEM (Pmode,
5047 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
5048 xops[3] = gen_rtx_MEM (Pmode,
5049 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
5050 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
5051 xops[6] = GEN_INT (4);
a14003ee 5052
e075ae69 5053 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
446ba526 5054
e075ae69
RH
5055 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
5056 output_asm_insn ("jne\t%2", xops);
870a0c2c 5057
e075ae69
RH
5058 if (!flag_pic)
5059 output_asm_insn ("push{l}\t%1", xops);
5060 else
5061 {
5062 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
5063 output_asm_insn ("push{l}\t%7", xops);
870a0c2c 5064 }
e075ae69
RH
5065 output_asm_insn ("call\t%P3", xops);
5066 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
5067 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
5068 num_func++;
5069 break;
c572e5ba 5070 }
2a2ab3f9 5071}
305f097e 5072
e075ae69
RH
5073/* Output assembler code to FILE to increment a counter associated
5074 with basic block number BLOCKNO.
305f097e 5075
e075ae69 5076 If profile_block_flag == 2
ecbc4695 5077
e075ae69
RH
5078 Output code to initialize the global structure `__bb' and
5079 call the function `__bb_trace_func' which will increment the
5080 counter.
ecbc4695 5081
e075ae69
RH
5082 `__bb' consists of two words. In the first word the number
5083 of the basic block has to be stored. In the second word
0f290768 5084 the address of a block allocated in the object module
e075ae69 5085 has to be stored.
ecbc4695 5086
e075ae69 5087 The basic block number is given by BLOCKNO.
ecbc4695 5088
0f290768 5089 The address of the block is given by the label created with
305f097e 5090
e075ae69 5091 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
305f097e 5092
e075ae69 5093 by FUNCTION_BLOCK_PROFILER.
ecbc4695 5094
e075ae69
RH
5095 Of course, since you are writing the definition of
5096 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
5097 can take a short cut in the definition of this macro and use the
5098 name that you know will result.
305f097e 5099
e075ae69
RH
5100 If described in a virtual assembler language the code to be
5101 output looks like:
305f097e 5102
e075ae69
RH
5103 move BLOCKNO -> (__bb)
5104 move LPBX0 -> (__bb+4)
5105 call __bb_trace_func
305f097e 5106
e075ae69
RH
5107 Note that function `__bb_trace_func' must not change the
5108 machine state, especially the flag register. To grant
5109 this, you must output code to save and restore registers
5110 either in this macro or in the macros MACHINE_STATE_SAVE
5111 and MACHINE_STATE_RESTORE. The last two macros will be
5112 used in the function `__bb_trace_func', so you must make
0f290768 5113 sure that the function prologue does not change any
e075ae69 5114 register prior to saving it with MACHINE_STATE_SAVE.
305f097e 5115
e075ae69 5116 else if profile_block_flag != 0
305f097e 5117
e075ae69
RH
5118 Output code to increment the counter directly.
5119 Basic blocks are numbered separately from zero within each
5120 compiled object module. The count associated with block number
0f290768 5121 BLOCKNO is at index BLOCKNO in an array of words; the name of
e075ae69 5122 this array is a local symbol made with this statement:
32b5b1aa 5123
e075ae69 5124 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
32b5b1aa 5125
e075ae69
RH
5126 Of course, since you are writing the definition of
5127 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
5128 can take a short cut in the definition of this macro and use the
0f290768 5129 name that you know will result.
32b5b1aa 5130
e075ae69
RH
5131 If described in a virtual assembler language the code to be
5132 output looks like:
32b5b1aa 5133
e075ae69
RH
5134 inc (LPBX2+4*BLOCKNO)
5135*/
32b5b1aa 5136
e075ae69
RH
5137void
5138ix86_output_block_profiler (file, blockno)
5139 FILE *file ATTRIBUTE_UNUSED;
5140 int blockno;
5141{
5142 rtx xops[8], cnt_rtx;
5143 char counts[80];
5144 char *block_table = counts;
5145
5146 switch (profile_block_flag)
5147 {
5148 case 2:
5149 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
32b5b1aa 5150
e075ae69
RH
5151 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
5152 xops[2] = GEN_INT (blockno);
5153 xops[3] = gen_rtx_MEM (Pmode,
5154 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
5155 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
5156 xops[5] = plus_constant (xops[4], 4);
5157 xops[0] = gen_rtx_MEM (SImode, xops[4]);
5158 xops[6] = gen_rtx_MEM (SImode, xops[5]);
79325812 5159
e075ae69 5160 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
32b5b1aa 5161
e075ae69
RH
5162 output_asm_insn ("pushf", xops);
5163 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5164 if (flag_pic)
32b5b1aa 5165 {
e075ae69
RH
5166 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
5167 output_asm_insn ("push{l}\t%7", xops);
5168 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
5169 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
5170 output_asm_insn ("pop{l}\t%7", xops);
5171 }
5172 else
5173 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
5174 output_asm_insn ("call\t%P3", xops);
5175 output_asm_insn ("popf", xops);
32b5b1aa 5176
e075ae69 5177 break;
32b5b1aa 5178
e075ae69
RH
5179 default:
5180 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
5181 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
5182 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
32b5b1aa 5183
e075ae69
RH
5184 if (blockno)
5185 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
32b5b1aa 5186
e075ae69
RH
5187 if (flag_pic)
5188 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
32b5b1aa 5189
e075ae69
RH
5190 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
5191 output_asm_insn ("inc{l}\t%0", xops);
32b5b1aa 5192
e075ae69 5193 break;
32b5b1aa 5194 }
32b5b1aa 5195}
32b5b1aa 5196\f
79325812 5197void
e075ae69
RH
5198ix86_expand_move (mode, operands)
5199 enum machine_mode mode;
5200 rtx operands[];
32b5b1aa 5201{
e075ae69 5202 int strict = (reload_in_progress || reload_completed);
e075ae69 5203 rtx insn;
e9a25f70 5204
e075ae69 5205 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
32b5b1aa 5206 {
e075ae69 5207 /* Emit insns to move operands[1] into operands[0]. */
e9a25f70 5208
e075ae69
RH
5209 if (GET_CODE (operands[0]) == MEM)
5210 operands[1] = force_reg (Pmode, operands[1]);
5211 else
32b5b1aa 5212 {
e075ae69
RH
5213 rtx temp = operands[0];
5214 if (GET_CODE (temp) != REG)
5215 temp = gen_reg_rtx (Pmode);
5216 temp = legitimize_pic_address (operands[1], temp);
5217 if (temp == operands[0])
5218 return;
5219 operands[1] = temp;
32b5b1aa 5220 }
e075ae69
RH
5221 }
5222 else
5223 {
d7a29404
JH
5224 if (GET_CODE (operands[0]) == MEM
5225 && (GET_MODE (operands[0]) == QImode
5226 || !push_operand (operands[0], mode))
5227 && GET_CODE (operands[1]) == MEM)
e075ae69 5228 operands[1] = force_reg (mode, operands[1]);
e9a25f70 5229
2c5a510c
RH
5230 if (push_operand (operands[0], mode)
5231 && ! general_no_elim_operand (operands[1], mode))
5232 operands[1] = copy_to_mode_reg (mode, operands[1]);
5233
e075ae69 5234 if (FLOAT_MODE_P (mode))
32b5b1aa 5235 {
d7a29404
JH
5236 /* If we are loading a floating point constant to a register,
5237 force the value to memory now, since we'll get better code
5238 out the back end. */
e075ae69
RH
5239
5240 if (strict)
5241 ;
e075ae69 5242 else if (GET_CODE (operands[1]) == CONST_DOUBLE
d7a29404 5243 && register_operand (operands[0], mode))
e075ae69 5244 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
32b5b1aa 5245 }
32b5b1aa 5246 }
e9a25f70 5247
e075ae69 5248 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
e9a25f70 5249
e075ae69
RH
5250 emit_insn (insn);
5251}
e9a25f70 5252
e075ae69
RH
5253/* Attempt to expand a binary operator. Make the expansion closer to the
5254 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 5255 memory references (one output, two input) in a single insn. */
e9a25f70 5256
e075ae69
RH
5257void
5258ix86_expand_binary_operator (code, mode, operands)
5259 enum rtx_code code;
5260 enum machine_mode mode;
5261 rtx operands[];
5262{
5263 int matching_memory;
5264 rtx src1, src2, dst, op, clob;
5265
5266 dst = operands[0];
5267 src1 = operands[1];
5268 src2 = operands[2];
5269
5270 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
5271 if (GET_RTX_CLASS (code) == 'c'
5272 && (rtx_equal_p (dst, src2)
5273 || immediate_operand (src1, mode)))
5274 {
5275 rtx temp = src1;
5276 src1 = src2;
5277 src2 = temp;
32b5b1aa 5278 }
e9a25f70 5279
e075ae69
RH
5280 /* If the destination is memory, and we do not have matching source
5281 operands, do things in registers. */
5282 matching_memory = 0;
5283 if (GET_CODE (dst) == MEM)
32b5b1aa 5284 {
e075ae69
RH
5285 if (rtx_equal_p (dst, src1))
5286 matching_memory = 1;
5287 else if (GET_RTX_CLASS (code) == 'c'
5288 && rtx_equal_p (dst, src2))
5289 matching_memory = 2;
5290 else
5291 dst = gen_reg_rtx (mode);
5292 }
0f290768 5293
e075ae69
RH
5294 /* Both source operands cannot be in memory. */
5295 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
5296 {
5297 if (matching_memory != 2)
5298 src2 = force_reg (mode, src2);
5299 else
5300 src1 = force_reg (mode, src1);
32b5b1aa 5301 }
e9a25f70 5302
06a964de
JH
5303 /* If the operation is not commutable, source 1 cannot be a constant
5304 or non-matching memory. */
0f290768 5305 if ((CONSTANT_P (src1)
06a964de
JH
5306 || (!matching_memory && GET_CODE (src1) == MEM))
5307 && GET_RTX_CLASS (code) != 'c')
e075ae69 5308 src1 = force_reg (mode, src1);
0f290768 5309
e075ae69 5310 /* If optimizing, copy to regs to improve CSE */
fe577e58 5311 if (optimize && ! no_new_pseudos)
32b5b1aa 5312 {
e075ae69
RH
5313 if (GET_CODE (dst) == MEM)
5314 dst = gen_reg_rtx (mode);
5315 if (GET_CODE (src1) == MEM)
5316 src1 = force_reg (mode, src1);
5317 if (GET_CODE (src2) == MEM)
5318 src2 = force_reg (mode, src2);
32b5b1aa 5319 }
e9a25f70 5320
e075ae69
RH
5321 /* Emit the instruction. */
5322
5323 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
5324 if (reload_in_progress)
5325 {
5326 /* Reload doesn't know about the flags register, and doesn't know that
5327 it doesn't want to clobber it. We can only do this with PLUS. */
5328 if (code != PLUS)
5329 abort ();
5330 emit_insn (op);
5331 }
5332 else
32b5b1aa 5333 {
e075ae69
RH
5334 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
5335 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 5336 }
e9a25f70 5337
e075ae69
RH
5338 /* Fix up the destination if needed. */
5339 if (dst != operands[0])
5340 emit_move_insn (operands[0], dst);
5341}
5342
5343/* Return TRUE or FALSE depending on whether the binary operator meets the
5344 appropriate constraints. */
5345
5346int
5347ix86_binary_operator_ok (code, mode, operands)
5348 enum rtx_code code;
5349 enum machine_mode mode ATTRIBUTE_UNUSED;
5350 rtx operands[3];
5351{
5352 /* Both source operands cannot be in memory. */
5353 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
5354 return 0;
5355 /* If the operation is not commutable, source 1 cannot be a constant. */
5356 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
5357 return 0;
5358 /* If the destination is memory, we must have a matching source operand. */
5359 if (GET_CODE (operands[0]) == MEM
5360 && ! (rtx_equal_p (operands[0], operands[1])
5361 || (GET_RTX_CLASS (code) == 'c'
5362 && rtx_equal_p (operands[0], operands[2]))))
5363 return 0;
06a964de
JH
5364 /* If the operation is not commutable and the source 1 is memory, we must
5365 have a matching destionation. */
5366 if (GET_CODE (operands[1]) == MEM
5367 && GET_RTX_CLASS (code) != 'c'
5368 && ! rtx_equal_p (operands[0], operands[1]))
5369 return 0;
e075ae69
RH
5370 return 1;
5371}
5372
5373/* Attempt to expand a unary operator. Make the expansion closer to the
5374 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 5375 memory references (one output, one input) in a single insn. */
e075ae69 5376
9d81fc27 5377void
e075ae69
RH
5378ix86_expand_unary_operator (code, mode, operands)
5379 enum rtx_code code;
5380 enum machine_mode mode;
5381 rtx operands[];
5382{
06a964de
JH
5383 int matching_memory;
5384 rtx src, dst, op, clob;
5385
5386 dst = operands[0];
5387 src = operands[1];
e075ae69 5388
06a964de
JH
5389 /* If the destination is memory, and we do not have matching source
5390 operands, do things in registers. */
5391 matching_memory = 0;
5392 if (GET_CODE (dst) == MEM)
32b5b1aa 5393 {
06a964de
JH
5394 if (rtx_equal_p (dst, src))
5395 matching_memory = 1;
e075ae69 5396 else
06a964de 5397 dst = gen_reg_rtx (mode);
32b5b1aa 5398 }
e9a25f70 5399
06a964de
JH
5400 /* When source operand is memory, destination must match. */
5401 if (!matching_memory && GET_CODE (src) == MEM)
5402 src = force_reg (mode, src);
0f290768 5403
06a964de 5404 /* If optimizing, copy to regs to improve CSE */
fe577e58 5405 if (optimize && ! no_new_pseudos)
06a964de
JH
5406 {
5407 if (GET_CODE (dst) == MEM)
5408 dst = gen_reg_rtx (mode);
5409 if (GET_CODE (src) == MEM)
5410 src = force_reg (mode, src);
5411 }
5412
5413 /* Emit the instruction. */
5414
5415 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
5416 if (reload_in_progress || code == NOT)
5417 {
5418 /* Reload doesn't know about the flags register, and doesn't know that
5419 it doesn't want to clobber it. */
5420 if (code != NOT)
5421 abort ();
5422 emit_insn (op);
5423 }
5424 else
5425 {
5426 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
5427 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
5428 }
5429
5430 /* Fix up the destination if needed. */
5431 if (dst != operands[0])
5432 emit_move_insn (operands[0], dst);
e075ae69
RH
5433}
5434
5435/* Return TRUE or FALSE depending on whether the unary operator meets the
5436 appropriate constraints. */
5437
5438int
5439ix86_unary_operator_ok (code, mode, operands)
5440 enum rtx_code code ATTRIBUTE_UNUSED;
5441 enum machine_mode mode ATTRIBUTE_UNUSED;
5442 rtx operands[2] ATTRIBUTE_UNUSED;
5443{
06a964de
JH
5444 /* If one of operands is memory, source and destination must match. */
5445 if ((GET_CODE (operands[0]) == MEM
5446 || GET_CODE (operands[1]) == MEM)
5447 && ! rtx_equal_p (operands[0], operands[1]))
5448 return FALSE;
e075ae69
RH
5449 return TRUE;
5450}
5451
16189740
RH
5452/* Return TRUE or FALSE depending on whether the first SET in INSN
5453 has source and destination with matching CC modes, and that the
5454 CC mode is at least as constrained as REQ_MODE. */
5455
5456int
5457ix86_match_ccmode (insn, req_mode)
5458 rtx insn;
5459 enum machine_mode req_mode;
5460{
5461 rtx set;
5462 enum machine_mode set_mode;
5463
5464 set = PATTERN (insn);
5465 if (GET_CODE (set) == PARALLEL)
5466 set = XVECEXP (set, 0, 0);
5467 if (GET_CODE (set) != SET)
5468 abort ();
9076b9c1
JH
5469 if (GET_CODE (SET_SRC (set)) != COMPARE)
5470 abort ();
16189740
RH
5471
5472 set_mode = GET_MODE (SET_DEST (set));
5473 switch (set_mode)
5474 {
9076b9c1
JH
5475 case CCNOmode:
5476 if (req_mode != CCNOmode
5477 && (req_mode != CCmode
5478 || XEXP (SET_SRC (set), 1) != const0_rtx))
5479 return 0;
5480 break;
16189740 5481 case CCmode:
9076b9c1 5482 if (req_mode == CCGCmode)
16189740
RH
5483 return 0;
5484 /* FALLTHRU */
9076b9c1
JH
5485 case CCGCmode:
5486 if (req_mode == CCGOCmode || req_mode == CCNOmode)
5487 return 0;
5488 /* FALLTHRU */
5489 case CCGOCmode:
16189740
RH
5490 if (req_mode == CCZmode)
5491 return 0;
5492 /* FALLTHRU */
5493 case CCZmode:
5494 break;
5495
5496 default:
5497 abort ();
5498 }
5499
5500 return (GET_MODE (SET_SRC (set)) == set_mode);
5501}
5502
e075ae69
RH
5503/* Generate insn patterns to do an integer compare of OPERANDS. */
5504
5505static rtx
5506ix86_expand_int_compare (code, op0, op1)
5507 enum rtx_code code;
5508 rtx op0, op1;
5509{
5510 enum machine_mode cmpmode;
5511 rtx tmp, flags;
5512
5513 cmpmode = SELECT_CC_MODE (code, op0, op1);
5514 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
5515
5516 /* This is very simple, but making the interface the same as in the
5517 FP case makes the rest of the code easier. */
5518 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
5519 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
5520
5521 /* Return the test that should be put into the flags user, i.e.
5522 the bcc, scc, or cmov instruction. */
5523 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
5524}
5525
3a3677ff
RH
5526/* Figure out whether to use ordered or unordered fp comparisons.
5527 Return the appropriate mode to use. */
e075ae69 5528
b1cdafbb 5529enum machine_mode
3a3677ff 5530ix86_fp_compare_mode (code)
8752c357 5531 enum rtx_code code ATTRIBUTE_UNUSED;
e075ae69 5532{
9e7adcb3
JH
5533 /* ??? In order to make all comparisons reversible, we do all comparisons
5534 non-trapping when compiling for IEEE. Once gcc is able to distinguish
5535 all forms trapping and nontrapping comparisons, we can make inequality
5536 comparisons trapping again, since it results in better code when using
5537 FCOM based compares. */
5538 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
5539}
5540
9076b9c1
JH
5541enum machine_mode
5542ix86_cc_mode (code, op0, op1)
5543 enum rtx_code code;
5544 rtx op0, op1;
5545{
5546 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5547 return ix86_fp_compare_mode (code);
5548 switch (code)
5549 {
5550 /* Only zero flag is needed. */
5551 case EQ: /* ZF=0 */
5552 case NE: /* ZF!=0 */
5553 return CCZmode;
5554 /* Codes needing carry flag. */
265dab10
JH
5555 case GEU: /* CF=0 */
5556 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
5557 case LTU: /* CF=1 */
5558 case LEU: /* CF=1 | ZF=1 */
265dab10 5559 return CCmode;
9076b9c1
JH
5560 /* Codes possibly doable only with sign flag when
5561 comparing against zero. */
5562 case GE: /* SF=OF or SF=0 */
7e08e190 5563 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
5564 if (op1 == const0_rtx)
5565 return CCGOCmode;
5566 else
5567 /* For other cases Carry flag is not required. */
5568 return CCGCmode;
5569 /* Codes doable only with sign flag when comparing
5570 against zero, but we miss jump instruction for it
5571 so we need to use relational tests agains overflow
5572 that thus needs to be zero. */
5573 case GT: /* ZF=0 & SF=OF */
5574 case LE: /* ZF=1 | SF<>OF */
5575 if (op1 == const0_rtx)
5576 return CCNOmode;
5577 else
5578 return CCGCmode;
5579 default:
0f290768 5580 abort ();
9076b9c1
JH
5581 }
5582}
5583
3a3677ff
RH
5584/* Return true if we should use an FCOMI instruction for this fp comparison. */
5585
a940d8bd 5586int
3a3677ff 5587ix86_use_fcomi_compare (code)
9e7adcb3 5588 enum rtx_code code ATTRIBUTE_UNUSED;
3a3677ff 5589{
9e7adcb3
JH
5590 enum rtx_code swapped_code = swap_condition (code);
5591 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
5592 || (ix86_fp_comparison_cost (swapped_code)
5593 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
5594}
5595
0f290768 5596/* Swap, force into registers, or otherwise massage the two operands
3a3677ff
RH
5597 to a fp comparison. The operands are updated in place; the new
5598 comparsion code is returned. */
5599
5600static enum rtx_code
5601ix86_prepare_fp_compare_args (code, pop0, pop1)
5602 enum rtx_code code;
5603 rtx *pop0, *pop1;
5604{
5605 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
5606 rtx op0 = *pop0, op1 = *pop1;
5607 enum machine_mode op_mode = GET_MODE (op0);
0644b628 5608 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
3a3677ff 5609
e075ae69 5610 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
5611 The same is true of the XFmode compare instructions. The same is
5612 true of the fcomi compare instructions. */
5613
0644b628
JH
5614 if (!is_sse
5615 && (fpcmp_mode == CCFPUmode
5616 || op_mode == XFmode
5617 || op_mode == TFmode
5618 || ix86_use_fcomi_compare (code)))
e075ae69 5619 {
3a3677ff
RH
5620 op0 = force_reg (op_mode, op0);
5621 op1 = force_reg (op_mode, op1);
e075ae69
RH
5622 }
5623 else
5624 {
5625 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
5626 things around if they appear profitable, otherwise force op0
5627 into a register. */
5628
5629 if (standard_80387_constant_p (op0) == 0
5630 || (GET_CODE (op0) == MEM
5631 && ! (standard_80387_constant_p (op1) == 0
5632 || GET_CODE (op1) == MEM)))
32b5b1aa 5633 {
e075ae69
RH
5634 rtx tmp;
5635 tmp = op0, op0 = op1, op1 = tmp;
5636 code = swap_condition (code);
5637 }
5638
5639 if (GET_CODE (op0) != REG)
3a3677ff 5640 op0 = force_reg (op_mode, op0);
e075ae69
RH
5641
5642 if (CONSTANT_P (op1))
5643 {
5644 if (standard_80387_constant_p (op1))
3a3677ff 5645 op1 = force_reg (op_mode, op1);
e075ae69 5646 else
3a3677ff 5647 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
5648 }
5649 }
e9a25f70 5650
9e7adcb3
JH
5651 /* Try to rearrange the comparison to make it cheaper. */
5652 if (ix86_fp_comparison_cost (code)
5653 > ix86_fp_comparison_cost (swap_condition (code))
5654 && (GET_CODE (op0) == REG || !reload_completed))
5655 {
5656 rtx tmp;
5657 tmp = op0, op0 = op1, op1 = tmp;
5658 code = swap_condition (code);
5659 if (GET_CODE (op0) != REG)
5660 op0 = force_reg (op_mode, op0);
5661 }
5662
3a3677ff
RH
5663 *pop0 = op0;
5664 *pop1 = op1;
5665 return code;
5666}
5667
c0c102a9
JH
5668/* Convert comparison codes we use to represent FP comparison to integer
5669 code that will result in proper branch. Return UNKNOWN if no such code
5670 is available. */
5671static enum rtx_code
5672ix86_fp_compare_code_to_integer (code)
5673 enum rtx_code code;
5674{
5675 switch (code)
5676 {
5677 case GT:
5678 return GTU;
5679 case GE:
5680 return GEU;
5681 case ORDERED:
5682 case UNORDERED:
5683 return code;
5684 break;
5685 case UNEQ:
5686 return EQ;
5687 break;
5688 case UNLT:
5689 return LTU;
5690 break;
5691 case UNLE:
5692 return LEU;
5693 break;
5694 case LTGT:
5695 return NE;
5696 break;
5697 default:
5698 return UNKNOWN;
5699 }
5700}
5701
5702/* Split comparison code CODE into comparisons we can do using branch
5703 instructions. BYPASS_CODE is comparison code for branch that will
5704 branch around FIRST_CODE and SECOND_CODE. If some of branches
5705 is not required, set value to NIL.
5706 We never require more than two branches. */
5707static void
5708ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
5709 enum rtx_code code, *bypass_code, *first_code, *second_code;
5710{
5711 *first_code = code;
5712 *bypass_code = NIL;
5713 *second_code = NIL;
5714
5715 /* The fcomi comparison sets flags as follows:
5716
5717 cmp ZF PF CF
5718 > 0 0 0
5719 < 0 0 1
5720 = 1 0 0
5721 un 1 1 1 */
5722
5723 switch (code)
5724 {
5725 case GT: /* GTU - CF=0 & ZF=0 */
5726 case GE: /* GEU - CF=0 */
5727 case ORDERED: /* PF=0 */
5728 case UNORDERED: /* PF=1 */
5729 case UNEQ: /* EQ - ZF=1 */
5730 case UNLT: /* LTU - CF=1 */
5731 case UNLE: /* LEU - CF=1 | ZF=1 */
5732 case LTGT: /* EQ - ZF=0 */
5733 break;
5734 case LT: /* LTU - CF=1 - fails on unordered */
5735 *first_code = UNLT;
5736 *bypass_code = UNORDERED;
5737 break;
5738 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
5739 *first_code = UNLE;
5740 *bypass_code = UNORDERED;
5741 break;
5742 case EQ: /* EQ - ZF=1 - fails on unordered */
5743 *first_code = UNEQ;
5744 *bypass_code = UNORDERED;
5745 break;
5746 case NE: /* NE - ZF=0 - fails on unordered */
5747 *first_code = LTGT;
5748 *second_code = UNORDERED;
5749 break;
5750 case UNGE: /* GEU - CF=0 - fails on unordered */
5751 *first_code = GE;
5752 *second_code = UNORDERED;
5753 break;
5754 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
5755 *first_code = GT;
5756 *second_code = UNORDERED;
5757 break;
5758 default:
5759 abort ();
5760 }
5761 if (!TARGET_IEEE_FP)
5762 {
5763 *second_code = NIL;
5764 *bypass_code = NIL;
5765 }
5766}
5767
9e7adcb3
JH
5768/* Return cost of comparison done fcom + arithmetics operations on AX.
5769 All following functions do use number of instructions as an cost metrics.
5770 In future this should be tweaked to compute bytes for optimize_size and
5771 take into account performance of various instructions on various CPUs. */
5772static int
5773ix86_fp_comparison_arithmetics_cost (code)
5774 enum rtx_code code;
5775{
5776 if (!TARGET_IEEE_FP)
5777 return 4;
5778 /* The cost of code output by ix86_expand_fp_compare. */
5779 switch (code)
5780 {
5781 case UNLE:
5782 case UNLT:
5783 case LTGT:
5784 case GT:
5785 case GE:
5786 case UNORDERED:
5787 case ORDERED:
5788 case UNEQ:
5789 return 4;
5790 break;
5791 case LT:
5792 case NE:
5793 case EQ:
5794 case UNGE:
5795 return 5;
5796 break;
5797 case LE:
5798 case UNGT:
5799 return 6;
5800 break;
5801 default:
5802 abort ();
5803 }
5804}
5805
5806/* Return cost of comparison done using fcomi operation.
5807 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5808static int
5809ix86_fp_comparison_fcomi_cost (code)
5810 enum rtx_code code;
5811{
5812 enum rtx_code bypass_code, first_code, second_code;
5813 /* Return arbitarily high cost when instruction is not supported - this
5814 prevents gcc from using it. */
5815 if (!TARGET_CMOVE)
5816 return 1024;
5817 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5818 return (bypass_code != NIL || second_code != NIL) + 2;
5819}
5820
5821/* Return cost of comparison done using sahf operation.
5822 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5823static int
5824ix86_fp_comparison_sahf_cost (code)
5825 enum rtx_code code;
5826{
5827 enum rtx_code bypass_code, first_code, second_code;
5828 /* Return arbitarily high cost when instruction is not preferred - this
5829 avoids gcc from using it. */
5830 if (!TARGET_USE_SAHF && !optimize_size)
5831 return 1024;
5832 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5833 return (bypass_code != NIL || second_code != NIL) + 3;
5834}
5835
5836/* Compute cost of the comparison done using any method.
5837 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5838static int
5839ix86_fp_comparison_cost (code)
5840 enum rtx_code code;
5841{
5842 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
5843 int min;
5844
5845 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
5846 sahf_cost = ix86_fp_comparison_sahf_cost (code);
5847
5848 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
5849 if (min > sahf_cost)
5850 min = sahf_cost;
5851 if (min > fcomi_cost)
5852 min = fcomi_cost;
5853 return min;
5854}
c0c102a9 5855
3a3677ff
RH
5856/* Generate insn patterns to do a floating point compare of OPERANDS. */
5857
9e7adcb3
JH
5858static rtx
5859ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
3a3677ff
RH
5860 enum rtx_code code;
5861 rtx op0, op1, scratch;
9e7adcb3
JH
5862 rtx *second_test;
5863 rtx *bypass_test;
3a3677ff
RH
5864{
5865 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 5866 rtx tmp, tmp2;
9e7adcb3 5867 int cost = ix86_fp_comparison_cost (code);
c0c102a9 5868 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
5869
5870 fpcmp_mode = ix86_fp_compare_mode (code);
5871 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
5872
9e7adcb3
JH
5873 if (second_test)
5874 *second_test = NULL_RTX;
5875 if (bypass_test)
5876 *bypass_test = NULL_RTX;
5877
c0c102a9
JH
5878 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5879
9e7adcb3
JH
5880 /* Do fcomi/sahf based test when profitable. */
5881 if ((bypass_code == NIL || bypass_test)
5882 && (second_code == NIL || second_test)
5883 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 5884 {
c0c102a9
JH
5885 if (TARGET_CMOVE)
5886 {
5887 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5888 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
5889 tmp);
5890 emit_insn (tmp);
5891 }
5892 else
5893 {
5894 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5895 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
bf71a4f8
JH
5896 if (!scratch)
5897 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
5898 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
5899 emit_insn (gen_x86_sahf_1 (scratch));
5900 }
e075ae69
RH
5901
5902 /* The FP codes work out to act like unsigned. */
9a915772 5903 intcmp_mode = fpcmp_mode;
9e7adcb3
JH
5904 code = first_code;
5905 if (bypass_code != NIL)
5906 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
5907 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5908 const0_rtx);
5909 if (second_code != NIL)
5910 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
5911 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5912 const0_rtx);
e075ae69
RH
5913 }
5914 else
5915 {
5916 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69
RH
5917 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5918 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
bf71a4f8
JH
5919 if (!scratch)
5920 scratch = gen_reg_rtx (HImode);
3a3677ff 5921 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 5922
9a915772
JH
5923 /* In the unordered case, we have to check C2 for NaN's, which
5924 doesn't happen to work out to anything nice combination-wise.
5925 So do some bit twiddling on the value we've got in AH to come
5926 up with an appropriate set of condition codes. */
e075ae69 5927
9a915772
JH
5928 intcmp_mode = CCNOmode;
5929 switch (code)
32b5b1aa 5930 {
9a915772
JH
5931 case GT:
5932 case UNGT:
5933 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 5934 {
3a3677ff 5935 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 5936 code = EQ;
9a915772
JH
5937 }
5938 else
5939 {
5940 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5941 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5942 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
5943 intcmp_mode = CCmode;
5944 code = GEU;
5945 }
5946 break;
5947 case LT:
5948 case UNLT:
5949 if (code == LT && TARGET_IEEE_FP)
5950 {
3a3677ff
RH
5951 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5952 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
5953 intcmp_mode = CCmode;
5954 code = EQ;
9a915772
JH
5955 }
5956 else
5957 {
5958 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
5959 code = NE;
5960 }
5961 break;
5962 case GE:
5963 case UNGE:
5964 if (code == GE || !TARGET_IEEE_FP)
5965 {
3a3677ff 5966 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 5967 code = EQ;
9a915772
JH
5968 }
5969 else
5970 {
5971 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5972 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
5973 GEN_INT (0x01)));
5974 code = NE;
5975 }
5976 break;
5977 case LE:
5978 case UNLE:
5979 if (code == LE && TARGET_IEEE_FP)
5980 {
3a3677ff
RH
5981 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5982 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5983 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
5984 intcmp_mode = CCmode;
5985 code = LTU;
9a915772
JH
5986 }
5987 else
5988 {
5989 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
5990 code = NE;
5991 }
5992 break;
5993 case EQ:
5994 case UNEQ:
5995 if (code == EQ && TARGET_IEEE_FP)
5996 {
3a3677ff
RH
5997 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5998 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
5999 intcmp_mode = CCmode;
6000 code = EQ;
9a915772
JH
6001 }
6002 else
6003 {
3a3677ff
RH
6004 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
6005 code = NE;
6006 break;
9a915772
JH
6007 }
6008 break;
6009 case NE:
6010 case LTGT:
6011 if (code == NE && TARGET_IEEE_FP)
6012 {
3a3677ff 6013 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
6014 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
6015 GEN_INT (0x40)));
3a3677ff 6016 code = NE;
9a915772
JH
6017 }
6018 else
6019 {
3a3677ff
RH
6020 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
6021 code = EQ;
32b5b1aa 6022 }
9a915772
JH
6023 break;
6024
6025 case UNORDERED:
6026 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
6027 code = NE;
6028 break;
6029 case ORDERED:
6030 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
6031 code = EQ;
6032 break;
6033
6034 default:
6035 abort ();
32b5b1aa 6036 }
32b5b1aa 6037 }
e075ae69
RH
6038
6039 /* Return the test that should be put into the flags user, i.e.
6040 the bcc, scc, or cmov instruction. */
6041 return gen_rtx_fmt_ee (code, VOIDmode,
6042 gen_rtx_REG (intcmp_mode, FLAGS_REG),
6043 const0_rtx);
6044}
6045
9e3e266c 6046rtx
a1b8572c 6047ix86_expand_compare (code, second_test, bypass_test)
e075ae69 6048 enum rtx_code code;
a1b8572c 6049 rtx *second_test, *bypass_test;
e075ae69
RH
6050{
6051 rtx op0, op1, ret;
6052 op0 = ix86_compare_op0;
6053 op1 = ix86_compare_op1;
6054
a1b8572c
JH
6055 if (second_test)
6056 *second_test = NULL_RTX;
6057 if (bypass_test)
6058 *bypass_test = NULL_RTX;
6059
e075ae69 6060 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
bf71a4f8 6061 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
77ebd435 6062 second_test, bypass_test);
32b5b1aa 6063 else
e075ae69
RH
6064 ret = ix86_expand_int_compare (code, op0, op1);
6065
6066 return ret;
6067}
6068
03598dea
JH
6069/* Return true if the CODE will result in nontrivial jump sequence. */
6070bool
6071ix86_fp_jump_nontrivial_p (code)
6072 enum rtx_code code;
6073{
6074 enum rtx_code bypass_code, first_code, second_code;
6075 if (!TARGET_CMOVE)
6076 return true;
6077 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
6078 return bypass_code != NIL || second_code != NIL;
6079}
6080
e075ae69 6081void
3a3677ff 6082ix86_expand_branch (code, label)
e075ae69 6083 enum rtx_code code;
e075ae69
RH
6084 rtx label;
6085{
3a3677ff 6086 rtx tmp;
e075ae69 6087
3a3677ff 6088 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 6089 {
3a3677ff
RH
6090 case QImode:
6091 case HImode:
6092 case SImode:
0d7d98ee 6093 simple:
a1b8572c 6094 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
6095 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
6096 gen_rtx_LABEL_REF (VOIDmode, label),
6097 pc_rtx);
6098 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 6099 return;
e075ae69 6100
3a3677ff
RH
6101 case SFmode:
6102 case DFmode:
0f290768 6103 case XFmode:
2b589241 6104 case TFmode:
3a3677ff
RH
6105 {
6106 rtvec vec;
6107 int use_fcomi;
03598dea 6108 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
6109
6110 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
6111 &ix86_compare_op1);
03598dea
JH
6112
6113 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
6114
6115 /* Check whether we will use the natural sequence with one jump. If
6116 so, we can expand jump early. Otherwise delay expansion by
6117 creating compound insn to not confuse optimizers. */
6118 if (bypass_code == NIL && second_code == NIL
6119 && TARGET_CMOVE)
6120 {
6121 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
6122 gen_rtx_LABEL_REF (VOIDmode, label),
6123 pc_rtx, NULL_RTX);
6124 }
6125 else
6126 {
6127 tmp = gen_rtx_fmt_ee (code, VOIDmode,
6128 ix86_compare_op0, ix86_compare_op1);
6129 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
6130 gen_rtx_LABEL_REF (VOIDmode, label),
6131 pc_rtx);
6132 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
6133
6134 use_fcomi = ix86_use_fcomi_compare (code);
6135 vec = rtvec_alloc (3 + !use_fcomi);
6136 RTVEC_ELT (vec, 0) = tmp;
6137 RTVEC_ELT (vec, 1)
6138 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
6139 RTVEC_ELT (vec, 2)
6140 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
6141 if (! use_fcomi)
6142 RTVEC_ELT (vec, 3)
6143 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
6144
6145 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
6146 }
3a3677ff
RH
6147 return;
6148 }
32b5b1aa 6149
3a3677ff 6150 case DImode:
0d7d98ee
JH
6151 if (TARGET_64BIT)
6152 goto simple;
3a3677ff
RH
6153 /* Expand DImode branch into multiple compare+branch. */
6154 {
6155 rtx lo[2], hi[2], label2;
6156 enum rtx_code code1, code2, code3;
32b5b1aa 6157
3a3677ff
RH
6158 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
6159 {
6160 tmp = ix86_compare_op0;
6161 ix86_compare_op0 = ix86_compare_op1;
6162 ix86_compare_op1 = tmp;
6163 code = swap_condition (code);
6164 }
6165 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
6166 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 6167
3a3677ff
RH
6168 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
6169 avoid two branches. This costs one extra insn, so disable when
6170 optimizing for size. */
32b5b1aa 6171
3a3677ff
RH
6172 if ((code == EQ || code == NE)
6173 && (!optimize_size
6174 || hi[1] == const0_rtx || lo[1] == const0_rtx))
6175 {
6176 rtx xor0, xor1;
32b5b1aa 6177
3a3677ff
RH
6178 xor1 = hi[0];
6179 if (hi[1] != const0_rtx)
6180 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
6181 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 6182
3a3677ff
RH
6183 xor0 = lo[0];
6184 if (lo[1] != const0_rtx)
6185 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
6186 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 6187
3a3677ff
RH
6188 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
6189 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 6190
3a3677ff
RH
6191 ix86_compare_op0 = tmp;
6192 ix86_compare_op1 = const0_rtx;
6193 ix86_expand_branch (code, label);
6194 return;
6195 }
e075ae69 6196
1f9124e4
JJ
6197 /* Otherwise, if we are doing less-than or greater-or-equal-than,
6198 op1 is a constant and the low word is zero, then we can just
6199 examine the high word. */
32b5b1aa 6200
1f9124e4
JJ
6201 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
6202 switch (code)
6203 {
6204 case LT: case LTU: case GE: case GEU:
6205 ix86_compare_op0 = hi[0];
6206 ix86_compare_op1 = hi[1];
6207 ix86_expand_branch (code, label);
6208 return;
6209 default:
6210 break;
6211 }
e075ae69 6212
3a3677ff 6213 /* Otherwise, we need two or three jumps. */
e075ae69 6214
3a3677ff 6215 label2 = gen_label_rtx ();
e075ae69 6216
3a3677ff
RH
6217 code1 = code;
6218 code2 = swap_condition (code);
6219 code3 = unsigned_condition (code);
e075ae69 6220
3a3677ff
RH
6221 switch (code)
6222 {
6223 case LT: case GT: case LTU: case GTU:
6224 break;
e075ae69 6225
3a3677ff
RH
6226 case LE: code1 = LT; code2 = GT; break;
6227 case GE: code1 = GT; code2 = LT; break;
6228 case LEU: code1 = LTU; code2 = GTU; break;
6229 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 6230
3a3677ff
RH
6231 case EQ: code1 = NIL; code2 = NE; break;
6232 case NE: code2 = NIL; break;
e075ae69 6233
3a3677ff
RH
6234 default:
6235 abort ();
6236 }
e075ae69 6237
3a3677ff
RH
6238 /*
6239 * a < b =>
6240 * if (hi(a) < hi(b)) goto true;
6241 * if (hi(a) > hi(b)) goto false;
6242 * if (lo(a) < lo(b)) goto true;
6243 * false:
6244 */
6245
6246 ix86_compare_op0 = hi[0];
6247 ix86_compare_op1 = hi[1];
6248
6249 if (code1 != NIL)
6250 ix86_expand_branch (code1, label);
6251 if (code2 != NIL)
6252 ix86_expand_branch (code2, label2);
6253
6254 ix86_compare_op0 = lo[0];
6255 ix86_compare_op1 = lo[1];
6256 ix86_expand_branch (code3, label);
6257
6258 if (code2 != NIL)
6259 emit_label (label2);
6260 return;
6261 }
e075ae69 6262
3a3677ff
RH
6263 default:
6264 abort ();
6265 }
32b5b1aa 6266}
e075ae69 6267
9e7adcb3
JH
6268/* Split branch based on floating point condition. */
6269void
03598dea
JH
6270ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
6271 enum rtx_code code;
6272 rtx op1, op2, target1, target2, tmp;
9e7adcb3
JH
6273{
6274 rtx second, bypass;
6275 rtx label = NULL_RTX;
03598dea 6276 rtx condition;
6b24c259
JH
6277 int bypass_probability = -1, second_probability = -1, probability = -1;
6278 rtx i;
9e7adcb3
JH
6279
6280 if (target2 != pc_rtx)
6281 {
6282 rtx tmp = target2;
6283 code = reverse_condition_maybe_unordered (code);
6284 target2 = target1;
6285 target1 = tmp;
6286 }
6287
6288 condition = ix86_expand_fp_compare (code, op1, op2,
6289 tmp, &second, &bypass);
6b24c259
JH
6290
6291 if (split_branch_probability >= 0)
6292 {
6293 /* Distribute the probabilities across the jumps.
6294 Assume the BYPASS and SECOND to be always test
6295 for UNORDERED. */
6296 probability = split_branch_probability;
6297
6298 /* Value of 1 is low enought to make no need for probability
6299 to be updated. Later we may run some experiments and see
6300 if unordered values are more frequent in practice. */
6301 if (bypass)
6302 bypass_probability = 1;
6303 if (second)
6304 second_probability = 1;
6305 }
9e7adcb3
JH
6306 if (bypass != NULL_RTX)
6307 {
6308 label = gen_label_rtx ();
6b24c259
JH
6309 i = emit_jump_insn (gen_rtx_SET
6310 (VOIDmode, pc_rtx,
6311 gen_rtx_IF_THEN_ELSE (VOIDmode,
6312 bypass,
6313 gen_rtx_LABEL_REF (VOIDmode,
6314 label),
6315 pc_rtx)));
6316 if (bypass_probability >= 0)
6317 REG_NOTES (i)
6318 = gen_rtx_EXPR_LIST (REG_BR_PROB,
6319 GEN_INT (bypass_probability),
6320 REG_NOTES (i));
6321 }
6322 i = emit_jump_insn (gen_rtx_SET
9e7adcb3
JH
6323 (VOIDmode, pc_rtx,
6324 gen_rtx_IF_THEN_ELSE (VOIDmode,
6b24c259
JH
6325 condition, target1, target2)));
6326 if (probability >= 0)
6327 REG_NOTES (i)
6328 = gen_rtx_EXPR_LIST (REG_BR_PROB,
6329 GEN_INT (probability),
6330 REG_NOTES (i));
6331 if (second != NULL_RTX)
9e7adcb3 6332 {
6b24c259
JH
6333 i = emit_jump_insn (gen_rtx_SET
6334 (VOIDmode, pc_rtx,
6335 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
6336 target2)));
6337 if (second_probability >= 0)
6338 REG_NOTES (i)
6339 = gen_rtx_EXPR_LIST (REG_BR_PROB,
6340 GEN_INT (second_probability),
6341 REG_NOTES (i));
9e7adcb3 6342 }
9e7adcb3
JH
6343 if (label != NULL_RTX)
6344 emit_label (label);
6345}
6346
32b5b1aa 6347int
3a3677ff 6348ix86_expand_setcc (code, dest)
e075ae69 6349 enum rtx_code code;
e075ae69 6350 rtx dest;
32b5b1aa 6351{
a1b8572c
JH
6352 rtx ret, tmp, tmpreg;
6353 rtx second_test, bypass_test;
e075ae69
RH
6354 int type;
6355
885a70fd
JH
6356 if (GET_MODE (ix86_compare_op0) == DImode
6357 && !TARGET_64BIT)
e075ae69
RH
6358 return 0; /* FAIL */
6359
6360 /* Three modes of generation:
6361 0 -- destination does not overlap compare sources:
6362 clear dest first, emit strict_low_part setcc.
6363 1 -- destination does overlap compare sources:
6364 emit subreg setcc, zero extend.
6365 2 -- destination is in QImode:
6366 emit setcc only.
e075ae69 6367
c50e5bc0
RH
6368 We don't use mode 0 early in compilation because it confuses CSE.
6369 There are peepholes to turn mode 1 into mode 0 if things work out
6370 nicely after reload. */
6371
6372 type = cse_not_expected ? 0 : 1;
e075ae69
RH
6373
6374 if (GET_MODE (dest) == QImode)
6375 type = 2;
6376 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
a500c31b 6377 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
e075ae69
RH
6378 type = 1;
6379
6380 if (type == 0)
6381 emit_move_insn (dest, const0_rtx);
6382
a1b8572c 6383 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
6384 PUT_MODE (ret, QImode);
6385
6386 tmp = dest;
a1b8572c 6387 tmpreg = dest;
e075ae69 6388 if (type == 0)
32b5b1aa 6389 {
e075ae69 6390 tmp = gen_lowpart (QImode, dest);
a1b8572c 6391 tmpreg = tmp;
e075ae69
RH
6392 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
6393 }
6394 else if (type == 1)
6395 {
6396 if (!cse_not_expected)
6397 tmp = gen_reg_rtx (QImode);
6398 else
6399 tmp = gen_lowpart (QImode, dest);
a1b8572c 6400 tmpreg = tmp;
e075ae69 6401 }
32b5b1aa 6402
e075ae69 6403 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
6404 if (bypass_test || second_test)
6405 {
6406 rtx test = second_test;
6407 int bypass = 0;
6408 rtx tmp2 = gen_reg_rtx (QImode);
6409 if (bypass_test)
6410 {
6411 if (second_test)
6412 abort();
6413 test = bypass_test;
6414 bypass = 1;
6415 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
6416 }
6417 PUT_MODE (test, QImode);
6418 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
6419
6420 if (bypass)
6421 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
6422 else
6423 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
6424 }
e075ae69
RH
6425
6426 if (type == 1)
6427 {
6428 rtx clob;
6429
6430 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
6431 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
6432 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6433 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6434 emit_insn (tmp);
32b5b1aa 6435 }
e075ae69
RH
6436
6437 return 1; /* DONE */
32b5b1aa 6438}
e075ae69 6439
32b5b1aa 6440int
e075ae69
RH
6441ix86_expand_int_movcc (operands)
6442 rtx operands[];
32b5b1aa 6443{
e075ae69
RH
6444 enum rtx_code code = GET_CODE (operands[1]), compare_code;
6445 rtx compare_seq, compare_op;
a1b8572c 6446 rtx second_test, bypass_test;
32b5b1aa 6447
36583fea
JH
6448 /* When the compare code is not LTU or GEU, we can not use sbbl case.
6449 In case comparsion is done with immediate, we can convert it to LTU or
6450 GEU by altering the integer. */
6451
6452 if ((code == LEU || code == GTU)
6453 && GET_CODE (ix86_compare_op1) == CONST_INT
6454 && GET_MODE (operands[0]) != HImode
6455 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
0f290768 6456 && GET_CODE (operands[2]) == CONST_INT
36583fea
JH
6457 && GET_CODE (operands[3]) == CONST_INT)
6458 {
6459 if (code == LEU)
6460 code = LTU;
6461 else
6462 code = GEU;
6463 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
6464 }
3a3677ff 6465
e075ae69 6466 start_sequence ();
a1b8572c 6467 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
6468 compare_seq = gen_sequence ();
6469 end_sequence ();
6470
6471 compare_code = GET_CODE (compare_op);
6472
6473 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
6474 HImode insns, we'd be swallowed in word prefix ops. */
6475
6476 if (GET_MODE (operands[0]) != HImode
885a70fd 6477 && GET_MODE (operands[0]) != DImode
0f290768 6478 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
6479 && GET_CODE (operands[3]) == CONST_INT)
6480 {
6481 rtx out = operands[0];
6482 HOST_WIDE_INT ct = INTVAL (operands[2]);
6483 HOST_WIDE_INT cf = INTVAL (operands[3]);
6484 HOST_WIDE_INT diff;
6485
a1b8572c
JH
6486 if ((compare_code == LTU || compare_code == GEU)
6487 && !second_test && !bypass_test)
e075ae69 6488 {
e075ae69
RH
6489
6490 /* Detect overlap between destination and compare sources. */
6491 rtx tmp = out;
6492
0f290768 6493 /* To simplify rest of code, restrict to the GEU case. */
36583fea
JH
6494 if (compare_code == LTU)
6495 {
6496 int tmp = ct;
6497 ct = cf;
6498 cf = tmp;
6499 compare_code = reverse_condition (compare_code);
6500 code = reverse_condition (code);
6501 }
6502 diff = ct - cf;
6503
e075ae69 6504 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
a500c31b 6505 || reg_overlap_mentioned_p (out, ix86_compare_op1))
e075ae69
RH
6506 tmp = gen_reg_rtx (SImode);
6507
6508 emit_insn (compare_seq);
6509 emit_insn (gen_x86_movsicc_0_m1 (tmp));
6510
36583fea
JH
6511 if (diff == 1)
6512 {
6513 /*
6514 * cmpl op0,op1
6515 * sbbl dest,dest
6516 * [addl dest, ct]
6517 *
6518 * Size 5 - 8.
6519 */
6520 if (ct)
e99af66b 6521 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (ct)));
36583fea
JH
6522 }
6523 else if (cf == -1)
6524 {
6525 /*
6526 * cmpl op0,op1
6527 * sbbl dest,dest
6528 * orl $ct, dest
6529 *
6530 * Size 8.
6531 */
e99af66b 6532 emit_insn (gen_iorsi3 (tmp, tmp, GEN_INT (ct)));
36583fea
JH
6533 }
6534 else if (diff == -1 && ct)
6535 {
6536 /*
6537 * cmpl op0,op1
6538 * sbbl dest,dest
6539 * xorl $-1, dest
6540 * [addl dest, cf]
6541 *
6542 * Size 8 - 11.
6543 */
6544 emit_insn (gen_one_cmplsi2 (tmp, tmp));
6545 if (cf)
e99af66b 6546 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (cf)));
36583fea
JH
6547 }
6548 else
6549 {
6550 /*
6551 * cmpl op0,op1
6552 * sbbl dest,dest
6553 * andl cf - ct, dest
6554 * [addl dest, ct]
6555 *
6556 * Size 8 - 11.
6557 */
e99af66b 6558 emit_insn (gen_andsi3 (tmp, tmp, GEN_INT (trunc_int_for_mode
7471a1f0 6559 (cf - ct, SImode))));
36583fea 6560 if (ct)
e99af66b 6561 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (ct)));
36583fea 6562 }
e075ae69
RH
6563
6564 if (tmp != out)
6565 emit_move_insn (out, tmp);
6566
6567 return 1; /* DONE */
6568 }
6569
6570 diff = ct - cf;
6571 if (diff < 0)
6572 {
6573 HOST_WIDE_INT tmp;
6574 tmp = ct, ct = cf, cf = tmp;
6575 diff = -diff;
734dba19
JH
6576 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
6577 {
6578 /* We may be reversing unordered compare to normal compare, that
6579 is not valid in general (we may convert non-trapping condition
6580 to trapping one), however on i386 we currently emit all
6581 comparisons unordered. */
6582 compare_code = reverse_condition_maybe_unordered (compare_code);
6583 code = reverse_condition_maybe_unordered (code);
6584 }
6585 else
6586 {
6587 compare_code = reverse_condition (compare_code);
6588 code = reverse_condition (code);
6589 }
e075ae69
RH
6590 }
6591 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
6592 || diff == 3 || diff == 5 || diff == 9)
6593 {
6594 /*
6595 * xorl dest,dest
6596 * cmpl op1,op2
6597 * setcc dest
6598 * lea cf(dest*(ct-cf)),dest
6599 *
6600 * Size 14.
6601 *
6602 * This also catches the degenerate setcc-only case.
6603 */
6604
6605 rtx tmp;
6606 int nops;
6607
6608 out = emit_store_flag (out, code, ix86_compare_op0,
6609 ix86_compare_op1, VOIDmode, 0, 1);
6610
6611 nops = 0;
885a70fd
JH
6612 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
6613 done in proper mode to match. */
e075ae69 6614 if (diff == 1)
885a70fd
JH
6615 {
6616 if (Pmode != SImode)
6617 tmp = gen_lowpart (Pmode, out);
6618 else
6619 tmp = out;
6620 }
e075ae69
RH
6621 else
6622 {
885a70fd
JH
6623 rtx out1;
6624 if (Pmode != SImode)
6625 out1 = gen_lowpart (Pmode, out);
6626 else
6627 out1 = out;
6628 tmp = gen_rtx_MULT (Pmode, out1, GEN_INT (diff & ~1));
e075ae69
RH
6629 nops++;
6630 if (diff & 1)
6631 {
885a70fd 6632 tmp = gen_rtx_PLUS (Pmode, tmp, out1);
e075ae69
RH
6633 nops++;
6634 }
6635 }
6636 if (cf != 0)
6637 {
885a70fd 6638 tmp = gen_rtx_PLUS (Pmode, tmp, GEN_INT (cf));
e075ae69
RH
6639 nops++;
6640 }
885a70fd
JH
6641 if (tmp != out
6642 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
e075ae69 6643 {
885a70fd
JH
6644 if (Pmode != SImode)
6645 tmp = gen_rtx_SUBREG (SImode, tmp, 0);
6646
6647 /* ??? We should to take care for outputing non-lea arithmetics
6648 for Pmode != SImode case too, but it is quite tricky and not
6649 too important, since all TARGET_64BIT machines support real
6650 conditional moves. */
6651 if (nops == 1 && Pmode == SImode)
e075ae69
RH
6652 {
6653 rtx clob;
6654
6655 clob = gen_rtx_REG (CCmode, FLAGS_REG);
6656 clob = gen_rtx_CLOBBER (VOIDmode, clob);
6657
6658 tmp = gen_rtx_SET (VOIDmode, out, tmp);
6659 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6660 emit_insn (tmp);
6661 }
6662 else
6663 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
6664 }
6665 if (out != operands[0])
6666 emit_move_insn (operands[0], out);
6667
6668 return 1; /* DONE */
6669 }
6670
6671 /*
6672 * General case: Jumpful:
6673 * xorl dest,dest cmpl op1, op2
6674 * cmpl op1, op2 movl ct, dest
6675 * setcc dest jcc 1f
6676 * decl dest movl cf, dest
6677 * andl (cf-ct),dest 1:
6678 * addl ct,dest
0f290768 6679 *
e075ae69
RH
6680 * Size 20. Size 14.
6681 *
6682 * This is reasonably steep, but branch mispredict costs are
6683 * high on modern cpus, so consider failing only if optimizing
6684 * for space.
6685 *
6686 * %%% Parameterize branch_cost on the tuning architecture, then
6687 * use that. The 80386 couldn't care less about mispredicts.
6688 */
6689
6690 if (!optimize_size && !TARGET_CMOVE)
6691 {
6692 if (ct == 0)
6693 {
6694 ct = cf;
6695 cf = 0;
734dba19
JH
6696 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
6697 {
6698 /* We may be reversing unordered compare to normal compare,
6699 that is not valid in general (we may convert non-trapping
6700 condition to trapping one), however on i386 we currently
6701 emit all comparisons unordered. */
6702 compare_code = reverse_condition_maybe_unordered (compare_code);
6703 code = reverse_condition_maybe_unordered (code);
6704 }
6705 else
6706 {
6707 compare_code = reverse_condition (compare_code);
6708 code = reverse_condition (code);
6709 }
e075ae69
RH
6710 }
6711
6712 out = emit_store_flag (out, code, ix86_compare_op0,
6713 ix86_compare_op1, VOIDmode, 0, 1);
6714
6715 emit_insn (gen_addsi3 (out, out, constm1_rtx));
7471a1f0
AO
6716 emit_insn (gen_andsi3 (out, out, GEN_INT (trunc_int_for_mode
6717 (cf - ct, SImode))));
e075ae69
RH
6718 if (ct != 0)
6719 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
6720 if (out != operands[0])
6721 emit_move_insn (operands[0], out);
6722
6723 return 1; /* DONE */
6724 }
6725 }
6726
6727 if (!TARGET_CMOVE)
6728 {
6729 /* Try a few things more with specific constants and a variable. */
6730
78a0d70c 6731 optab op;
e075ae69
RH
6732 rtx var, orig_out, out, tmp;
6733
6734 if (optimize_size)
6735 return 0; /* FAIL */
6736
0f290768 6737 /* If one of the two operands is an interesting constant, load a
e075ae69 6738 constant with the above and mask it in with a logical operation. */
0f290768 6739
e075ae69
RH
6740 if (GET_CODE (operands[2]) == CONST_INT)
6741 {
6742 var = operands[3];
6743 if (INTVAL (operands[2]) == 0)
6744 operands[3] = constm1_rtx, op = and_optab;
6745 else if (INTVAL (operands[2]) == -1)
6746 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
6747 else
6748 return 0; /* FAIL */
e075ae69
RH
6749 }
6750 else if (GET_CODE (operands[3]) == CONST_INT)
6751 {
6752 var = operands[2];
6753 if (INTVAL (operands[3]) == 0)
6754 operands[2] = constm1_rtx, op = and_optab;
6755 else if (INTVAL (operands[3]) == -1)
6756 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
6757 else
6758 return 0; /* FAIL */
e075ae69 6759 }
78a0d70c 6760 else
e075ae69
RH
6761 return 0; /* FAIL */
6762
6763 orig_out = operands[0];
6764 tmp = gen_reg_rtx (GET_MODE (orig_out));
6765 operands[0] = tmp;
6766
6767 /* Recurse to get the constant loaded. */
6768 if (ix86_expand_int_movcc (operands) == 0)
6769 return 0; /* FAIL */
6770
6771 /* Mask in the interesting variable. */
6772 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
6773 OPTAB_WIDEN);
6774 if (out != orig_out)
6775 emit_move_insn (orig_out, out);
6776
6777 return 1; /* DONE */
6778 }
6779
6780 /*
6781 * For comparison with above,
6782 *
6783 * movl cf,dest
6784 * movl ct,tmp
6785 * cmpl op1,op2
6786 * cmovcc tmp,dest
6787 *
6788 * Size 15.
6789 */
6790
6791 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
6792 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
6793 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
6794 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
6795
a1b8572c
JH
6796 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
6797 {
6798 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
6799 emit_move_insn (tmp, operands[3]);
6800 operands[3] = tmp;
6801 }
6802 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
6803 {
6804 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
6805 emit_move_insn (tmp, operands[2]);
6806 operands[2] = tmp;
6807 }
c9682caf
JH
6808 if (! register_operand (operands[2], VOIDmode)
6809 && ! register_operand (operands[3], VOIDmode))
6810 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
a1b8572c 6811
e075ae69
RH
6812 emit_insn (compare_seq);
6813 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6814 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6815 compare_op, operands[2],
6816 operands[3])));
a1b8572c
JH
6817 if (bypass_test)
6818 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6819 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6820 bypass_test,
6821 operands[3],
6822 operands[0])));
6823 if (second_test)
6824 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6825 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6826 second_test,
6827 operands[2],
6828 operands[0])));
e075ae69
RH
6829
6830 return 1; /* DONE */
e9a25f70 6831}
e075ae69 6832
32b5b1aa 6833int
e075ae69
RH
6834ix86_expand_fp_movcc (operands)
6835 rtx operands[];
32b5b1aa 6836{
e075ae69 6837 enum rtx_code code;
e075ae69 6838 rtx tmp;
a1b8572c 6839 rtx compare_op, second_test, bypass_test;
32b5b1aa 6840
0073023d
JH
6841 /* For SF/DFmode conditional moves based on comparisons
6842 in same mode, we may want to use SSE min/max instructions. */
6843 if (((TARGET_SSE && GET_MODE (operands[0]) == SFmode)
6844 || (TARGET_SSE2 && GET_MODE (operands[0]) == DFmode))
6845 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
fa9f36a1
JH
6846 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
6847 && (!TARGET_IEEE_FP
6848 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
0073023d
JH
6849 /* We may be called from the post-reload splitter. */
6850 && (!REG_P (operands[0])
6851 || SSE_REG_P (operands[0])
52a661a6 6852 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
0073023d
JH
6853 {
6854 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
6855 code = GET_CODE (operands[1]);
6856
6857 /* See if we have (cross) match between comparison operands and
6858 conditional move operands. */
6859 if (rtx_equal_p (operands[2], op1))
6860 {
6861 rtx tmp = op0;
6862 op0 = op1;
6863 op1 = tmp;
6864 code = reverse_condition_maybe_unordered (code);
6865 }
6866 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
6867 {
6868 /* Check for min operation. */
6869 if (code == LT)
6870 {
6871 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
6872 if (memory_operand (op0, VOIDmode))
6873 op0 = force_reg (GET_MODE (operands[0]), op0);
6874 if (GET_MODE (operands[0]) == SFmode)
6875 emit_insn (gen_minsf3 (operands[0], op0, op1));
6876 else
6877 emit_insn (gen_mindf3 (operands[0], op0, op1));
6878 return 1;
6879 }
6880 /* Check for max operation. */
6881 if (code == GT)
6882 {
6883 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
6884 if (memory_operand (op0, VOIDmode))
6885 op0 = force_reg (GET_MODE (operands[0]), op0);
6886 if (GET_MODE (operands[0]) == SFmode)
6887 emit_insn (gen_maxsf3 (operands[0], op0, op1));
6888 else
6889 emit_insn (gen_maxdf3 (operands[0], op0, op1));
6890 return 1;
6891 }
6892 }
6893 /* Manage condition to be sse_comparison_operator. In case we are
6894 in non-ieee mode, try to canonicalize the destination operand
6895 to be first in the comparison - this helps reload to avoid extra
6896 moves. */
6897 if (!sse_comparison_operator (operands[1], VOIDmode)
6898 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
6899 {
6900 rtx tmp = ix86_compare_op0;
6901 ix86_compare_op0 = ix86_compare_op1;
6902 ix86_compare_op1 = tmp;
6903 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
6904 VOIDmode, ix86_compare_op0,
6905 ix86_compare_op1);
6906 }
6907 /* Similary try to manage result to be first operand of conditional
fa9f36a1
JH
6908 move. We also don't support the NE comparison on SSE, so try to
6909 avoid it. */
037f20f1
JH
6910 if ((rtx_equal_p (operands[0], operands[3])
6911 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
6912 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
0073023d
JH
6913 {
6914 rtx tmp = operands[2];
6915 operands[2] = operands[3];
92d0fb09 6916 operands[3] = tmp;
0073023d
JH
6917 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
6918 (GET_CODE (operands[1])),
6919 VOIDmode, ix86_compare_op0,
6920 ix86_compare_op1);
6921 }
6922 if (GET_MODE (operands[0]) == SFmode)
6923 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
6924 operands[2], operands[3],
6925 ix86_compare_op0, ix86_compare_op1));
6926 else
6927 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
6928 operands[2], operands[3],
6929 ix86_compare_op0, ix86_compare_op1));
6930 return 1;
6931 }
6932
e075ae69 6933 /* The floating point conditional move instructions don't directly
0f290768 6934 support conditions resulting from a signed integer comparison. */
32b5b1aa 6935
e075ae69 6936 code = GET_CODE (operands[1]);
a1b8572c 6937 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
6938
6939 /* The floating point conditional move instructions don't directly
6940 support signed integer comparisons. */
6941
a1b8572c 6942 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 6943 {
a1b8572c
JH
6944 if (second_test != NULL || bypass_test != NULL)
6945 abort();
e075ae69 6946 tmp = gen_reg_rtx (QImode);
3a3677ff 6947 ix86_expand_setcc (code, tmp);
e075ae69
RH
6948 code = NE;
6949 ix86_compare_op0 = tmp;
6950 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
6951 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
6952 }
6953 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
6954 {
6955 tmp = gen_reg_rtx (GET_MODE (operands[0]));
6956 emit_move_insn (tmp, operands[3]);
6957 operands[3] = tmp;
6958 }
6959 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
6960 {
6961 tmp = gen_reg_rtx (GET_MODE (operands[0]));
6962 emit_move_insn (tmp, operands[2]);
6963 operands[2] = tmp;
e075ae69 6964 }
e9a25f70 6965
e075ae69
RH
6966 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6967 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9e7adcb3 6968 compare_op,
e075ae69
RH
6969 operands[2],
6970 operands[3])));
a1b8572c
JH
6971 if (bypass_test)
6972 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6973 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6974 bypass_test,
6975 operands[3],
6976 operands[0])));
6977 if (second_test)
6978 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6979 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6980 second_test,
6981 operands[2],
6982 operands[0])));
32b5b1aa 6983
e075ae69 6984 return 1;
32b5b1aa
SC
6985}
6986
2450a057
JH
6987/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
6988 works for floating pointer parameters and nonoffsetable memories.
6989 For pushes, it returns just stack offsets; the values will be saved
6990 in the right order. Maximally three parts are generated. */
6991
2b589241 6992static int
2450a057
JH
6993ix86_split_to_parts (operand, parts, mode)
6994 rtx operand;
6995 rtx *parts;
6996 enum machine_mode mode;
32b5b1aa 6997{
26e5b205
JH
6998 int size;
6999
7000 if (!TARGET_64BIT)
7001 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
7002 else
7003 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 7004
a7180f70
BS
7005 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
7006 abort ();
2450a057
JH
7007 if (size < 2 || size > 3)
7008 abort ();
7009
d7a29404
JH
7010 /* Optimize constant pool reference to immediates. This is used by fp moves,
7011 that force all constants to memory to allow combining. */
7012
7013 if (GET_CODE (operand) == MEM
7014 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
7015 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
7016 operand = get_pool_constant (XEXP (operand, 0));
7017
2450a057 7018 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 7019 {
2450a057
JH
7020 /* The only non-offsetable memories we handle are pushes. */
7021 if (! push_operand (operand, VOIDmode))
7022 abort ();
7023
26e5b205
JH
7024 operand = copy_rtx (operand);
7025 PUT_MODE (operand, Pmode);
2450a057
JH
7026 parts[0] = parts[1] = parts[2] = operand;
7027 }
26e5b205 7028 else if (!TARGET_64BIT)
2450a057
JH
7029 {
7030 if (mode == DImode)
7031 split_di (&operand, 1, &parts[0], &parts[1]);
7032 else
e075ae69 7033 {
2450a057
JH
7034 if (REG_P (operand))
7035 {
7036 if (!reload_completed)
7037 abort ();
7038 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
7039 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
7040 if (size == 3)
7041 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
7042 }
7043 else if (offsettable_memref_p (operand))
7044 {
f4ef873c 7045 operand = adjust_address (operand, SImode, 0);
2450a057 7046 parts[0] = operand;
b72f00af 7047 parts[1] = adjust_address (operand, SImode, 4);
2450a057 7048 if (size == 3)
b72f00af 7049 parts[2] = adjust_address (operand, SImode, 8);
2450a057
JH
7050 }
7051 else if (GET_CODE (operand) == CONST_DOUBLE)
7052 {
7053 REAL_VALUE_TYPE r;
2b589241 7054 long l[4];
2450a057
JH
7055
7056 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
7057 switch (mode)
7058 {
7059 case XFmode:
2b589241 7060 case TFmode:
2450a057
JH
7061 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
7062 parts[2] = GEN_INT (l[2]);
7063 break;
7064 case DFmode:
7065 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
7066 break;
7067 default:
7068 abort ();
7069 }
7070 parts[1] = GEN_INT (l[1]);
7071 parts[0] = GEN_INT (l[0]);
7072 }
7073 else
7074 abort ();
e075ae69 7075 }
2450a057 7076 }
26e5b205
JH
7077 else
7078 {
7079 if (mode == XFmode || mode == TFmode)
7080 {
7081 if (REG_P (operand))
7082 {
7083 if (!reload_completed)
7084 abort ();
7085 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
7086 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
7087 }
7088 else if (offsettable_memref_p (operand))
7089 {
b72f00af 7090 operand = adjust_address (operand, DImode, 0);
26e5b205 7091 parts[0] = operand;
b72f00af 7092 parts[1] = adjust_address (operand, SImode, 8);
26e5b205
JH
7093 }
7094 else if (GET_CODE (operand) == CONST_DOUBLE)
7095 {
7096 REAL_VALUE_TYPE r;
7097 long l[3];
7098
7099 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
7100 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
7101 /* Do not use shift by 32 to avoid warning on 32bit systems. */
7102 if (HOST_BITS_PER_WIDE_INT >= 64)
7103 parts[0] = GEN_INT (l[0] + ((l[1] << 31) << 1));
7104 else
7105 parts[0] = immed_double_const (l[0], l[1], DImode);
7106 parts[1] = GEN_INT (l[2]);
7107 }
7108 else
7109 abort ();
7110 }
7111 }
2450a057 7112
2b589241 7113 return size;
2450a057
JH
7114}
7115
7116/* Emit insns to perform a move or push of DI, DF, and XF values.
7117 Return false when normal moves are needed; true when all required
7118 insns have been emitted. Operands 2-4 contain the input values
7119 int the correct order; operands 5-7 contain the output values. */
7120
26e5b205
JH
7121void
7122ix86_split_long_move (operands)
7123 rtx operands[];
2450a057
JH
7124{
7125 rtx part[2][3];
26e5b205 7126 int nparts;
2450a057
JH
7127 int push = 0;
7128 int collisions = 0;
26e5b205
JH
7129 enum machine_mode mode = GET_MODE (operands[0]);
7130
7131 /* The DFmode expanders may ask us to move double.
7132 For 64bit target this is single move. By hiding the fact
7133 here we simplify i386.md splitters. */
7134 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
7135 {
7136 /* Optimize constant pool reference to immediates. This is used by fp moves,
7137 that force all constants to memory to allow combining. */
7138
7139 if (GET_CODE (operands[1]) == MEM
7140 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
7141 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
7142 operands[1] = get_pool_constant (XEXP (operands[1], 0));
7143 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
7144 {
7145 operands[0] = copy_rtx (operands[0]);
7146 PUT_MODE (operands[0], Pmode);
7147 }
26e5b205
JH
7148 else
7149 operands[0] = gen_lowpart (DImode, operands[0]);
7150 operands[1] = gen_lowpart (DImode, operands[1]);
7151 emit_move_insn (operands[0], operands[1]);
7152 return;
7153 }
2450a057 7154
2450a057
JH
7155 /* The only non-offsettable memory we handle is push. */
7156 if (push_operand (operands[0], VOIDmode))
7157 push = 1;
7158 else if (GET_CODE (operands[0]) == MEM
7159 && ! offsettable_memref_p (operands[0]))
7160 abort ();
7161
26e5b205
JH
7162 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
7163 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
7164
7165 /* When emitting push, take care for source operands on the stack. */
7166 if (push && GET_CODE (operands[1]) == MEM
7167 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
7168 {
26e5b205 7169 if (nparts == 3)
886cbb88
JH
7170 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
7171 XEXP (part[1][2], 0));
7172 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
7173 XEXP (part[1][1], 0));
2450a057
JH
7174 }
7175
0f290768 7176 /* We need to do copy in the right order in case an address register
2450a057
JH
7177 of the source overlaps the destination. */
7178 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
7179 {
7180 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
7181 collisions++;
7182 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
7183 collisions++;
26e5b205 7184 if (nparts == 3
2450a057
JH
7185 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
7186 collisions++;
7187
7188 /* Collision in the middle part can be handled by reordering. */
26e5b205 7189 if (collisions == 1 && nparts == 3
2450a057 7190 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 7191 {
2450a057
JH
7192 rtx tmp;
7193 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
7194 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
7195 }
e075ae69 7196
2450a057
JH
7197 /* If there are more collisions, we can't handle it by reordering.
7198 Do an lea to the last part and use only one colliding move. */
7199 else if (collisions > 1)
7200 {
7201 collisions = 1;
26e5b205 7202 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
2450a057 7203 XEXP (part[1][0], 0)));
26e5b205
JH
7204 part[1][0] = change_address (part[1][0],
7205 TARGET_64BIT ? DImode : SImode,
7206 part[0][nparts - 1]);
b72f00af 7207 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
26e5b205 7208 if (nparts == 3)
b72f00af 7209 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
2450a057
JH
7210 }
7211 }
7212
7213 if (push)
7214 {
26e5b205 7215 if (!TARGET_64BIT)
2b589241 7216 {
26e5b205
JH
7217 if (nparts == 3)
7218 {
7219 /* We use only first 12 bytes of TFmode value, but for pushing we
7220 are required to adjust stack as if we were pushing real 16byte
7221 value. */
7222 if (mode == TFmode && !TARGET_64BIT)
7223 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
7224 GEN_INT (-4)));
7225 emit_move_insn (part[0][2], part[1][2]);
7226 }
2b589241 7227 }
26e5b205
JH
7228 else
7229 {
7230 /* In 64bit mode we don't have 32bit push available. In case this is
7231 register, it is OK - we will just use larger counterpart. We also
7232 retype memory - these comes from attempt to avoid REX prefix on
7233 moving of second half of TFmode value. */
7234 if (GET_MODE (part[1][1]) == SImode)
7235 {
7236 if (GET_CODE (part[1][1]) == MEM)
f4ef873c 7237 part[1][1] = adjust_address (part[1][1], DImode, 0);
26e5b205
JH
7238 else if (REG_P (part[1][1]))
7239 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
7240 else
7241 abort();
886cbb88
JH
7242 if (GET_MODE (part[1][0]) == SImode)
7243 part[1][0] = part[1][1];
26e5b205
JH
7244 }
7245 }
7246 emit_move_insn (part[0][1], part[1][1]);
7247 emit_move_insn (part[0][0], part[1][0]);
7248 return;
2450a057
JH
7249 }
7250
7251 /* Choose correct order to not overwrite the source before it is copied. */
7252 if ((REG_P (part[0][0])
7253 && REG_P (part[1][1])
7254 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 7255 || (nparts == 3
2450a057
JH
7256 && REGNO (part[0][0]) == REGNO (part[1][2]))))
7257 || (collisions > 0
7258 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
7259 {
26e5b205 7260 if (nparts == 3)
2450a057 7261 {
26e5b205
JH
7262 operands[2] = part[0][2];
7263 operands[3] = part[0][1];
7264 operands[4] = part[0][0];
7265 operands[5] = part[1][2];
7266 operands[6] = part[1][1];
7267 operands[7] = part[1][0];
2450a057
JH
7268 }
7269 else
7270 {
26e5b205
JH
7271 operands[2] = part[0][1];
7272 operands[3] = part[0][0];
7273 operands[5] = part[1][1];
7274 operands[6] = part[1][0];
2450a057
JH
7275 }
7276 }
7277 else
7278 {
26e5b205 7279 if (nparts == 3)
2450a057 7280 {
26e5b205
JH
7281 operands[2] = part[0][0];
7282 operands[3] = part[0][1];
7283 operands[4] = part[0][2];
7284 operands[5] = part[1][0];
7285 operands[6] = part[1][1];
7286 operands[7] = part[1][2];
2450a057
JH
7287 }
7288 else
7289 {
26e5b205
JH
7290 operands[2] = part[0][0];
7291 operands[3] = part[0][1];
7292 operands[5] = part[1][0];
7293 operands[6] = part[1][1];
e075ae69
RH
7294 }
7295 }
26e5b205
JH
7296 emit_move_insn (operands[2], operands[5]);
7297 emit_move_insn (operands[3], operands[6]);
7298 if (nparts == 3)
7299 emit_move_insn (operands[4], operands[7]);
32b5b1aa 7300
26e5b205 7301 return;
32b5b1aa 7302}
32b5b1aa 7303
e075ae69
RH
7304void
7305ix86_split_ashldi (operands, scratch)
7306 rtx *operands, scratch;
32b5b1aa 7307{
e075ae69
RH
7308 rtx low[2], high[2];
7309 int count;
b985a30f 7310
e075ae69
RH
7311 if (GET_CODE (operands[2]) == CONST_INT)
7312 {
7313 split_di (operands, 2, low, high);
7314 count = INTVAL (operands[2]) & 63;
32b5b1aa 7315
e075ae69
RH
7316 if (count >= 32)
7317 {
7318 emit_move_insn (high[0], low[1]);
7319 emit_move_insn (low[0], const0_rtx);
b985a30f 7320
e075ae69
RH
7321 if (count > 32)
7322 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
7323 }
7324 else
7325 {
7326 if (!rtx_equal_p (operands[0], operands[1]))
7327 emit_move_insn (operands[0], operands[1]);
7328 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
7329 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
7330 }
7331 }
7332 else
7333 {
7334 if (!rtx_equal_p (operands[0], operands[1]))
7335 emit_move_insn (operands[0], operands[1]);
b985a30f 7336
e075ae69 7337 split_di (operands, 1, low, high);
b985a30f 7338
e075ae69
RH
7339 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
7340 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 7341
fe577e58 7342 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 7343 {
fe577e58 7344 if (! no_new_pseudos)
e075ae69
RH
7345 scratch = force_reg (SImode, const0_rtx);
7346 else
7347 emit_move_insn (scratch, const0_rtx);
7348
7349 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
7350 scratch));
7351 }
7352 else
7353 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
7354 }
e9a25f70 7355}
32b5b1aa 7356
e075ae69
RH
7357void
7358ix86_split_ashrdi (operands, scratch)
7359 rtx *operands, scratch;
32b5b1aa 7360{
e075ae69
RH
7361 rtx low[2], high[2];
7362 int count;
32b5b1aa 7363
e075ae69
RH
7364 if (GET_CODE (operands[2]) == CONST_INT)
7365 {
7366 split_di (operands, 2, low, high);
7367 count = INTVAL (operands[2]) & 63;
32b5b1aa 7368
e075ae69
RH
7369 if (count >= 32)
7370 {
7371 emit_move_insn (low[0], high[1]);
32b5b1aa 7372
e075ae69
RH
7373 if (! reload_completed)
7374 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
7375 else
7376 {
7377 emit_move_insn (high[0], low[0]);
7378 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
7379 }
7380
7381 if (count > 32)
7382 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
7383 }
7384 else
7385 {
7386 if (!rtx_equal_p (operands[0], operands[1]))
7387 emit_move_insn (operands[0], operands[1]);
7388 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
7389 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
7390 }
7391 }
7392 else
32b5b1aa 7393 {
e075ae69
RH
7394 if (!rtx_equal_p (operands[0], operands[1]))
7395 emit_move_insn (operands[0], operands[1]);
7396
7397 split_di (operands, 1, low, high);
7398
7399 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
7400 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
7401
fe577e58 7402 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 7403 {
fe577e58 7404 if (! no_new_pseudos)
e075ae69
RH
7405 scratch = gen_reg_rtx (SImode);
7406 emit_move_insn (scratch, high[0]);
7407 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
7408 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
7409 scratch));
7410 }
7411 else
7412 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 7413 }
e075ae69 7414}
32b5b1aa 7415
e075ae69
RH
7416void
7417ix86_split_lshrdi (operands, scratch)
7418 rtx *operands, scratch;
7419{
7420 rtx low[2], high[2];
7421 int count;
32b5b1aa 7422
e075ae69 7423 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 7424 {
e075ae69
RH
7425 split_di (operands, 2, low, high);
7426 count = INTVAL (operands[2]) & 63;
7427
7428 if (count >= 32)
c7271385 7429 {
e075ae69
RH
7430 emit_move_insn (low[0], high[1]);
7431 emit_move_insn (high[0], const0_rtx);
32b5b1aa 7432
e075ae69
RH
7433 if (count > 32)
7434 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
7435 }
7436 else
7437 {
7438 if (!rtx_equal_p (operands[0], operands[1]))
7439 emit_move_insn (operands[0], operands[1]);
7440 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
7441 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
7442 }
32b5b1aa 7443 }
e075ae69
RH
7444 else
7445 {
7446 if (!rtx_equal_p (operands[0], operands[1]))
7447 emit_move_insn (operands[0], operands[1]);
32b5b1aa 7448
e075ae69
RH
7449 split_di (operands, 1, low, high);
7450
7451 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
7452 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
7453
7454 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 7455 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 7456 {
fe577e58 7457 if (! no_new_pseudos)
e075ae69
RH
7458 scratch = force_reg (SImode, const0_rtx);
7459 else
7460 emit_move_insn (scratch, const0_rtx);
7461
7462 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
7463 scratch));
7464 }
7465 else
7466 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
7467 }
32b5b1aa 7468}
3f803cd9 7469
0407c02b 7470/* Helper function for the string operations below. Dest VARIABLE whether
0945b39d
JH
7471 it is aligned to VALUE bytes. If true, jump to the label. */
7472static rtx
7473ix86_expand_aligntest (variable, value)
7474 rtx variable;
7475 int value;
7476{
7477 rtx label = gen_label_rtx ();
7478 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
7479 if (GET_MODE (variable) == DImode)
7480 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
7481 else
7482 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
7483 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
7484 1, 0, label);
7485 return label;
7486}
7487
7488/* Adjust COUNTER by the VALUE. */
7489static void
7490ix86_adjust_counter (countreg, value)
7491 rtx countreg;
7492 HOST_WIDE_INT value;
7493{
7494 if (GET_MODE (countreg) == DImode)
7495 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
7496 else
7497 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
7498}
7499
7500/* Zero extend possibly SImode EXP to Pmode register. */
d24b3457 7501rtx
0945b39d
JH
7502ix86_zero_extend_to_Pmode (exp)
7503 rtx exp;
7504{
7505 rtx r;
7506 if (GET_MODE (exp) == VOIDmode)
7507 return force_reg (Pmode, exp);
7508 if (GET_MODE (exp) == Pmode)
7509 return copy_to_mode_reg (Pmode, exp);
7510 r = gen_reg_rtx (Pmode);
7511 emit_insn (gen_zero_extendsidi2 (r, exp));
7512 return r;
7513}
7514
7515/* Expand string move (memcpy) operation. Use i386 string operations when
7516 profitable. expand_clrstr contains similar code. */
7517int
7518ix86_expand_movstr (dst, src, count_exp, align_exp)
7519 rtx dst, src, count_exp, align_exp;
7520{
7521 rtx srcreg, destreg, countreg;
7522 enum machine_mode counter_mode;
7523 HOST_WIDE_INT align = 0;
7524 unsigned HOST_WIDE_INT count = 0;
7525 rtx insns;
7526
7527 start_sequence ();
7528
7529 if (GET_CODE (align_exp) == CONST_INT)
7530 align = INTVAL (align_exp);
7531
7532 /* This simple hack avoids all inlining code and simplifies code bellow. */
7533 if (!TARGET_ALIGN_STRINGOPS)
7534 align = 64;
7535
7536 if (GET_CODE (count_exp) == CONST_INT)
7537 count = INTVAL (count_exp);
7538
7539 /* Figure out proper mode for counter. For 32bits it is always SImode,
7540 for 64bits use SImode when possible, otherwise DImode.
7541 Set count to number of bytes copied when known at compile time. */
7542 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
7543 || x86_64_zero_extended_value (count_exp))
7544 counter_mode = SImode;
7545 else
7546 counter_mode = DImode;
7547
7548 if (counter_mode != SImode && counter_mode != DImode)
7549 abort ();
7550
7551 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
7552 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
7553
7554 emit_insn (gen_cld ());
7555
7556 /* When optimizing for size emit simple rep ; movsb instruction for
7557 counts not divisible by 4. */
7558
7559 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
7560 {
7561 countreg = ix86_zero_extend_to_Pmode (count_exp);
7562 if (TARGET_64BIT)
7563 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
7564 destreg, srcreg, countreg));
7565 else
7566 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
7567 destreg, srcreg, countreg));
7568 }
7569
7570 /* For constant aligned (or small unaligned) copies use rep movsl
7571 followed by code copying the rest. For PentiumPro ensure 8 byte
7572 alignment to allow rep movsl acceleration. */
7573
7574 else if (count != 0
7575 && (align >= 8
7576 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
7577 || optimize_size || count < (unsigned int)64))
7578 {
7579 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
7580 if (count & ~(size - 1))
7581 {
7582 countreg = copy_to_mode_reg (counter_mode,
7583 GEN_INT ((count >> (size == 4 ? 2 : 3))
7584 & (TARGET_64BIT ? -1 : 0x3fffffff)));
7585 countreg = ix86_zero_extend_to_Pmode (countreg);
7586 if (size == 4)
7587 {
7588 if (TARGET_64BIT)
7589 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
7590 destreg, srcreg, countreg));
7591 else
7592 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
7593 destreg, srcreg, countreg));
7594 }
7595 else
7596 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
7597 destreg, srcreg, countreg));
7598 }
7599 if (size == 8 && (count & 0x04))
7600 emit_insn (gen_strmovsi (destreg, srcreg));
7601 if (count & 0x02)
7602 emit_insn (gen_strmovhi (destreg, srcreg));
7603 if (count & 0x01)
7604 emit_insn (gen_strmovqi (destreg, srcreg));
7605 }
7606 /* The generic code based on the glibc implementation:
7607 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
7608 allowing accelerated copying there)
7609 - copy the data using rep movsl
7610 - copy the rest. */
7611 else
7612 {
7613 rtx countreg2;
7614 rtx label = NULL;
7615
7616 /* In case we don't know anything about the alignment, default to
7617 library version, since it is usually equally fast and result in
7618 shorter code. */
7619 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
7620 {
7621 end_sequence ();
7622 return 0;
7623 }
7624
7625 if (TARGET_SINGLE_STRINGOP)
7626 emit_insn (gen_cld ());
7627
7628 countreg2 = gen_reg_rtx (Pmode);
7629 countreg = copy_to_mode_reg (counter_mode, count_exp);
7630
7631 /* We don't use loops to align destination and to copy parts smaller
7632 than 4 bytes, because gcc is able to optimize such code better (in
7633 the case the destination or the count really is aligned, gcc is often
7634 able to predict the branches) and also it is friendlier to the
a4f31c00 7635 hardware branch prediction.
0945b39d
JH
7636
7637 Using loops is benefical for generic case, because we can
7638 handle small counts using the loops. Many CPUs (such as Athlon)
7639 have large REP prefix setup costs.
7640
7641 This is quite costy. Maybe we can revisit this decision later or
7642 add some customizability to this code. */
7643
7644 if (count == 0
7645 && align < (TARGET_PENTIUMPRO && (count == 0
7646 || count >= (unsigned int)260)
7647 ? 8 : UNITS_PER_WORD))
7648 {
7649 label = gen_label_rtx ();
7650 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
7651 LEU, 0, counter_mode, 1, 0, label);
7652 }
7653 if (align <= 1)
7654 {
7655 rtx label = ix86_expand_aligntest (destreg, 1);
7656 emit_insn (gen_strmovqi (destreg, srcreg));
7657 ix86_adjust_counter (countreg, 1);
7658 emit_label (label);
7659 LABEL_NUSES (label) = 1;
7660 }
7661 if (align <= 2)
7662 {
7663 rtx label = ix86_expand_aligntest (destreg, 2);
7664 emit_insn (gen_strmovhi (destreg, srcreg));
7665 ix86_adjust_counter (countreg, 2);
7666 emit_label (label);
7667 LABEL_NUSES (label) = 1;
7668 }
7669 if (align <= 4
7670 && ((TARGET_PENTIUMPRO && (count == 0
7671 || count >= (unsigned int)260))
7672 || TARGET_64BIT))
7673 {
7674 rtx label = ix86_expand_aligntest (destreg, 4);
7675 emit_insn (gen_strmovsi (destreg, srcreg));
7676 ix86_adjust_counter (countreg, 4);
7677 emit_label (label);
7678 LABEL_NUSES (label) = 1;
7679 }
7680
7681 if (!TARGET_SINGLE_STRINGOP)
7682 emit_insn (gen_cld ());
7683 if (TARGET_64BIT)
7684 {
7685 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
7686 GEN_INT (3)));
7687 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
7688 destreg, srcreg, countreg2));
7689 }
7690 else
7691 {
7692 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
7693 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
7694 destreg, srcreg, countreg2));
7695 }
7696
7697 if (label)
7698 {
7699 emit_label (label);
7700 LABEL_NUSES (label) = 1;
7701 }
7702 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
7703 emit_insn (gen_strmovsi (destreg, srcreg));
7704 if ((align <= 4 || count == 0) && TARGET_64BIT)
7705 {
7706 rtx label = ix86_expand_aligntest (countreg, 4);
7707 emit_insn (gen_strmovsi (destreg, srcreg));
7708 emit_label (label);
7709 LABEL_NUSES (label) = 1;
7710 }
7711 if (align > 2 && count != 0 && (count & 2))
7712 emit_insn (gen_strmovhi (destreg, srcreg));
7713 if (align <= 2 || count == 0)
7714 {
7715 rtx label = ix86_expand_aligntest (countreg, 2);
7716 emit_insn (gen_strmovhi (destreg, srcreg));
7717 emit_label (label);
7718 LABEL_NUSES (label) = 1;
7719 }
7720 if (align > 1 && count != 0 && (count & 1))
7721 emit_insn (gen_strmovqi (destreg, srcreg));
7722 if (align <= 1 || count == 0)
7723 {
7724 rtx label = ix86_expand_aligntest (countreg, 1);
7725 emit_insn (gen_strmovqi (destreg, srcreg));
7726 emit_label (label);
7727 LABEL_NUSES (label) = 1;
7728 }
7729 }
7730
7731 insns = get_insns ();
7732 end_sequence ();
7733
7734 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
7735 emit_insns (insns);
7736 return 1;
7737}
7738
7739/* Expand string clear operation (bzero). Use i386 string operations when
7740 profitable. expand_movstr contains similar code. */
7741int
7742ix86_expand_clrstr (src, count_exp, align_exp)
7743 rtx src, count_exp, align_exp;
7744{
7745 rtx destreg, zeroreg, countreg;
7746 enum machine_mode counter_mode;
7747 HOST_WIDE_INT align = 0;
7748 unsigned HOST_WIDE_INT count = 0;
7749
7750 if (GET_CODE (align_exp) == CONST_INT)
7751 align = INTVAL (align_exp);
7752
7753 /* This simple hack avoids all inlining code and simplifies code bellow. */
7754 if (!TARGET_ALIGN_STRINGOPS)
7755 align = 32;
7756
7757 if (GET_CODE (count_exp) == CONST_INT)
7758 count = INTVAL (count_exp);
7759 /* Figure out proper mode for counter. For 32bits it is always SImode,
7760 for 64bits use SImode when possible, otherwise DImode.
7761 Set count to number of bytes copied when known at compile time. */
7762 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
7763 || x86_64_zero_extended_value (count_exp))
7764 counter_mode = SImode;
7765 else
7766 counter_mode = DImode;
7767
7768 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
7769
7770 emit_insn (gen_cld ());
7771
7772 /* When optimizing for size emit simple rep ; movsb instruction for
7773 counts not divisible by 4. */
7774
7775 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
7776 {
7777 countreg = ix86_zero_extend_to_Pmode (count_exp);
7778 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
7779 if (TARGET_64BIT)
7780 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
7781 destreg, countreg));
7782 else
7783 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
7784 destreg, countreg));
7785 }
7786 else if (count != 0
7787 && (align >= 8
7788 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
7789 || optimize_size || count < (unsigned int)64))
7790 {
7791 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
7792 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
7793 if (count & ~(size - 1))
7794 {
7795 countreg = copy_to_mode_reg (counter_mode,
7796 GEN_INT ((count >> (size == 4 ? 2 : 3))
7797 & (TARGET_64BIT ? -1 : 0x3fffffff)));
7798 countreg = ix86_zero_extend_to_Pmode (countreg);
7799 if (size == 4)
7800 {
7801 if (TARGET_64BIT)
7802 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
7803 destreg, countreg));
7804 else
7805 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
7806 destreg, countreg));
7807 }
7808 else
7809 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
7810 destreg, countreg));
7811 }
7812 if (size == 8 && (count & 0x04))
7813 emit_insn (gen_strsetsi (destreg,
7814 gen_rtx_SUBREG (SImode, zeroreg, 0)));
7815 if (count & 0x02)
7816 emit_insn (gen_strsethi (destreg,
7817 gen_rtx_SUBREG (HImode, zeroreg, 0)));
7818 if (count & 0x01)
7819 emit_insn (gen_strsetqi (destreg,
7820 gen_rtx_SUBREG (QImode, zeroreg, 0)));
7821 }
7822 else
7823 {
7824 rtx countreg2;
7825 rtx label = NULL;
7826
7827 /* In case we don't know anything about the alignment, default to
7828 library version, since it is usually equally fast and result in
7829 shorter code. */
7830 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
7831 return 0;
7832
7833 if (TARGET_SINGLE_STRINGOP)
7834 emit_insn (gen_cld ());
7835
7836 countreg2 = gen_reg_rtx (Pmode);
7837 countreg = copy_to_mode_reg (counter_mode, count_exp);
7838 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
7839
7840 if (count == 0
7841 && align < (TARGET_PENTIUMPRO && (count == 0
7842 || count >= (unsigned int)260)
7843 ? 8 : UNITS_PER_WORD))
7844 {
7845 label = gen_label_rtx ();
7846 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
7847 LEU, 0, counter_mode, 1, 0, label);
7848 }
7849 if (align <= 1)
7850 {
7851 rtx label = ix86_expand_aligntest (destreg, 1);
7852 emit_insn (gen_strsetqi (destreg,
7853 gen_rtx_SUBREG (QImode, zeroreg, 0)));
7854 ix86_adjust_counter (countreg, 1);
7855 emit_label (label);
7856 LABEL_NUSES (label) = 1;
7857 }
7858 if (align <= 2)
7859 {
7860 rtx label = ix86_expand_aligntest (destreg, 2);
7861 emit_insn (gen_strsethi (destreg,
7862 gen_rtx_SUBREG (HImode, zeroreg, 0)));
7863 ix86_adjust_counter (countreg, 2);
7864 emit_label (label);
7865 LABEL_NUSES (label) = 1;
7866 }
7867 if (align <= 4 && TARGET_PENTIUMPRO && (count == 0
7868 || count >= (unsigned int)260))
7869 {
7870 rtx label = ix86_expand_aligntest (destreg, 4);
7871 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
7872 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
7873 : zeroreg)));
7874 ix86_adjust_counter (countreg, 4);
7875 emit_label (label);
7876 LABEL_NUSES (label) = 1;
7877 }
7878
7879 if (!TARGET_SINGLE_STRINGOP)
7880 emit_insn (gen_cld ());
7881 if (TARGET_64BIT)
7882 {
7883 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
7884 GEN_INT (3)));
7885 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
7886 destreg, countreg2));
7887 }
7888 else
7889 {
7890 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
7891 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
7892 destreg, countreg2));
7893 }
7894
7895 if (label)
7896 {
7897 emit_label (label);
7898 LABEL_NUSES (label) = 1;
7899 }
7900 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
7901 emit_insn (gen_strsetsi (destreg,
7902 gen_rtx_SUBREG (SImode, zeroreg, 0)));
7903 if (TARGET_64BIT && (align <= 4 || count == 0))
7904 {
7905 rtx label = ix86_expand_aligntest (destreg, 2);
7906 emit_insn (gen_strsetsi (destreg,
7907 gen_rtx_SUBREG (SImode, zeroreg, 0)));
7908 emit_label (label);
7909 LABEL_NUSES (label) = 1;
7910 }
7911 if (align > 2 && count != 0 && (count & 2))
7912 emit_insn (gen_strsethi (destreg,
7913 gen_rtx_SUBREG (HImode, zeroreg, 0)));
7914 if (align <= 2 || count == 0)
7915 {
7916 rtx label = ix86_expand_aligntest (destreg, 2);
7917 emit_insn (gen_strsethi (destreg,
7918 gen_rtx_SUBREG (HImode, zeroreg, 0)));
7919 emit_label (label);
7920 LABEL_NUSES (label) = 1;
7921 }
7922 if (align > 1 && count != 0 && (count & 1))
7923 emit_insn (gen_strsetqi (destreg,
7924 gen_rtx_SUBREG (QImode, zeroreg, 0)));
7925 if (align <= 1 || count == 0)
7926 {
7927 rtx label = ix86_expand_aligntest (destreg, 1);
7928 emit_insn (gen_strsetqi (destreg,
7929 gen_rtx_SUBREG (QImode, zeroreg, 0)));
7930 emit_label (label);
7931 LABEL_NUSES (label) = 1;
7932 }
7933 }
7934 return 1;
7935}
7936/* Expand strlen. */
7937int
7938ix86_expand_strlen (out, src, eoschar, align)
7939 rtx out, src, eoschar, align;
7940{
7941 rtx addr, scratch1, scratch2, scratch3, scratch4;
7942
7943 /* The generic case of strlen expander is long. Avoid it's
7944 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
7945
7946 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
7947 && !TARGET_INLINE_ALL_STRINGOPS
7948 && !optimize_size
7949 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
7950 return 0;
7951
7952 addr = force_reg (Pmode, XEXP (src, 0));
7953 scratch1 = gen_reg_rtx (Pmode);
7954
7955 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
7956 && !optimize_size)
7957 {
7958 /* Well it seems that some optimizer does not combine a call like
7959 foo(strlen(bar), strlen(bar));
7960 when the move and the subtraction is done here. It does calculate
7961 the length just once when these instructions are done inside of
7962 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
7963 often used and I use one fewer register for the lifetime of
7964 output_strlen_unroll() this is better. */
7965
7966 emit_move_insn (out, addr);
7967
7968 ix86_expand_strlensi_unroll_1 (out, align);
7969
7970 /* strlensi_unroll_1 returns the address of the zero at the end of
7971 the string, like memchr(), so compute the length by subtracting
7972 the start address. */
7973 if (TARGET_64BIT)
7974 emit_insn (gen_subdi3 (out, out, addr));
7975 else
7976 emit_insn (gen_subsi3 (out, out, addr));
7977 }
7978 else
7979 {
7980 scratch2 = gen_reg_rtx (Pmode);
7981 scratch3 = gen_reg_rtx (Pmode);
7982 scratch4 = force_reg (Pmode, constm1_rtx);
7983
7984 emit_move_insn (scratch3, addr);
7985 eoschar = force_reg (QImode, eoschar);
7986
7987 emit_insn (gen_cld ());
7988 if (TARGET_64BIT)
7989 {
7990 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
7991 align, scratch4, scratch3));
7992 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
7993 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
7994 }
7995 else
7996 {
7997 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
7998 align, scratch4, scratch3));
7999 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
8000 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
8001 }
8002 }
8003 return 1;
8004}
8005
e075ae69
RH
8006/* Expand the appropriate insns for doing strlen if not just doing
8007 repnz; scasb
8008
8009 out = result, initialized with the start address
8010 align_rtx = alignment of the address.
8011 scratch = scratch register, initialized with the startaddress when
77ebd435 8012 not aligned, otherwise undefined
3f803cd9
SC
8013
8014 This is just the body. It needs the initialisations mentioned above and
8015 some address computing at the end. These things are done in i386.md. */
8016
0945b39d
JH
8017static void
8018ix86_expand_strlensi_unroll_1 (out, align_rtx)
8019 rtx out, align_rtx;
3f803cd9 8020{
e075ae69
RH
8021 int align;
8022 rtx tmp;
8023 rtx align_2_label = NULL_RTX;
8024 rtx align_3_label = NULL_RTX;
8025 rtx align_4_label = gen_label_rtx ();
8026 rtx end_0_label = gen_label_rtx ();
e075ae69 8027 rtx mem;
e2e52e1b 8028 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 8029 rtx scratch = gen_reg_rtx (SImode);
e075ae69
RH
8030
8031 align = 0;
8032 if (GET_CODE (align_rtx) == CONST_INT)
8033 align = INTVAL (align_rtx);
3f803cd9 8034
e9a25f70 8035 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 8036
e9a25f70 8037 /* Is there a known alignment and is it less than 4? */
e075ae69 8038 if (align < 4)
3f803cd9 8039 {
0945b39d
JH
8040 rtx scratch1 = gen_reg_rtx (Pmode);
8041 emit_move_insn (scratch1, out);
e9a25f70 8042 /* Is there a known alignment and is it not 2? */
e075ae69 8043 if (align != 2)
3f803cd9 8044 {
e075ae69
RH
8045 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
8046 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
8047
8048 /* Leave just the 3 lower bits. */
0945b39d 8049 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
8050 NULL_RTX, 0, OPTAB_WIDEN);
8051
9076b9c1 8052 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
0945b39d 8053 Pmode, 1, 0, align_4_label);
9076b9c1 8054 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
0945b39d 8055 Pmode, 1, 0, align_2_label);
9076b9c1 8056 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
0945b39d 8057 Pmode, 1, 0, align_3_label);
3f803cd9
SC
8058 }
8059 else
8060 {
e9a25f70
JL
8061 /* Since the alignment is 2, we have to check 2 or 0 bytes;
8062 check if is aligned to 4 - byte. */
e9a25f70 8063
0945b39d 8064 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
e075ae69
RH
8065 NULL_RTX, 0, OPTAB_WIDEN);
8066
9076b9c1 8067 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
0945b39d 8068 Pmode, 1, 0, align_4_label);
3f803cd9
SC
8069 }
8070
e075ae69 8071 mem = gen_rtx_MEM (QImode, out);
e9a25f70 8072
e075ae69 8073 /* Now compare the bytes. */
e9a25f70 8074
0f290768 8075 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1
JH
8076 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
8077 QImode, 1, 0, end_0_label);
3f803cd9 8078
0f290768 8079 /* Increment the address. */
0945b39d
JH
8080 if (TARGET_64BIT)
8081 emit_insn (gen_adddi3 (out, out, const1_rtx));
8082 else
8083 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 8084
e075ae69
RH
8085 /* Not needed with an alignment of 2 */
8086 if (align != 2)
8087 {
8088 emit_label (align_2_label);
3f803cd9 8089
9076b9c1
JH
8090 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
8091 QImode, 1, 0, end_0_label);
e075ae69 8092
0945b39d
JH
8093 if (TARGET_64BIT)
8094 emit_insn (gen_adddi3 (out, out, const1_rtx));
8095 else
8096 emit_insn (gen_addsi3 (out, out, const1_rtx));
e075ae69
RH
8097
8098 emit_label (align_3_label);
8099 }
8100
9076b9c1
JH
8101 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
8102 QImode, 1, 0, end_0_label);
e075ae69 8103
0945b39d
JH
8104 if (TARGET_64BIT)
8105 emit_insn (gen_adddi3 (out, out, const1_rtx));
8106 else
8107 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
8108 }
8109
e075ae69
RH
8110 /* Generate loop to check 4 bytes at a time. It is not a good idea to
8111 align this loop. It gives only huge programs, but does not help to
8112 speed up. */
8113 emit_label (align_4_label);
3f803cd9 8114
e075ae69
RH
8115 mem = gen_rtx_MEM (SImode, out);
8116 emit_move_insn (scratch, mem);
0945b39d
JH
8117 if (TARGET_64BIT)
8118 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
8119 else
8120 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 8121
e2e52e1b
JH
8122 /* This formula yields a nonzero result iff one of the bytes is zero.
8123 This saves three branches inside loop and many cycles. */
8124
8125 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
8126 emit_insn (gen_one_cmplsi2 (scratch, scratch));
8127 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0
AO
8128 emit_insn (gen_andsi3 (tmpreg, tmpreg,
8129 GEN_INT (trunc_int_for_mode
8130 (0x80808080, SImode))));
9076b9c1
JH
8131 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0,
8132 SImode, 1, 0, align_4_label);
e2e52e1b
JH
8133
8134 if (TARGET_CMOVE)
8135 {
8136 rtx reg = gen_reg_rtx (SImode);
0945b39d 8137 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
8138 emit_move_insn (reg, tmpreg);
8139 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
8140
0f290768 8141 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 8142 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
8143 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
8144 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
8145 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
8146 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
8147 reg,
8148 tmpreg)));
e2e52e1b 8149 /* Emit lea manually to avoid clobbering of flags. */
0945b39d
JH
8150 emit_insn (gen_rtx_SET (SImode, reg2,
8151 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
e2e52e1b
JH
8152
8153 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
8154 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
8155 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d 8156 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
a4f31c00
AJ
8157 reg2,
8158 out)));
e2e52e1b
JH
8159
8160 }
8161 else
8162 {
8163 rtx end_2_label = gen_label_rtx ();
8164 /* Is zero in the first two bytes? */
8165
16189740 8166 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
8167 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
8168 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
8169 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8170 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
8171 pc_rtx);
8172 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8173 JUMP_LABEL (tmp) = end_2_label;
8174
0f290768 8175 /* Not in the first two. Move two bytes forward. */
e2e52e1b 8176 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
0945b39d
JH
8177 if (TARGET_64BIT)
8178 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
8179 else
8180 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
e2e52e1b
JH
8181
8182 emit_label (end_2_label);
8183
8184 }
8185
0f290768 8186 /* Avoid branch in fixing the byte. */
e2e52e1b 8187 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 8188 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
0945b39d
JH
8189 if (TARGET_64BIT)
8190 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
8191 else
8192 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
e075ae69
RH
8193
8194 emit_label (end_0_label);
8195}
8196\f
e075ae69
RH
8197/* Clear stack slot assignments remembered from previous functions.
8198 This is called from INIT_EXPANDERS once before RTL is emitted for each
8199 function. */
8200
36edd3cc
BS
8201static void
8202ix86_init_machine_status (p)
1526a060 8203 struct function *p;
e075ae69 8204{
37b15744
RH
8205 p->machine = (struct machine_function *)
8206 xcalloc (1, sizeof (struct machine_function));
e075ae69
RH
8207}
8208
1526a060
BS
8209/* Mark machine specific bits of P for GC. */
8210static void
8211ix86_mark_machine_status (p)
8212 struct function *p;
8213{
37b15744 8214 struct machine_function *machine = p->machine;
1526a060
BS
8215 enum machine_mode mode;
8216 int n;
8217
37b15744
RH
8218 if (! machine)
8219 return;
8220
1526a060
BS
8221 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
8222 mode = (enum machine_mode) ((int) mode + 1))
8223 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
37b15744
RH
8224 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
8225}
8226
8227static void
8228ix86_free_machine_status (p)
8229 struct function *p;
8230{
8231 free (p->machine);
8232 p->machine = NULL;
1526a060
BS
8233}
8234
e075ae69
RH
8235/* Return a MEM corresponding to a stack slot with mode MODE.
8236 Allocate a new slot if necessary.
8237
8238 The RTL for a function can have several slots available: N is
8239 which slot to use. */
8240
8241rtx
8242assign_386_stack_local (mode, n)
8243 enum machine_mode mode;
8244 int n;
8245{
8246 if (n < 0 || n >= MAX_386_STACK_LOCALS)
8247 abort ();
8248
8249 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
8250 ix86_stack_locals[(int) mode][n]
8251 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
8252
8253 return ix86_stack_locals[(int) mode][n];
8254}
8255\f
8256/* Calculate the length of the memory address in the instruction
8257 encoding. Does not include the one-byte modrm, opcode, or prefix. */
8258
8259static int
8260memory_address_length (addr)
8261 rtx addr;
8262{
8263 struct ix86_address parts;
8264 rtx base, index, disp;
8265 int len;
8266
8267 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
8268 || GET_CODE (addr) == POST_INC
8269 || GET_CODE (addr) == PRE_MODIFY
8270 || GET_CODE (addr) == POST_MODIFY)
e075ae69 8271 return 0;
3f803cd9 8272
e075ae69
RH
8273 if (! ix86_decompose_address (addr, &parts))
8274 abort ();
3f803cd9 8275
e075ae69
RH
8276 base = parts.base;
8277 index = parts.index;
8278 disp = parts.disp;
8279 len = 0;
3f803cd9 8280
e075ae69
RH
8281 /* Register Indirect. */
8282 if (base && !index && !disp)
8283 {
8284 /* Special cases: ebp and esp need the two-byte modrm form. */
8285 if (addr == stack_pointer_rtx
8286 || addr == arg_pointer_rtx
564d80f4
JH
8287 || addr == frame_pointer_rtx
8288 || addr == hard_frame_pointer_rtx)
e075ae69 8289 len = 1;
3f803cd9 8290 }
e9a25f70 8291
e075ae69
RH
8292 /* Direct Addressing. */
8293 else if (disp && !base && !index)
8294 len = 4;
8295
3f803cd9
SC
8296 else
8297 {
e075ae69
RH
8298 /* Find the length of the displacement constant. */
8299 if (disp)
8300 {
8301 if (GET_CODE (disp) == CONST_INT
8302 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
8303 len = 1;
8304 else
8305 len = 4;
8306 }
3f803cd9 8307
e075ae69
RH
8308 /* An index requires the two-byte modrm form. */
8309 if (index)
8310 len += 1;
3f803cd9
SC
8311 }
8312
e075ae69
RH
8313 return len;
8314}
79325812 8315
6ef67412
JH
8316/* Compute default value for "length_immediate" attribute. When SHORTFORM is set
8317 expect that insn have 8bit immediate alternative. */
e075ae69 8318int
6ef67412 8319ix86_attr_length_immediate_default (insn, shortform)
e075ae69 8320 rtx insn;
6ef67412 8321 int shortform;
e075ae69 8322{
6ef67412
JH
8323 int len = 0;
8324 int i;
6c698a6d 8325 extract_insn_cached (insn);
6ef67412
JH
8326 for (i = recog_data.n_operands - 1; i >= 0; --i)
8327 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 8328 {
6ef67412 8329 if (len)
3071fab5 8330 abort ();
6ef67412
JH
8331 if (shortform
8332 && GET_CODE (recog_data.operand[i]) == CONST_INT
8333 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
8334 len = 1;
8335 else
8336 {
8337 switch (get_attr_mode (insn))
8338 {
8339 case MODE_QI:
8340 len+=1;
8341 break;
8342 case MODE_HI:
8343 len+=2;
8344 break;
8345 case MODE_SI:
8346 len+=4;
8347 break;
8348 default:
8349 fatal_insn ("Unknown insn mode", insn);
8350 }
8351 }
3071fab5 8352 }
6ef67412
JH
8353 return len;
8354}
8355/* Compute default value for "length_address" attribute. */
8356int
8357ix86_attr_length_address_default (insn)
8358 rtx insn;
8359{
8360 int i;
6c698a6d 8361 extract_insn_cached (insn);
1ccbefce
RH
8362 for (i = recog_data.n_operands - 1; i >= 0; --i)
8363 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 8364 {
6ef67412 8365 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
8366 break;
8367 }
6ef67412 8368 return 0;
3f803cd9 8369}
e075ae69
RH
8370\f
8371/* Return the maximum number of instructions a cpu can issue. */
b657fc39 8372
e075ae69
RH
8373int
8374ix86_issue_rate ()
b657fc39 8375{
e075ae69 8376 switch (ix86_cpu)
b657fc39 8377 {
e075ae69
RH
8378 case PROCESSOR_PENTIUM:
8379 case PROCESSOR_K6:
8380 return 2;
79325812 8381
e075ae69 8382 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
8383 case PROCESSOR_PENTIUM4:
8384 case PROCESSOR_ATHLON:
e075ae69 8385 return 3;
b657fc39 8386
b657fc39 8387 default:
e075ae69 8388 return 1;
b657fc39 8389 }
b657fc39
L
8390}
8391
e075ae69
RH
8392/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
8393 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 8394
e075ae69
RH
8395static int
8396ix86_flags_dependant (insn, dep_insn, insn_type)
8397 rtx insn, dep_insn;
8398 enum attr_type insn_type;
8399{
8400 rtx set, set2;
b657fc39 8401
e075ae69
RH
8402 /* Simplify the test for uninteresting insns. */
8403 if (insn_type != TYPE_SETCC
8404 && insn_type != TYPE_ICMOV
8405 && insn_type != TYPE_FCMOV
8406 && insn_type != TYPE_IBR)
8407 return 0;
b657fc39 8408
e075ae69
RH
8409 if ((set = single_set (dep_insn)) != 0)
8410 {
8411 set = SET_DEST (set);
8412 set2 = NULL_RTX;
8413 }
8414 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
8415 && XVECLEN (PATTERN (dep_insn), 0) == 2
8416 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
8417 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
8418 {
8419 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
8420 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
8421 }
78a0d70c
ZW
8422 else
8423 return 0;
b657fc39 8424
78a0d70c
ZW
8425 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
8426 return 0;
b657fc39 8427
78a0d70c
ZW
8428 /* This test is true if the dependant insn reads the flags but
8429 not any other potentially set register. */
8430 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
8431 return 0;
8432
8433 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
8434 return 0;
8435
8436 return 1;
e075ae69 8437}
b657fc39 8438
e075ae69
RH
8439/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
8440 address with operands set by DEP_INSN. */
8441
8442static int
8443ix86_agi_dependant (insn, dep_insn, insn_type)
8444 rtx insn, dep_insn;
8445 enum attr_type insn_type;
8446{
8447 rtx addr;
8448
8449 if (insn_type == TYPE_LEA)
5fbdde42
RH
8450 {
8451 addr = PATTERN (insn);
8452 if (GET_CODE (addr) == SET)
8453 ;
8454 else if (GET_CODE (addr) == PARALLEL
8455 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
8456 addr = XVECEXP (addr, 0, 0);
8457 else
8458 abort ();
8459 addr = SET_SRC (addr);
8460 }
e075ae69
RH
8461 else
8462 {
8463 int i;
6c698a6d 8464 extract_insn_cached (insn);
1ccbefce
RH
8465 for (i = recog_data.n_operands - 1; i >= 0; --i)
8466 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 8467 {
1ccbefce 8468 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
8469 goto found;
8470 }
8471 return 0;
8472 found:;
b657fc39
L
8473 }
8474
e075ae69 8475 return modified_in_p (addr, dep_insn);
b657fc39 8476}
a269a03c
JC
8477
8478int
e075ae69 8479ix86_adjust_cost (insn, link, dep_insn, cost)
a269a03c
JC
8480 rtx insn, link, dep_insn;
8481 int cost;
8482{
e075ae69 8483 enum attr_type insn_type, dep_insn_type;
0b5107cf 8484 enum attr_memory memory;
e075ae69 8485 rtx set, set2;
9b00189f 8486 int dep_insn_code_number;
a269a03c 8487
309ada50 8488 /* Anti and output depenancies have zero cost on all CPUs. */
e075ae69 8489 if (REG_NOTE_KIND (link) != 0)
309ada50 8490 return 0;
a269a03c 8491
9b00189f
JH
8492 dep_insn_code_number = recog_memoized (dep_insn);
8493
e075ae69 8494 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 8495 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 8496 return cost;
a269a03c 8497
1c71e60e
JH
8498 insn_type = get_attr_type (insn);
8499 dep_insn_type = get_attr_type (dep_insn);
9b00189f 8500
a269a03c
JC
8501 switch (ix86_cpu)
8502 {
8503 case PROCESSOR_PENTIUM:
e075ae69
RH
8504 /* Address Generation Interlock adds a cycle of latency. */
8505 if (ix86_agi_dependant (insn, dep_insn, insn_type))
8506 cost += 1;
8507
8508 /* ??? Compares pair with jump/setcc. */
8509 if (ix86_flags_dependant (insn, dep_insn, insn_type))
8510 cost = 0;
8511
8512 /* Floating point stores require value to be ready one cycle ealier. */
0f290768 8513 if (insn_type == TYPE_FMOV
e075ae69
RH
8514 && get_attr_memory (insn) == MEMORY_STORE
8515 && !ix86_agi_dependant (insn, dep_insn, insn_type))
8516 cost += 1;
8517 break;
a269a03c 8518
e075ae69 8519 case PROCESSOR_PENTIUMPRO:
0f290768 8520 /* Since we can't represent delayed latencies of load+operation,
e075ae69
RH
8521 increase the cost here for non-imov insns. */
8522 if (dep_insn_type != TYPE_IMOV
8523 && dep_insn_type != TYPE_FMOV
0b5107cf
JH
8524 && ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
8525 || memory == MEMORY_BOTH))
e075ae69
RH
8526 cost += 1;
8527
8528 /* INT->FP conversion is expensive. */
8529 if (get_attr_fp_int_src (dep_insn))
8530 cost += 5;
8531
8532 /* There is one cycle extra latency between an FP op and a store. */
8533 if (insn_type == TYPE_FMOV
8534 && (set = single_set (dep_insn)) != NULL_RTX
8535 && (set2 = single_set (insn)) != NULL_RTX
8536 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
8537 && GET_CODE (SET_DEST (set2)) == MEM)
8538 cost += 1;
8539 break;
a269a03c 8540
e075ae69
RH
8541 case PROCESSOR_K6:
8542 /* The esp dependency is resolved before the instruction is really
8543 finished. */
8544 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
8545 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
8546 return 1;
a269a03c 8547
0f290768 8548 /* Since we can't represent delayed latencies of load+operation,
e075ae69 8549 increase the cost here for non-imov insns. */
0b5107cf
JH
8550 if ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
8551 || memory == MEMORY_BOTH)
e075ae69
RH
8552 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
8553
8554 /* INT->FP conversion is expensive. */
8555 if (get_attr_fp_int_src (dep_insn))
8556 cost += 5;
a14003ee 8557 break;
e075ae69 8558
309ada50 8559 case PROCESSOR_ATHLON:
0b5107cf
JH
8560 if ((memory = get_attr_memory (dep_insn)) == MEMORY_LOAD
8561 || memory == MEMORY_BOTH)
8562 {
8563 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
8564 cost += 2;
8565 else
8566 cost += 3;
8567 }
309ada50 8568
a269a03c 8569 default:
a269a03c
JC
8570 break;
8571 }
8572
8573 return cost;
8574}
0a726ef1 8575
e075ae69
RH
8576static union
8577{
8578 struct ppro_sched_data
8579 {
8580 rtx decode[3];
8581 int issued_this_cycle;
8582 } ppro;
8583} ix86_sched_data;
0a726ef1 8584
e075ae69
RH
8585static int
8586ix86_safe_length (insn)
8587 rtx insn;
8588{
8589 if (recog_memoized (insn) >= 0)
8590 return get_attr_length(insn);
8591 else
8592 return 128;
8593}
0a726ef1 8594
e075ae69
RH
8595static int
8596ix86_safe_length_prefix (insn)
8597 rtx insn;
8598{
8599 if (recog_memoized (insn) >= 0)
8600 return get_attr_length(insn);
8601 else
8602 return 0;
8603}
8604
8605static enum attr_memory
8606ix86_safe_memory (insn)
8607 rtx insn;
8608{
8609 if (recog_memoized (insn) >= 0)
8610 return get_attr_memory(insn);
8611 else
8612 return MEMORY_UNKNOWN;
8613}
0a726ef1 8614
e075ae69
RH
8615static enum attr_pent_pair
8616ix86_safe_pent_pair (insn)
8617 rtx insn;
8618{
8619 if (recog_memoized (insn) >= 0)
8620 return get_attr_pent_pair(insn);
8621 else
8622 return PENT_PAIR_NP;
8623}
0a726ef1 8624
e075ae69
RH
8625static enum attr_ppro_uops
8626ix86_safe_ppro_uops (insn)
8627 rtx insn;
8628{
8629 if (recog_memoized (insn) >= 0)
8630 return get_attr_ppro_uops (insn);
8631 else
8632 return PPRO_UOPS_MANY;
8633}
0a726ef1 8634
e075ae69
RH
8635static void
8636ix86_dump_ppro_packet (dump)
8637 FILE *dump;
0a726ef1 8638{
e075ae69 8639 if (ix86_sched_data.ppro.decode[0])
0a726ef1 8640 {
e075ae69
RH
8641 fprintf (dump, "PPRO packet: %d",
8642 INSN_UID (ix86_sched_data.ppro.decode[0]));
8643 if (ix86_sched_data.ppro.decode[1])
8644 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
8645 if (ix86_sched_data.ppro.decode[2])
8646 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
8647 fputc ('\n', dump);
8648 }
8649}
0a726ef1 8650
e075ae69 8651/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 8652
e075ae69
RH
8653void
8654ix86_sched_init (dump, sched_verbose)
8655 FILE *dump ATTRIBUTE_UNUSED;
8656 int sched_verbose ATTRIBUTE_UNUSED;
8657{
8658 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
8659}
8660
8661/* Shift INSN to SLOT, and shift everything else down. */
8662
8663static void
8664ix86_reorder_insn (insnp, slot)
8665 rtx *insnp, *slot;
8666{
8667 if (insnp != slot)
8668 {
8669 rtx insn = *insnp;
0f290768 8670 do
e075ae69
RH
8671 insnp[0] = insnp[1];
8672 while (++insnp != slot);
8673 *insnp = insn;
0a726ef1 8674 }
e075ae69
RH
8675}
8676
8677/* Find an instruction with given pairability and minimal amount of cycles
8678 lost by the fact that the CPU waits for both pipelines to finish before
8679 reading next instructions. Also take care that both instructions together
8680 can not exceed 7 bytes. */
8681
8682static rtx *
8683ix86_pent_find_pair (e_ready, ready, type, first)
8684 rtx *e_ready;
8685 rtx *ready;
8686 enum attr_pent_pair type;
8687 rtx first;
8688{
8689 int mincycles, cycles;
8690 enum attr_pent_pair tmp;
8691 enum attr_memory memory;
8692 rtx *insnp, *bestinsnp = NULL;
0a726ef1 8693
e075ae69
RH
8694 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
8695 return NULL;
0a726ef1 8696
e075ae69
RH
8697 memory = ix86_safe_memory (first);
8698 cycles = result_ready_cost (first);
8699 mincycles = INT_MAX;
8700
8701 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
8702 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
8703 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
6ec6d558 8704 {
e075ae69
RH
8705 enum attr_memory second_memory;
8706 int secondcycles, currentcycles;
8707
8708 second_memory = ix86_safe_memory (*insnp);
8709 secondcycles = result_ready_cost (*insnp);
8710 currentcycles = abs (cycles - secondcycles);
8711
8712 if (secondcycles >= 1 && cycles >= 1)
6ec6d558 8713 {
e075ae69
RH
8714 /* Two read/modify/write instructions together takes two
8715 cycles longer. */
8716 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
8717 currentcycles += 2;
0f290768 8718
e075ae69
RH
8719 /* Read modify/write instruction followed by read/modify
8720 takes one cycle longer. */
8721 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
8722 && tmp != PENT_PAIR_UV
8723 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
8724 currentcycles += 1;
6ec6d558 8725 }
e075ae69
RH
8726 if (currentcycles < mincycles)
8727 bestinsnp = insnp, mincycles = currentcycles;
6ec6d558 8728 }
0a726ef1 8729
e075ae69
RH
8730 return bestinsnp;
8731}
8732
78a0d70c 8733/* Subroutines of ix86_sched_reorder. */
e075ae69 8734
c6991660 8735static void
78a0d70c 8736ix86_sched_reorder_pentium (ready, e_ready)
e075ae69 8737 rtx *ready;
78a0d70c 8738 rtx *e_ready;
e075ae69 8739{
78a0d70c 8740 enum attr_pent_pair pair1, pair2;
e075ae69 8741 rtx *insnp;
e075ae69 8742
78a0d70c
ZW
8743 /* This wouldn't be necessary if Haifa knew that static insn ordering
8744 is important to which pipe an insn is issued to. So we have to make
8745 some minor rearrangements. */
e075ae69 8746
78a0d70c
ZW
8747 pair1 = ix86_safe_pent_pair (*e_ready);
8748
8749 /* If the first insn is non-pairable, let it be. */
8750 if (pair1 == PENT_PAIR_NP)
8751 return;
8752
8753 pair2 = PENT_PAIR_NP;
8754 insnp = 0;
8755
8756 /* If the first insn is UV or PV pairable, search for a PU
8757 insn to go with. */
8758 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
e075ae69 8759 {
78a0d70c
ZW
8760 insnp = ix86_pent_find_pair (e_ready-1, ready,
8761 PENT_PAIR_PU, *e_ready);
8762 if (insnp)
8763 pair2 = PENT_PAIR_PU;
8764 }
e075ae69 8765
78a0d70c
ZW
8766 /* If the first insn is PU or UV pairable, search for a PV
8767 insn to go with. */
8768 if (pair2 == PENT_PAIR_NP
8769 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
8770 {
8771 insnp = ix86_pent_find_pair (e_ready-1, ready,
8772 PENT_PAIR_PV, *e_ready);
8773 if (insnp)
8774 pair2 = PENT_PAIR_PV;
8775 }
e075ae69 8776
78a0d70c
ZW
8777 /* If the first insn is pairable, search for a UV
8778 insn to go with. */
8779 if (pair2 == PENT_PAIR_NP)
8780 {
8781 insnp = ix86_pent_find_pair (e_ready-1, ready,
8782 PENT_PAIR_UV, *e_ready);
8783 if (insnp)
8784 pair2 = PENT_PAIR_UV;
8785 }
e075ae69 8786
78a0d70c
ZW
8787 if (pair2 == PENT_PAIR_NP)
8788 return;
e075ae69 8789
78a0d70c
ZW
8790 /* Found something! Decide if we need to swap the order. */
8791 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
8792 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
8793 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
8794 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
8795 ix86_reorder_insn (insnp, e_ready);
8796 else
8797 ix86_reorder_insn (insnp, e_ready - 1);
8798}
e075ae69 8799
c6991660 8800static void
78a0d70c
ZW
8801ix86_sched_reorder_ppro (ready, e_ready)
8802 rtx *ready;
8803 rtx *e_ready;
8804{
8805 rtx decode[3];
8806 enum attr_ppro_uops cur_uops;
8807 int issued_this_cycle;
8808 rtx *insnp;
8809 int i;
e075ae69 8810
0f290768 8811 /* At this point .ppro.decode contains the state of the three
78a0d70c 8812 decoders from last "cycle". That is, those insns that were
0f290768 8813 actually independent. But here we're scheduling for the
78a0d70c
ZW
8814 decoder, and we may find things that are decodable in the
8815 same cycle. */
e075ae69 8816
0f290768 8817 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
78a0d70c 8818 issued_this_cycle = 0;
e075ae69 8819
78a0d70c
ZW
8820 insnp = e_ready;
8821 cur_uops = ix86_safe_ppro_uops (*insnp);
0a726ef1 8822
78a0d70c
ZW
8823 /* If the decoders are empty, and we've a complex insn at the
8824 head of the priority queue, let it issue without complaint. */
8825 if (decode[0] == NULL)
8826 {
8827 if (cur_uops == PPRO_UOPS_MANY)
8828 {
8829 decode[0] = *insnp;
8830 goto ppro_done;
8831 }
8832
8833 /* Otherwise, search for a 2-4 uop unsn to issue. */
8834 while (cur_uops != PPRO_UOPS_FEW)
8835 {
8836 if (insnp == ready)
8837 break;
8838 cur_uops = ix86_safe_ppro_uops (*--insnp);
8839 }
8840
8841 /* If so, move it to the head of the line. */
8842 if (cur_uops == PPRO_UOPS_FEW)
8843 ix86_reorder_insn (insnp, e_ready);
0a726ef1 8844
78a0d70c
ZW
8845 /* Issue the head of the queue. */
8846 issued_this_cycle = 1;
8847 decode[0] = *e_ready--;
8848 }
fb693d44 8849
78a0d70c
ZW
8850 /* Look for simple insns to fill in the other two slots. */
8851 for (i = 1; i < 3; ++i)
8852 if (decode[i] == NULL)
8853 {
8854 if (ready >= e_ready)
8855 goto ppro_done;
fb693d44 8856
e075ae69
RH
8857 insnp = e_ready;
8858 cur_uops = ix86_safe_ppro_uops (*insnp);
78a0d70c
ZW
8859 while (cur_uops != PPRO_UOPS_ONE)
8860 {
8861 if (insnp == ready)
8862 break;
8863 cur_uops = ix86_safe_ppro_uops (*--insnp);
8864 }
fb693d44 8865
78a0d70c
ZW
8866 /* Found one. Move it to the head of the queue and issue it. */
8867 if (cur_uops == PPRO_UOPS_ONE)
e075ae69 8868 {
78a0d70c
ZW
8869 ix86_reorder_insn (insnp, e_ready);
8870 decode[i] = *e_ready--;
8871 issued_this_cycle++;
8872 continue;
8873 }
fb693d44 8874
78a0d70c
ZW
8875 /* ??? Didn't find one. Ideally, here we would do a lazy split
8876 of 2-uop insns, issue one and queue the other. */
8877 }
fb693d44 8878
78a0d70c
ZW
8879 ppro_done:
8880 if (issued_this_cycle == 0)
8881 issued_this_cycle = 1;
8882 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
8883}
fb693d44 8884
0f290768 8885/* We are about to being issuing insns for this clock cycle.
78a0d70c
ZW
8886 Override the default sort algorithm to better slot instructions. */
8887int
8888ix86_sched_reorder (dump, sched_verbose, ready, n_ready, clock_var)
8889 FILE *dump ATTRIBUTE_UNUSED;
8890 int sched_verbose ATTRIBUTE_UNUSED;
8891 rtx *ready;
8892 int n_ready;
8893 int clock_var ATTRIBUTE_UNUSED;
8894{
8895 rtx *e_ready = ready + n_ready - 1;
fb693d44 8896
78a0d70c
ZW
8897 if (n_ready < 2)
8898 goto out;
e075ae69 8899
78a0d70c
ZW
8900 switch (ix86_cpu)
8901 {
8902 default:
8903 break;
e075ae69 8904
78a0d70c
ZW
8905 case PROCESSOR_PENTIUM:
8906 ix86_sched_reorder_pentium (ready, e_ready);
8907 break;
e075ae69 8908
78a0d70c
ZW
8909 case PROCESSOR_PENTIUMPRO:
8910 ix86_sched_reorder_ppro (ready, e_ready);
e075ae69 8911 break;
fb693d44
RH
8912 }
8913
e075ae69
RH
8914out:
8915 return ix86_issue_rate ();
8916}
fb693d44 8917
e075ae69
RH
8918/* We are about to issue INSN. Return the number of insns left on the
8919 ready queue that can be issued this cycle. */
b222082e 8920
e075ae69
RH
8921int
8922ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
8923 FILE *dump;
8924 int sched_verbose;
8925 rtx insn;
8926 int can_issue_more;
8927{
8928 int i;
8929 switch (ix86_cpu)
fb693d44 8930 {
e075ae69
RH
8931 default:
8932 return can_issue_more - 1;
fb693d44 8933
e075ae69
RH
8934 case PROCESSOR_PENTIUMPRO:
8935 {
8936 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 8937
e075ae69
RH
8938 if (uops == PPRO_UOPS_MANY)
8939 {
8940 if (sched_verbose)
8941 ix86_dump_ppro_packet (dump);
8942 ix86_sched_data.ppro.decode[0] = insn;
8943 ix86_sched_data.ppro.decode[1] = NULL;
8944 ix86_sched_data.ppro.decode[2] = NULL;
8945 if (sched_verbose)
8946 ix86_dump_ppro_packet (dump);
8947 ix86_sched_data.ppro.decode[0] = NULL;
8948 }
8949 else if (uops == PPRO_UOPS_FEW)
8950 {
8951 if (sched_verbose)
8952 ix86_dump_ppro_packet (dump);
8953 ix86_sched_data.ppro.decode[0] = insn;
8954 ix86_sched_data.ppro.decode[1] = NULL;
8955 ix86_sched_data.ppro.decode[2] = NULL;
8956 }
8957 else
8958 {
8959 for (i = 0; i < 3; ++i)
8960 if (ix86_sched_data.ppro.decode[i] == NULL)
8961 {
8962 ix86_sched_data.ppro.decode[i] = insn;
8963 break;
8964 }
8965 if (i == 3)
8966 abort ();
8967 if (i == 2)
8968 {
8969 if (sched_verbose)
8970 ix86_dump_ppro_packet (dump);
8971 ix86_sched_data.ppro.decode[0] = NULL;
8972 ix86_sched_data.ppro.decode[1] = NULL;
8973 ix86_sched_data.ppro.decode[2] = NULL;
8974 }
8975 }
8976 }
8977 return --ix86_sched_data.ppro.issued_this_cycle;
8978 }
fb693d44 8979}
a7180f70 8980\f
0e4970d7
RK
8981/* Walk through INSNS and look for MEM references whose address is DSTREG or
8982 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
8983 appropriate. */
8984
8985void
8986ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
8987 rtx insns;
8988 rtx dstref, srcref, dstreg, srcreg;
8989{
8990 rtx insn;
8991
8992 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
8993 if (INSN_P (insn))
8994 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
8995 dstreg, srcreg);
8996}
8997
8998/* Subroutine of above to actually do the updating by recursively walking
8999 the rtx. */
9000
9001static void
9002ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
9003 rtx x;
9004 rtx dstref, srcref, dstreg, srcreg;
9005{
9006 enum rtx_code code = GET_CODE (x);
9007 const char *format_ptr = GET_RTX_FORMAT (code);
9008 int i, j;
9009
9010 if (code == MEM && XEXP (x, 0) == dstreg)
9011 MEM_COPY_ATTRIBUTES (x, dstref);
9012 else if (code == MEM && XEXP (x, 0) == srcreg)
9013 MEM_COPY_ATTRIBUTES (x, srcref);
9014
9015 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
9016 {
9017 if (*format_ptr == 'e')
9018 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
9019 dstreg, srcreg);
9020 else if (*format_ptr == 'E')
9021 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
397be6cf 9022 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
0e4970d7
RK
9023 dstreg, srcreg);
9024 }
9025}
9026\f
a7180f70
BS
9027/* Compute the alignment given to a constant that is being placed in memory.
9028 EXP is the constant and ALIGN is the alignment that the object would
9029 ordinarily have.
9030 The value of this function is used instead of that alignment to align
9031 the object. */
9032
9033int
9034ix86_constant_alignment (exp, align)
9035 tree exp;
9036 int align;
9037{
9038 if (TREE_CODE (exp) == REAL_CST)
9039 {
9040 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
9041 return 64;
9042 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
9043 return 128;
9044 }
9045 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
9046 && align < 256)
9047 return 256;
9048
9049 return align;
9050}
9051
9052/* Compute the alignment for a static variable.
9053 TYPE is the data type, and ALIGN is the alignment that
9054 the object would ordinarily have. The value of this function is used
9055 instead of that alignment to align the object. */
9056
9057int
9058ix86_data_alignment (type, align)
9059 tree type;
9060 int align;
9061{
9062 if (AGGREGATE_TYPE_P (type)
9063 && TYPE_SIZE (type)
9064 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
9065 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
9066 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
9067 return 256;
9068
0d7d98ee
JH
9069 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
9070 to 16byte boundary. */
9071 if (TARGET_64BIT)
9072 {
9073 if (AGGREGATE_TYPE_P (type)
9074 && TYPE_SIZE (type)
9075 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
9076 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
9077 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
9078 return 128;
9079 }
9080
a7180f70
BS
9081 if (TREE_CODE (type) == ARRAY_TYPE)
9082 {
9083 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
9084 return 64;
9085 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
9086 return 128;
9087 }
9088 else if (TREE_CODE (type) == COMPLEX_TYPE)
9089 {
0f290768 9090
a7180f70
BS
9091 if (TYPE_MODE (type) == DCmode && align < 64)
9092 return 64;
9093 if (TYPE_MODE (type) == XCmode && align < 128)
9094 return 128;
9095 }
9096 else if ((TREE_CODE (type) == RECORD_TYPE
9097 || TREE_CODE (type) == UNION_TYPE
9098 || TREE_CODE (type) == QUAL_UNION_TYPE)
9099 && TYPE_FIELDS (type))
9100 {
9101 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
9102 return 64;
9103 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
9104 return 128;
9105 }
9106 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
9107 || TREE_CODE (type) == INTEGER_TYPE)
9108 {
9109 if (TYPE_MODE (type) == DFmode && align < 64)
9110 return 64;
9111 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
9112 return 128;
9113 }
9114
9115 return align;
9116}
9117
9118/* Compute the alignment for a local variable.
9119 TYPE is the data type, and ALIGN is the alignment that
9120 the object would ordinarily have. The value of this macro is used
9121 instead of that alignment to align the object. */
9122
9123int
9124ix86_local_alignment (type, align)
9125 tree type;
9126 int align;
9127{
0d7d98ee
JH
9128 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
9129 to 16byte boundary. */
9130 if (TARGET_64BIT)
9131 {
9132 if (AGGREGATE_TYPE_P (type)
9133 && TYPE_SIZE (type)
9134 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
9135 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
9136 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
9137 return 128;
9138 }
a7180f70
BS
9139 if (TREE_CODE (type) == ARRAY_TYPE)
9140 {
9141 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
9142 return 64;
9143 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
9144 return 128;
9145 }
9146 else if (TREE_CODE (type) == COMPLEX_TYPE)
9147 {
9148 if (TYPE_MODE (type) == DCmode && align < 64)
9149 return 64;
9150 if (TYPE_MODE (type) == XCmode && align < 128)
9151 return 128;
9152 }
9153 else if ((TREE_CODE (type) == RECORD_TYPE
9154 || TREE_CODE (type) == UNION_TYPE
9155 || TREE_CODE (type) == QUAL_UNION_TYPE)
9156 && TYPE_FIELDS (type))
9157 {
9158 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
9159 return 64;
9160 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
9161 return 128;
9162 }
9163 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
9164 || TREE_CODE (type) == INTEGER_TYPE)
9165 {
0f290768 9166
a7180f70
BS
9167 if (TYPE_MODE (type) == DFmode && align < 64)
9168 return 64;
9169 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
9170 return 128;
9171 }
9172 return align;
9173}
0ed08620
JH
9174\f
9175/* Emit RTL insns to initialize the variable parts of a trampoline.
9176 FNADDR is an RTX for the address of the function's pure code.
9177 CXT is an RTX for the static chain value for the function. */
9178void
9179x86_initialize_trampoline (tramp, fnaddr, cxt)
9180 rtx tramp, fnaddr, cxt;
9181{
9182 if (!TARGET_64BIT)
9183 {
9184 /* Compute offset from the end of the jmp to the target function. */
9185 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
9186 plus_constant (tramp, 10),
9187 NULL_RTX, 1, OPTAB_DIRECT);
9188 emit_move_insn (gen_rtx_MEM (QImode, tramp),
9189 GEN_INT (trunc_int_for_mode (0xb9, QImode)));
9190 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
9191 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
9192 GEN_INT (trunc_int_for_mode (0xe9, QImode)));
9193 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
9194 }
9195 else
9196 {
9197 int offset = 0;
9198 /* Try to load address using shorter movl instead of movabs.
9199 We may want to support movq for kernel mode, but kernel does not use
9200 trampolines at the moment. */
9201 if (x86_64_zero_extended_value (fnaddr))
9202 {
9203 fnaddr = copy_to_mode_reg (DImode, fnaddr);
9204 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
9205 GEN_INT (trunc_int_for_mode (0xbb41, HImode)));
9206 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
9207 gen_lowpart (SImode, fnaddr));
9208 offset += 6;
9209 }
9210 else
9211 {
9212 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
9213 GEN_INT (trunc_int_for_mode (0xbb49, HImode)));
9214 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
9215 fnaddr);
9216 offset += 10;
9217 }
9218 /* Load static chain using movabs to r10. */
9219 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
9220 GEN_INT (trunc_int_for_mode (0xba49, HImode)));
9221 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
9222 cxt);
9223 offset += 10;
9224 /* Jump to the r11 */
9225 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
9226 GEN_INT (trunc_int_for_mode (0xff49, HImode)));
9227 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
9228 GEN_INT (trunc_int_for_mode (0xe3, HImode)));
9229 offset += 3;
9230 if (offset > TRAMPOLINE_SIZE)
9231 abort();
9232 }
9233}
bd793c65
BS
9234
9235#define def_builtin(NAME, TYPE, CODE) \
df4ae160 9236 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL)
bd793c65
BS
9237struct builtin_description
9238{
9239 enum insn_code icode;
9240 const char * name;
9241 enum ix86_builtins code;
9242 enum rtx_code comparison;
9243 unsigned int flag;
9244};
9245
9246static struct builtin_description bdesc_comi[] =
9247{
9248 { CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
9249 { CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
9250 { CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
9251 { CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
9252 { CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
9253 { CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
9254 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
9255 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
9256 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
9257 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
9258 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
9259 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
9260};
9261
9262static struct builtin_description bdesc_2arg[] =
9263{
9264 /* SSE */
9265 { CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
9266 { CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
9267 { CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
9268 { CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
9269 { CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
9270 { CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
9271 { CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
9272 { CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
9273
9274 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
9275 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
9276 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
9277 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
9278 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
9279 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
9280 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
9281 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
9282 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
9283 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
9284 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
9285 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
9286 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
9287 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
9288 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
9289 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
9290 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
9291 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
9292 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
9293 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
9294 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
9295 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
9296 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
9297 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
9298
9299 { CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
9300 { CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
9301 { CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
9302 { CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
9303
9304 { CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
9305 { CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
9306 { CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
9307 { CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
9308
9309 { CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
9310 { CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
9311 { CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
9312 { CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
9313 { CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
9314
9315 /* MMX */
9316 { CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
9317 { CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
9318 { CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
9319 { CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
9320 { CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
9321 { CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
9322
9323 { CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
9324 { CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
9325 { CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
9326 { CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
9327 { CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
9328 { CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
9329 { CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
9330 { CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
9331
9332 { CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
9333 { CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
9334 { CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
9335
9336 { CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
9337 { CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
9338 { CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
9339 { CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
9340
9341 { CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
9342 { CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
9343
9344 { CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
9345 { CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
9346 { CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
9347 { CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
9348 { CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
9349 { CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
9350
9351 { CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
9352 { CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
9353 { CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
9354 { CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
9355
9356 { CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
9357 { CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
9358 { CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
9359 { CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
9360 { CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
9361 { CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
9362
9363 /* Special. */
9364 { CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
9365 { CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
9366 { CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
9367
9368 { CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
9369 { CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
9370
9371 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
9372 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
9373 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
9374 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
9375 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
9376 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
9377
9378 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
9379 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
9380 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
9381 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
9382 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
9383 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
9384
9385 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
9386 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
9387 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
9388 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
9389
9390 { CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
9391 { CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
9392
9393};
9394
9395static struct builtin_description bdesc_1arg[] =
9396{
9397 { CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
9398 { CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
9399
9400 { CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
9401 { CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
9402 { CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
9403
9404 { CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
9405 { CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
9406 { CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
9407 { CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
9408
9409};
9410
f6155fda
SS
9411/* Set up all the target-specific builtins. */
9412void
9413ix86_init_builtins ()
9414{
9415 if (TARGET_MMX)
9416 ix86_init_mmx_sse_builtins ();
9417}
9418
9419/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
bd793c65
BS
9420 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
9421 builtins. */
9422void
f6155fda 9423ix86_init_mmx_sse_builtins ()
bd793c65
BS
9424{
9425 struct builtin_description * d;
77ebd435 9426 size_t i;
cbd5937a 9427 tree endlink = void_list_node;
bd793c65
BS
9428
9429 tree pchar_type_node = build_pointer_type (char_type_node);
9430 tree pfloat_type_node = build_pointer_type (float_type_node);
9431 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
9432 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
9433
9434 /* Comparisons. */
9435 tree int_ftype_v4sf_v4sf
9436 = build_function_type (integer_type_node,
9437 tree_cons (NULL_TREE, V4SF_type_node,
9438 tree_cons (NULL_TREE,
9439 V4SF_type_node,
9440 endlink)));
9441 tree v4si_ftype_v4sf_v4sf
9442 = build_function_type (V4SI_type_node,
9443 tree_cons (NULL_TREE, V4SF_type_node,
9444 tree_cons (NULL_TREE,
9445 V4SF_type_node,
9446 endlink)));
9447 /* MMX/SSE/integer conversions. */
9448 tree int_ftype_v4sf_int
9449 = build_function_type (integer_type_node,
9450 tree_cons (NULL_TREE, V4SF_type_node,
9451 tree_cons (NULL_TREE,
9452 integer_type_node,
9453 endlink)));
9454 tree int_ftype_v4sf
9455 = build_function_type (integer_type_node,
9456 tree_cons (NULL_TREE, V4SF_type_node,
9457 endlink));
9458 tree int_ftype_v8qi
9459 = build_function_type (integer_type_node,
9460 tree_cons (NULL_TREE, V8QI_type_node,
9461 endlink));
9462 tree int_ftype_v2si
9463 = build_function_type (integer_type_node,
9464 tree_cons (NULL_TREE, V2SI_type_node,
9465 endlink));
9466 tree v2si_ftype_int
9467 = build_function_type (V2SI_type_node,
9468 tree_cons (NULL_TREE, integer_type_node,
9469 endlink));
9470 tree v4sf_ftype_v4sf_int
9471 = build_function_type (integer_type_node,
9472 tree_cons (NULL_TREE, V4SF_type_node,
9473 tree_cons (NULL_TREE, integer_type_node,
9474 endlink)));
9475 tree v4sf_ftype_v4sf_v2si
9476 = build_function_type (V4SF_type_node,
9477 tree_cons (NULL_TREE, V4SF_type_node,
9478 tree_cons (NULL_TREE, V2SI_type_node,
9479 endlink)));
9480 tree int_ftype_v4hi_int
9481 = build_function_type (integer_type_node,
9482 tree_cons (NULL_TREE, V4HI_type_node,
9483 tree_cons (NULL_TREE, integer_type_node,
9484 endlink)));
9485 tree v4hi_ftype_v4hi_int_int
332316cd 9486 = build_function_type (V4HI_type_node,
bd793c65
BS
9487 tree_cons (NULL_TREE, V4HI_type_node,
9488 tree_cons (NULL_TREE, integer_type_node,
9489 tree_cons (NULL_TREE,
9490 integer_type_node,
9491 endlink))));
9492 /* Miscellaneous. */
9493 tree v8qi_ftype_v4hi_v4hi
9494 = build_function_type (V8QI_type_node,
9495 tree_cons (NULL_TREE, V4HI_type_node,
9496 tree_cons (NULL_TREE, V4HI_type_node,
9497 endlink)));
9498 tree v4hi_ftype_v2si_v2si
9499 = build_function_type (V4HI_type_node,
9500 tree_cons (NULL_TREE, V2SI_type_node,
9501 tree_cons (NULL_TREE, V2SI_type_node,
9502 endlink)));
9503 tree v4sf_ftype_v4sf_v4sf_int
9504 = build_function_type (V4SF_type_node,
9505 tree_cons (NULL_TREE, V4SF_type_node,
9506 tree_cons (NULL_TREE, V4SF_type_node,
9507 tree_cons (NULL_TREE,
9508 integer_type_node,
9509 endlink))));
9510 tree v4hi_ftype_v8qi_v8qi
9511 = build_function_type (V4HI_type_node,
9512 tree_cons (NULL_TREE, V8QI_type_node,
9513 tree_cons (NULL_TREE, V8QI_type_node,
9514 endlink)));
9515 tree v2si_ftype_v4hi_v4hi
9516 = build_function_type (V2SI_type_node,
9517 tree_cons (NULL_TREE, V4HI_type_node,
9518 tree_cons (NULL_TREE, V4HI_type_node,
9519 endlink)));
9520 tree v4hi_ftype_v4hi_int
9521 = build_function_type (V4HI_type_node,
9522 tree_cons (NULL_TREE, V4HI_type_node,
9523 tree_cons (NULL_TREE, integer_type_node,
9524 endlink)));
9525 tree di_ftype_di_int
9526 = build_function_type (long_long_unsigned_type_node,
9527 tree_cons (NULL_TREE, long_long_unsigned_type_node,
9528 tree_cons (NULL_TREE, integer_type_node,
9529 endlink)));
9530 tree v8qi_ftype_v8qi_di
9531 = build_function_type (V8QI_type_node,
9532 tree_cons (NULL_TREE, V8QI_type_node,
9533 tree_cons (NULL_TREE,
9534 long_long_integer_type_node,
9535 endlink)));
9536 tree v4hi_ftype_v4hi_di
9537 = build_function_type (V4HI_type_node,
9538 tree_cons (NULL_TREE, V4HI_type_node,
9539 tree_cons (NULL_TREE,
9540 long_long_integer_type_node,
9541 endlink)));
9542 tree v2si_ftype_v2si_di
9543 = build_function_type (V2SI_type_node,
9544 tree_cons (NULL_TREE, V2SI_type_node,
9545 tree_cons (NULL_TREE,
9546 long_long_integer_type_node,
9547 endlink)));
9548 tree void_ftype_void
9549 = build_function_type (void_type_node, endlink);
9550 tree void_ftype_pchar_int
9551 = build_function_type (void_type_node,
9552 tree_cons (NULL_TREE, pchar_type_node,
9553 tree_cons (NULL_TREE, integer_type_node,
9554 endlink)));
9555 tree void_ftype_unsigned
9556 = build_function_type (void_type_node,
9557 tree_cons (NULL_TREE, unsigned_type_node,
9558 endlink));
9559 tree unsigned_ftype_void
9560 = build_function_type (unsigned_type_node, endlink);
9561 tree di_ftype_void
9562 = build_function_type (long_long_unsigned_type_node, endlink);
9563 tree ti_ftype_void
9564 = build_function_type (intTI_type_node, endlink);
9565 tree v2si_ftype_v4sf
9566 = build_function_type (V2SI_type_node,
9567 tree_cons (NULL_TREE, V4SF_type_node,
9568 endlink));
9569 /* Loads/stores. */
9570 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
9571 tree_cons (NULL_TREE, V8QI_type_node,
9572 tree_cons (NULL_TREE,
9573 pchar_type_node,
9574 endlink)));
9575 tree void_ftype_v8qi_v8qi_pchar
9576 = build_function_type (void_type_node, maskmovq_args);
9577 tree v4sf_ftype_pfloat
9578 = build_function_type (V4SF_type_node,
9579 tree_cons (NULL_TREE, pfloat_type_node,
9580 endlink));
9581 tree v4sf_ftype_float
9582 = build_function_type (V4SF_type_node,
9583 tree_cons (NULL_TREE, float_type_node,
9584 endlink));
9585 tree v4sf_ftype_float_float_float_float
9586 = build_function_type (V4SF_type_node,
9587 tree_cons (NULL_TREE, float_type_node,
9588 tree_cons (NULL_TREE, float_type_node,
9589 tree_cons (NULL_TREE,
9590 float_type_node,
9591 tree_cons (NULL_TREE,
9592 float_type_node,
9593 endlink)))));
9594 /* @@@ the type is bogus */
9595 tree v4sf_ftype_v4sf_pv2si
9596 = build_function_type (V4SF_type_node,
9597 tree_cons (NULL_TREE, V4SF_type_node,
9598 tree_cons (NULL_TREE, pv2si_type_node,
9599 endlink)));
9600 tree v4sf_ftype_pv2si_v4sf
9601 = build_function_type (V4SF_type_node,
9602 tree_cons (NULL_TREE, V4SF_type_node,
9603 tree_cons (NULL_TREE, pv2si_type_node,
9604 endlink)));
9605 tree void_ftype_pfloat_v4sf
9606 = build_function_type (void_type_node,
9607 tree_cons (NULL_TREE, pfloat_type_node,
9608 tree_cons (NULL_TREE, V4SF_type_node,
9609 endlink)));
9610 tree void_ftype_pdi_di
9611 = build_function_type (void_type_node,
9612 tree_cons (NULL_TREE, pdi_type_node,
9613 tree_cons (NULL_TREE,
9614 long_long_unsigned_type_node,
9615 endlink)));
9616 /* Normal vector unops. */
9617 tree v4sf_ftype_v4sf
9618 = build_function_type (V4SF_type_node,
9619 tree_cons (NULL_TREE, V4SF_type_node,
9620 endlink));
0f290768 9621
bd793c65
BS
9622 /* Normal vector binops. */
9623 tree v4sf_ftype_v4sf_v4sf
9624 = build_function_type (V4SF_type_node,
9625 tree_cons (NULL_TREE, V4SF_type_node,
9626 tree_cons (NULL_TREE, V4SF_type_node,
9627 endlink)));
9628 tree v8qi_ftype_v8qi_v8qi
9629 = build_function_type (V8QI_type_node,
9630 tree_cons (NULL_TREE, V8QI_type_node,
9631 tree_cons (NULL_TREE, V8QI_type_node,
9632 endlink)));
9633 tree v4hi_ftype_v4hi_v4hi
9634 = build_function_type (V4HI_type_node,
9635 tree_cons (NULL_TREE, V4HI_type_node,
9636 tree_cons (NULL_TREE, V4HI_type_node,
9637 endlink)));
9638 tree v2si_ftype_v2si_v2si
9639 = build_function_type (V2SI_type_node,
9640 tree_cons (NULL_TREE, V2SI_type_node,
9641 tree_cons (NULL_TREE, V2SI_type_node,
9642 endlink)));
9643 tree ti_ftype_ti_ti
9644 = build_function_type (intTI_type_node,
9645 tree_cons (NULL_TREE, intTI_type_node,
9646 tree_cons (NULL_TREE, intTI_type_node,
9647 endlink)));
9648 tree di_ftype_di_di
9649 = build_function_type (long_long_unsigned_type_node,
9650 tree_cons (NULL_TREE, long_long_unsigned_type_node,
9651 tree_cons (NULL_TREE,
9652 long_long_unsigned_type_node,
9653 endlink)));
9654
9655 /* Add all builtins that are more or less simple operations on two
9656 operands. */
9657 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
9658 {
9659 /* Use one of the operands; the target can have a different mode for
9660 mask-generating compares. */
9661 enum machine_mode mode;
9662 tree type;
9663
9664 if (d->name == 0)
9665 continue;
9666 mode = insn_data[d->icode].operand[1].mode;
9667
9668 if (! TARGET_SSE && ! VALID_MMX_REG_MODE (mode))
9669 continue;
9670
9671 switch (mode)
9672 {
9673 case V4SFmode:
9674 type = v4sf_ftype_v4sf_v4sf;
9675 break;
9676 case V8QImode:
9677 type = v8qi_ftype_v8qi_v8qi;
9678 break;
9679 case V4HImode:
9680 type = v4hi_ftype_v4hi_v4hi;
9681 break;
9682 case V2SImode:
9683 type = v2si_ftype_v2si_v2si;
9684 break;
9685 case TImode:
9686 type = ti_ftype_ti_ti;
9687 break;
9688 case DImode:
9689 type = di_ftype_di_di;
9690 break;
9691
9692 default:
9693 abort ();
9694 }
0f290768 9695
bd793c65
BS
9696 /* Override for comparisons. */
9697 if (d->icode == CODE_FOR_maskcmpv4sf3
9698 || d->icode == CODE_FOR_maskncmpv4sf3
9699 || d->icode == CODE_FOR_vmmaskcmpv4sf3
9700 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
9701 type = v4si_ftype_v4sf_v4sf;
9702
9703 def_builtin (d->name, type, d->code);
9704 }
9705
9706 /* Add the remaining MMX insns with somewhat more complicated types. */
9707 def_builtin ("__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
9708 def_builtin ("__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
9709 def_builtin ("__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
9710 def_builtin ("__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
9711 def_builtin ("__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
9712 def_builtin ("__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
9713 def_builtin ("__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
9714 def_builtin ("__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
9715 def_builtin ("__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
9716
9717 def_builtin ("__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
9718 def_builtin ("__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
9719 def_builtin ("__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
9720
9721 def_builtin ("__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
9722 def_builtin ("__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
9723
9724 def_builtin ("__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
9725 def_builtin ("__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
9726
9727 /* Everything beyond this point is SSE only. */
9728 if (! TARGET_SSE)
9729 return;
0f290768 9730
bd793c65
BS
9731 /* comi/ucomi insns. */
9732 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
9733 def_builtin (d->name, int_ftype_v4sf_v4sf, d->code);
9734
9735 def_builtin ("__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
9736 def_builtin ("__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
9737 def_builtin ("__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
9738
9739 def_builtin ("__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
9740 def_builtin ("__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
9741 def_builtin ("__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
9742 def_builtin ("__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
9743 def_builtin ("__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
9744 def_builtin ("__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
9745
9746 def_builtin ("__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
9747 def_builtin ("__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
9748
9749 def_builtin ("__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
9750
9751 def_builtin ("__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
9752 def_builtin ("__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
9753 def_builtin ("__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
9754 def_builtin ("__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
9755 def_builtin ("__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
9756 def_builtin ("__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
9757
9758 def_builtin ("__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
9759 def_builtin ("__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
9760 def_builtin ("__builtin_ia32_storehps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
9761 def_builtin ("__builtin_ia32_storelps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
9762
9763 def_builtin ("__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
9764 def_builtin ("__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
9765 def_builtin ("__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
9766 def_builtin ("__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
9767
9768 def_builtin ("__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
9769 def_builtin ("__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
9770
9771 def_builtin ("__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
9772
9773 def_builtin ("__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
9774 def_builtin ("__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
9775 def_builtin ("__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
9776 def_builtin ("__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
9777 def_builtin ("__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
9778 def_builtin ("__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
9779
9780 def_builtin ("__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
9781
9782 /* Composite intrinsics. */
9783 def_builtin ("__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
9784 def_builtin ("__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
9785 def_builtin ("__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
9786 def_builtin ("__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
9787 def_builtin ("__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
9788 def_builtin ("__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
9789 def_builtin ("__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
9790}
9791
9792/* Errors in the source file can cause expand_expr to return const0_rtx
9793 where we expect a vector. To avoid crashing, use one of the vector
9794 clear instructions. */
9795static rtx
9796safe_vector_operand (x, mode)
9797 rtx x;
9798 enum machine_mode mode;
9799{
9800 if (x != const0_rtx)
9801 return x;
9802 x = gen_reg_rtx (mode);
9803
9804 if (VALID_MMX_REG_MODE (mode))
9805 emit_insn (gen_mmx_clrdi (mode == DImode ? x
9806 : gen_rtx_SUBREG (DImode, x, 0)));
9807 else
9808 emit_insn (gen_sse_clrti (mode == TImode ? x
9809 : gen_rtx_SUBREG (TImode, x, 0)));
9810 return x;
9811}
9812
9813/* Subroutine of ix86_expand_builtin to take care of binop insns. */
9814
9815static rtx
9816ix86_expand_binop_builtin (icode, arglist, target)
9817 enum insn_code icode;
9818 tree arglist;
9819 rtx target;
9820{
9821 rtx pat;
9822 tree arg0 = TREE_VALUE (arglist);
9823 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9824 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9825 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9826 enum machine_mode tmode = insn_data[icode].operand[0].mode;
9827 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
9828 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
9829
9830 if (VECTOR_MODE_P (mode0))
9831 op0 = safe_vector_operand (op0, mode0);
9832 if (VECTOR_MODE_P (mode1))
9833 op1 = safe_vector_operand (op1, mode1);
9834
9835 if (! target
9836 || GET_MODE (target) != tmode
9837 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9838 target = gen_reg_rtx (tmode);
9839
9840 /* In case the insn wants input operands in modes different from
9841 the result, abort. */
9842 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
9843 abort ();
9844
9845 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9846 op0 = copy_to_mode_reg (mode0, op0);
9847 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
9848 op1 = copy_to_mode_reg (mode1, op1);
9849
9850 pat = GEN_FCN (icode) (target, op0, op1);
9851 if (! pat)
9852 return 0;
9853 emit_insn (pat);
9854 return target;
9855}
9856
9857/* Subroutine of ix86_expand_builtin to take care of stores. */
9858
9859static rtx
9860ix86_expand_store_builtin (icode, arglist, shuffle)
9861 enum insn_code icode;
9862 tree arglist;
9863 int shuffle;
9864{
9865 rtx pat;
9866 tree arg0 = TREE_VALUE (arglist);
9867 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9868 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9869 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9870 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
9871 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
9872
9873 if (VECTOR_MODE_P (mode1))
9874 op1 = safe_vector_operand (op1, mode1);
9875
9876 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
9877 if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
9878 op1 = copy_to_mode_reg (mode1, op1);
9879 if (shuffle >= 0)
9880 emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
9881 pat = GEN_FCN (icode) (op0, op1);
9882 if (pat)
9883 emit_insn (pat);
9884 return 0;
9885}
9886
9887/* Subroutine of ix86_expand_builtin to take care of unop insns. */
9888
9889static rtx
9890ix86_expand_unop_builtin (icode, arglist, target, do_load)
9891 enum insn_code icode;
9892 tree arglist;
9893 rtx target;
9894 int do_load;
9895{
9896 rtx pat;
9897 tree arg0 = TREE_VALUE (arglist);
9898 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9899 enum machine_mode tmode = insn_data[icode].operand[0].mode;
9900 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
9901
9902 if (! target
9903 || GET_MODE (target) != tmode
9904 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9905 target = gen_reg_rtx (tmode);
9906 if (do_load)
9907 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
9908 else
9909 {
9910 if (VECTOR_MODE_P (mode0))
9911 op0 = safe_vector_operand (op0, mode0);
9912
9913 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9914 op0 = copy_to_mode_reg (mode0, op0);
9915 }
9916
9917 pat = GEN_FCN (icode) (target, op0);
9918 if (! pat)
9919 return 0;
9920 emit_insn (pat);
9921 return target;
9922}
9923
9924/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
9925 sqrtss, rsqrtss, rcpss. */
9926
9927static rtx
9928ix86_expand_unop1_builtin (icode, arglist, target)
9929 enum insn_code icode;
9930 tree arglist;
9931 rtx target;
9932{
9933 rtx pat;
9934 tree arg0 = TREE_VALUE (arglist);
9935 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9936 enum machine_mode tmode = insn_data[icode].operand[0].mode;
9937 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
9938
9939 if (! target
9940 || GET_MODE (target) != tmode
9941 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9942 target = gen_reg_rtx (tmode);
9943
9944 if (VECTOR_MODE_P (mode0))
9945 op0 = safe_vector_operand (op0, mode0);
9946
9947 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9948 op0 = copy_to_mode_reg (mode0, op0);
9949
9950 pat = GEN_FCN (icode) (target, op0, op0);
9951 if (! pat)
9952 return 0;
9953 emit_insn (pat);
9954 return target;
9955}
9956
9957/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
9958
9959static rtx
9960ix86_expand_sse_compare (d, arglist, target)
9961 struct builtin_description *d;
9962 tree arglist;
9963 rtx target;
9964{
9965 rtx pat;
9966 tree arg0 = TREE_VALUE (arglist);
9967 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9968 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9969 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9970 rtx op2;
9971 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
9972 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
9973 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
9974 enum rtx_code comparison = d->comparison;
9975
9976 if (VECTOR_MODE_P (mode0))
9977 op0 = safe_vector_operand (op0, mode0);
9978 if (VECTOR_MODE_P (mode1))
9979 op1 = safe_vector_operand (op1, mode1);
9980
9981 /* Swap operands if we have a comparison that isn't available in
9982 hardware. */
9983 if (d->flag)
9984 {
9985 target = gen_reg_rtx (tmode);
9986 emit_move_insn (target, op1);
9987 op1 = op0;
9988 op0 = target;
9989 comparison = swap_condition (comparison);
9990 }
9991 else if (! target
9992 || GET_MODE (target) != tmode
9993 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
9994 target = gen_reg_rtx (tmode);
9995
9996 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
9997 op0 = copy_to_mode_reg (mode0, op0);
9998 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
9999 op1 = copy_to_mode_reg (mode1, op1);
10000
10001 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
10002 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
10003 if (! pat)
10004 return 0;
10005 emit_insn (pat);
10006 return target;
10007}
10008
10009/* Subroutine of ix86_expand_builtin to take care of comi insns. */
10010
10011static rtx
10012ix86_expand_sse_comi (d, arglist, target)
10013 struct builtin_description *d;
10014 tree arglist;
10015 rtx target;
10016{
10017 rtx pat;
10018 tree arg0 = TREE_VALUE (arglist);
10019 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10020 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10021 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10022 rtx op2;
10023 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
10024 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
10025 enum rtx_code comparison = d->comparison;
10026
10027 if (VECTOR_MODE_P (mode0))
10028 op0 = safe_vector_operand (op0, mode0);
10029 if (VECTOR_MODE_P (mode1))
10030 op1 = safe_vector_operand (op1, mode1);
10031
10032 /* Swap operands if we have a comparison that isn't available in
10033 hardware. */
10034 if (d->flag)
10035 {
10036 rtx tmp = op1;
10037 op1 = op0;
10038 op0 = tmp;
10039 comparison = swap_condition (comparison);
10040 }
10041
10042 target = gen_reg_rtx (SImode);
10043 emit_move_insn (target, const0_rtx);
10044 target = gen_rtx_SUBREG (QImode, target, 0);
10045
10046 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
10047 op0 = copy_to_mode_reg (mode0, op0);
10048 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
10049 op1 = copy_to_mode_reg (mode1, op1);
10050
10051 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
10052 pat = GEN_FCN (d->icode) (op0, op1, op2);
10053 if (! pat)
10054 return 0;
10055 emit_insn (pat);
10056 emit_insn (gen_setcc_2 (target, op2));
10057
10058 return target;
10059}
10060
10061/* Expand an expression EXP that calls a built-in function,
10062 with result going to TARGET if that's convenient
10063 (and in mode MODE if that's convenient).
10064 SUBTARGET may be used as the target for computing one of EXP's operands.
10065 IGNORE is nonzero if the value is to be ignored. */
10066
10067rtx
10068ix86_expand_builtin (exp, target, subtarget, mode, ignore)
10069 tree exp;
10070 rtx target;
10071 rtx subtarget ATTRIBUTE_UNUSED;
10072 enum machine_mode mode ATTRIBUTE_UNUSED;
10073 int ignore ATTRIBUTE_UNUSED;
10074{
10075 struct builtin_description *d;
77ebd435 10076 size_t i;
bd793c65
BS
10077 enum insn_code icode;
10078 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
10079 tree arglist = TREE_OPERAND (exp, 1);
10080 tree arg0, arg1, arg2, arg3;
10081 rtx op0, op1, op2, pat;
10082 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 10083 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
10084
10085 switch (fcode)
10086 {
10087 case IX86_BUILTIN_EMMS:
10088 emit_insn (gen_emms ());
10089 return 0;
10090
10091 case IX86_BUILTIN_SFENCE:
10092 emit_insn (gen_sfence ());
10093 return 0;
10094
10095 case IX86_BUILTIN_M_FROM_INT:
10096 target = gen_reg_rtx (DImode);
10097 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
10098 emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
10099 return target;
10100
10101 case IX86_BUILTIN_M_TO_INT:
10102 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
10103 op0 = copy_to_mode_reg (DImode, op0);
10104 target = gen_reg_rtx (SImode);
10105 emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
10106 return target;
10107
10108 case IX86_BUILTIN_PEXTRW:
10109 icode = CODE_FOR_mmx_pextrw;
10110 arg0 = TREE_VALUE (arglist);
10111 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10112 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10113 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10114 tmode = insn_data[icode].operand[0].mode;
10115 mode0 = insn_data[icode].operand[1].mode;
10116 mode1 = insn_data[icode].operand[2].mode;
10117
10118 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
10119 op0 = copy_to_mode_reg (mode0, op0);
10120 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
10121 {
10122 /* @@@ better error message */
10123 error ("selector must be an immediate");
10124 return const0_rtx;
10125 }
10126 if (target == 0
10127 || GET_MODE (target) != tmode
10128 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10129 target = gen_reg_rtx (tmode);
10130 pat = GEN_FCN (icode) (target, op0, op1);
10131 if (! pat)
10132 return 0;
10133 emit_insn (pat);
10134 return target;
10135
10136 case IX86_BUILTIN_PINSRW:
10137 icode = CODE_FOR_mmx_pinsrw;
10138 arg0 = TREE_VALUE (arglist);
10139 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10140 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
10141 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10142 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10143 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
10144 tmode = insn_data[icode].operand[0].mode;
10145 mode0 = insn_data[icode].operand[1].mode;
10146 mode1 = insn_data[icode].operand[2].mode;
10147 mode2 = insn_data[icode].operand[3].mode;
10148
10149 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
10150 op0 = copy_to_mode_reg (mode0, op0);
10151 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
10152 op1 = copy_to_mode_reg (mode1, op1);
10153 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
10154 {
10155 /* @@@ better error message */
10156 error ("selector must be an immediate");
10157 return const0_rtx;
10158 }
10159 if (target == 0
10160 || GET_MODE (target) != tmode
10161 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10162 target = gen_reg_rtx (tmode);
10163 pat = GEN_FCN (icode) (target, op0, op1, op2);
10164 if (! pat)
10165 return 0;
10166 emit_insn (pat);
10167 return target;
10168
10169 case IX86_BUILTIN_MASKMOVQ:
10170 icode = CODE_FOR_mmx_maskmovq;
10171 /* Note the arg order is different from the operand order. */
10172 arg1 = TREE_VALUE (arglist);
10173 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
10174 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
10175 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10176 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10177 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
10178 mode0 = insn_data[icode].operand[0].mode;
10179 mode1 = insn_data[icode].operand[1].mode;
10180 mode2 = insn_data[icode].operand[2].mode;
10181
10182 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
10183 op0 = copy_to_mode_reg (mode0, op0);
10184 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
10185 op1 = copy_to_mode_reg (mode1, op1);
10186 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
10187 op2 = copy_to_mode_reg (mode2, op2);
10188 pat = GEN_FCN (icode) (op0, op1, op2);
10189 if (! pat)
10190 return 0;
10191 emit_insn (pat);
10192 return 0;
10193
10194 case IX86_BUILTIN_SQRTSS:
10195 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
10196 case IX86_BUILTIN_RSQRTSS:
10197 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
10198 case IX86_BUILTIN_RCPSS:
10199 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
10200
10201 case IX86_BUILTIN_LOADAPS:
10202 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
10203
10204 case IX86_BUILTIN_LOADUPS:
10205 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
10206
10207 case IX86_BUILTIN_STOREAPS:
10208 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
10209 case IX86_BUILTIN_STOREUPS:
10210 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
10211
10212 case IX86_BUILTIN_LOADSS:
10213 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
10214
10215 case IX86_BUILTIN_STORESS:
10216 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
10217
0f290768 10218 case IX86_BUILTIN_LOADHPS:
bd793c65
BS
10219 case IX86_BUILTIN_LOADLPS:
10220 icode = (fcode == IX86_BUILTIN_LOADHPS
10221 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
10222 arg0 = TREE_VALUE (arglist);
10223 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10224 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10225 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10226 tmode = insn_data[icode].operand[0].mode;
10227 mode0 = insn_data[icode].operand[1].mode;
10228 mode1 = insn_data[icode].operand[2].mode;
10229
10230 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
10231 op0 = copy_to_mode_reg (mode0, op0);
10232 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
10233 if (target == 0
10234 || GET_MODE (target) != tmode
10235 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10236 target = gen_reg_rtx (tmode);
10237 pat = GEN_FCN (icode) (target, op0, op1);
10238 if (! pat)
10239 return 0;
10240 emit_insn (pat);
10241 return target;
0f290768 10242
bd793c65
BS
10243 case IX86_BUILTIN_STOREHPS:
10244 case IX86_BUILTIN_STORELPS:
10245 icode = (fcode == IX86_BUILTIN_STOREHPS
10246 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
10247 arg0 = TREE_VALUE (arglist);
10248 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10249 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10250 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10251 mode0 = insn_data[icode].operand[1].mode;
10252 mode1 = insn_data[icode].operand[2].mode;
10253
10254 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
10255 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
10256 op1 = copy_to_mode_reg (mode1, op1);
10257
10258 pat = GEN_FCN (icode) (op0, op0, op1);
10259 if (! pat)
10260 return 0;
10261 emit_insn (pat);
10262 return 0;
10263
10264 case IX86_BUILTIN_MOVNTPS:
10265 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
10266 case IX86_BUILTIN_MOVNTQ:
10267 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
10268
10269 case IX86_BUILTIN_LDMXCSR:
10270 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
10271 target = assign_386_stack_local (SImode, 0);
10272 emit_move_insn (target, op0);
10273 emit_insn (gen_ldmxcsr (target));
10274 return 0;
10275
10276 case IX86_BUILTIN_STMXCSR:
10277 target = assign_386_stack_local (SImode, 0);
10278 emit_insn (gen_stmxcsr (target));
10279 return copy_to_mode_reg (SImode, target);
10280
10281 case IX86_BUILTIN_PREFETCH:
10282 icode = CODE_FOR_prefetch;
10283 arg0 = TREE_VALUE (arglist);
10284 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10285 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10286 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
332316cd
BS
10287 mode0 = insn_data[icode].operand[0].mode;
10288 mode1 = insn_data[icode].operand[1].mode;
bd793c65 10289
332316cd 10290 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
bd793c65
BS
10291 {
10292 /* @@@ better error message */
10293 error ("selector must be an immediate");
10294 return const0_rtx;
10295 }
10296
332316cd 10297 op0 = copy_to_mode_reg (Pmode, op0);
bd793c65
BS
10298 pat = GEN_FCN (icode) (op0, op1);
10299 if (! pat)
10300 return 0;
10301 emit_insn (pat);
10302 return target;
0f290768 10303
bd793c65
BS
10304 case IX86_BUILTIN_SHUFPS:
10305 icode = CODE_FOR_sse_shufps;
10306 arg0 = TREE_VALUE (arglist);
10307 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10308 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
10309 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10310 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10311 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
10312 tmode = insn_data[icode].operand[0].mode;
10313 mode0 = insn_data[icode].operand[1].mode;
10314 mode1 = insn_data[icode].operand[2].mode;
10315 mode2 = insn_data[icode].operand[3].mode;
10316
10317 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
10318 op0 = copy_to_mode_reg (mode0, op0);
10319 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
10320 op1 = copy_to_mode_reg (mode1, op1);
10321 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
10322 {
10323 /* @@@ better error message */
10324 error ("mask must be an immediate");
10325 return const0_rtx;
10326 }
10327 if (target == 0
10328 || GET_MODE (target) != tmode
10329 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10330 target = gen_reg_rtx (tmode);
10331 pat = GEN_FCN (icode) (target, op0, op1, op2);
10332 if (! pat)
10333 return 0;
10334 emit_insn (pat);
10335 return target;
10336
10337 case IX86_BUILTIN_PSHUFW:
10338 icode = CODE_FOR_mmx_pshufw;
10339 arg0 = TREE_VALUE (arglist);
10340 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10341 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10342 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10343 tmode = insn_data[icode].operand[0].mode;
10344 mode0 = insn_data[icode].operand[2].mode;
10345 mode1 = insn_data[icode].operand[3].mode;
10346
10347 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
10348 op0 = copy_to_mode_reg (mode0, op0);
10349 if (! (*insn_data[icode].operand[3].predicate) (op1, mode1))
10350 {
10351 /* @@@ better error message */
10352 error ("mask must be an immediate");
10353 return const0_rtx;
10354 }
10355 if (target == 0
10356 || GET_MODE (target) != tmode
10357 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10358 target = gen_reg_rtx (tmode);
10359 pat = GEN_FCN (icode) (target, target, op0, op1);
10360 if (! pat)
10361 return 0;
10362 emit_insn (pat);
10363 return target;
10364
10365 /* Composite intrinsics. */
10366 case IX86_BUILTIN_SETPS1:
10367 target = assign_386_stack_local (SFmode, 0);
10368 arg0 = TREE_VALUE (arglist);
f4ef873c 10369 emit_move_insn (adjust_address (target, SFmode, 0),
bd793c65
BS
10370 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
10371 op0 = gen_reg_rtx (V4SFmode);
f4ef873c 10372 emit_insn (gen_sse_loadss (op0, adjust_address (target, V4SFmode, 0)));
bd793c65
BS
10373 emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
10374 return op0;
0f290768 10375
bd793c65
BS
10376 case IX86_BUILTIN_SETPS:
10377 target = assign_386_stack_local (V4SFmode, 0);
bd793c65
BS
10378 arg0 = TREE_VALUE (arglist);
10379 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10380 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
10381 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
d2037d01 10382 emit_move_insn (adjust_address (target, SFmode, 0),
bd793c65 10383 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
d2037d01 10384 emit_move_insn (adjust_address (target, SFmode, 4),
bd793c65 10385 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
d2037d01 10386 emit_move_insn (adjust_address (target, SFmode, 8),
bd793c65 10387 expand_expr (arg2, NULL_RTX, VOIDmode, 0));
d2037d01 10388 emit_move_insn (adjust_address (target, SFmode, 12),
bd793c65
BS
10389 expand_expr (arg3, NULL_RTX, VOIDmode, 0));
10390 op0 = gen_reg_rtx (V4SFmode);
10391 emit_insn (gen_sse_movaps (op0, target));
10392 return op0;
10393
10394 case IX86_BUILTIN_CLRPS:
10395 target = gen_reg_rtx (TImode);
10396 emit_insn (gen_sse_clrti (target));
10397 return target;
10398
10399 case IX86_BUILTIN_LOADRPS:
10400 target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
10401 gen_reg_rtx (V4SFmode), 1);
10402 emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
10403 return target;
10404
10405 case IX86_BUILTIN_LOADPS1:
10406 target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
10407 gen_reg_rtx (V4SFmode), 1);
10408 emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
10409 return target;
10410
10411 case IX86_BUILTIN_STOREPS1:
10412 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
10413 case IX86_BUILTIN_STORERPS:
10414 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
10415
10416 case IX86_BUILTIN_MMX_ZERO:
10417 target = gen_reg_rtx (DImode);
10418 emit_insn (gen_mmx_clrdi (target));
10419 return target;
10420
10421 default:
10422 break;
10423 }
10424
10425 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
10426 if (d->code == fcode)
10427 {
10428 /* Compares are treated specially. */
10429 if (d->icode == CODE_FOR_maskcmpv4sf3
10430 || d->icode == CODE_FOR_vmmaskcmpv4sf3
10431 || d->icode == CODE_FOR_maskncmpv4sf3
10432 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
10433 return ix86_expand_sse_compare (d, arglist, target);
10434
10435 return ix86_expand_binop_builtin (d->icode, arglist, target);
10436 }
10437
10438 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
10439 if (d->code == fcode)
10440 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 10441
bd793c65
BS
10442 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
10443 if (d->code == fcode)
10444 return ix86_expand_sse_comi (d, arglist, target);
0f290768 10445
bd793c65
BS
10446 /* @@@ Should really do something sensible here. */
10447 return 0;
bd793c65 10448}
4211a8fb
JH
10449
10450/* Store OPERAND to the memory after reload is completed. This means
10451 that we can't easilly use assign_stack_local. */
10452rtx
10453ix86_force_to_memory (mode, operand)
10454 enum machine_mode mode;
10455 rtx operand;
10456{
898d374d 10457 rtx result;
4211a8fb
JH
10458 if (!reload_completed)
10459 abort ();
898d374d
JH
10460 if (TARGET_64BIT && TARGET_RED_ZONE)
10461 {
10462 result = gen_rtx_MEM (mode,
10463 gen_rtx_PLUS (Pmode,
10464 stack_pointer_rtx,
10465 GEN_INT (-RED_ZONE_SIZE)));
10466 emit_move_insn (result, operand);
10467 }
10468 else if (TARGET_64BIT && !TARGET_RED_ZONE)
4211a8fb 10469 {
898d374d 10470 switch (mode)
4211a8fb 10471 {
898d374d
JH
10472 case HImode:
10473 case SImode:
10474 operand = gen_lowpart (DImode, operand);
10475 /* FALLTHRU */
10476 case DImode:
4211a8fb 10477 emit_insn (
898d374d
JH
10478 gen_rtx_SET (VOIDmode,
10479 gen_rtx_MEM (DImode,
10480 gen_rtx_PRE_DEC (DImode,
10481 stack_pointer_rtx)),
10482 operand));
10483 break;
10484 default:
10485 abort ();
10486 }
10487 result = gen_rtx_MEM (mode, stack_pointer_rtx);
10488 }
10489 else
10490 {
10491 switch (mode)
10492 {
10493 case DImode:
10494 {
10495 rtx operands[2];
10496 split_di (&operand, 1, operands, operands + 1);
10497 emit_insn (
10498 gen_rtx_SET (VOIDmode,
10499 gen_rtx_MEM (SImode,
10500 gen_rtx_PRE_DEC (Pmode,
10501 stack_pointer_rtx)),
10502 operands[1]));
10503 emit_insn (
10504 gen_rtx_SET (VOIDmode,
10505 gen_rtx_MEM (SImode,
10506 gen_rtx_PRE_DEC (Pmode,
10507 stack_pointer_rtx)),
10508 operands[0]));
10509 }
10510 break;
10511 case HImode:
10512 /* It is better to store HImodes as SImodes. */
10513 if (!TARGET_PARTIAL_REG_STALL)
10514 operand = gen_lowpart (SImode, operand);
10515 /* FALLTHRU */
10516 case SImode:
4211a8fb 10517 emit_insn (
898d374d
JH
10518 gen_rtx_SET (VOIDmode,
10519 gen_rtx_MEM (GET_MODE (operand),
10520 gen_rtx_PRE_DEC (SImode,
10521 stack_pointer_rtx)),
10522 operand));
10523 break;
10524 default:
10525 abort ();
4211a8fb 10526 }
898d374d 10527 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 10528 }
898d374d 10529 return result;
4211a8fb
JH
10530}
10531
10532/* Free operand from the memory. */
10533void
10534ix86_free_from_memory (mode)
10535 enum machine_mode mode;
10536{
898d374d
JH
10537 if (!TARGET_64BIT || !TARGET_RED_ZONE)
10538 {
10539 int size;
10540
10541 if (mode == DImode || TARGET_64BIT)
10542 size = 8;
10543 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
10544 size = 2;
10545 else
10546 size = 4;
10547 /* Use LEA to deallocate stack space. In peephole2 it will be converted
10548 to pop or add instruction if registers are available. */
10549 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10550 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10551 GEN_INT (size))));
10552 }
4211a8fb 10553}
a946dd00 10554
f84aa48a
JH
10555/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
10556 QImode must go into class Q_REGS.
10557 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
10558 movdf to do mem-to-mem moves through integer regs. */
10559enum reg_class
10560ix86_preferred_reload_class (x, class)
10561 rtx x;
10562 enum reg_class class;
10563{
10564 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
10565 {
10566 /* SSE can't load any constant directly yet. */
10567 if (SSE_CLASS_P (class))
10568 return NO_REGS;
10569 /* Floats can load 0 and 1. */
10570 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
10571 {
10572 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
10573 if (MAYBE_SSE_CLASS_P (class))
10574 return (reg_class_subset_p (class, GENERAL_REGS)
10575 ? GENERAL_REGS : FLOAT_REGS);
10576 else
10577 return class;
10578 }
10579 /* General regs can load everything. */
10580 if (reg_class_subset_p (class, GENERAL_REGS))
10581 return GENERAL_REGS;
10582 /* In case we haven't resolved FLOAT or SSE yet, give up. */
10583 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
10584 return NO_REGS;
10585 }
10586 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
10587 return NO_REGS;
10588 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
10589 return Q_REGS;
10590 return class;
10591}
10592
10593/* If we are copying between general and FP registers, we need a memory
10594 location. The same is true for SSE and MMX registers.
10595
10596 The macro can't work reliably when one of the CLASSES is class containing
10597 registers from multiple units (SSE, MMX, integer). We avoid this by never
10598 combining those units in single alternative in the machine description.
10599 Ensure that this constraint holds to avoid unexpected surprises.
10600
10601 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
10602 enforce these sanity checks. */
10603int
10604ix86_secondary_memory_needed (class1, class2, mode, strict)
10605 enum reg_class class1, class2;
10606 enum machine_mode mode;
10607 int strict;
10608{
10609 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
10610 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
10611 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
10612 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
10613 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
10614 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
10615 {
10616 if (strict)
10617 abort ();
10618 else
10619 return 1;
10620 }
10621 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
10622 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
10623 && (mode) != SImode)
10624 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
10625 && (mode) != SImode));
10626}
10627/* Return the cost of moving data from a register in class CLASS1 to
a4f31c00 10628 one in class CLASS2.
f84aa48a
JH
10629
10630 It is not required that the cost always equal 2 when FROM is the same as TO;
10631 on some machines it is expensive to move between registers if they are not
10632 general registers. */
10633int
10634ix86_register_move_cost (mode, class1, class2)
10635 enum machine_mode mode;
10636 enum reg_class class1, class2;
10637{
10638 /* In case we require secondary memory, compute cost of the store followed
10639 by load. In case of copying from general_purpose_register we may emit
10640 multiple stores followed by single load causing memory size mismatch
10641 stall. Count this as arbitarily high cost of 20. */
10642 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
10643 {
92d0fb09 10644 int add_cost = 0;
62415523 10645 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
92d0fb09 10646 add_cost = 20;
62415523 10647 return (MEMORY_MOVE_COST (mode, class1, 0)
92d0fb09 10648 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
f84aa48a 10649 }
92d0fb09 10650 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
10651 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
10652 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
10653 return ix86_cost->mmxsse_to_integer;
10654 if (MAYBE_FLOAT_CLASS_P (class1))
10655 return ix86_cost->fp_move;
10656 if (MAYBE_SSE_CLASS_P (class1))
10657 return ix86_cost->sse_move;
10658 if (MAYBE_MMX_CLASS_P (class1))
10659 return ix86_cost->mmx_move;
f84aa48a
JH
10660 return 2;
10661}
10662
a946dd00
JH
10663/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
10664int
10665ix86_hard_regno_mode_ok (regno, mode)
10666 int regno;
10667 enum machine_mode mode;
10668{
10669 /* Flags and only flags can only hold CCmode values. */
10670 if (CC_REGNO_P (regno))
10671 return GET_MODE_CLASS (mode) == MODE_CC;
10672 if (GET_MODE_CLASS (mode) == MODE_CC
10673 || GET_MODE_CLASS (mode) == MODE_RANDOM
10674 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
10675 return 0;
10676 if (FP_REGNO_P (regno))
10677 return VALID_FP_MODE_P (mode);
10678 if (SSE_REGNO_P (regno))
10679 return VALID_SSE_REG_MODE (mode);
10680 if (MMX_REGNO_P (regno))
10681 return VALID_MMX_REG_MODE (mode);
10682 /* We handle both integer and floats in the general purpose registers.
10683 In future we should be able to handle vector modes as well. */
10684 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
10685 return 0;
10686 /* Take care for QImode values - they can be in non-QI regs, but then
10687 they do cause partial register stalls. */
d2836273 10688 if (regno < 4 || mode != QImode || TARGET_64BIT)
a946dd00
JH
10689 return 1;
10690 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
10691}
fa79946e
JH
10692
10693/* Return the cost of moving data of mode M between a
10694 register and memory. A value of 2 is the default; this cost is
10695 relative to those in `REGISTER_MOVE_COST'.
10696
10697 If moving between registers and memory is more expensive than
10698 between two registers, you should define this macro to express the
a4f31c00
AJ
10699 relative cost.
10700
fa79946e
JH
10701 Model also increased moving costs of QImode registers in non
10702 Q_REGS classes.
10703 */
10704int
10705ix86_memory_move_cost (mode, class, in)
10706 enum machine_mode mode;
10707 enum reg_class class;
10708 int in;
10709{
10710 if (FLOAT_CLASS_P (class))
10711 {
10712 int index;
10713 switch (mode)
10714 {
10715 case SFmode:
10716 index = 0;
10717 break;
10718 case DFmode:
10719 index = 1;
10720 break;
10721 case XFmode:
10722 case TFmode:
10723 index = 2;
10724 break;
10725 default:
10726 return 100;
10727 }
10728 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
10729 }
10730 if (SSE_CLASS_P (class))
10731 {
10732 int index;
10733 switch (GET_MODE_SIZE (mode))
10734 {
10735 case 4:
10736 index = 0;
10737 break;
10738 case 8:
10739 index = 1;
10740 break;
10741 case 16:
10742 index = 2;
10743 break;
10744 default:
10745 return 100;
10746 }
10747 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
10748 }
10749 if (MMX_CLASS_P (class))
10750 {
10751 int index;
10752 switch (GET_MODE_SIZE (mode))
10753 {
10754 case 4:
10755 index = 0;
10756 break;
10757 case 8:
10758 index = 1;
10759 break;
10760 default:
10761 return 100;
10762 }
10763 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
10764 }
10765 switch (GET_MODE_SIZE (mode))
10766 {
10767 case 1:
10768 if (in)
10769 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
10770 : ix86_cost->movzbl_load);
10771 else
10772 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
10773 : ix86_cost->int_store[0] + 4);
10774 break;
10775 case 2:
10776 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
10777 default:
10778 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
10779 if (mode == TFmode)
10780 mode = XFmode;
3bb7e126 10781 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
fa79946e
JH
10782 * (int) GET_MODE_SIZE (mode) / 4);
10783 }
10784}
0ecf09f9 10785
7c262518
RH
10786#if defined(TARGET_ELF) && defined(TARGET_COFF)
10787static void
10788sco_asm_named_section (name, flags, align)
10789 const char *name;
10790 unsigned int flags;
10791 unsigned int align;
10792{
10793 if (TARGET_ELF)
10794 default_elf_asm_named_section (name, flags, align);
10795 else
10796 default_coff_asm_named_section (name, flags, align);
10797}
10798#endif
This page took 2.691815 seconds and 5 git commands to generate.