]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
varasm.c (named_section_flags): Remove align parameter.
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
8752c357 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
4592bdcb 3 Free Software Foundation, Inc.
2a2ab3f9
JVA
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
bb5177ac 23#include "system.h"
2a2ab3f9 24#include "rtl.h"
6baf1cc8
BS
25#include "tree.h"
26#include "tm_p.h"
2a2ab3f9
JVA
27#include "regs.h"
28#include "hard-reg-set.h"
29#include "real.h"
30#include "insn-config.h"
31#include "conditions.h"
2a2ab3f9
JVA
32#include "output.h"
33#include "insn-attr.h"
2a2ab3f9 34#include "flags.h"
a8ffcc81 35#include "except.h"
ecbc4695 36#include "function.h"
00c79232 37#include "recog.h"
ced8dd8c 38#include "expr.h"
e78d8e51 39#include "optabs.h"
f103890b 40#include "toplev.h"
e075ae69 41#include "basic-block.h"
1526a060 42#include "ggc.h"
672a6f42
NB
43#include "target.h"
44#include "target-def.h"
2a2ab3f9 45
8dfe5673
RK
46#ifndef CHECK_STACK_LIMIT
47#define CHECK_STACK_LIMIT -1
48#endif
49
32b5b1aa
SC
50/* Processor costs (relative to an add) */
51struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 52 1, /* cost of an add instruction */
32b5b1aa
SC
53 1, /* cost of a lea instruction */
54 3, /* variable shift costs */
55 2, /* constant shift costs */
56 6, /* cost of starting a multiply */
57 1, /* cost of multiply per each bit set */
e075ae69 58 23, /* cost of a divide/mod */
96e7ae40 59 15, /* "large" insn */
e2e52e1b 60 3, /* MOVE_RATIO */
7c6b971d 61 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
62 {2, 4, 2}, /* cost of loading integer registers
63 in QImode, HImode and SImode.
0f290768 64 Relative to reg-reg move (2). */
96e7ae40
JH
65 {2, 4, 2}, /* cost of storing integer registers */
66 2, /* cost of reg,reg fld/fst */
67 {8, 8, 8}, /* cost of loading fp registers
68 in SFmode, DFmode and XFmode */
fa79946e
JH
69 {8, 8, 8}, /* cost of loading integer registers */
70 2, /* cost of moving MMX register */
71 {4, 8}, /* cost of loading MMX registers
72 in SImode and DImode */
73 {4, 8}, /* cost of storing MMX registers
74 in SImode and DImode */
75 2, /* cost of moving SSE register */
76 {4, 8, 16}, /* cost of loading SSE registers
77 in SImode, DImode and TImode */
78 {4, 8, 16}, /* cost of storing SSE registers
79 in SImode, DImode and TImode */
80 3, /* MMX or SSE register to integer */
32b5b1aa
SC
81};
82
83struct processor_costs i486_cost = { /* 486 specific costs */
84 1, /* cost of an add instruction */
85 1, /* cost of a lea instruction */
86 3, /* variable shift costs */
87 2, /* constant shift costs */
88 12, /* cost of starting a multiply */
89 1, /* cost of multiply per each bit set */
e075ae69 90 40, /* cost of a divide/mod */
96e7ae40 91 15, /* "large" insn */
e2e52e1b 92 3, /* MOVE_RATIO */
7c6b971d 93 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
94 {2, 4, 2}, /* cost of loading integer registers
95 in QImode, HImode and SImode.
0f290768 96 Relative to reg-reg move (2). */
96e7ae40
JH
97 {2, 4, 2}, /* cost of storing integer registers */
98 2, /* cost of reg,reg fld/fst */
99 {8, 8, 8}, /* cost of loading fp registers
100 in SFmode, DFmode and XFmode */
fa79946e
JH
101 {8, 8, 8}, /* cost of loading integer registers */
102 2, /* cost of moving MMX register */
103 {4, 8}, /* cost of loading MMX registers
104 in SImode and DImode */
105 {4, 8}, /* cost of storing MMX registers
106 in SImode and DImode */
107 2, /* cost of moving SSE register */
108 {4, 8, 16}, /* cost of loading SSE registers
109 in SImode, DImode and TImode */
110 {4, 8, 16}, /* cost of storing SSE registers
111 in SImode, DImode and TImode */
112 3 /* MMX or SSE register to integer */
32b5b1aa
SC
113};
114
e5cb57e8 115struct processor_costs pentium_cost = {
32b5b1aa
SC
116 1, /* cost of an add instruction */
117 1, /* cost of a lea instruction */
856b07a1 118 4, /* variable shift costs */
e5cb57e8 119 1, /* constant shift costs */
856b07a1
SC
120 11, /* cost of starting a multiply */
121 0, /* cost of multiply per each bit set */
e075ae69 122 25, /* cost of a divide/mod */
96e7ae40 123 8, /* "large" insn */
e2e52e1b 124 6, /* MOVE_RATIO */
7c6b971d 125 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
126 {2, 4, 2}, /* cost of loading integer registers
127 in QImode, HImode and SImode.
0f290768 128 Relative to reg-reg move (2). */
96e7ae40
JH
129 {2, 4, 2}, /* cost of storing integer registers */
130 2, /* cost of reg,reg fld/fst */
131 {2, 2, 6}, /* cost of loading fp registers
132 in SFmode, DFmode and XFmode */
fa79946e
JH
133 {4, 4, 6}, /* cost of loading integer registers */
134 8, /* cost of moving MMX register */
135 {8, 8}, /* cost of loading MMX registers
136 in SImode and DImode */
137 {8, 8}, /* cost of storing MMX registers
138 in SImode and DImode */
139 2, /* cost of moving SSE register */
140 {4, 8, 16}, /* cost of loading SSE registers
141 in SImode, DImode and TImode */
142 {4, 8, 16}, /* cost of storing SSE registers
143 in SImode, DImode and TImode */
144 3 /* MMX or SSE register to integer */
32b5b1aa
SC
145};
146
856b07a1
SC
147struct processor_costs pentiumpro_cost = {
148 1, /* cost of an add instruction */
149 1, /* cost of a lea instruction */
e075ae69 150 1, /* variable shift costs */
856b07a1 151 1, /* constant shift costs */
369e59b1 152 4, /* cost of starting a multiply */
856b07a1 153 0, /* cost of multiply per each bit set */
e075ae69 154 17, /* cost of a divide/mod */
96e7ae40 155 8, /* "large" insn */
e2e52e1b 156 6, /* MOVE_RATIO */
7c6b971d 157 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
158 {4, 4, 4}, /* cost of loading integer registers
159 in QImode, HImode and SImode.
0f290768 160 Relative to reg-reg move (2). */
96e7ae40
JH
161 {2, 2, 2}, /* cost of storing integer registers */
162 2, /* cost of reg,reg fld/fst */
163 {2, 2, 6}, /* cost of loading fp registers
164 in SFmode, DFmode and XFmode */
fa79946e
JH
165 {4, 4, 6}, /* cost of loading integer registers */
166 2, /* cost of moving MMX register */
167 {2, 2}, /* cost of loading MMX registers
168 in SImode and DImode */
169 {2, 2}, /* cost of storing MMX registers
170 in SImode and DImode */
171 2, /* cost of moving SSE register */
172 {2, 2, 8}, /* cost of loading SSE registers
173 in SImode, DImode and TImode */
174 {2, 2, 8}, /* cost of storing SSE registers
175 in SImode, DImode and TImode */
176 3 /* MMX or SSE register to integer */
856b07a1
SC
177};
178
a269a03c
JC
179struct processor_costs k6_cost = {
180 1, /* cost of an add instruction */
e075ae69 181 2, /* cost of a lea instruction */
a269a03c
JC
182 1, /* variable shift costs */
183 1, /* constant shift costs */
73fe76e4 184 3, /* cost of starting a multiply */
a269a03c 185 0, /* cost of multiply per each bit set */
e075ae69 186 18, /* cost of a divide/mod */
96e7ae40 187 8, /* "large" insn */
e2e52e1b 188 4, /* MOVE_RATIO */
7c6b971d 189 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
190 {4, 5, 4}, /* cost of loading integer registers
191 in QImode, HImode and SImode.
0f290768 192 Relative to reg-reg move (2). */
96e7ae40
JH
193 {2, 3, 2}, /* cost of storing integer registers */
194 4, /* cost of reg,reg fld/fst */
195 {6, 6, 6}, /* cost of loading fp registers
196 in SFmode, DFmode and XFmode */
fa79946e
JH
197 {4, 4, 4}, /* cost of loading integer registers */
198 2, /* cost of moving MMX register */
199 {2, 2}, /* cost of loading MMX registers
200 in SImode and DImode */
201 {2, 2}, /* cost of storing MMX registers
202 in SImode and DImode */
203 2, /* cost of moving SSE register */
204 {2, 2, 8}, /* cost of loading SSE registers
205 in SImode, DImode and TImode */
206 {2, 2, 8}, /* cost of storing SSE registers
207 in SImode, DImode and TImode */
208 6 /* MMX or SSE register to integer */
a269a03c
JC
209};
210
309ada50
JH
211struct processor_costs athlon_cost = {
212 1, /* cost of an add instruction */
0b5107cf 213 2, /* cost of a lea instruction */
309ada50
JH
214 1, /* variable shift costs */
215 1, /* constant shift costs */
216 5, /* cost of starting a multiply */
217 0, /* cost of multiply per each bit set */
0b5107cf 218 42, /* cost of a divide/mod */
309ada50 219 8, /* "large" insn */
e2e52e1b 220 9, /* MOVE_RATIO */
309ada50
JH
221 4, /* cost for loading QImode using movzbl */
222 {4, 5, 4}, /* cost of loading integer registers
223 in QImode, HImode and SImode.
0f290768 224 Relative to reg-reg move (2). */
309ada50
JH
225 {2, 3, 2}, /* cost of storing integer registers */
226 4, /* cost of reg,reg fld/fst */
0b5107cf 227 {6, 6, 20}, /* cost of loading fp registers
309ada50 228 in SFmode, DFmode and XFmode */
fa79946e
JH
229 {4, 4, 16}, /* cost of loading integer registers */
230 2, /* cost of moving MMX register */
231 {2, 2}, /* cost of loading MMX registers
232 in SImode and DImode */
233 {2, 2}, /* cost of storing MMX registers
234 in SImode and DImode */
235 2, /* cost of moving SSE register */
236 {2, 2, 8}, /* cost of loading SSE registers
237 in SImode, DImode and TImode */
238 {2, 2, 8}, /* cost of storing SSE registers
239 in SImode, DImode and TImode */
240 6 /* MMX or SSE register to integer */
309ada50
JH
241};
242
b4e89e2d
JH
243struct processor_costs pentium4_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 8, /* variable shift costs */
247 8, /* constant shift costs */
248 30, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 112, /* cost of a divide/mod */
251 16, /* "large" insn */
252 6, /* MOVE_RATIO */
253 2, /* cost for loading QImode using movzbl */
254 {4, 5, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 3, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 12, /* cost of moving SSE register */
268 {12, 12, 12}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 10, /* MMX or SSE register to integer */
273};
274
32b5b1aa
SC
275struct processor_costs *ix86_cost = &pentium_cost;
276
a269a03c
JC
277/* Processor feature/optimization bitmasks. */
278#define m_386 (1<<PROCESSOR_I386)
279#define m_486 (1<<PROCESSOR_I486)
280#define m_PENT (1<<PROCESSOR_PENTIUM)
281#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
282#define m_K6 (1<<PROCESSOR_K6)
309ada50 283#define m_ATHLON (1<<PROCESSOR_ATHLON)
b4e89e2d 284#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
a269a03c 285
309ada50 286const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
b4e89e2d 287const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
a269a03c 288const int x86_zero_extend_with_and = m_486 | m_PENT;
b4e89e2d 289const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
e075ae69 290const int x86_double_with_add = ~m_386;
a269a03c 291const int x86_use_bit_test = m_386;
e2e52e1b 292const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
b4e89e2d
JH
293const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
294const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
ef6257cd 295const int x86_branch_hints = m_PENT4;
b4e89e2d 296const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
e075ae69
RH
297const int x86_partial_reg_stall = m_PPRO;
298const int x86_use_loop = m_K6;
309ada50 299const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
e075ae69
RH
300const int x86_use_mov0 = m_K6;
301const int x86_use_cltd = ~(m_PENT | m_K6);
302const int x86_read_modify_write = ~m_PENT;
303const int x86_read_modify = ~(m_PENT | m_PPRO);
304const int x86_split_long_moves = m_PPRO;
e9e80858 305const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
b4e89e2d 306const int x86_single_stringop = m_386 | m_PENT4;
d9f32422
JH
307const int x86_qimode_math = ~(0);
308const int x86_promote_qi_regs = 0;
309const int x86_himode_math = ~(m_PPRO);
310const int x86_promote_hi_regs = m_PPRO;
b4e89e2d
JH
311const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
312const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
313const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
314const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
315const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
316const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
317const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
c6036a37
JH
318const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
319const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
320const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
a269a03c 321
564d80f4 322#define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
2a2ab3f9 323
e075ae69
RH
324const char * const hi_reg_name[] = HI_REGISTER_NAMES;
325const char * const qi_reg_name[] = QI_REGISTER_NAMES;
326const char * const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
4c0d89b5
RS
327
328/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 329 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 330
e075ae69 331enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
332{
333 /* ax, dx, cx, bx */
ab408a86 334 AREG, DREG, CREG, BREG,
4c0d89b5 335 /* si, di, bp, sp */
e075ae69 336 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
337 /* FP registers */
338 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 339 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 340 /* arg pointer */
83774849 341 NON_Q_REGS,
564d80f4 342 /* flags, fpsr, dirflag, frame */
a7180f70
BS
343 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
344 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
345 SSE_REGS, SSE_REGS,
346 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30
JH
347 MMX_REGS, MMX_REGS,
348 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
349 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
350 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
351 SSE_REGS, SSE_REGS,
4c0d89b5 352};
c572e5ba 353
3d117b30 354/* The "default" register map used in 32bit mode. */
83774849 355
0f290768 356int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
357{
358 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
359 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
3d117b30 360 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
361 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
362 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
363 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
364 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
365};
366
0f7fa3d0
JH
367/* The "default" register map used in 64bit mode. */
368int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
369{
370 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
371 33, 34, 35, 36, 37, 38, 39, 40 /* fp regs */
372 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
373 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
374 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
375 8,9,10,11,12,13,14,15, /* extended integer registers */
376 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
377};
378
83774849
RH
379/* Define the register numbers to be used in Dwarf debugging information.
380 The SVR4 reference port C compiler uses the following register numbers
381 in its Dwarf output code:
382 0 for %eax (gcc regno = 0)
383 1 for %ecx (gcc regno = 2)
384 2 for %edx (gcc regno = 1)
385 3 for %ebx (gcc regno = 3)
386 4 for %esp (gcc regno = 7)
387 5 for %ebp (gcc regno = 6)
388 6 for %esi (gcc regno = 4)
389 7 for %edi (gcc regno = 5)
390 The following three DWARF register numbers are never generated by
391 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
392 believes these numbers have these meanings.
393 8 for %eip (no gcc equivalent)
394 9 for %eflags (gcc regno = 17)
395 10 for %trapno (no gcc equivalent)
396 It is not at all clear how we should number the FP stack registers
397 for the x86 architecture. If the version of SDB on x86/svr4 were
398 a bit less brain dead with respect to floating-point then we would
399 have a precedent to follow with respect to DWARF register numbers
400 for x86 FP registers, but the SDB on x86/svr4 is so completely
401 broken with respect to FP registers that it is hardly worth thinking
402 of it as something to strive for compatibility with.
403 The version of x86/svr4 SDB I have at the moment does (partially)
404 seem to believe that DWARF register number 11 is associated with
405 the x86 register %st(0), but that's about all. Higher DWARF
406 register numbers don't seem to be associated with anything in
407 particular, and even for DWARF regno 11, SDB only seems to under-
408 stand that it should say that a variable lives in %st(0) (when
409 asked via an `=' command) if we said it was in DWARF regno 11,
410 but SDB still prints garbage when asked for the value of the
411 variable in question (via a `/' command).
412 (Also note that the labels SDB prints for various FP stack regs
413 when doing an `x' command are all wrong.)
414 Note that these problems generally don't affect the native SVR4
415 C compiler because it doesn't allow the use of -O with -g and
416 because when it is *not* optimizing, it allocates a memory
417 location for each floating-point variable, and the memory
418 location is what gets described in the DWARF AT_location
419 attribute for the variable in question.
420 Regardless of the severe mental illness of the x86/svr4 SDB, we
421 do something sensible here and we use the following DWARF
422 register numbers. Note that these are all stack-top-relative
423 numbers.
424 11 for %st(0) (gcc regno = 8)
425 12 for %st(1) (gcc regno = 9)
426 13 for %st(2) (gcc regno = 10)
427 14 for %st(3) (gcc regno = 11)
428 15 for %st(4) (gcc regno = 12)
429 16 for %st(5) (gcc regno = 13)
430 17 for %st(6) (gcc regno = 14)
431 18 for %st(7) (gcc regno = 15)
432*/
0f290768 433int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
434{
435 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
436 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
3f3f2124 437 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
438 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
439 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
3f3f2124
JH
440 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
441 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
83774849
RH
442};
443
c572e5ba
JVA
444/* Test and compare insns in i386.md store the information needed to
445 generate branch and scc insns here. */
446
e075ae69
RH
447struct rtx_def *ix86_compare_op0 = NULL_RTX;
448struct rtx_def *ix86_compare_op1 = NULL_RTX;
f5316dfe 449
7a2e09f4 450#define MAX_386_STACK_LOCALS 3
8362f420
JH
451/* Size of the register save area. */
452#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
36edd3cc
BS
453
454/* Define the structure for the machine field in struct function. */
455struct machine_function
456{
457 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
8362f420 458 int save_varrargs_registers;
6fca22eb 459 int accesses_prev_frame;
36edd3cc
BS
460};
461
01d939e8 462#define ix86_stack_locals (cfun->machine->stack_locals)
8362f420 463#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
36edd3cc 464
4dd2ac2c
JH
465/* Structure describing stack frame layout.
466 Stack grows downward:
467
468 [arguments]
469 <- ARG_POINTER
470 saved pc
471
472 saved frame pointer if frame_pointer_needed
473 <- HARD_FRAME_POINTER
474 [saved regs]
475
476 [padding1] \
477 )
478 [va_arg registers] (
479 > to_allocate <- FRAME_POINTER
480 [frame] (
481 )
482 [padding2] /
483 */
484struct ix86_frame
485{
486 int nregs;
487 int padding1;
8362f420 488 int va_arg_size;
4dd2ac2c
JH
489 HOST_WIDE_INT frame;
490 int padding2;
491 int outgoing_arguments_size;
8362f420 492 int red_zone_size;
4dd2ac2c
JH
493
494 HOST_WIDE_INT to_allocate;
495 /* The offsets relative to ARG_POINTER. */
496 HOST_WIDE_INT frame_pointer_offset;
497 HOST_WIDE_INT hard_frame_pointer_offset;
498 HOST_WIDE_INT stack_pointer_offset;
499};
500
6189a572
JH
501/* Code model option as passed by user. */
502const char *ix86_cmodel_string;
503/* Parsed value. */
504enum cmodel ix86_cmodel;
505
c8c5cb99 506/* which cpu are we scheduling for */
e42ea7f9 507enum processor_type ix86_cpu;
c8c5cb99
SC
508
509/* which instruction set architecture to use. */
c942177e 510int ix86_arch;
c8c5cb99
SC
511
512/* Strings to hold which cpu and instruction set architecture to use. */
9c23aa47
ZW
513const char *ix86_cpu_string; /* for -mcpu=<xxx> */
514const char *ix86_arch_string; /* for -march=<xxx> */
c8c5cb99 515
0f290768 516/* # of registers to use to pass arguments. */
e075ae69 517const char *ix86_regparm_string;
e9a25f70 518
e075ae69
RH
519/* ix86_regparm_string as a number */
520int ix86_regparm;
e9a25f70
JL
521
522/* Alignment to use for loops and jumps: */
523
0f290768 524/* Power of two alignment for loops. */
e075ae69 525const char *ix86_align_loops_string;
e9a25f70 526
0f290768 527/* Power of two alignment for non-loop jumps. */
e075ae69 528const char *ix86_align_jumps_string;
e9a25f70 529
3af4bd89 530/* Power of two alignment for stack boundary in bytes. */
e075ae69 531const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
532
533/* Preferred alignment for stack boundary in bits. */
e075ae69 534int ix86_preferred_stack_boundary;
3af4bd89 535
e9a25f70 536/* Values 1-5: see jump.c */
e075ae69
RH
537int ix86_branch_cost;
538const char *ix86_branch_cost_string;
e9a25f70 539
0f290768 540/* Power of two alignment for functions. */
e075ae69 541const char *ix86_align_funcs_string;
e075ae69 542\f
f6da8bc3
KG
543static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
544static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
e075ae69 545 int, int, FILE *));
f6da8bc3 546static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
3a3677ff
RH
547static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
548 rtx *, rtx *));
f6da8bc3
KG
549static rtx gen_push PARAMS ((rtx));
550static int memory_address_length PARAMS ((rtx addr));
551static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
552static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
553static int ix86_safe_length PARAMS ((rtx));
554static enum attr_memory ix86_safe_memory PARAMS ((rtx));
555static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
556static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
557static void ix86_dump_ppro_packet PARAMS ((FILE *));
558static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
559static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
e075ae69 560 rtx));
f6da8bc3
KG
561static void ix86_init_machine_status PARAMS ((struct function *));
562static void ix86_mark_machine_status PARAMS ((struct function *));
37b15744 563static void ix86_free_machine_status PARAMS ((struct function *));
2b589241 564static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
f6da8bc3 565static int ix86_safe_length_prefix PARAMS ((rtx));
0903fcab
JH
566static int ix86_nsaved_regs PARAMS((void));
567static void ix86_emit_save_regs PARAMS((void));
c6036a37 568static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
37a58036 569static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
0e4970d7 570static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
c6991660
KG
571static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
572static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
55efb413 573static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
0945b39d 574static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
0945b39d
JH
575static rtx ix86_expand_aligntest PARAMS ((rtx, int));
576static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
e075ae69
RH
577
578struct ix86_address
579{
580 rtx base, index, disp;
581 HOST_WIDE_INT scale;
582};
b08de47e 583
e075ae69 584static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
bd793c65
BS
585
586struct builtin_description;
587static rtx ix86_expand_sse_comi PARAMS ((struct builtin_description *, tree,
588 rtx));
589static rtx ix86_expand_sse_compare PARAMS ((struct builtin_description *, tree,
590 rtx));
591static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
592static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
593static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
594static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
595static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
c0c102a9
JH
596static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
597static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
598 enum rtx_code *,
599 enum rtx_code *,
600 enum rtx_code *));
9e7adcb3
JH
601static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
602 rtx *, rtx *));
603static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
604static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
605static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
606static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
37a58036 607static int ix86_save_reg PARAMS ((int, int));
4dd2ac2c 608static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
8d8e52be 609static int ix86_comp_type_attributes PARAMS ((tree, tree));
7c262518 610
2cc07db4
RH
611#ifdef DO_GLOBAL_CTORS_BODY
612static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
613#endif
7c262518 614#if defined(TARGET_ELF) && defined(TARGET_COFF)
715bdd29 615static void sco_asm_named_section PARAMS ((const char *, unsigned int));
2cc07db4 616static void sco_asm_out_constructor PARAMS ((rtx, int));
7c262518 617#endif
672a6f42
NB
618\f
619/* Initialize the GCC target structure. */
f5f4be42 620#undef TARGET_VALID_TYPE_ATTRIBUTE
672a6f42 621#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
f5f4be42
NB
622# define TARGET_VALID_TYPE_ATTRIBUTE i386_pe_valid_type_attribute_p
623# undef TARGET_VALID_DECL_ATTRIBUTE
624# define TARGET_VALID_DECL_ATTRIBUTE i386_pe_valid_decl_attribute_p
625# undef TARGET_MERGE_DECL_ATTRIBUTES
626# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
627#else
628# define TARGET_VALID_TYPE_ATTRIBUTE ix86_valid_type_attribute_p
672a6f42
NB
629#endif
630
8d8e52be
JM
631#undef TARGET_COMP_TYPE_ATTRIBUTES
632#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
633
f6155fda
SS
634#undef TARGET_INIT_BUILTINS
635#define TARGET_INIT_BUILTINS ix86_init_builtins
636
637#undef TARGET_EXPAND_BUILTIN
638#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
639
08c148a8
NB
640#if defined (OSF_OS) || defined (TARGET_OSF1ELF)
641 static void ix86_osf_output_function_prologue PARAMS ((FILE *,
642 HOST_WIDE_INT));
643# undef TARGET_ASM_FUNCTION_PROLOGUE
644# define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue
645#endif
646
17b53c33
NB
647#undef TARGET_ASM_OPEN_PAREN
648#define TARGET_ASM_OPEN_PAREN ""
649#undef TARGET_ASM_CLOSE_PAREN
650#define TARGET_ASM_CLOSE_PAREN ""
651
f6897b10 652struct gcc_target targetm = TARGET_INITIALIZER;
e075ae69 653\f
f5316dfe
MM
654/* Sometimes certain combinations of command options do not make
655 sense on a particular target machine. You can define a macro
656 `OVERRIDE_OPTIONS' to take account of this. This macro, if
657 defined, is executed once just after all the command options have
658 been parsed.
659
660 Don't use this macro to turn on various extra optimizations for
661 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
662
663void
664override_options ()
665{
400500c4 666 int i;
e075ae69
RH
667 /* Comes from final.c -- no real reason to change it. */
668#define MAX_CODE_ALIGN 16
f5316dfe 669
c8c5cb99
SC
670 static struct ptt
671 {
e075ae69
RH
672 struct processor_costs *cost; /* Processor costs */
673 int target_enable; /* Target flags to enable. */
674 int target_disable; /* Target flags to disable. */
675 int align_loop; /* Default alignments. */
676 int align_jump;
677 int align_func;
678 int branch_cost;
679 }
0f290768 680 const processor_target_table[PROCESSOR_max] =
e075ae69
RH
681 {
682 {&i386_cost, 0, 0, 2, 2, 2, 1},
683 {&i486_cost, 0, 0, 4, 4, 4, 1},
684 {&pentium_cost, 0, 0, -4, -4, -4, 1},
685 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
309ada50 686 {&k6_cost, 0, 0, -5, -5, 4, 1},
b4e89e2d
JH
687 {&athlon_cost, 0, 0, 4, -4, 4, 1},
688 {&pentium4_cost, 0, 0, 2, 2, 2, 1}
e075ae69
RH
689 };
690
691 static struct pta
692 {
0f290768 693 const char *name; /* processor name or nickname. */
e075ae69
RH
694 enum processor_type processor;
695 }
0f290768 696 const processor_alias_table[] =
e075ae69
RH
697 {
698 {"i386", PROCESSOR_I386},
699 {"i486", PROCESSOR_I486},
700 {"i586", PROCESSOR_PENTIUM},
701 {"pentium", PROCESSOR_PENTIUM},
702 {"i686", PROCESSOR_PENTIUMPRO},
703 {"pentiumpro", PROCESSOR_PENTIUMPRO},
e075ae69 704 {"k6", PROCESSOR_K6},
309ada50 705 {"athlon", PROCESSOR_ATHLON},
b4e89e2d 706 {"pentium4", PROCESSOR_PENTIUM4},
3af4bd89 707 };
c8c5cb99 708
0f290768 709 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
c8c5cb99 710
f5316dfe
MM
711#ifdef SUBTARGET_OVERRIDE_OPTIONS
712 SUBTARGET_OVERRIDE_OPTIONS;
713#endif
714
5a6ee819 715 ix86_arch = PROCESSOR_I386;
e075ae69
RH
716 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
717
6189a572
JH
718 if (ix86_cmodel_string != 0)
719 {
720 if (!strcmp (ix86_cmodel_string, "small"))
721 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
722 else if (flag_pic)
723 sorry ("Code model %s not supported in PIC mode", ix86_cmodel_string);
724 else if (!strcmp (ix86_cmodel_string, "32"))
725 ix86_cmodel = CM_32;
726 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
727 ix86_cmodel = CM_KERNEL;
728 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
729 ix86_cmodel = CM_MEDIUM;
730 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
731 ix86_cmodel = CM_LARGE;
732 else
733 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
734 }
735 else
736 {
737 ix86_cmodel = CM_32;
738 if (TARGET_64BIT)
739 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
740 }
741 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
742 error ("Code model `%s' not supported in the %s bit mode.",
743 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
744 if (ix86_cmodel == CM_LARGE)
745 sorry ("Code model `large' not supported yet.");
0c2dc519
JH
746 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
747 sorry ("%i-bit mode not compiled in.",
748 (target_flags & MASK_64BIT) ? 64 : 32);
6189a572 749
e075ae69
RH
750 if (ix86_arch_string != 0)
751 {
e075ae69
RH
752 for (i = 0; i < pta_size; i++)
753 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
754 {
755 ix86_arch = processor_alias_table[i].processor;
756 /* Default cpu tuning to the architecture. */
757 ix86_cpu = ix86_arch;
758 break;
759 }
400500c4 760
e075ae69
RH
761 if (i == pta_size)
762 error ("bad value (%s) for -march= switch", ix86_arch_string);
763 }
764
765 if (ix86_cpu_string != 0)
766 {
e075ae69
RH
767 for (i = 0; i < pta_size; i++)
768 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
769 {
770 ix86_cpu = processor_alias_table[i].processor;
771 break;
772 }
773 if (i == pta_size)
774 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
775 }
776
777 ix86_cost = processor_target_table[ix86_cpu].cost;
778 target_flags |= processor_target_table[ix86_cpu].target_enable;
779 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
780
36edd3cc
BS
781 /* Arrange to set up i386_stack_locals for all functions. */
782 init_machine_status = ix86_init_machine_status;
1526a060 783 mark_machine_status = ix86_mark_machine_status;
37b15744 784 free_machine_status = ix86_free_machine_status;
36edd3cc 785
0f290768 786 /* Validate -mregparm= value. */
e075ae69 787 if (ix86_regparm_string)
b08de47e 788 {
400500c4
RK
789 i = atoi (ix86_regparm_string);
790 if (i < 0 || i > REGPARM_MAX)
791 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
792 else
793 ix86_regparm = i;
b08de47e 794 }
0d7d98ee
JH
795 else
796 if (TARGET_64BIT)
797 ix86_regparm = REGPARM_MAX;
b08de47e 798
3e18fdf6 799 /* If the user has provided any of the -malign-* options,
a4f31c00 800 warn and use that value only if -falign-* is not set.
3e18fdf6 801 Remove this code in GCC 3.2 or later. */
e075ae69 802 if (ix86_align_loops_string)
b08de47e 803 {
3e18fdf6
GK
804 warning ("-malign-loops is obsolete, use -falign-loops");
805 if (align_loops == 0)
806 {
807 i = atoi (ix86_align_loops_string);
808 if (i < 0 || i > MAX_CODE_ALIGN)
809 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
810 else
811 align_loops = 1 << i;
812 }
b08de47e 813 }
3af4bd89 814
e075ae69 815 if (ix86_align_jumps_string)
b08de47e 816 {
3e18fdf6
GK
817 warning ("-malign-jumps is obsolete, use -falign-jumps");
818 if (align_jumps == 0)
819 {
820 i = atoi (ix86_align_jumps_string);
821 if (i < 0 || i > MAX_CODE_ALIGN)
822 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
823 else
824 align_jumps = 1 << i;
825 }
b08de47e 826 }
b08de47e 827
e075ae69 828 if (ix86_align_funcs_string)
b08de47e 829 {
3e18fdf6
GK
830 warning ("-malign-functions is obsolete, use -falign-functions");
831 if (align_functions == 0)
832 {
833 i = atoi (ix86_align_funcs_string);
834 if (i < 0 || i > MAX_CODE_ALIGN)
835 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
836 else
837 align_functions = 1 << i;
838 }
b08de47e 839 }
3af4bd89 840
3e18fdf6
GK
841 /* Default align_* from the processor table. */
842#define abs(n) (n < 0 ? -n : n)
843 if (align_loops == 0)
844 align_loops = 1 << abs (processor_target_table[ix86_cpu].align_loop);
845 if (align_jumps == 0)
846 align_jumps = 1 << abs (processor_target_table[ix86_cpu].align_jump);
847 if (align_functions == 0)
848 align_functions = 1 << abs (processor_target_table[ix86_cpu].align_func);
849
e4c0478d 850 /* Validate -mpreferred-stack-boundary= value, or provide default.
3af4bd89 851 The default of 128 bits is for Pentium III's SSE __m128. */
e075ae69
RH
852 ix86_preferred_stack_boundary = 128;
853 if (ix86_preferred_stack_boundary_string)
3af4bd89 854 {
400500c4 855 i = atoi (ix86_preferred_stack_boundary_string);
0d7d98ee
JH
856 if (i < (TARGET_64BIT ? 3 : 2) || i > 31)
857 error ("-mpreferred-stack-boundary=%d is not between %d and 31", i,
858 TARGET_64BIT ? 3 : 2);
400500c4
RK
859 else
860 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 861 }
77a989d1 862
0f290768 863 /* Validate -mbranch-cost= value, or provide default. */
e075ae69
RH
864 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
865 if (ix86_branch_cost_string)
804a8ee0 866 {
400500c4
RK
867 i = atoi (ix86_branch_cost_string);
868 if (i < 0 || i > 5)
869 error ("-mbranch-cost=%d is not between 0 and 5", i);
870 else
871 ix86_branch_cost = i;
804a8ee0 872 }
804a8ee0 873
e9a25f70
JL
874 /* Keep nonleaf frame pointers. */
875 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 876 flag_omit_frame_pointer = 1;
e075ae69
RH
877
878 /* If we're doing fast math, we don't care about comparison order
879 wrt NaNs. This lets us use a shorter comparison sequence. */
de6c5979 880 if (flag_unsafe_math_optimizations)
e075ae69
RH
881 target_flags &= ~MASK_IEEE_FP;
882
a7180f70
BS
883 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
884 on by -msse. */
885 if (TARGET_SSE)
886 target_flags |= MASK_MMX;
c6036a37
JH
887
888 if ((x86_accumulate_outgoing_args & CPUMASK)
889 && !(target_flags & MASK_NO_ACCUMULATE_OUTGOING_ARGS)
890 && !optimize_size)
891 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
f5316dfe
MM
892}
893\f
32b5b1aa 894void
c6aded7c 895optimization_options (level, size)
32b5b1aa 896 int level;
bb5177ac 897 int size ATTRIBUTE_UNUSED;
32b5b1aa 898{
e9a25f70
JL
899 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
900 make the problem with not enough registers even worse. */
32b5b1aa
SC
901#ifdef INSN_SCHEDULING
902 if (level > 1)
903 flag_schedule_insns = 0;
904#endif
905}
b08de47e 906\f
b08de47e
MM
907/* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
908 attribute for TYPE. The attributes in ATTRIBUTES have previously been
909 assigned to TYPE. */
910
9959db6d 911int
e075ae69 912ix86_valid_type_attribute_p (type, attributes, identifier, args)
b08de47e 913 tree type;
bb5177ac 914 tree attributes ATTRIBUTE_UNUSED;
b08de47e
MM
915 tree identifier;
916 tree args;
917{
918 if (TREE_CODE (type) != FUNCTION_TYPE
ac478ac0 919 && TREE_CODE (type) != METHOD_TYPE
b08de47e
MM
920 && TREE_CODE (type) != FIELD_DECL
921 && TREE_CODE (type) != TYPE_DECL)
922 return 0;
923
924 /* Stdcall attribute says callee is responsible for popping arguments
925 if they are not variable. */
0d7d98ee
JH
926 if (is_attribute_p ("stdcall", identifier)
927 && !TARGET_64BIT)
b08de47e
MM
928 return (args == NULL_TREE);
929
0f290768 930 /* Cdecl attribute says the callee is a normal C declaration. */
0d7d98ee
JH
931 if (is_attribute_p ("cdecl", identifier)
932 && !TARGET_64BIT)
b08de47e
MM
933 return (args == NULL_TREE);
934
935 /* Regparm attribute specifies how many integer arguments are to be
0f290768 936 passed in registers. */
b08de47e
MM
937 if (is_attribute_p ("regparm", identifier))
938 {
939 tree cst;
940
e9a25f70 941 if (! args || TREE_CODE (args) != TREE_LIST
b08de47e
MM
942 || TREE_CHAIN (args) != NULL_TREE
943 || TREE_VALUE (args) == NULL_TREE)
944 return 0;
945
946 cst = TREE_VALUE (args);
947 if (TREE_CODE (cst) != INTEGER_CST)
948 return 0;
949
cce097f1 950 if (compare_tree_int (cst, REGPARM_MAX) > 0)
b08de47e
MM
951 return 0;
952
953 return 1;
954 }
955
956 return 0;
957}
958
08c148a8
NB
959#if defined (OSF_OS) || defined (TARGET_OSF1ELF)
960
961/* Generate the assembly code for function entry. FILE is a stdio
962 stream to output the code to. SIZE is an int: how many units of
963 temporary storage to allocate.
964
965 Refer to the array `regs_ever_live' to determine which registers to
966 save; `regs_ever_live[I]' is nonzero if register number I is ever
967 used in the function. This function is responsible for knowing
968 which registers should not be saved even if used.
969
970 We override it here to allow for the new profiling code to go before
971 the prologue and the old mcount code to go after the prologue (and
972 after %ebx has been set up for ELF shared library support). */
973
974static void
975ix86_osf_output_function_prologue (file, size)
976 FILE *file;
977 HOST_WIDE_INT size;
978{
979 char *prefix = "";
980 char *lprefix = LPREFIX;
981 int labelno = profile_label_no;
982
983#ifdef OSF_OS
984
985 if (TARGET_UNDERSCORES)
986 prefix = "_";
987
988 if (profile_flag && OSF_PROFILE_BEFORE_PROLOGUE)
989 {
990 if (!flag_pic && !HALF_PIC_P ())
991 {
992 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
993 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
994 }
995
996 else if (HALF_PIC_P ())
997 {
998 rtx symref;
999
1000 HALF_PIC_EXTERNAL ("_mcount_ptr");
1001 symref = HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode,
1002 "_mcount_ptr"));
1003
1004 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1005 fprintf (file, "\tmovl %s%s,%%eax\n", prefix,
1006 XSTR (symref, 0));
1007 fprintf (file, "\tcall *(%%eax)\n");
1008 }
1009
1010 else
1011 {
1012 static int call_no = 0;
1013
1014 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1015 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1016 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1017 lprefix, call_no++);
1018 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1019 lprefix, labelno);
1020 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1021 prefix);
1022 fprintf (file, "\tcall *(%%eax)\n");
1023 }
1024 }
1025
1026#else /* !OSF_OS */
1027
1028 if (profile_flag && OSF_PROFILE_BEFORE_PROLOGUE)
1029 {
1030 if (!flag_pic)
1031 {
1032 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1033 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1034 }
1035
1036 else
1037 {
1038 static int call_no = 0;
1039
1040 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1041 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1042 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1043 lprefix, call_no++);
1044 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1045 lprefix, labelno);
1046 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1047 prefix);
1048 fprintf (file, "\tcall *(%%eax)\n");
1049 }
1050 }
1051#endif /* !OSF_OS */
1052
1053 function_prologue (file, size);
1054}
1055
1056#endif /* OSF_OS || TARGET_OSF1ELF */
1057
b08de47e
MM
1058/* Return 0 if the attributes for two types are incompatible, 1 if they
1059 are compatible, and 2 if they are nearly compatible (which causes a
1060 warning to be generated). */
1061
8d8e52be 1062static int
e075ae69 1063ix86_comp_type_attributes (type1, type2)
afcfe58c
MM
1064 tree type1;
1065 tree type2;
b08de47e 1066{
0f290768 1067 /* Check for mismatch of non-default calling convention. */
69ddee61 1068 const char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
1069
1070 if (TREE_CODE (type1) != FUNCTION_TYPE)
1071 return 1;
1072
1073 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
1074 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1075 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 1076 return 0;
b08de47e
MM
1077 return 1;
1078}
b08de47e
MM
1079\f
1080/* Value is the number of bytes of arguments automatically
1081 popped when returning from a subroutine call.
1082 FUNDECL is the declaration node of the function (as a tree),
1083 FUNTYPE is the data type of the function (as a tree),
1084 or for a library call it is an identifier node for the subroutine name.
1085 SIZE is the number of bytes of arguments passed on the stack.
1086
1087 On the 80386, the RTD insn may be used to pop them if the number
1088 of args is fixed, but if the number is variable then the caller
1089 must pop them all. RTD can't be used for library calls now
1090 because the library is compiled with the Unix compiler.
1091 Use of RTD is a selectable option, since it is incompatible with
1092 standard Unix calling sequences. If the option is not selected,
1093 the caller must always pop the args.
1094
1095 The attribute stdcall is equivalent to RTD on a per module basis. */
1096
1097int
e075ae69 1098ix86_return_pops_args (fundecl, funtype, size)
b08de47e
MM
1099 tree fundecl;
1100 tree funtype;
1101 int size;
79325812 1102{
3345ee7d 1103 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 1104
0f290768 1105 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 1106 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 1107
0f290768 1108 /* Stdcall functions will pop the stack if not variable args. */
698cdd84
SC
1109 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1110 rtd = 1;
79325812 1111
698cdd84
SC
1112 if (rtd
1113 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
1114 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1115 == void_type_node)))
698cdd84
SC
1116 return size;
1117 }
79325812 1118
e9a25f70 1119 /* Lose any fake structure return argument. */
0d7d98ee
JH
1120 if (aggregate_value_p (TREE_TYPE (funtype))
1121 && !TARGET_64BIT)
698cdd84 1122 return GET_MODE_SIZE (Pmode);
79325812 1123
2614aac6 1124 return 0;
b08de47e 1125}
b08de47e
MM
1126\f
1127/* Argument support functions. */
1128
1129/* Initialize a variable CUM of type CUMULATIVE_ARGS
1130 for a call to a function whose data type is FNTYPE.
1131 For a library call, FNTYPE is 0. */
1132
1133void
1134init_cumulative_args (cum, fntype, libname)
e9a25f70 1135 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
b08de47e
MM
1136 tree fntype; /* tree ptr for function decl */
1137 rtx libname; /* SYMBOL_REF of library name or 0 */
1138{
1139 static CUMULATIVE_ARGS zero_cum;
1140 tree param, next_param;
1141
1142 if (TARGET_DEBUG_ARG)
1143 {
1144 fprintf (stderr, "\ninit_cumulative_args (");
1145 if (fntype)
e9a25f70
JL
1146 fprintf (stderr, "fntype code = %s, ret code = %s",
1147 tree_code_name[(int) TREE_CODE (fntype)],
1148 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
1149 else
1150 fprintf (stderr, "no fntype");
1151
1152 if (libname)
1153 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1154 }
1155
1156 *cum = zero_cum;
1157
1158 /* Set up the number of registers to use for passing arguments. */
e075ae69 1159 cum->nregs = ix86_regparm;
b08de47e
MM
1160 if (fntype)
1161 {
1162 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
e9a25f70 1163
b08de47e
MM
1164 if (attr)
1165 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1166 }
1167
1168 /* Determine if this function has variable arguments. This is
1169 indicated by the last argument being 'void_type_mode' if there
1170 are no variable arguments. If there are variable arguments, then
1171 we won't pass anything in registers */
1172
1173 if (cum->nregs)
1174 {
1175 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 1176 param != 0; param = next_param)
b08de47e
MM
1177 {
1178 next_param = TREE_CHAIN (param);
e9a25f70 1179 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
b08de47e
MM
1180 cum->nregs = 0;
1181 }
1182 }
1183
1184 if (TARGET_DEBUG_ARG)
1185 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1186
1187 return;
1188}
1189
1190/* Update the data in CUM to advance over an argument
1191 of mode MODE and data type TYPE.
1192 (TYPE is null for libcalls where that information may not be available.) */
1193
1194void
1195function_arg_advance (cum, mode, type, named)
1196 CUMULATIVE_ARGS *cum; /* current arg information */
1197 enum machine_mode mode; /* current arg mode */
1198 tree type; /* type of the argument or 0 if lib support */
1199 int named; /* whether or not the argument was named */
1200{
5ac9118e
KG
1201 int bytes =
1202 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
1203 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1204
1205 if (TARGET_DEBUG_ARG)
1206 fprintf (stderr,
e9a25f70 1207 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
b08de47e 1208 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
82a127a9 1209 if (TARGET_SSE && mode == TImode)
b08de47e 1210 {
82a127a9
CM
1211 cum->sse_words += words;
1212 cum->sse_nregs -= 1;
1213 cum->sse_regno += 1;
1214 if (cum->sse_nregs <= 0)
1215 {
1216 cum->sse_nregs = 0;
1217 cum->sse_regno = 0;
1218 }
b08de47e 1219 }
a4f31c00 1220 else
82a127a9
CM
1221 {
1222 cum->words += words;
1223 cum->nregs -= words;
1224 cum->regno += words;
b08de47e 1225
82a127a9
CM
1226 if (cum->nregs <= 0)
1227 {
1228 cum->nregs = 0;
1229 cum->regno = 0;
1230 }
1231 }
b08de47e
MM
1232 return;
1233}
1234
1235/* Define where to put the arguments to a function.
1236 Value is zero to push the argument on the stack,
1237 or a hard register in which to store the argument.
1238
1239 MODE is the argument's machine mode.
1240 TYPE is the data type of the argument (as a tree).
1241 This is null for libcalls where that information may
1242 not be available.
1243 CUM is a variable of type CUMULATIVE_ARGS which gives info about
1244 the preceding args and about the function being called.
1245 NAMED is nonzero if this argument is a named parameter
1246 (otherwise it is an extra parameter matching an ellipsis). */
1247
1248struct rtx_def *
1249function_arg (cum, mode, type, named)
1250 CUMULATIVE_ARGS *cum; /* current arg information */
1251 enum machine_mode mode; /* current arg mode */
1252 tree type; /* type of the argument or 0 if lib support */
1253 int named; /* != 0 for normal args, == 0 for ... args */
1254{
1255 rtx ret = NULL_RTX;
5ac9118e
KG
1256 int bytes =
1257 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
1258 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1259
32ee7d1d
JH
1260 if (mode == VOIDmode)
1261 return constm1_rtx;
1262
b08de47e
MM
1263 switch (mode)
1264 {
0f290768 1265 /* For now, pass fp/complex values on the stack. */
e9a25f70 1266 default:
b08de47e
MM
1267 break;
1268
1269 case BLKmode:
1270 case DImode:
1271 case SImode:
1272 case HImode:
1273 case QImode:
1274 if (words <= cum->nregs)
f64cecad 1275 ret = gen_rtx_REG (mode, cum->regno);
b08de47e 1276 break;
82a127a9
CM
1277 case TImode:
1278 if (cum->sse_nregs)
1279 ret = gen_rtx_REG (mode, cum->sse_regno);
a4f31c00 1280 break;
b08de47e
MM
1281 }
1282
1283 if (TARGET_DEBUG_ARG)
1284 {
1285 fprintf (stderr,
e9a25f70 1286 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
b08de47e
MM
1287 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
1288
1289 if (ret)
1290 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
1291 else
1292 fprintf (stderr, ", stack");
1293
1294 fprintf (stderr, " )\n");
1295 }
1296
1297 return ret;
1298}
e075ae69 1299\f
8bad7136 1300
7dd4b4a3
JH
1301/* Return nonzero if OP is general operand representable on x86_64. */
1302
1303int
1304x86_64_general_operand (op, mode)
1305 rtx op;
1306 enum machine_mode mode;
1307{
1308 if (!TARGET_64BIT)
1309 return general_operand (op, mode);
1310 if (nonimmediate_operand (op, mode))
1311 return 1;
1312 return x86_64_sign_extended_value (op);
1313}
1314
1315/* Return nonzero if OP is general operand representable on x86_64
1316 as eighter sign extended or zero extended constant. */
1317
1318int
1319x86_64_szext_general_operand (op, mode)
1320 rtx op;
1321 enum machine_mode mode;
1322{
1323 if (!TARGET_64BIT)
1324 return general_operand (op, mode);
1325 if (nonimmediate_operand (op, mode))
1326 return 1;
1327 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
1328}
1329
1330/* Return nonzero if OP is nonmemory operand representable on x86_64. */
1331
1332int
1333x86_64_nonmemory_operand (op, mode)
1334 rtx op;
1335 enum machine_mode mode;
1336{
1337 if (!TARGET_64BIT)
1338 return nonmemory_operand (op, mode);
1339 if (register_operand (op, mode))
1340 return 1;
1341 return x86_64_sign_extended_value (op);
1342}
1343
1344/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
1345
1346int
1347x86_64_movabs_operand (op, mode)
1348 rtx op;
1349 enum machine_mode mode;
1350{
1351 if (!TARGET_64BIT || !flag_pic)
1352 return nonmemory_operand (op, mode);
1353 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
1354 return 1;
1355 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
1356 return 1;
1357 return 0;
1358}
1359
1360/* Return nonzero if OP is nonmemory operand representable on x86_64. */
1361
1362int
1363x86_64_szext_nonmemory_operand (op, mode)
1364 rtx op;
1365 enum machine_mode mode;
1366{
1367 if (!TARGET_64BIT)
1368 return nonmemory_operand (op, mode);
1369 if (register_operand (op, mode))
1370 return 1;
1371 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
1372}
1373
1374/* Return nonzero if OP is immediate operand representable on x86_64. */
1375
1376int
1377x86_64_immediate_operand (op, mode)
1378 rtx op;
1379 enum machine_mode mode;
1380{
1381 if (!TARGET_64BIT)
1382 return immediate_operand (op, mode);
1383 return x86_64_sign_extended_value (op);
1384}
1385
1386/* Return nonzero if OP is immediate operand representable on x86_64. */
1387
1388int
1389x86_64_zext_immediate_operand (op, mode)
1390 rtx op;
1391 enum machine_mode mode ATTRIBUTE_UNUSED;
1392{
1393 return x86_64_zero_extended_value (op);
1394}
1395
8bad7136
JL
1396/* Return nonzero if OP is (const_int 1), else return zero. */
1397
1398int
1399const_int_1_operand (op, mode)
1400 rtx op;
1401 enum machine_mode mode ATTRIBUTE_UNUSED;
1402{
1403 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
1404}
1405
e075ae69
RH
1406/* Returns 1 if OP is either a symbol reference or a sum of a symbol
1407 reference and a constant. */
b08de47e
MM
1408
1409int
e075ae69
RH
1410symbolic_operand (op, mode)
1411 register rtx op;
1412 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 1413{
e075ae69 1414 switch (GET_CODE (op))
2a2ab3f9 1415 {
e075ae69
RH
1416 case SYMBOL_REF:
1417 case LABEL_REF:
1418 return 1;
1419
1420 case CONST:
1421 op = XEXP (op, 0);
1422 if (GET_CODE (op) == SYMBOL_REF
1423 || GET_CODE (op) == LABEL_REF
1424 || (GET_CODE (op) == UNSPEC
1425 && XINT (op, 1) >= 6
1426 && XINT (op, 1) <= 7))
1427 return 1;
1428 if (GET_CODE (op) != PLUS
1429 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1430 return 0;
1431
1432 op = XEXP (op, 0);
1433 if (GET_CODE (op) == SYMBOL_REF
1434 || GET_CODE (op) == LABEL_REF)
1435 return 1;
1436 /* Only @GOTOFF gets offsets. */
1437 if (GET_CODE (op) != UNSPEC
1438 || XINT (op, 1) != 7)
1439 return 0;
1440
1441 op = XVECEXP (op, 0, 0);
1442 if (GET_CODE (op) == SYMBOL_REF
1443 || GET_CODE (op) == LABEL_REF)
1444 return 1;
1445 return 0;
1446
1447 default:
1448 return 0;
2a2ab3f9
JVA
1449 }
1450}
2a2ab3f9 1451
e075ae69 1452/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 1453
e075ae69
RH
1454int
1455pic_symbolic_operand (op, mode)
1456 register rtx op;
1457 enum machine_mode mode ATTRIBUTE_UNUSED;
1458{
1459 if (GET_CODE (op) == CONST)
2a2ab3f9 1460 {
e075ae69
RH
1461 op = XEXP (op, 0);
1462 if (GET_CODE (op) == UNSPEC)
1463 return 1;
1464 if (GET_CODE (op) != PLUS
1465 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1466 return 0;
1467 op = XEXP (op, 0);
1468 if (GET_CODE (op) == UNSPEC)
1469 return 1;
2a2ab3f9 1470 }
e075ae69 1471 return 0;
2a2ab3f9 1472}
2a2ab3f9 1473
28d52ffb
RH
1474/* Test for a valid operand for a call instruction. Don't allow the
1475 arg pointer register or virtual regs since they may decay into
1476 reg + const, which the patterns can't handle. */
2a2ab3f9 1477
e075ae69
RH
1478int
1479call_insn_operand (op, mode)
1480 rtx op;
1481 enum machine_mode mode ATTRIBUTE_UNUSED;
1482{
e075ae69
RH
1483 /* Disallow indirect through a virtual register. This leads to
1484 compiler aborts when trying to eliminate them. */
1485 if (GET_CODE (op) == REG
1486 && (op == arg_pointer_rtx
564d80f4 1487 || op == frame_pointer_rtx
e075ae69
RH
1488 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
1489 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
1490 return 0;
2a2ab3f9 1491
28d52ffb
RH
1492 /* Disallow `call 1234'. Due to varying assembler lameness this
1493 gets either rejected or translated to `call .+1234'. */
1494 if (GET_CODE (op) == CONST_INT)
1495 return 0;
1496
cbbf65e0
RH
1497 /* Explicitly allow SYMBOL_REF even if pic. */
1498 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 1499 return 1;
2a2ab3f9 1500
cbbf65e0
RH
1501 /* Half-pic doesn't allow anything but registers and constants.
1502 We've just taken care of the later. */
1503 if (HALF_PIC_P ())
1504 return register_operand (op, Pmode);
1505
1506 /* Otherwise we can allow any general_operand in the address. */
1507 return general_operand (op, Pmode);
e075ae69 1508}
79325812 1509
e075ae69
RH
1510int
1511constant_call_address_operand (op, mode)
1512 rtx op;
1513 enum machine_mode mode ATTRIBUTE_UNUSED;
1514{
eaf19aba
JJ
1515 if (GET_CODE (op) == CONST
1516 && GET_CODE (XEXP (op, 0)) == PLUS
1517 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1518 op = XEXP (XEXP (op, 0), 0);
e1ff012c 1519 return GET_CODE (op) == SYMBOL_REF;
e075ae69 1520}
2a2ab3f9 1521
e075ae69 1522/* Match exactly zero and one. */
e9a25f70 1523
0f290768 1524int
e075ae69
RH
1525const0_operand (op, mode)
1526 register rtx op;
1527 enum machine_mode mode;
1528{
1529 return op == CONST0_RTX (mode);
1530}
e9a25f70 1531
0f290768 1532int
e075ae69
RH
1533const1_operand (op, mode)
1534 register rtx op;
1535 enum machine_mode mode ATTRIBUTE_UNUSED;
1536{
1537 return op == const1_rtx;
1538}
2a2ab3f9 1539
e075ae69 1540/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 1541
e075ae69
RH
1542int
1543const248_operand (op, mode)
1544 register rtx op;
1545 enum machine_mode mode ATTRIBUTE_UNUSED;
1546{
1547 return (GET_CODE (op) == CONST_INT
1548 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
1549}
e9a25f70 1550
e075ae69 1551/* True if this is a constant appropriate for an increment or decremenmt. */
81fd0956 1552
e075ae69
RH
1553int
1554incdec_operand (op, mode)
1555 register rtx op;
0631e0bf 1556 enum machine_mode mode ATTRIBUTE_UNUSED;
e075ae69 1557{
b4e89e2d
JH
1558 /* On Pentium4, the inc and dec operations causes extra dependancy on flag
1559 registers, since carry flag is not set. */
1560 if (TARGET_PENTIUM4 && !optimize_size)
1561 return 0;
2b1c08f5 1562 return op == const1_rtx || op == constm1_rtx;
e075ae69 1563}
2a2ab3f9 1564
371bc54b
JH
1565/* Return nonzero if OP is acceptable as operand of DImode shift
1566 expander. */
1567
1568int
1569shiftdi_operand (op, mode)
1570 rtx op;
1571 enum machine_mode mode ATTRIBUTE_UNUSED;
1572{
1573 if (TARGET_64BIT)
1574 return nonimmediate_operand (op, mode);
1575 else
1576 return register_operand (op, mode);
1577}
1578
0f290768 1579/* Return false if this is the stack pointer, or any other fake
e075ae69
RH
1580 register eliminable to the stack pointer. Otherwise, this is
1581 a register operand.
2a2ab3f9 1582
e075ae69
RH
1583 This is used to prevent esp from being used as an index reg.
1584 Which would only happen in pathological cases. */
5f1ec3e6 1585
e075ae69
RH
1586int
1587reg_no_sp_operand (op, mode)
1588 register rtx op;
1589 enum machine_mode mode;
1590{
1591 rtx t = op;
1592 if (GET_CODE (t) == SUBREG)
1593 t = SUBREG_REG (t);
564d80f4 1594 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 1595 return 0;
2a2ab3f9 1596
e075ae69 1597 return register_operand (op, mode);
2a2ab3f9 1598}
b840bfb0 1599
915119a5
BS
1600int
1601mmx_reg_operand (op, mode)
1602 register rtx op;
bd793c65 1603 enum machine_mode mode ATTRIBUTE_UNUSED;
915119a5
BS
1604{
1605 return MMX_REG_P (op);
1606}
1607
2c5a510c
RH
1608/* Return false if this is any eliminable register. Otherwise
1609 general_operand. */
1610
1611int
1612general_no_elim_operand (op, mode)
1613 register rtx op;
1614 enum machine_mode mode;
1615{
1616 rtx t = op;
1617 if (GET_CODE (t) == SUBREG)
1618 t = SUBREG_REG (t);
1619 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1620 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1621 || t == virtual_stack_dynamic_rtx)
1622 return 0;
1020a5ab
RH
1623 if (REG_P (t)
1624 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
1625 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
1626 return 0;
2c5a510c
RH
1627
1628 return general_operand (op, mode);
1629}
1630
1631/* Return false if this is any eliminable register. Otherwise
1632 register_operand or const_int. */
1633
1634int
1635nonmemory_no_elim_operand (op, mode)
1636 register rtx op;
1637 enum machine_mode mode;
1638{
1639 rtx t = op;
1640 if (GET_CODE (t) == SUBREG)
1641 t = SUBREG_REG (t);
1642 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1643 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1644 || t == virtual_stack_dynamic_rtx)
1645 return 0;
1646
1647 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
1648}
1649
e075ae69 1650/* Return true if op is a Q_REGS class register. */
b840bfb0 1651
e075ae69
RH
1652int
1653q_regs_operand (op, mode)
1654 register rtx op;
1655 enum machine_mode mode;
b840bfb0 1656{
e075ae69
RH
1657 if (mode != VOIDmode && GET_MODE (op) != mode)
1658 return 0;
1659 if (GET_CODE (op) == SUBREG)
1660 op = SUBREG_REG (op);
1661 return QI_REG_P (op);
0f290768 1662}
b840bfb0 1663
e075ae69 1664/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 1665
e075ae69
RH
1666int
1667non_q_regs_operand (op, mode)
1668 register rtx op;
1669 enum machine_mode mode;
1670{
1671 if (mode != VOIDmode && GET_MODE (op) != mode)
1672 return 0;
1673 if (GET_CODE (op) == SUBREG)
1674 op = SUBREG_REG (op);
1675 return NON_QI_REG_P (op);
0f290768 1676}
b840bfb0 1677
915119a5
BS
1678/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
1679 insns. */
1680int
1681sse_comparison_operator (op, mode)
1682 rtx op;
1683 enum machine_mode mode ATTRIBUTE_UNUSED;
1684{
1685 enum rtx_code code = GET_CODE (op);
a46d1d38
JH
1686 switch (code)
1687 {
1688 /* Operations supported directly. */
1689 case EQ:
1690 case LT:
1691 case LE:
1692 case UNORDERED:
1693 case NE:
1694 case UNGE:
1695 case UNGT:
1696 case ORDERED:
1697 return 1;
1698 /* These are equivalent to ones above in non-IEEE comparisons. */
1699 case UNEQ:
1700 case UNLT:
1701 case UNLE:
1702 case LTGT:
1703 case GE:
1704 case GT:
1705 return !TARGET_IEEE_FP;
1706 default:
1707 return 0;
1708 }
915119a5 1709}
9076b9c1 1710/* Return 1 if OP is a valid comparison operator in valid mode. */
e075ae69 1711int
9076b9c1
JH
1712ix86_comparison_operator (op, mode)
1713 register rtx op;
1714 enum machine_mode mode;
e075ae69 1715{
9076b9c1 1716 enum machine_mode inmode;
9a915772 1717 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
1718 if (mode != VOIDmode && GET_MODE (op) != mode)
1719 return 0;
9a915772
JH
1720 if (GET_RTX_CLASS (code) != '<')
1721 return 0;
1722 inmode = GET_MODE (XEXP (op, 0));
1723
1724 if (inmode == CCFPmode || inmode == CCFPUmode)
1725 {
1726 enum rtx_code second_code, bypass_code;
1727 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1728 return (bypass_code == NIL && second_code == NIL);
1729 }
1730 switch (code)
3a3677ff
RH
1731 {
1732 case EQ: case NE:
3a3677ff 1733 return 1;
9076b9c1 1734 case LT: case GE:
7e08e190 1735 if (inmode == CCmode || inmode == CCGCmode
9076b9c1
JH
1736 || inmode == CCGOCmode || inmode == CCNOmode)
1737 return 1;
1738 return 0;
7e08e190 1739 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
7e08e190 1740 if (inmode == CCmode)
9076b9c1
JH
1741 return 1;
1742 return 0;
1743 case GT: case LE:
7e08e190 1744 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
9076b9c1
JH
1745 return 1;
1746 return 0;
3a3677ff
RH
1747 default:
1748 return 0;
1749 }
1750}
1751
9076b9c1 1752/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3a3677ff 1753
9076b9c1
JH
1754int
1755fcmov_comparison_operator (op, mode)
3a3677ff
RH
1756 register rtx op;
1757 enum machine_mode mode;
1758{
b62d22a2 1759 enum machine_mode inmode;
9a915772 1760 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
1761 if (mode != VOIDmode && GET_MODE (op) != mode)
1762 return 0;
9a915772
JH
1763 if (GET_RTX_CLASS (code) != '<')
1764 return 0;
1765 inmode = GET_MODE (XEXP (op, 0));
1766 if (inmode == CCFPmode || inmode == CCFPUmode)
3a3677ff 1767 {
9a915772
JH
1768 enum rtx_code second_code, bypass_code;
1769 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1770 if (bypass_code != NIL || second_code != NIL)
1771 return 0;
1772 code = ix86_fp_compare_code_to_integer (code);
1773 }
1774 /* i387 supports just limited amount of conditional codes. */
1775 switch (code)
1776 {
1777 case LTU: case GTU: case LEU: case GEU:
1778 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
9076b9c1
JH
1779 return 1;
1780 return 0;
9a915772
JH
1781 case ORDERED: case UNORDERED:
1782 case EQ: case NE:
1783 return 1;
3a3677ff
RH
1784 default:
1785 return 0;
1786 }
e075ae69 1787}
b840bfb0 1788
e9e80858
JH
1789/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1790
1791int
1792promotable_binary_operator (op, mode)
1793 register rtx op;
1794 enum machine_mode mode ATTRIBUTE_UNUSED;
1795{
1796 switch (GET_CODE (op))
1797 {
1798 case MULT:
1799 /* Modern CPUs have same latency for HImode and SImode multiply,
1800 but 386 and 486 do HImode multiply faster. */
1801 return ix86_cpu > PROCESSOR_I486;
1802 case PLUS:
1803 case AND:
1804 case IOR:
1805 case XOR:
1806 case ASHIFT:
1807 return 1;
1808 default:
1809 return 0;
1810 }
1811}
1812
e075ae69
RH
1813/* Nearly general operand, but accept any const_double, since we wish
1814 to be able to drop them into memory rather than have them get pulled
1815 into registers. */
b840bfb0 1816
2a2ab3f9 1817int
e075ae69
RH
1818cmp_fp_expander_operand (op, mode)
1819 register rtx op;
1820 enum machine_mode mode;
2a2ab3f9 1821{
e075ae69 1822 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 1823 return 0;
e075ae69 1824 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 1825 return 1;
e075ae69 1826 return general_operand (op, mode);
2a2ab3f9
JVA
1827}
1828
e075ae69 1829/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
1830
1831int
e075ae69 1832ext_register_operand (op, mode)
2a2ab3f9 1833 register rtx op;
bb5177ac 1834 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 1835{
3522082b 1836 int regno;
0d7d98ee
JH
1837 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
1838 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
e075ae69 1839 return 0;
3522082b
JH
1840
1841 if (!register_operand (op, VOIDmode))
1842 return 0;
1843
1844 /* Be curefull to accept only registers having upper parts. */
1845 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
1846 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
e075ae69
RH
1847}
1848
1849/* Return 1 if this is a valid binary floating-point operation.
0f290768 1850 OP is the expression matched, and MODE is its mode. */
e075ae69
RH
1851
1852int
1853binary_fp_operator (op, mode)
1854 register rtx op;
1855 enum machine_mode mode;
1856{
1857 if (mode != VOIDmode && mode != GET_MODE (op))
1858 return 0;
1859
2a2ab3f9
JVA
1860 switch (GET_CODE (op))
1861 {
e075ae69
RH
1862 case PLUS:
1863 case MINUS:
1864 case MULT:
1865 case DIV:
1866 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 1867
2a2ab3f9
JVA
1868 default:
1869 return 0;
1870 }
1871}
fee2770d 1872
e075ae69
RH
1873int
1874mult_operator(op, mode)
1875 register rtx op;
1876 enum machine_mode mode ATTRIBUTE_UNUSED;
1877{
1878 return GET_CODE (op) == MULT;
1879}
1880
1881int
1882div_operator(op, mode)
1883 register rtx op;
1884 enum machine_mode mode ATTRIBUTE_UNUSED;
1885{
1886 return GET_CODE (op) == DIV;
1887}
0a726ef1
JL
1888
1889int
e075ae69
RH
1890arith_or_logical_operator (op, mode)
1891 rtx op;
1892 enum machine_mode mode;
0a726ef1 1893{
e075ae69
RH
1894 return ((mode == VOIDmode || GET_MODE (op) == mode)
1895 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
1896 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
1897}
1898
e075ae69 1899/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
1900
1901int
e075ae69
RH
1902memory_displacement_operand (op, mode)
1903 register rtx op;
1904 enum machine_mode mode;
4f2c8ebb 1905{
e075ae69 1906 struct ix86_address parts;
e9a25f70 1907
e075ae69
RH
1908 if (! memory_operand (op, mode))
1909 return 0;
1910
1911 if (! ix86_decompose_address (XEXP (op, 0), &parts))
1912 abort ();
1913
1914 return parts.disp != NULL_RTX;
4f2c8ebb
RS
1915}
1916
16189740 1917/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
1918 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1919
1920 ??? It seems likely that this will only work because cmpsi is an
1921 expander, and no actual insns use this. */
4f2c8ebb
RS
1922
1923int
e075ae69
RH
1924cmpsi_operand (op, mode)
1925 rtx op;
1926 enum machine_mode mode;
fee2770d 1927{
b9b2c339 1928 if (nonimmediate_operand (op, mode))
e075ae69
RH
1929 return 1;
1930
1931 if (GET_CODE (op) == AND
1932 && GET_MODE (op) == SImode
1933 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
1934 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
1935 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
1936 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
1937 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
1938 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 1939 return 1;
e9a25f70 1940
fee2770d
RS
1941 return 0;
1942}
d784886d 1943
e075ae69
RH
1944/* Returns 1 if OP is memory operand that can not be represented by the
1945 modRM array. */
d784886d
RK
1946
1947int
e075ae69 1948long_memory_operand (op, mode)
d784886d
RK
1949 register rtx op;
1950 enum machine_mode mode;
1951{
e075ae69 1952 if (! memory_operand (op, mode))
d784886d
RK
1953 return 0;
1954
e075ae69 1955 return memory_address_length (op) != 0;
d784886d 1956}
2247f6ed
JH
1957
1958/* Return nonzero if the rtx is known aligned. */
1959
1960int
1961aligned_operand (op, mode)
1962 rtx op;
1963 enum machine_mode mode;
1964{
1965 struct ix86_address parts;
1966
1967 if (!general_operand (op, mode))
1968 return 0;
1969
0f290768 1970 /* Registers and immediate operands are always "aligned". */
2247f6ed
JH
1971 if (GET_CODE (op) != MEM)
1972 return 1;
1973
0f290768 1974 /* Don't even try to do any aligned optimizations with volatiles. */
2247f6ed
JH
1975 if (MEM_VOLATILE_P (op))
1976 return 0;
1977
1978 op = XEXP (op, 0);
1979
1980 /* Pushes and pops are only valid on the stack pointer. */
1981 if (GET_CODE (op) == PRE_DEC
1982 || GET_CODE (op) == POST_INC)
1983 return 1;
1984
1985 /* Decode the address. */
1986 if (! ix86_decompose_address (op, &parts))
1987 abort ();
1988
1989 /* Look for some component that isn't known to be aligned. */
1990 if (parts.index)
1991 {
1992 if (parts.scale < 4
bdb429a5 1993 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
1994 return 0;
1995 }
1996 if (parts.base)
1997 {
bdb429a5 1998 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
1999 return 0;
2000 }
2001 if (parts.disp)
2002 {
2003 if (GET_CODE (parts.disp) != CONST_INT
2004 || (INTVAL (parts.disp) & 3) != 0)
2005 return 0;
2006 }
2007
2008 /* Didn't find one -- this must be an aligned address. */
2009 return 1;
2010}
e075ae69
RH
2011\f
2012/* Return true if the constant is something that can be loaded with
2013 a special instruction. Only handle 0.0 and 1.0; others are less
2014 worthwhile. */
57dbca5e
BS
2015
2016int
e075ae69
RH
2017standard_80387_constant_p (x)
2018 rtx x;
57dbca5e 2019{
2b04e52b 2020 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 2021 return -1;
2b04e52b
JH
2022 /* Note that on the 80387, other constants, such as pi, that we should support
2023 too. On some machines, these are much slower to load as standard constant,
2024 than to load from doubles in memory. */
2025 if (x == CONST0_RTX (GET_MODE (x)))
2026 return 1;
2027 if (x == CONST1_RTX (GET_MODE (x)))
2028 return 2;
e075ae69 2029 return 0;
57dbca5e
BS
2030}
2031
2b04e52b
JH
2032/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
2033 */
2034int
2035standard_sse_constant_p (x)
2036 rtx x;
2037{
2038 if (GET_CODE (x) != CONST_DOUBLE)
2039 return -1;
2040 return (x == CONST0_RTX (GET_MODE (x)));
2041}
2042
2a2ab3f9
JVA
2043/* Returns 1 if OP contains a symbol reference */
2044
2045int
2046symbolic_reference_mentioned_p (op)
2047 rtx op;
2048{
6f7d635c 2049 register const char *fmt;
2a2ab3f9
JVA
2050 register int i;
2051
2052 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
2053 return 1;
2054
2055 fmt = GET_RTX_FORMAT (GET_CODE (op));
2056 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
2057 {
2058 if (fmt[i] == 'E')
2059 {
2060 register int j;
2061
2062 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
2063 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
2064 return 1;
2065 }
e9a25f70 2066
2a2ab3f9
JVA
2067 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
2068 return 1;
2069 }
2070
2071 return 0;
2072}
e075ae69
RH
2073
2074/* Return 1 if it is appropriate to emit `ret' instructions in the
2075 body of a function. Do this only if the epilogue is simple, needing a
2076 couple of insns. Prior to reloading, we can't tell how many registers
2077 must be saved, so return 0 then. Return 0 if there is no frame
2078 marker to de-allocate.
2079
2080 If NON_SAVING_SETJMP is defined and true, then it is not possible
2081 for the epilogue to be simple, so return 0. This is a special case
2082 since NON_SAVING_SETJMP will not cause regs_ever_live to change
2083 until final, but jump_optimize may need to know sooner if a
2084 `return' is OK. */
32b5b1aa
SC
2085
2086int
e075ae69 2087ix86_can_use_return_insn_p ()
32b5b1aa 2088{
4dd2ac2c 2089 struct ix86_frame frame;
9a7372d6 2090
e075ae69
RH
2091#ifdef NON_SAVING_SETJMP
2092 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
2093 return 0;
2094#endif
9a7372d6
RH
2095#ifdef FUNCTION_BLOCK_PROFILER_EXIT
2096 if (profile_block_flag == 2)
2097 return 0;
2098#endif
2099
2100 if (! reload_completed || frame_pointer_needed)
2101 return 0;
32b5b1aa 2102
9a7372d6
RH
2103 /* Don't allow more than 32 pop, since that's all we can do
2104 with one instruction. */
2105 if (current_function_pops_args
2106 && current_function_args_size >= 32768)
e075ae69 2107 return 0;
32b5b1aa 2108
4dd2ac2c
JH
2109 ix86_compute_frame_layout (&frame);
2110 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 2111}
6189a572
JH
2112\f
2113/* Return 1 if VALUE can be stored in the sign extended immediate field. */
2114int
2115x86_64_sign_extended_value (value)
2116 rtx value;
2117{
2118 switch (GET_CODE (value))
2119 {
2120 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
2121 to be at least 32 and this all acceptable constants are
2122 represented as CONST_INT. */
2123 case CONST_INT:
2124 if (HOST_BITS_PER_WIDE_INT == 32)
2125 return 1;
2126 else
2127 {
2128 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
fa9f36a1 2129 return trunc_int_for_mode (val, SImode) == val;
6189a572
JH
2130 }
2131 break;
2132
2133 /* For certain code models, the symbolic references are known to fit. */
2134 case SYMBOL_REF:
2135 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
2136
2137 /* For certain code models, the code is near as well. */
2138 case LABEL_REF:
2139 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
2140
2141 /* We also may accept the offsetted memory references in certain special
2142 cases. */
2143 case CONST:
2144 if (GET_CODE (XEXP (value, 0)) == UNSPEC
2145 && XVECLEN (XEXP (value, 0), 0) == 1
2146 && XINT (XEXP (value, 0), 1) == 15)
2147 return 1;
2148 else if (GET_CODE (XEXP (value, 0)) == PLUS)
2149 {
2150 rtx op1 = XEXP (XEXP (value, 0), 0);
2151 rtx op2 = XEXP (XEXP (value, 0), 1);
2152 HOST_WIDE_INT offset;
2153
2154 if (ix86_cmodel == CM_LARGE)
2155 return 0;
2156 if (GET_CODE (op2) != CONST_INT)
2157 return 0;
2158 offset = trunc_int_for_mode (INTVAL (op2), DImode);
2159 switch (GET_CODE (op1))
2160 {
2161 case SYMBOL_REF:
2162 /* For CM_SMALL assume that latest object is 1MB before
2163 end of 31bits boundary. We may also accept pretty
2164 large negative constants knowing that all objects are
2165 in the positive half of address space. */
2166 if (ix86_cmodel == CM_SMALL
2167 && offset < 1024*1024*1024
2168 && trunc_int_for_mode (offset, SImode) == offset)
2169 return 1;
2170 /* For CM_KERNEL we know that all object resist in the
2171 negative half of 32bits address space. We may not
2172 accept negative offsets, since they may be just off
2173 and we may accept pretty large possitive ones. */
2174 if (ix86_cmodel == CM_KERNEL
2175 && offset > 0
2176 && trunc_int_for_mode (offset, SImode) == offset)
2177 return 1;
2178 break;
2179 case LABEL_REF:
2180 /* These conditions are similar to SYMBOL_REF ones, just the
2181 constraints for code models differ. */
2182 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
2183 && offset < 1024*1024*1024
2184 && trunc_int_for_mode (offset, SImode) == offset)
2185 return 1;
2186 if (ix86_cmodel == CM_KERNEL
2187 && offset > 0
2188 && trunc_int_for_mode (offset, SImode) == offset)
2189 return 1;
2190 break;
2191 default:
2192 return 0;
2193 }
2194 }
2195 return 0;
2196 default:
2197 return 0;
2198 }
2199}
2200
2201/* Return 1 if VALUE can be stored in the zero extended immediate field. */
2202int
2203x86_64_zero_extended_value (value)
2204 rtx value;
2205{
2206 switch (GET_CODE (value))
2207 {
2208 case CONST_DOUBLE:
2209 if (HOST_BITS_PER_WIDE_INT == 32)
2210 return (GET_MODE (value) == VOIDmode
2211 && !CONST_DOUBLE_HIGH (value));
2212 else
2213 return 0;
2214 case CONST_INT:
2215 if (HOST_BITS_PER_WIDE_INT == 32)
2216 return INTVAL (value) >= 0;
2217 else
2218 return !(INTVAL (value) & ~(HOST_WIDE_INT)0xffffffff);
2219 break;
2220
2221 /* For certain code models, the symbolic references are known to fit. */
2222 case SYMBOL_REF:
2223 return ix86_cmodel == CM_SMALL;
2224
2225 /* For certain code models, the code is near as well. */
2226 case LABEL_REF:
2227 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
2228
2229 /* We also may accept the offsetted memory references in certain special
2230 cases. */
2231 case CONST:
2232 if (GET_CODE (XEXP (value, 0)) == PLUS)
2233 {
2234 rtx op1 = XEXP (XEXP (value, 0), 0);
2235 rtx op2 = XEXP (XEXP (value, 0), 1);
2236
2237 if (ix86_cmodel == CM_LARGE)
2238 return 0;
2239 switch (GET_CODE (op1))
2240 {
2241 case SYMBOL_REF:
2242 return 0;
2243 /* For small code model we may accept pretty large possitive
2244 offsets, since one bit is available for free. Negative
2245 offsets are limited by the size of NULL pointer area
2246 specified by the ABI. */
2247 if (ix86_cmodel == CM_SMALL
2248 && GET_CODE (op2) == CONST_INT
2249 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
2250 && (trunc_int_for_mode (INTVAL (op2), SImode)
2251 == INTVAL (op2)))
2252 return 1;
2253 /* ??? For the kernel, we may accept adjustment of
2254 -0x10000000, since we know that it will just convert
2255 negative address space to possitive, but perhaps this
2256 is not worthwhile. */
2257 break;
2258 case LABEL_REF:
2259 /* These conditions are similar to SYMBOL_REF ones, just the
2260 constraints for code models differ. */
2261 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
2262 && GET_CODE (op2) == CONST_INT
2263 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
2264 && (trunc_int_for_mode (INTVAL (op2), SImode)
2265 == INTVAL (op2)))
2266 return 1;
2267 break;
2268 default:
2269 return 0;
2270 }
2271 }
2272 return 0;
2273 default:
2274 return 0;
2275 }
2276}
6fca22eb
RH
2277
2278/* Value should be nonzero if functions must have frame pointers.
2279 Zero means the frame pointer need not be set up (and parms may
2280 be accessed via the stack pointer) in functions that seem suitable. */
2281
2282int
2283ix86_frame_pointer_required ()
2284{
2285 /* If we accessed previous frames, then the generated code expects
2286 to be able to access the saved ebp value in our frame. */
2287 if (cfun->machine->accesses_prev_frame)
2288 return 1;
a4f31c00 2289
6fca22eb
RH
2290 /* Several x86 os'es need a frame pointer for other reasons,
2291 usually pertaining to setjmp. */
2292 if (SUBTARGET_FRAME_POINTER_REQUIRED)
2293 return 1;
2294
2295 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
2296 the frame pointer by default. Turn it back on now if we've not
2297 got a leaf function. */
2298 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
2299 return 1;
2300
2301 return 0;
2302}
2303
2304/* Record that the current function accesses previous call frames. */
2305
2306void
2307ix86_setup_frame_addresses ()
2308{
2309 cfun->machine->accesses_prev_frame = 1;
2310}
e075ae69 2311\f
4cf12e7e 2312static char pic_label_name[32];
e9a25f70 2313
e075ae69
RH
2314/* This function generates code for -fpic that loads %ebx with
2315 the return address of the caller and then returns. */
2316
2317void
4cf12e7e 2318ix86_asm_file_end (file)
e075ae69 2319 FILE *file;
e075ae69
RH
2320{
2321 rtx xops[2];
32b5b1aa 2322
4cf12e7e
RH
2323 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
2324 return;
32b5b1aa 2325
c7f0da1d
RH
2326 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
2327 to updating relocations to a section being discarded such that this
2328 doesn't work. Ought to detect this at configure time. */
7c262518 2329#if 0
4cf12e7e
RH
2330 /* The trick here is to create a linkonce section containing the
2331 pic label thunk, but to refer to it with an internal label.
2332 Because the label is internal, we don't have inter-dso name
2333 binding issues on hosts that don't support ".hidden".
e9a25f70 2334
4cf12e7e
RH
2335 In order to use these macros, however, we must create a fake
2336 function decl. */
7c262518
RH
2337 if (targetm.have_named_sections)
2338 {
2339 tree decl = build_decl (FUNCTION_DECL,
2340 get_identifier ("i686.get_pc_thunk"),
2341 error_mark_node);
2342 DECL_ONE_ONLY (decl) = 1;
2343 UNIQUE_SECTION (decl, 0);
715bdd29 2344 named_section (decl, NULL);
7c262518
RH
2345 }
2346 else
4cf12e7e 2347#else
7c262518 2348 text_section ();
4cf12e7e 2349#endif
0afeb08a 2350
4cf12e7e
RH
2351 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
2352 internal (non-global) label that's being emitted, it didn't make
2353 sense to have .type information for local labels. This caused
2354 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
2355 me debug info for a label that you're declaring non-global?) this
2356 was changed to call ASM_OUTPUT_LABEL() instead. */
2357
2358 ASM_OUTPUT_LABEL (file, pic_label_name);
2359
2360 xops[0] = pic_offset_table_rtx;
2361 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
2362 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
2363 output_asm_insn ("ret", xops);
32b5b1aa 2364}
32b5b1aa 2365
e075ae69
RH
2366void
2367load_pic_register ()
32b5b1aa 2368{
e075ae69 2369 rtx gotsym, pclab;
32b5b1aa 2370
0d7d98ee
JH
2371 if (TARGET_64BIT)
2372 abort();
2373
a8a05998 2374 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
32b5b1aa 2375
e075ae69 2376 if (TARGET_DEEP_BRANCH_PREDICTION)
32b5b1aa 2377 {
4cf12e7e
RH
2378 if (! pic_label_name[0])
2379 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
e075ae69 2380 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
32b5b1aa 2381 }
e075ae69 2382 else
e5cb57e8 2383 {
e075ae69 2384 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
e5cb57e8 2385 }
e5cb57e8 2386
e075ae69 2387 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
2a2ab3f9 2388
e075ae69
RH
2389 if (! TARGET_DEEP_BRANCH_PREDICTION)
2390 emit_insn (gen_popsi1 (pic_offset_table_rtx));
79325812 2391
e075ae69 2392 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
e9a25f70 2393}
8dfe5673 2394
0d7d98ee 2395/* Generate an "push" pattern for input ARG. */
e9a25f70 2396
e075ae69
RH
2397static rtx
2398gen_push (arg)
2399 rtx arg;
e9a25f70 2400{
c5c76735 2401 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
2402 gen_rtx_MEM (Pmode,
2403 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
2404 stack_pointer_rtx)),
2405 arg);
e9a25f70
JL
2406}
2407
4dd2ac2c
JH
2408/* Return 1 if we need to save REGNO. */
2409static int
1020a5ab
RH
2410ix86_save_reg (regno, maybe_eh_return)
2411 int regno;
37a58036 2412 int maybe_eh_return;
1020a5ab
RH
2413{
2414 if (flag_pic
2415 && ! TARGET_64BIT
2416 && regno == PIC_OFFSET_TABLE_REGNUM
2417 && (current_function_uses_pic_offset_table
2418 || current_function_uses_const_pool
2419 || current_function_calls_eh_return))
2420 return 1;
2421
2422 if (current_function_calls_eh_return && maybe_eh_return)
2423 {
2424 unsigned i;
2425 for (i = 0; ; i++)
2426 {
2427 unsigned test = EH_RETURN_DATA_REGNO(i);
2428 if (test == INVALID_REGNUM)
2429 break;
2430 if (test == (unsigned) regno)
2431 return 1;
2432 }
2433 }
4dd2ac2c 2434
1020a5ab
RH
2435 return (regs_ever_live[regno]
2436 && !call_used_regs[regno]
2437 && !fixed_regs[regno]
2438 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
2439}
2440
0903fcab
JH
2441/* Return number of registers to be saved on the stack. */
2442
2443static int
2444ix86_nsaved_regs ()
2445{
2446 int nregs = 0;
0903fcab
JH
2447 int regno;
2448
4dd2ac2c 2449 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 2450 if (ix86_save_reg (regno, true))
4dd2ac2c 2451 nregs++;
0903fcab
JH
2452 return nregs;
2453}
2454
2455/* Return the offset between two registers, one to be eliminated, and the other
2456 its replacement, at the start of a routine. */
2457
2458HOST_WIDE_INT
2459ix86_initial_elimination_offset (from, to)
2460 int from;
2461 int to;
2462{
4dd2ac2c
JH
2463 struct ix86_frame frame;
2464 ix86_compute_frame_layout (&frame);
564d80f4
JH
2465
2466 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 2467 return frame.hard_frame_pointer_offset;
564d80f4
JH
2468 else if (from == FRAME_POINTER_REGNUM
2469 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 2470 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
2471 else
2472 {
564d80f4
JH
2473 if (to != STACK_POINTER_REGNUM)
2474 abort ();
2475 else if (from == ARG_POINTER_REGNUM)
4dd2ac2c 2476 return frame.stack_pointer_offset;
564d80f4
JH
2477 else if (from != FRAME_POINTER_REGNUM)
2478 abort ();
0903fcab 2479 else
4dd2ac2c 2480 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
2481 }
2482}
2483
4dd2ac2c 2484/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 2485
4dd2ac2c
JH
2486static void
2487ix86_compute_frame_layout (frame)
2488 struct ix86_frame *frame;
65954bd8 2489{
65954bd8 2490 HOST_WIDE_INT total_size;
564d80f4 2491 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
44affdae
JH
2492 int offset;
2493 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4dd2ac2c 2494 HOST_WIDE_INT size = get_frame_size ();
65954bd8 2495
4dd2ac2c 2496 frame->nregs = ix86_nsaved_regs ();
564d80f4 2497 total_size = size;
65954bd8 2498
4dd2ac2c
JH
2499 /* Skip return value and save base pointer. */
2500 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
2501
2502 frame->hard_frame_pointer_offset = offset;
564d80f4 2503
fcbfaa65
RK
2504 /* Do some sanity checking of stack_alignment_needed and
2505 preferred_alignment, since i386 port is the only using those features
2506 that may break easilly. */
564d80f4 2507
44affdae
JH
2508 if (size && !stack_alignment_needed)
2509 abort ();
44affdae
JH
2510 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
2511 abort ();
2512 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
2513 abort ();
2514 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
2515 abort ();
564d80f4 2516
4dd2ac2c
JH
2517 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
2518 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 2519
4dd2ac2c
JH
2520 /* Register save area */
2521 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 2522
8362f420
JH
2523 /* Va-arg area */
2524 if (ix86_save_varrargs_registers)
2525 {
2526 offset += X86_64_VARARGS_SIZE;
2527 frame->va_arg_size = X86_64_VARARGS_SIZE;
2528 }
2529 else
2530 frame->va_arg_size = 0;
2531
4dd2ac2c
JH
2532 /* Align start of frame for local function. */
2533 frame->padding1 = ((offset + stack_alignment_needed - 1)
2534 & -stack_alignment_needed) - offset;
f73ad30e 2535
4dd2ac2c 2536 offset += frame->padding1;
65954bd8 2537
4dd2ac2c
JH
2538 /* Frame pointer points here. */
2539 frame->frame_pointer_offset = offset;
54ff41b7 2540
4dd2ac2c 2541 offset += size;
65954bd8 2542
4dd2ac2c 2543 /* Add outgoing arguments area. */
f73ad30e 2544 if (ACCUMULATE_OUTGOING_ARGS)
4dd2ac2c
JH
2545 {
2546 offset += current_function_outgoing_args_size;
2547 frame->outgoing_arguments_size = current_function_outgoing_args_size;
2548 }
2549 else
2550 frame->outgoing_arguments_size = 0;
564d80f4 2551
4dd2ac2c
JH
2552 /* Align stack boundary. */
2553 frame->padding2 = ((offset + preferred_alignment - 1)
2554 & -preferred_alignment) - offset;
2555
2556 offset += frame->padding2;
2557
2558 /* We've reached end of stack frame. */
2559 frame->stack_pointer_offset = offset;
2560
2561 /* Size prologue needs to allocate. */
2562 frame->to_allocate =
2563 (size + frame->padding1 + frame->padding2
8362f420 2564 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 2565
8362f420
JH
2566 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
2567 && current_function_is_leaf)
2568 {
2569 frame->red_zone_size = frame->to_allocate;
2570 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
2571 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
2572 }
2573 else
2574 frame->red_zone_size = 0;
2575 frame->to_allocate -= frame->red_zone_size;
2576 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c
JH
2577#if 0
2578 fprintf (stderr, "nregs: %i\n", frame->nregs);
2579 fprintf (stderr, "size: %i\n", size);
2580 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
2581 fprintf (stderr, "padding1: %i\n", frame->padding1);
8362f420 2582 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4dd2ac2c
JH
2583 fprintf (stderr, "padding2: %i\n", frame->padding2);
2584 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
8362f420 2585 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4dd2ac2c
JH
2586 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
2587 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
2588 frame->hard_frame_pointer_offset);
2589 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
2590#endif
65954bd8
JL
2591}
2592
0903fcab
JH
2593/* Emit code to save registers in the prologue. */
2594
2595static void
2596ix86_emit_save_regs ()
2597{
2598 register int regno;
0903fcab 2599 rtx insn;
0903fcab 2600
4dd2ac2c 2601 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 2602 if (ix86_save_reg (regno, true))
0903fcab 2603 {
0d7d98ee 2604 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
2605 RTX_FRAME_RELATED_P (insn) = 1;
2606 }
2607}
2608
c6036a37
JH
2609/* Emit code to save registers using MOV insns. First register
2610 is restored from POINTER + OFFSET. */
2611static void
2612ix86_emit_save_regs_using_mov (pointer, offset)
b72f00af
RK
2613 rtx pointer;
2614 HOST_WIDE_INT offset;
c6036a37
JH
2615{
2616 int regno;
2617 rtx insn;
2618
2619 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2620 if (ix86_save_reg (regno, true))
2621 {
b72f00af
RK
2622 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
2623 Pmode, offset),
c6036a37
JH
2624 gen_rtx_REG (Pmode, regno));
2625 RTX_FRAME_RELATED_P (insn) = 1;
2626 offset += UNITS_PER_WORD;
2627 }
2628}
2629
0f290768 2630/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
2631
2632void
2633ix86_expand_prologue ()
2a2ab3f9 2634{
564d80f4 2635 rtx insn;
0d7d98ee
JH
2636 int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table
2637 || current_function_uses_const_pool)
2638 && !TARGET_64BIT);
4dd2ac2c 2639 struct ix86_frame frame;
c6036a37
JH
2640 int use_mov = (TARGET_PROLOGUE_USING_MOVE && !optimize_size);
2641 HOST_WIDE_INT allocate;
4dd2ac2c
JH
2642
2643 ix86_compute_frame_layout (&frame);
79325812 2644
e075ae69
RH
2645 /* Note: AT&T enter does NOT have reversed args. Enter is probably
2646 slower on all targets. Also sdb doesn't like it. */
e9a25f70 2647
2a2ab3f9
JVA
2648 if (frame_pointer_needed)
2649 {
564d80f4 2650 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 2651 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 2652
564d80f4 2653 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 2654 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
2655 }
2656
c6036a37
JH
2657 allocate = frame.to_allocate;
2658 /* In case we are dealing only with single register and empty frame,
2659 push is equivalent of the mov+add sequence. */
2660 if (allocate == 0 && frame.nregs <= 1)
2661 use_mov = 0;
2662
2663 if (!use_mov)
2664 ix86_emit_save_regs ();
2665 else
2666 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 2667
c6036a37 2668 if (allocate == 0)
8dfe5673 2669 ;
e323735c 2670 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
469ac993 2671 {
f2042df3
RH
2672 insn = emit_insn (gen_pro_epilogue_adjust_stack
2673 (stack_pointer_rtx, stack_pointer_rtx,
e323735c 2674 GEN_INT (-allocate)));
e075ae69 2675 RTX_FRAME_RELATED_P (insn) = 1;
469ac993 2676 }
79325812 2677 else
8dfe5673 2678 {
e075ae69 2679 /* ??? Is this only valid for Win32? */
e9a25f70 2680
e075ae69 2681 rtx arg0, sym;
e9a25f70 2682
8362f420
JH
2683 if (TARGET_64BIT)
2684 abort();
2685
e075ae69 2686 arg0 = gen_rtx_REG (SImode, 0);
c6036a37 2687 emit_move_insn (arg0, GEN_INT (allocate));
77a989d1 2688
e075ae69
RH
2689 sym = gen_rtx_MEM (FUNCTION_MODE,
2690 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
32ee7d1d 2691 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
e075ae69
RH
2692
2693 CALL_INSN_FUNCTION_USAGE (insn)
276ab4a4
RH
2694 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
2695 CALL_INSN_FUNCTION_USAGE (insn));
e075ae69 2696 }
c6036a37
JH
2697 if (use_mov)
2698 {
2699 if (!frame_pointer_needed || !frame.to_allocate)
2700 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
2701 else
2702 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
2703 -frame.nregs * UNITS_PER_WORD);
2704 }
e9a25f70 2705
84530511
SC
2706#ifdef SUBTARGET_PROLOGUE
2707 SUBTARGET_PROLOGUE;
0f290768 2708#endif
84530511 2709
e9a25f70 2710 if (pic_reg_used)
e075ae69 2711 load_pic_register ();
77a989d1 2712
e9a25f70
JL
2713 /* If we are profiling, make sure no instructions are scheduled before
2714 the call to mcount. However, if -fpic, the above call will have
2715 done that. */
e075ae69 2716 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
e9a25f70 2717 emit_insn (gen_blockage ());
77a989d1
SC
2718}
2719
da2d1d3a
JH
2720/* Emit code to restore saved registers using MOV insns. First register
2721 is restored from POINTER + OFFSET. */
2722static void
1020a5ab
RH
2723ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
2724 rtx pointer;
2725 int offset;
37a58036 2726 int maybe_eh_return;
da2d1d3a
JH
2727{
2728 int regno;
da2d1d3a 2729
4dd2ac2c 2730 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 2731 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 2732 {
4dd2ac2c 2733 emit_move_insn (gen_rtx_REG (Pmode, regno),
b72f00af
RK
2734 adjust_address (gen_rtx_MEM (Pmode, pointer),
2735 Pmode, offset));
4dd2ac2c 2736 offset += UNITS_PER_WORD;
da2d1d3a
JH
2737 }
2738}
2739
0f290768 2740/* Restore function stack, frame, and registers. */
e9a25f70 2741
2a2ab3f9 2742void
1020a5ab
RH
2743ix86_expand_epilogue (style)
2744 int style;
2a2ab3f9 2745{
1c71e60e 2746 int regno;
fdb8a883 2747 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 2748 struct ix86_frame frame;
65954bd8 2749 HOST_WIDE_INT offset;
4dd2ac2c
JH
2750
2751 ix86_compute_frame_layout (&frame);
2a2ab3f9 2752
a4f31c00 2753 /* Calculate start of saved registers relative to ebp. Special care
84e306b4
RH
2754 must be taken for the normal return case of a function using
2755 eh_return: the eax and edx registers are marked as saved, but not
2756 restored along this path. */
2757 offset = frame.nregs;
2758 if (current_function_calls_eh_return && style != 2)
2759 offset -= 2;
2760 offset *= -UNITS_PER_WORD;
2a2ab3f9 2761
1c71e60e
JH
2762#ifdef FUNCTION_BLOCK_PROFILER_EXIT
2763 if (profile_block_flag == 2)
564d80f4 2764 {
1c71e60e 2765 FUNCTION_BLOCK_PROFILER_EXIT;
564d80f4 2766 }
1c71e60e 2767#endif
564d80f4 2768
fdb8a883
JW
2769 /* If we're only restoring one register and sp is not valid then
2770 using a move instruction to restore the register since it's
0f290768 2771 less work than reloading sp and popping the register.
da2d1d3a
JH
2772
2773 The default code result in stack adjustment using add/lea instruction,
2774 while this code results in LEAVE instruction (or discrete equivalent),
2775 so it is profitable in some other cases as well. Especially when there
2776 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2777 and there is exactly one register to pop. This heruistic may need some
2778 tuning in future. */
4dd2ac2c 2779 if ((!sp_valid && frame.nregs <= 1)
c6036a37
JH
2780 || (TARGET_EPILOGUE_USING_MOVE && !optimize_size
2781 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 2782 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
da2d1d3a 2783 || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
1020a5ab
RH
2784 && frame.nregs == 1)
2785 || style == 2)
2a2ab3f9 2786 {
da2d1d3a
JH
2787 /* Restore registers. We can use ebp or esp to address the memory
2788 locations. If both are available, default to ebp, since offsets
2789 are known to be small. Only exception is esp pointing directly to the
2790 end of block of saved registers, where we may simplify addressing
2791 mode. */
2792
4dd2ac2c 2793 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
2794 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
2795 frame.to_allocate, style == 2);
da2d1d3a 2796 else
1020a5ab
RH
2797 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
2798 offset, style == 2);
2799
2800 /* eh_return epilogues need %ecx added to the stack pointer. */
2801 if (style == 2)
2802 {
2803 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 2804
1020a5ab
RH
2805 if (frame_pointer_needed)
2806 {
2807 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
2808 tmp = plus_constant (tmp, UNITS_PER_WORD);
2809 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
2810
2811 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
2812 emit_move_insn (hard_frame_pointer_rtx, tmp);
2813
2814 emit_insn (gen_pro_epilogue_adjust_stack
f2042df3 2815 (stack_pointer_rtx, sa, const0_rtx));
1020a5ab
RH
2816 }
2817 else
2818 {
2819 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
2820 tmp = plus_constant (tmp, (frame.to_allocate
2821 + frame.nregs * UNITS_PER_WORD));
2822 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
2823 }
2824 }
2825 else if (!frame_pointer_needed)
f2042df3
RH
2826 emit_insn (gen_pro_epilogue_adjust_stack
2827 (stack_pointer_rtx, stack_pointer_rtx,
2828 GEN_INT (frame.to_allocate
2829 + frame.nregs * UNITS_PER_WORD)));
0f290768 2830 /* If not an i386, mov & pop is faster than "leave". */
da2d1d3a 2831 else if (TARGET_USE_LEAVE || optimize_size)
8362f420 2832 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
c8c5cb99 2833 else
2a2ab3f9 2834 {
1c71e60e
JH
2835 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2836 hard_frame_pointer_rtx,
f2042df3 2837 const0_rtx));
8362f420
JH
2838 if (TARGET_64BIT)
2839 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
2840 else
2841 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
2842 }
2843 }
1c71e60e 2844 else
68f654ec 2845 {
1c71e60e
JH
2846 /* First step is to deallocate the stack frame so that we can
2847 pop the registers. */
2848 if (!sp_valid)
2849 {
2850 if (!frame_pointer_needed)
2851 abort ();
2852 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2853 hard_frame_pointer_rtx,
f2042df3 2854 GEN_INT (offset)));
1c71e60e 2855 }
4dd2ac2c 2856 else if (frame.to_allocate)
f2042df3
RH
2857 emit_insn (gen_pro_epilogue_adjust_stack
2858 (stack_pointer_rtx, stack_pointer_rtx,
2859 GEN_INT (frame.to_allocate)));
1c71e60e 2860
4dd2ac2c 2861 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 2862 if (ix86_save_reg (regno, false))
8362f420
JH
2863 {
2864 if (TARGET_64BIT)
2865 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
2866 else
2867 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
2868 }
4dd2ac2c 2869 if (frame_pointer_needed)
8362f420
JH
2870 {
2871 if (TARGET_64BIT)
2872 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
2873 else
2874 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
2875 }
68f654ec 2876 }
68f654ec 2877
cbbf65e0 2878 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 2879 if (style == 0)
cbbf65e0
RH
2880 return;
2881
2a2ab3f9
JVA
2882 if (current_function_pops_args && current_function_args_size)
2883 {
e075ae69 2884 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 2885
b8c752c8
UD
2886 /* i386 can only pop 64K bytes. If asked to pop more, pop
2887 return address, do explicit add, and jump indirectly to the
0f290768 2888 caller. */
2a2ab3f9 2889
b8c752c8 2890 if (current_function_pops_args >= 65536)
2a2ab3f9 2891 {
e075ae69 2892 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 2893
8362f420
JH
2894 /* There are is no "pascal" calling convention in 64bit ABI. */
2895 if (TARGET_64BIT)
2896 abort();
2897
e075ae69
RH
2898 emit_insn (gen_popsi1 (ecx));
2899 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 2900 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 2901 }
79325812 2902 else
e075ae69
RH
2903 emit_jump_insn (gen_return_pop_internal (popc));
2904 }
2905 else
2906 emit_jump_insn (gen_return_internal ());
2907}
2908\f
2909/* Extract the parts of an RTL expression that is a valid memory address
2910 for an instruction. Return false if the structure of the address is
2911 grossly off. */
2912
2913static int
2914ix86_decompose_address (addr, out)
2915 register rtx addr;
2916 struct ix86_address *out;
2917{
2918 rtx base = NULL_RTX;
2919 rtx index = NULL_RTX;
2920 rtx disp = NULL_RTX;
2921 HOST_WIDE_INT scale = 1;
2922 rtx scale_rtx = NULL_RTX;
2923
2924 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
2925 base = addr;
2926 else if (GET_CODE (addr) == PLUS)
2927 {
2928 rtx op0 = XEXP (addr, 0);
2929 rtx op1 = XEXP (addr, 1);
2930 enum rtx_code code0 = GET_CODE (op0);
2931 enum rtx_code code1 = GET_CODE (op1);
2932
2933 if (code0 == REG || code0 == SUBREG)
2934 {
2935 if (code1 == REG || code1 == SUBREG)
2936 index = op0, base = op1; /* index + base */
2937 else
2938 base = op0, disp = op1; /* base + displacement */
2939 }
2940 else if (code0 == MULT)
e9a25f70 2941 {
e075ae69
RH
2942 index = XEXP (op0, 0);
2943 scale_rtx = XEXP (op0, 1);
2944 if (code1 == REG || code1 == SUBREG)
2945 base = op1; /* index*scale + base */
e9a25f70 2946 else
e075ae69
RH
2947 disp = op1; /* index*scale + disp */
2948 }
2949 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
2950 {
2951 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
2952 scale_rtx = XEXP (XEXP (op0, 0), 1);
2953 base = XEXP (op0, 1);
2954 disp = op1;
2a2ab3f9 2955 }
e075ae69
RH
2956 else if (code0 == PLUS)
2957 {
2958 index = XEXP (op0, 0); /* index + base + disp */
2959 base = XEXP (op0, 1);
2960 disp = op1;
2961 }
2962 else
2963 return FALSE;
2964 }
2965 else if (GET_CODE (addr) == MULT)
2966 {
2967 index = XEXP (addr, 0); /* index*scale */
2968 scale_rtx = XEXP (addr, 1);
2969 }
2970 else if (GET_CODE (addr) == ASHIFT)
2971 {
2972 rtx tmp;
2973
2974 /* We're called for lea too, which implements ashift on occasion. */
2975 index = XEXP (addr, 0);
2976 tmp = XEXP (addr, 1);
2977 if (GET_CODE (tmp) != CONST_INT)
2978 return FALSE;
2979 scale = INTVAL (tmp);
2980 if ((unsigned HOST_WIDE_INT) scale > 3)
2981 return FALSE;
2982 scale = 1 << scale;
2a2ab3f9 2983 }
2a2ab3f9 2984 else
e075ae69
RH
2985 disp = addr; /* displacement */
2986
2987 /* Extract the integral value of scale. */
2988 if (scale_rtx)
e9a25f70 2989 {
e075ae69
RH
2990 if (GET_CODE (scale_rtx) != CONST_INT)
2991 return FALSE;
2992 scale = INTVAL (scale_rtx);
e9a25f70 2993 }
3b3c6a3f 2994
e075ae69
RH
2995 /* Allow arg pointer and stack pointer as index if there is not scaling */
2996 if (base && index && scale == 1
564d80f4
JH
2997 && (index == arg_pointer_rtx || index == frame_pointer_rtx
2998 || index == stack_pointer_rtx))
e075ae69
RH
2999 {
3000 rtx tmp = base;
3001 base = index;
3002 index = tmp;
3003 }
3004
3005 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
3006 if ((base == hard_frame_pointer_rtx
3007 || base == frame_pointer_rtx
3008 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
3009 disp = const0_rtx;
3010
3011 /* Special case: on K6, [%esi] makes the instruction vector decoded.
3012 Avoid this by transforming to [%esi+0]. */
3013 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
3014 && base && !index && !disp
329e1d01 3015 && REG_P (base)
e075ae69
RH
3016 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
3017 disp = const0_rtx;
3018
3019 /* Special case: encode reg+reg instead of reg*2. */
3020 if (!base && index && scale && scale == 2)
3021 base = index, scale = 1;
0f290768 3022
e075ae69
RH
3023 /* Special case: scaling cannot be encoded without base or displacement. */
3024 if (!base && !disp && index && scale != 1)
3025 disp = const0_rtx;
3026
3027 out->base = base;
3028 out->index = index;
3029 out->disp = disp;
3030 out->scale = scale;
3b3c6a3f 3031
e075ae69
RH
3032 return TRUE;
3033}
01329426
JH
3034\f
3035/* Return cost of the memory address x.
3036 For i386, it is better to use a complex address than let gcc copy
3037 the address into a reg and make a new pseudo. But not if the address
3038 requires to two regs - that would mean more pseudos with longer
3039 lifetimes. */
3040int
3041ix86_address_cost (x)
3042 rtx x;
3043{
3044 struct ix86_address parts;
3045 int cost = 1;
3b3c6a3f 3046
01329426
JH
3047 if (!ix86_decompose_address (x, &parts))
3048 abort ();
3049
3050 /* More complex memory references are better. */
3051 if (parts.disp && parts.disp != const0_rtx)
3052 cost--;
3053
3054 /* Attempt to minimize number of registers in the address. */
3055 if ((parts.base
3056 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
3057 || (parts.index
3058 && (!REG_P (parts.index)
3059 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
3060 cost++;
3061
3062 if (parts.base
3063 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
3064 && parts.index
3065 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
3066 && parts.base != parts.index)
3067 cost++;
3068
3069 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
3070 since it's predecode logic can't detect the length of instructions
3071 and it degenerates to vector decoded. Increase cost of such
3072 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 3073 to split such addresses or even refuse such addresses at all.
01329426
JH
3074
3075 Following addressing modes are affected:
3076 [base+scale*index]
3077 [scale*index+disp]
3078 [base+index]
0f290768 3079
01329426
JH
3080 The first and last case may be avoidable by explicitly coding the zero in
3081 memory address, but I don't have AMD-K6 machine handy to check this
3082 theory. */
3083
3084 if (TARGET_K6
3085 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
3086 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
3087 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
3088 cost += 10;
0f290768 3089
01329426
JH
3090 return cost;
3091}
3092\f
b949ea8b
JW
3093/* If X is a machine specific address (i.e. a symbol or label being
3094 referenced as a displacement from the GOT implemented using an
3095 UNSPEC), then return the base term. Otherwise return X. */
3096
3097rtx
3098ix86_find_base_term (x)
3099 rtx x;
3100{
3101 rtx term;
3102
3103 if (GET_CODE (x) != PLUS
3104 || XEXP (x, 0) != pic_offset_table_rtx
3105 || GET_CODE (XEXP (x, 1)) != CONST)
3106 return x;
3107
3108 term = XEXP (XEXP (x, 1), 0);
3109
3110 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
3111 term = XEXP (term, 0);
3112
3113 if (GET_CODE (term) != UNSPEC
3114 || XVECLEN (term, 0) != 1
3115 || XINT (term, 1) != 7)
3116 return x;
3117
3118 term = XVECEXP (term, 0, 0);
3119
3120 if (GET_CODE (term) != SYMBOL_REF
3121 && GET_CODE (term) != LABEL_REF)
3122 return x;
3123
3124 return term;
3125}
3126\f
e075ae69
RH
3127/* Determine if a given CONST RTX is a valid memory displacement
3128 in PIC mode. */
0f290768 3129
59be65f6 3130int
91bb873f
RH
3131legitimate_pic_address_disp_p (disp)
3132 register rtx disp;
3133{
3134 if (GET_CODE (disp) != CONST)
3135 return 0;
3136 disp = XEXP (disp, 0);
3137
3138 if (GET_CODE (disp) == PLUS)
3139 {
3140 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
3141 return 0;
3142 disp = XEXP (disp, 0);
3143 }
3144
3145 if (GET_CODE (disp) != UNSPEC
3146 || XVECLEN (disp, 0) != 1)
3147 return 0;
3148
3149 /* Must be @GOT or @GOTOFF. */
3150 if (XINT (disp, 1) != 6
3151 && XINT (disp, 1) != 7)
3152 return 0;
3153
3154 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
3155 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
3156 return 0;
3157
3158 return 1;
3159}
3160
e075ae69
RH
3161/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
3162 memory address for an instruction. The MODE argument is the machine mode
3163 for the MEM expression that wants to use this address.
3164
3165 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
3166 convert common non-canonical forms to canonical form so that they will
3167 be recognized. */
3168
3b3c6a3f
MM
3169int
3170legitimate_address_p (mode, addr, strict)
3171 enum machine_mode mode;
3172 register rtx addr;
3173 int strict;
3174{
e075ae69
RH
3175 struct ix86_address parts;
3176 rtx base, index, disp;
3177 HOST_WIDE_INT scale;
3178 const char *reason = NULL;
3179 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
3180
3181 if (TARGET_DEBUG_ADDR)
3182 {
3183 fprintf (stderr,
e9a25f70 3184 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 3185 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
3186 debug_rtx (addr);
3187 }
3188
e075ae69 3189 if (! ix86_decompose_address (addr, &parts))
3b3c6a3f 3190 {
e075ae69 3191 reason = "decomposition failed";
50e60bc3 3192 goto report_error;
3b3c6a3f
MM
3193 }
3194
e075ae69
RH
3195 base = parts.base;
3196 index = parts.index;
3197 disp = parts.disp;
3198 scale = parts.scale;
91f0226f 3199
e075ae69 3200 /* Validate base register.
e9a25f70
JL
3201
3202 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
3203 is one word out of a two word structure, which is represented internally
3204 as a DImode int. */
e9a25f70 3205
3b3c6a3f
MM
3206 if (base)
3207 {
e075ae69
RH
3208 reason_rtx = base;
3209
3d771dfd 3210 if (GET_CODE (base) != REG)
3b3c6a3f 3211 {
e075ae69 3212 reason = "base is not a register";
50e60bc3 3213 goto report_error;
3b3c6a3f
MM
3214 }
3215
c954bd01
RH
3216 if (GET_MODE (base) != Pmode)
3217 {
e075ae69 3218 reason = "base is not in Pmode";
50e60bc3 3219 goto report_error;
c954bd01
RH
3220 }
3221
e9a25f70
JL
3222 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
3223 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
3b3c6a3f 3224 {
e075ae69 3225 reason = "base is not valid";
50e60bc3 3226 goto report_error;
3b3c6a3f
MM
3227 }
3228 }
3229
e075ae69 3230 /* Validate index register.
e9a25f70
JL
3231
3232 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
3233 is one word out of a two word structure, which is represented internally
3234 as a DImode int. */
e075ae69
RH
3235
3236 if (index)
3b3c6a3f 3237 {
e075ae69
RH
3238 reason_rtx = index;
3239
3240 if (GET_CODE (index) != REG)
3b3c6a3f 3241 {
e075ae69 3242 reason = "index is not a register";
50e60bc3 3243 goto report_error;
3b3c6a3f
MM
3244 }
3245
e075ae69 3246 if (GET_MODE (index) != Pmode)
c954bd01 3247 {
e075ae69 3248 reason = "index is not in Pmode";
50e60bc3 3249 goto report_error;
c954bd01
RH
3250 }
3251
e075ae69
RH
3252 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
3253 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
3b3c6a3f 3254 {
e075ae69 3255 reason = "index is not valid";
50e60bc3 3256 goto report_error;
3b3c6a3f
MM
3257 }
3258 }
3b3c6a3f 3259
e075ae69
RH
3260 /* Validate scale factor. */
3261 if (scale != 1)
3b3c6a3f 3262 {
e075ae69
RH
3263 reason_rtx = GEN_INT (scale);
3264 if (!index)
3b3c6a3f 3265 {
e075ae69 3266 reason = "scale without index";
50e60bc3 3267 goto report_error;
3b3c6a3f
MM
3268 }
3269
e075ae69 3270 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 3271 {
e075ae69 3272 reason = "scale is not a valid multiplier";
50e60bc3 3273 goto report_error;
3b3c6a3f
MM
3274 }
3275 }
3276
91bb873f 3277 /* Validate displacement. */
3b3c6a3f
MM
3278 if (disp)
3279 {
e075ae69
RH
3280 reason_rtx = disp;
3281
91bb873f 3282 if (!CONSTANT_ADDRESS_P (disp))
3b3c6a3f 3283 {
e075ae69 3284 reason = "displacement is not constant";
50e60bc3 3285 goto report_error;
3b3c6a3f
MM
3286 }
3287
0d7d98ee 3288 if (TARGET_64BIT)
3b3c6a3f 3289 {
0d7d98ee
JH
3290 if (!x86_64_sign_extended_value (disp))
3291 {
3292 reason = "displacement is out of range";
3293 goto report_error;
3294 }
3295 }
3296 else
3297 {
3298 if (GET_CODE (disp) == CONST_DOUBLE)
3299 {
3300 reason = "displacement is a const_double";
3301 goto report_error;
3302 }
3b3c6a3f
MM
3303 }
3304
91bb873f 3305 if (flag_pic && SYMBOLIC_CONST (disp))
3b3c6a3f 3306 {
0d7d98ee
JH
3307 if (TARGET_64BIT && (index || base))
3308 {
3309 reason = "non-constant pic memory reference";
3310 goto report_error;
3311 }
91bb873f
RH
3312 if (! legitimate_pic_address_disp_p (disp))
3313 {
e075ae69 3314 reason = "displacement is an invalid pic construct";
50e60bc3 3315 goto report_error;
91bb873f
RH
3316 }
3317
4e9efe54 3318 /* This code used to verify that a symbolic pic displacement
0f290768
KH
3319 includes the pic_offset_table_rtx register.
3320
4e9efe54
JH
3321 While this is good idea, unfortunately these constructs may
3322 be created by "adds using lea" optimization for incorrect
3323 code like:
3324
3325 int a;
3326 int foo(int i)
3327 {
3328 return *(&a+i);
3329 }
3330
50e60bc3 3331 This code is nonsensical, but results in addressing
4e9efe54
JH
3332 GOT table with pic_offset_table_rtx base. We can't
3333 just refuse it easilly, since it gets matched by
3334 "addsi3" pattern, that later gets split to lea in the
3335 case output register differs from input. While this
3336 can be handled by separate addsi pattern for this case
3337 that never results in lea, this seems to be easier and
3338 correct fix for crash to disable this test. */
3b3c6a3f 3339 }
91bb873f 3340 else if (HALF_PIC_P ())
3b3c6a3f 3341 {
91bb873f 3342 if (! HALF_PIC_ADDRESS_P (disp)
e075ae69 3343 || (base != NULL_RTX || index != NULL_RTX))
91bb873f 3344 {
e075ae69 3345 reason = "displacement is an invalid half-pic reference";
50e60bc3 3346 goto report_error;
91bb873f 3347 }
3b3c6a3f
MM
3348 }
3349 }
3350
e075ae69 3351 /* Everything looks valid. */
3b3c6a3f 3352 if (TARGET_DEBUG_ADDR)
e075ae69 3353 fprintf (stderr, "Success.\n");
3b3c6a3f 3354 return TRUE;
e075ae69 3355
50e60bc3 3356report_error:
e075ae69
RH
3357 if (TARGET_DEBUG_ADDR)
3358 {
3359 fprintf (stderr, "Error: %s\n", reason);
3360 debug_rtx (reason_rtx);
3361 }
3362 return FALSE;
3b3c6a3f 3363}
3b3c6a3f 3364\f
55efb413
JW
3365/* Return an unique alias set for the GOT. */
3366
0f290768 3367static HOST_WIDE_INT
55efb413
JW
3368ix86_GOT_alias_set ()
3369{
3370 static HOST_WIDE_INT set = -1;
3371 if (set == -1)
3372 set = new_alias_set ();
3373 return set;
0f290768 3374}
55efb413 3375
3b3c6a3f
MM
3376/* Return a legitimate reference for ORIG (an address) using the
3377 register REG. If REG is 0, a new pseudo is generated.
3378
91bb873f 3379 There are two types of references that must be handled:
3b3c6a3f
MM
3380
3381 1. Global data references must load the address from the GOT, via
3382 the PIC reg. An insn is emitted to do this load, and the reg is
3383 returned.
3384
91bb873f
RH
3385 2. Static data references, constant pool addresses, and code labels
3386 compute the address as an offset from the GOT, whose base is in
3387 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
3388 differentiate them from global data objects. The returned
3389 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
3390
3391 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 3392 reg also appears in the address. */
3b3c6a3f
MM
3393
3394rtx
3395legitimize_pic_address (orig, reg)
3396 rtx orig;
3397 rtx reg;
3398{
3399 rtx addr = orig;
3400 rtx new = orig;
91bb873f 3401 rtx base;
3b3c6a3f 3402
91bb873f
RH
3403 if (GET_CODE (addr) == LABEL_REF
3404 || (GET_CODE (addr) == SYMBOL_REF
3405 && (CONSTANT_POOL_ADDRESS_P (addr)
3406 || SYMBOL_REF_FLAG (addr))))
3b3c6a3f 3407 {
91bb873f
RH
3408 /* This symbol may be referenced via a displacement from the PIC
3409 base address (@GOTOFF). */
3b3c6a3f 3410
91bb873f 3411 current_function_uses_pic_offset_table = 1;
4859dd36
RH
3412 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
3413 new = gen_rtx_CONST (Pmode, new);
91bb873f 3414 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 3415
91bb873f
RH
3416 if (reg != 0)
3417 {
3b3c6a3f 3418 emit_move_insn (reg, new);
91bb873f 3419 new = reg;
3b3c6a3f 3420 }
3b3c6a3f 3421 }
91bb873f 3422 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 3423 {
91bb873f 3424 /* This symbol must be referenced via a load from the
0f290768 3425 Global Offset Table (@GOT). */
3b3c6a3f 3426
91bb873f 3427 current_function_uses_pic_offset_table = 1;
4859dd36
RH
3428 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
3429 new = gen_rtx_CONST (Pmode, new);
91bb873f
RH
3430 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3431 new = gen_rtx_MEM (Pmode, new);
3432 RTX_UNCHANGING_P (new) = 1;
ba4828e0 3433 set_mem_alias_set (new, ix86_GOT_alias_set ());
3b3c6a3f
MM
3434
3435 if (reg == 0)
3436 reg = gen_reg_rtx (Pmode);
91bb873f
RH
3437 emit_move_insn (reg, new);
3438 new = reg;
0f290768 3439 }
91bb873f
RH
3440 else
3441 {
3442 if (GET_CODE (addr) == CONST)
3b3c6a3f 3443 {
91bb873f
RH
3444 addr = XEXP (addr, 0);
3445 if (GET_CODE (addr) == UNSPEC)
3446 {
3447 /* Check that the unspec is one of the ones we generate? */
3448 }
3449 else if (GET_CODE (addr) != PLUS)
564d80f4 3450 abort ();
3b3c6a3f 3451 }
91bb873f
RH
3452 if (GET_CODE (addr) == PLUS)
3453 {
3454 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 3455
91bb873f
RH
3456 /* Check first to see if this is a constant offset from a @GOTOFF
3457 symbol reference. */
3458 if ((GET_CODE (op0) == LABEL_REF
3459 || (GET_CODE (op0) == SYMBOL_REF
3460 && (CONSTANT_POOL_ADDRESS_P (op0)
3461 || SYMBOL_REF_FLAG (op0))))
3462 && GET_CODE (op1) == CONST_INT)
3463 {
3464 current_function_uses_pic_offset_table = 1;
4859dd36
RH
3465 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
3466 new = gen_rtx_PLUS (Pmode, new, op1);
3467 new = gen_rtx_CONST (Pmode, new);
91bb873f
RH
3468 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3469
3470 if (reg != 0)
3471 {
3472 emit_move_insn (reg, new);
3473 new = reg;
3474 }
3475 }
3476 else
3477 {
3478 base = legitimize_pic_address (XEXP (addr, 0), reg);
3479 new = legitimize_pic_address (XEXP (addr, 1),
3480 base == reg ? NULL_RTX : reg);
3481
3482 if (GET_CODE (new) == CONST_INT)
3483 new = plus_constant (base, INTVAL (new));
3484 else
3485 {
3486 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
3487 {
3488 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
3489 new = XEXP (new, 1);
3490 }
3491 new = gen_rtx_PLUS (Pmode, base, new);
3492 }
3493 }
3494 }
3b3c6a3f
MM
3495 }
3496 return new;
3497}
3498\f
3b3c6a3f
MM
3499/* Try machine-dependent ways of modifying an illegitimate address
3500 to be legitimate. If we find one, return the new, valid address.
3501 This macro is used in only one place: `memory_address' in explow.c.
3502
3503 OLDX is the address as it was before break_out_memory_refs was called.
3504 In some cases it is useful to look at this to decide what needs to be done.
3505
3506 MODE and WIN are passed so that this macro can use
3507 GO_IF_LEGITIMATE_ADDRESS.
3508
3509 It is always safe for this macro to do nothing. It exists to recognize
3510 opportunities to optimize the output.
3511
3512 For the 80386, we handle X+REG by loading X into a register R and
3513 using R+REG. R will go in a general reg and indexing will be used.
3514 However, if REG is a broken-out memory address or multiplication,
3515 nothing needs to be done because REG can certainly go in a general reg.
3516
3517 When -fpic is used, special handling is needed for symbolic references.
3518 See comments by legitimize_pic_address in i386.c for details. */
3519
3520rtx
3521legitimize_address (x, oldx, mode)
3522 register rtx x;
bb5177ac 3523 register rtx oldx ATTRIBUTE_UNUSED;
3b3c6a3f
MM
3524 enum machine_mode mode;
3525{
3526 int changed = 0;
3527 unsigned log;
3528
3529 if (TARGET_DEBUG_ADDR)
3530 {
e9a25f70
JL
3531 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
3532 GET_MODE_NAME (mode));
3b3c6a3f
MM
3533 debug_rtx (x);
3534 }
3535
3536 if (flag_pic && SYMBOLIC_CONST (x))
3537 return legitimize_pic_address (x, 0);
3538
3539 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
3540 if (GET_CODE (x) == ASHIFT
3541 && GET_CODE (XEXP (x, 1)) == CONST_INT
3542 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3543 {
3544 changed = 1;
a269a03c
JC
3545 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
3546 GEN_INT (1 << log));
3b3c6a3f
MM
3547 }
3548
3549 if (GET_CODE (x) == PLUS)
3550 {
0f290768 3551 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 3552
3b3c6a3f
MM
3553 if (GET_CODE (XEXP (x, 0)) == ASHIFT
3554 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3555 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3556 {
3557 changed = 1;
c5c76735
JL
3558 XEXP (x, 0) = gen_rtx_MULT (Pmode,
3559 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
3560 GEN_INT (1 << log));
3b3c6a3f
MM
3561 }
3562
3563 if (GET_CODE (XEXP (x, 1)) == ASHIFT
3564 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
3565 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3566 {
3567 changed = 1;
c5c76735
JL
3568 XEXP (x, 1) = gen_rtx_MULT (Pmode,
3569 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
3570 GEN_INT (1 << log));
3b3c6a3f
MM
3571 }
3572
0f290768 3573 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
3574 if (GET_CODE (XEXP (x, 1)) == MULT)
3575 {
3576 rtx tmp = XEXP (x, 0);
3577 XEXP (x, 0) = XEXP (x, 1);
3578 XEXP (x, 1) = tmp;
3579 changed = 1;
3580 }
3581
3582 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
3583 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
3584 created by virtual register instantiation, register elimination, and
3585 similar optimizations. */
3586 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
3587 {
3588 changed = 1;
c5c76735
JL
3589 x = gen_rtx_PLUS (Pmode,
3590 gen_rtx_PLUS (Pmode, XEXP (x, 0),
3591 XEXP (XEXP (x, 1), 0)),
3592 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
3593 }
3594
e9a25f70
JL
3595 /* Canonicalize
3596 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
3597 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
3598 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
3599 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
3600 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
3601 && CONSTANT_P (XEXP (x, 1)))
3602 {
00c79232
ML
3603 rtx constant;
3604 rtx other = NULL_RTX;
3b3c6a3f
MM
3605
3606 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
3607 {
3608 constant = XEXP (x, 1);
3609 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
3610 }
3611 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
3612 {
3613 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
3614 other = XEXP (x, 1);
3615 }
3616 else
3617 constant = 0;
3618
3619 if (constant)
3620 {
3621 changed = 1;
c5c76735
JL
3622 x = gen_rtx_PLUS (Pmode,
3623 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
3624 XEXP (XEXP (XEXP (x, 0), 1), 0)),
3625 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
3626 }
3627 }
3628
3629 if (changed && legitimate_address_p (mode, x, FALSE))
3630 return x;
3631
3632 if (GET_CODE (XEXP (x, 0)) == MULT)
3633 {
3634 changed = 1;
3635 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
3636 }
3637
3638 if (GET_CODE (XEXP (x, 1)) == MULT)
3639 {
3640 changed = 1;
3641 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
3642 }
3643
3644 if (changed
3645 && GET_CODE (XEXP (x, 1)) == REG
3646 && GET_CODE (XEXP (x, 0)) == REG)
3647 return x;
3648
3649 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
3650 {
3651 changed = 1;
3652 x = legitimize_pic_address (x, 0);
3653 }
3654
3655 if (changed && legitimate_address_p (mode, x, FALSE))
3656 return x;
3657
3658 if (GET_CODE (XEXP (x, 0)) == REG)
3659 {
3660 register rtx temp = gen_reg_rtx (Pmode);
3661 register rtx val = force_operand (XEXP (x, 1), temp);
3662 if (val != temp)
3663 emit_move_insn (temp, val);
3664
3665 XEXP (x, 1) = temp;
3666 return x;
3667 }
3668
3669 else if (GET_CODE (XEXP (x, 1)) == REG)
3670 {
3671 register rtx temp = gen_reg_rtx (Pmode);
3672 register rtx val = force_operand (XEXP (x, 0), temp);
3673 if (val != temp)
3674 emit_move_insn (temp, val);
3675
3676 XEXP (x, 0) = temp;
3677 return x;
3678 }
3679 }
3680
3681 return x;
3682}
2a2ab3f9
JVA
3683\f
3684/* Print an integer constant expression in assembler syntax. Addition
3685 and subtraction are the only arithmetic that may appear in these
3686 expressions. FILE is the stdio stream to write to, X is the rtx, and
3687 CODE is the operand print code from the output string. */
3688
3689static void
3690output_pic_addr_const (file, x, code)
3691 FILE *file;
3692 rtx x;
3693 int code;
3694{
3695 char buf[256];
3696
3697 switch (GET_CODE (x))
3698 {
3699 case PC:
3700 if (flag_pic)
3701 putc ('.', file);
3702 else
3703 abort ();
3704 break;
3705
3706 case SYMBOL_REF:
91bb873f
RH
3707 assemble_name (file, XSTR (x, 0));
3708 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
3709 fputs ("@PLT", file);
2a2ab3f9
JVA
3710 break;
3711
91bb873f
RH
3712 case LABEL_REF:
3713 x = XEXP (x, 0);
3714 /* FALLTHRU */
2a2ab3f9
JVA
3715 case CODE_LABEL:
3716 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
3717 assemble_name (asm_out_file, buf);
3718 break;
3719
3720 case CONST_INT:
f64cecad 3721 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
3722 break;
3723
3724 case CONST:
3725 /* This used to output parentheses around the expression,
3726 but that does not work on the 386 (either ATT or BSD assembler). */
3727 output_pic_addr_const (file, XEXP (x, 0), code);
3728 break;
3729
3730 case CONST_DOUBLE:
3731 if (GET_MODE (x) == VOIDmode)
3732 {
3733 /* We can use %d if the number is <32 bits and positive. */
3734 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
3735 fprintf (file, "0x%lx%08lx",
3736 (unsigned long) CONST_DOUBLE_HIGH (x),
3737 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 3738 else
f64cecad 3739 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
3740 }
3741 else
3742 /* We can't handle floating point constants;
3743 PRINT_OPERAND must handle them. */
3744 output_operand_lossage ("floating constant misused");
3745 break;
3746
3747 case PLUS:
e9a25f70 3748 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
3749 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
3750 {
2a2ab3f9 3751 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 3752 putc ('+', file);
e9a25f70 3753 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 3754 }
91bb873f 3755 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 3756 {
2a2ab3f9 3757 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 3758 putc ('+', file);
e9a25f70 3759 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 3760 }
91bb873f
RH
3761 else
3762 abort ();
2a2ab3f9
JVA
3763 break;
3764
3765 case MINUS:
e075ae69 3766 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
2a2ab3f9 3767 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 3768 putc ('-', file);
2a2ab3f9 3769 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 3770 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
2a2ab3f9
JVA
3771 break;
3772
91bb873f
RH
3773 case UNSPEC:
3774 if (XVECLEN (x, 0) != 1)
77ebd435 3775 abort ();
91bb873f
RH
3776 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
3777 switch (XINT (x, 1))
77ebd435
AJ
3778 {
3779 case 6:
3780 fputs ("@GOT", file);
3781 break;
3782 case 7:
3783 fputs ("@GOTOFF", file);
3784 break;
3785 case 8:
3786 fputs ("@PLT", file);
3787 break;
3788 default:
3789 output_operand_lossage ("invalid UNSPEC as operand");
3790 break;
3791 }
91bb873f
RH
3792 break;
3793
2a2ab3f9
JVA
3794 default:
3795 output_operand_lossage ("invalid expression as operand");
3796 }
3797}
1865dbb5 3798
0f290768 3799/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
1865dbb5
JM
3800 We need to handle our special PIC relocations. */
3801
0f290768 3802void
1865dbb5
JM
3803i386_dwarf_output_addr_const (file, x)
3804 FILE *file;
3805 rtx x;
3806{
f0ca81d2 3807 fprintf (file, "%s", INT_ASM_OP);
1865dbb5
JM
3808 if (flag_pic)
3809 output_pic_addr_const (file, x, '\0');
3810 else
3811 output_addr_const (file, x);
3812 fputc ('\n', file);
3813}
3814
3815/* In the name of slightly smaller debug output, and to cater to
3816 general assembler losage, recognize PIC+GOTOFF and turn it back
3817 into a direct symbol reference. */
3818
3819rtx
3820i386_simplify_dwarf_addr (orig_x)
3821 rtx orig_x;
3822{
3823 rtx x = orig_x;
3824
3825 if (GET_CODE (x) != PLUS
3826 || GET_CODE (XEXP (x, 0)) != REG
3827 || GET_CODE (XEXP (x, 1)) != CONST)
3828 return orig_x;
3829
3830 x = XEXP (XEXP (x, 1), 0);
3831 if (GET_CODE (x) == UNSPEC
3adbce3d
RH
3832 && (XINT (x, 1) == 6
3833 || XINT (x, 1) == 7))
1865dbb5
JM
3834 return XVECEXP (x, 0, 0);
3835
3836 if (GET_CODE (x) == PLUS
3837 && GET_CODE (XEXP (x, 0)) == UNSPEC
3838 && GET_CODE (XEXP (x, 1)) == CONST_INT
3adbce3d
RH
3839 && (XINT (XEXP (x, 0), 1) == 6
3840 || XINT (XEXP (x, 0), 1) == 7))
1865dbb5
JM
3841 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
3842
3843 return orig_x;
3844}
2a2ab3f9 3845\f
a269a03c 3846static void
e075ae69 3847put_condition_code (code, mode, reverse, fp, file)
a269a03c 3848 enum rtx_code code;
e075ae69
RH
3849 enum machine_mode mode;
3850 int reverse, fp;
a269a03c
JC
3851 FILE *file;
3852{
a269a03c
JC
3853 const char *suffix;
3854
9a915772
JH
3855 if (mode == CCFPmode || mode == CCFPUmode)
3856 {
3857 enum rtx_code second_code, bypass_code;
3858 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3859 if (bypass_code != NIL || second_code != NIL)
3860 abort();
3861 code = ix86_fp_compare_code_to_integer (code);
3862 mode = CCmode;
3863 }
a269a03c
JC
3864 if (reverse)
3865 code = reverse_condition (code);
e075ae69 3866
a269a03c
JC
3867 switch (code)
3868 {
3869 case EQ:
3870 suffix = "e";
3871 break;
a269a03c
JC
3872 case NE:
3873 suffix = "ne";
3874 break;
a269a03c 3875 case GT:
7e08e190 3876 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
3877 abort ();
3878 suffix = "g";
a269a03c 3879 break;
a269a03c 3880 case GTU:
e075ae69
RH
3881 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
3882 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 3883 if (mode != CCmode)
0f290768 3884 abort ();
e075ae69 3885 suffix = fp ? "nbe" : "a";
a269a03c 3886 break;
a269a03c 3887 case LT:
9076b9c1 3888 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 3889 suffix = "s";
7e08e190 3890 else if (mode == CCmode || mode == CCGCmode)
e075ae69 3891 suffix = "l";
9076b9c1 3892 else
0f290768 3893 abort ();
a269a03c 3894 break;
a269a03c 3895 case LTU:
9076b9c1 3896 if (mode != CCmode)
0f290768 3897 abort ();
a269a03c
JC
3898 suffix = "b";
3899 break;
a269a03c 3900 case GE:
9076b9c1 3901 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 3902 suffix = "ns";
7e08e190 3903 else if (mode == CCmode || mode == CCGCmode)
e075ae69 3904 suffix = "ge";
9076b9c1 3905 else
0f290768 3906 abort ();
a269a03c 3907 break;
a269a03c 3908 case GEU:
e075ae69 3909 /* ??? As above. */
7e08e190 3910 if (mode != CCmode)
0f290768 3911 abort ();
7e08e190 3912 suffix = fp ? "nb" : "ae";
a269a03c 3913 break;
a269a03c 3914 case LE:
7e08e190 3915 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
3916 abort ();
3917 suffix = "le";
a269a03c 3918 break;
a269a03c 3919 case LEU:
9076b9c1
JH
3920 if (mode != CCmode)
3921 abort ();
7e08e190 3922 suffix = "be";
a269a03c 3923 break;
3a3677ff 3924 case UNORDERED:
9e7adcb3 3925 suffix = fp ? "u" : "p";
3a3677ff
RH
3926 break;
3927 case ORDERED:
9e7adcb3 3928 suffix = fp ? "nu" : "np";
3a3677ff 3929 break;
a269a03c
JC
3930 default:
3931 abort ();
3932 }
3933 fputs (suffix, file);
3934}
3935
e075ae69
RH
3936void
3937print_reg (x, code, file)
3938 rtx x;
3939 int code;
3940 FILE *file;
e5cb57e8 3941{
e075ae69 3942 if (REGNO (x) == ARG_POINTER_REGNUM
564d80f4 3943 || REGNO (x) == FRAME_POINTER_REGNUM
e075ae69
RH
3944 || REGNO (x) == FLAGS_REG
3945 || REGNO (x) == FPSR_REG)
3946 abort ();
e9a25f70 3947
e075ae69
RH
3948 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
3949 putc ('%', file);
3950
ef6257cd 3951 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
3952 code = 2;
3953 else if (code == 'b')
3954 code = 1;
3955 else if (code == 'k')
3956 code = 4;
3f3f2124
JH
3957 else if (code == 'q')
3958 code = 8;
e075ae69
RH
3959 else if (code == 'y')
3960 code = 3;
3961 else if (code == 'h')
3962 code = 0;
3963 else
3964 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 3965
3f3f2124
JH
3966 /* Irritatingly, AMD extended registers use different naming convention
3967 from the normal registers. */
3968 if (REX_INT_REG_P (x))
3969 {
885a70fd
JH
3970 if (!TARGET_64BIT)
3971 abort ();
3f3f2124
JH
3972 switch (code)
3973 {
ef6257cd 3974 case 0:
3f3f2124
JH
3975 error ("Extended registers have no high halves\n");
3976 break;
3977 case 1:
3978 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
3979 break;
3980 case 2:
3981 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
3982 break;
3983 case 4:
3984 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
3985 break;
3986 case 8:
3987 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
3988 break;
3989 default:
3990 error ("Unsupported operand size for extended register.\n");
3991 break;
3992 }
3993 return;
3994 }
e075ae69
RH
3995 switch (code)
3996 {
3997 case 3:
3998 if (STACK_TOP_P (x))
3999 {
4000 fputs ("st(0)", file);
4001 break;
4002 }
4003 /* FALLTHRU */
e075ae69 4004 case 8:
3f3f2124 4005 case 4:
e075ae69 4006 case 12:
446988df 4007 if (! ANY_FP_REG_P (x))
885a70fd 4008 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
e075ae69 4009 /* FALLTHRU */
a7180f70 4010 case 16:
e075ae69
RH
4011 case 2:
4012 fputs (hi_reg_name[REGNO (x)], file);
4013 break;
4014 case 1:
4015 fputs (qi_reg_name[REGNO (x)], file);
4016 break;
4017 case 0:
4018 fputs (qi_high_reg_name[REGNO (x)], file);
4019 break;
4020 default:
4021 abort ();
fe25fea3 4022 }
e5cb57e8
SC
4023}
4024
2a2ab3f9 4025/* Meaning of CODE:
fe25fea3 4026 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 4027 C -- print opcode suffix for set/cmov insn.
fe25fea3 4028 c -- like C, but print reversed condition
ef6257cd 4029 F,f -- likewise, but for floating-point.
2a2ab3f9
JVA
4030 R -- print the prefix for register names.
4031 z -- print the opcode suffix for the size of the current operand.
4032 * -- print a star (in certain assembler syntax)
fb204271 4033 A -- print an absolute memory reference.
2a2ab3f9 4034 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
4035 s -- print a shift double count, followed by the assemblers argument
4036 delimiter.
fe25fea3
SC
4037 b -- print the QImode name of the register for the indicated operand.
4038 %b0 would print %al if operands[0] is reg 0.
4039 w -- likewise, print the HImode name of the register.
4040 k -- likewise, print the SImode name of the register.
3f3f2124 4041 q -- likewise, print the DImode name of the register.
ef6257cd
JH
4042 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
4043 y -- print "st(0)" instead of "st" as a register.
a46d1d38 4044 D -- print condition for SSE cmp instruction.
ef6257cd
JH
4045 P -- if PIC, print an @PLT suffix.
4046 X -- don't print any sort of PIC '@' suffix for a symbol.
a46d1d38 4047 */
2a2ab3f9
JVA
4048
4049void
4050print_operand (file, x, code)
4051 FILE *file;
4052 rtx x;
4053 int code;
4054{
4055 if (code)
4056 {
4057 switch (code)
4058 {
4059 case '*':
e075ae69 4060 if (ASSEMBLER_DIALECT == 0)
2a2ab3f9
JVA
4061 putc ('*', file);
4062 return;
4063
fb204271
DN
4064 case 'A':
4065 if (ASSEMBLER_DIALECT == 0)
4066 putc ('*', file);
4067 else if (ASSEMBLER_DIALECT == 1)
4068 {
4069 /* Intel syntax. For absolute addresses, registers should not
4070 be surrounded by braces. */
4071 if (GET_CODE (x) != REG)
4072 {
4073 putc ('[', file);
4074 PRINT_OPERAND (file, x, 0);
4075 putc (']', file);
4076 return;
4077 }
4078 }
4079
4080 PRINT_OPERAND (file, x, 0);
4081 return;
4082
4083
2a2ab3f9 4084 case 'L':
e075ae69
RH
4085 if (ASSEMBLER_DIALECT == 0)
4086 putc ('l', file);
2a2ab3f9
JVA
4087 return;
4088
4089 case 'W':
e075ae69
RH
4090 if (ASSEMBLER_DIALECT == 0)
4091 putc ('w', file);
2a2ab3f9
JVA
4092 return;
4093
4094 case 'B':
e075ae69
RH
4095 if (ASSEMBLER_DIALECT == 0)
4096 putc ('b', file);
2a2ab3f9
JVA
4097 return;
4098
4099 case 'Q':
e075ae69
RH
4100 if (ASSEMBLER_DIALECT == 0)
4101 putc ('l', file);
2a2ab3f9
JVA
4102 return;
4103
4104 case 'S':
e075ae69
RH
4105 if (ASSEMBLER_DIALECT == 0)
4106 putc ('s', file);
2a2ab3f9
JVA
4107 return;
4108
5f1ec3e6 4109 case 'T':
e075ae69
RH
4110 if (ASSEMBLER_DIALECT == 0)
4111 putc ('t', file);
5f1ec3e6
JVA
4112 return;
4113
2a2ab3f9
JVA
4114 case 'z':
4115 /* 387 opcodes don't get size suffixes if the operands are
0f290768 4116 registers. */
2a2ab3f9
JVA
4117
4118 if (STACK_REG_P (x))
4119 return;
4120
4121 /* this is the size of op from size of operand */
4122 switch (GET_MODE_SIZE (GET_MODE (x)))
4123 {
2a2ab3f9 4124 case 2:
155d8a47
JW
4125#ifdef HAVE_GAS_FILDS_FISTS
4126 putc ('s', file);
4127#endif
2a2ab3f9
JVA
4128 return;
4129
4130 case 4:
4131 if (GET_MODE (x) == SFmode)
4132 {
e075ae69 4133 putc ('s', file);
2a2ab3f9
JVA
4134 return;
4135 }
4136 else
e075ae69 4137 putc ('l', file);
2a2ab3f9
JVA
4138 return;
4139
5f1ec3e6 4140 case 12:
2b589241 4141 case 16:
e075ae69
RH
4142 putc ('t', file);
4143 return;
5f1ec3e6 4144
2a2ab3f9
JVA
4145 case 8:
4146 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
4147 {
4148#ifdef GAS_MNEMONICS
e075ae69 4149 putc ('q', file);
56c0e8fa 4150#else
e075ae69
RH
4151 putc ('l', file);
4152 putc ('l', file);
56c0e8fa
JVA
4153#endif
4154 }
e075ae69
RH
4155 else
4156 putc ('l', file);
2a2ab3f9 4157 return;
155d8a47
JW
4158
4159 default:
4160 abort ();
2a2ab3f9 4161 }
4af3895e
JVA
4162
4163 case 'b':
4164 case 'w':
4165 case 'k':
3f3f2124 4166 case 'q':
4af3895e
JVA
4167 case 'h':
4168 case 'y':
5cb6195d 4169 case 'X':
e075ae69 4170 case 'P':
4af3895e
JVA
4171 break;
4172
2d49677f
SC
4173 case 's':
4174 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
4175 {
4176 PRINT_OPERAND (file, x, 0);
e075ae69 4177 putc (',', file);
2d49677f 4178 }
a269a03c
JC
4179 return;
4180
a46d1d38
JH
4181 case 'D':
4182 /* Little bit of braindamage here. The SSE compare instructions
4183 does use completely different names for the comparisons that the
4184 fp conditional moves. */
4185 switch (GET_CODE (x))
4186 {
4187 case EQ:
4188 case UNEQ:
4189 fputs ("eq", file);
4190 break;
4191 case LT:
4192 case UNLT:
4193 fputs ("lt", file);
4194 break;
4195 case LE:
4196 case UNLE:
4197 fputs ("le", file);
4198 break;
4199 case UNORDERED:
4200 fputs ("unord", file);
4201 break;
4202 case NE:
4203 case LTGT:
4204 fputs ("neq", file);
4205 break;
4206 case UNGE:
4207 case GE:
4208 fputs ("nlt", file);
4209 break;
4210 case UNGT:
4211 case GT:
4212 fputs ("nle", file);
4213 break;
4214 case ORDERED:
4215 fputs ("ord", file);
4216 break;
4217 default:
4218 abort ();
4219 break;
4220 }
4221 return;
1853aadd 4222 case 'C':
e075ae69 4223 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 4224 return;
fe25fea3 4225 case 'F':
e075ae69 4226 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
4227 return;
4228
e9a25f70 4229 /* Like above, but reverse condition */
e075ae69
RH
4230 case 'c':
4231 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
4232 return;
fe25fea3 4233 case 'f':
e075ae69 4234 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 4235 return;
ef6257cd
JH
4236 case '+':
4237 {
4238 rtx x;
e5cb57e8 4239
ef6257cd
JH
4240 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
4241 return;
a4f31c00 4242
ef6257cd
JH
4243 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4244 if (x)
4245 {
4246 int pred_val = INTVAL (XEXP (x, 0));
4247
4248 if (pred_val < REG_BR_PROB_BASE * 45 / 100
4249 || pred_val > REG_BR_PROB_BASE * 55 / 100)
4250 {
4251 int taken = pred_val > REG_BR_PROB_BASE / 2;
4252 int cputaken = final_forward_branch_p (current_output_insn) == 0;
4253
4254 /* Emit hints only in the case default branch prediction
4255 heruistics would fail. */
4256 if (taken != cputaken)
4257 {
4258 /* We use 3e (DS) prefix for taken branches and
4259 2e (CS) prefix for not taken branches. */
4260 if (taken)
4261 fputs ("ds ; ", file);
4262 else
4263 fputs ("cs ; ", file);
4264 }
4265 }
4266 }
4267 return;
4268 }
4af3895e 4269 default:
68daafd4
JVA
4270 {
4271 char str[50];
68daafd4
JVA
4272 sprintf (str, "invalid operand code `%c'", code);
4273 output_operand_lossage (str);
4274 }
2a2ab3f9
JVA
4275 }
4276 }
e9a25f70 4277
2a2ab3f9
JVA
4278 if (GET_CODE (x) == REG)
4279 {
4280 PRINT_REG (x, code, file);
4281 }
e9a25f70 4282
2a2ab3f9
JVA
4283 else if (GET_CODE (x) == MEM)
4284 {
e075ae69
RH
4285 /* No `byte ptr' prefix for call instructions. */
4286 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
2a2ab3f9 4287 {
69ddee61 4288 const char * size;
e075ae69
RH
4289 switch (GET_MODE_SIZE (GET_MODE (x)))
4290 {
4291 case 1: size = "BYTE"; break;
4292 case 2: size = "WORD"; break;
4293 case 4: size = "DWORD"; break;
4294 case 8: size = "QWORD"; break;
4295 case 12: size = "XWORD"; break;
a7180f70 4296 case 16: size = "XMMWORD"; break;
e075ae69 4297 default:
564d80f4 4298 abort ();
e075ae69 4299 }
fb204271
DN
4300
4301 /* Check for explicit size override (codes 'b', 'w' and 'k') */
4302 if (code == 'b')
4303 size = "BYTE";
4304 else if (code == 'w')
4305 size = "WORD";
4306 else if (code == 'k')
4307 size = "DWORD";
4308
e075ae69
RH
4309 fputs (size, file);
4310 fputs (" PTR ", file);
2a2ab3f9 4311 }
e075ae69
RH
4312
4313 x = XEXP (x, 0);
4314 if (flag_pic && CONSTANT_ADDRESS_P (x))
4315 output_pic_addr_const (file, x, code);
0d7d98ee
JH
4316 /* Avoid (%rip) for call operands. */
4317 else if (CONSTANT_ADDRESS_P (x) && code =='P'
4318 && GET_CODE (x) != CONST_INT)
4319 output_addr_const (file, x);
2a2ab3f9 4320 else
e075ae69 4321 output_address (x);
2a2ab3f9 4322 }
e9a25f70 4323
2a2ab3f9
JVA
4324 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
4325 {
e9a25f70
JL
4326 REAL_VALUE_TYPE r;
4327 long l;
4328
5f1ec3e6
JVA
4329 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4330 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69
RH
4331
4332 if (ASSEMBLER_DIALECT == 0)
4333 putc ('$', file);
52267fcb 4334 fprintf (file, "0x%lx", l);
5f1ec3e6 4335 }
e9a25f70 4336
0f290768 4337 /* These float cases don't actually occur as immediate operands. */
5f1ec3e6
JVA
4338 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
4339 {
e9a25f70
JL
4340 REAL_VALUE_TYPE r;
4341 char dstr[30];
4342
5f1ec3e6
JVA
4343 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4344 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
4345 fprintf (file, "%s", dstr);
2a2ab3f9 4346 }
e9a25f70 4347
2b589241
JH
4348 else if (GET_CODE (x) == CONST_DOUBLE
4349 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
2a2ab3f9 4350 {
e9a25f70
JL
4351 REAL_VALUE_TYPE r;
4352 char dstr[30];
4353
5f1ec3e6
JVA
4354 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4355 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
4356 fprintf (file, "%s", dstr);
2a2ab3f9 4357 }
79325812 4358 else
2a2ab3f9 4359 {
4af3895e 4360 if (code != 'P')
2a2ab3f9 4361 {
695dac07 4362 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69
RH
4363 {
4364 if (ASSEMBLER_DIALECT == 0)
4365 putc ('$', file);
4366 }
2a2ab3f9
JVA
4367 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
4368 || GET_CODE (x) == LABEL_REF)
e075ae69
RH
4369 {
4370 if (ASSEMBLER_DIALECT == 0)
4371 putc ('$', file);
4372 else
4373 fputs ("OFFSET FLAT:", file);
4374 }
2a2ab3f9 4375 }
e075ae69
RH
4376 if (GET_CODE (x) == CONST_INT)
4377 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
4378 else if (flag_pic)
2a2ab3f9
JVA
4379 output_pic_addr_const (file, x, code);
4380 else
4381 output_addr_const (file, x);
4382 }
4383}
4384\f
4385/* Print a memory operand whose address is ADDR. */
4386
4387void
4388print_operand_address (file, addr)
4389 FILE *file;
4390 register rtx addr;
4391{
e075ae69
RH
4392 struct ix86_address parts;
4393 rtx base, index, disp;
4394 int scale;
e9a25f70 4395
e075ae69
RH
4396 if (! ix86_decompose_address (addr, &parts))
4397 abort ();
e9a25f70 4398
e075ae69
RH
4399 base = parts.base;
4400 index = parts.index;
4401 disp = parts.disp;
4402 scale = parts.scale;
e9a25f70 4403
e075ae69
RH
4404 if (!base && !index)
4405 {
4406 /* Displacement only requires special attention. */
e9a25f70 4407
e075ae69 4408 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 4409 {
e075ae69 4410 if (ASSEMBLER_DIALECT != 0)
fb204271
DN
4411 {
4412 if (USER_LABEL_PREFIX[0] == 0)
4413 putc ('%', file);
4414 fputs ("ds:", file);
4415 }
e075ae69 4416 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
2a2ab3f9 4417 }
e075ae69
RH
4418 else if (flag_pic)
4419 output_pic_addr_const (file, addr, 0);
4420 else
4421 output_addr_const (file, addr);
0d7d98ee
JH
4422
4423 /* Use one byte shorter RIP relative addressing for 64bit mode. */
4424 if (GET_CODE (disp) != CONST_INT && TARGET_64BIT)
4425 fputs ("(%rip)", file);
e075ae69
RH
4426 }
4427 else
4428 {
4429 if (ASSEMBLER_DIALECT == 0)
2a2ab3f9 4430 {
e075ae69 4431 if (disp)
2a2ab3f9 4432 {
c399861d 4433 if (flag_pic)
e075ae69
RH
4434 output_pic_addr_const (file, disp, 0);
4435 else if (GET_CODE (disp) == LABEL_REF)
4436 output_asm_label (disp);
2a2ab3f9 4437 else
e075ae69 4438 output_addr_const (file, disp);
2a2ab3f9
JVA
4439 }
4440
e075ae69
RH
4441 putc ('(', file);
4442 if (base)
4443 PRINT_REG (base, 0, file);
4444 if (index)
2a2ab3f9 4445 {
e075ae69
RH
4446 putc (',', file);
4447 PRINT_REG (index, 0, file);
4448 if (scale != 1)
4449 fprintf (file, ",%d", scale);
2a2ab3f9 4450 }
e075ae69 4451 putc (')', file);
2a2ab3f9 4452 }
2a2ab3f9
JVA
4453 else
4454 {
e075ae69 4455 rtx offset = NULL_RTX;
e9a25f70 4456
e075ae69
RH
4457 if (disp)
4458 {
4459 /* Pull out the offset of a symbol; print any symbol itself. */
4460 if (GET_CODE (disp) == CONST
4461 && GET_CODE (XEXP (disp, 0)) == PLUS
4462 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
4463 {
4464 offset = XEXP (XEXP (disp, 0), 1);
4465 disp = gen_rtx_CONST (VOIDmode,
4466 XEXP (XEXP (disp, 0), 0));
4467 }
ce193852 4468
e075ae69
RH
4469 if (flag_pic)
4470 output_pic_addr_const (file, disp, 0);
4471 else if (GET_CODE (disp) == LABEL_REF)
4472 output_asm_label (disp);
4473 else if (GET_CODE (disp) == CONST_INT)
4474 offset = disp;
4475 else
4476 output_addr_const (file, disp);
4477 }
e9a25f70 4478
e075ae69
RH
4479 putc ('[', file);
4480 if (base)
a8620236 4481 {
e075ae69
RH
4482 PRINT_REG (base, 0, file);
4483 if (offset)
4484 {
4485 if (INTVAL (offset) >= 0)
4486 putc ('+', file);
4487 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
4488 }
a8620236 4489 }
e075ae69
RH
4490 else if (offset)
4491 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 4492 else
e075ae69 4493 putc ('0', file);
e9a25f70 4494
e075ae69
RH
4495 if (index)
4496 {
4497 putc ('+', file);
4498 PRINT_REG (index, 0, file);
4499 if (scale != 1)
4500 fprintf (file, "*%d", scale);
4501 }
4502 putc (']', file);
4503 }
2a2ab3f9
JVA
4504 }
4505}
4506\f
4507/* Split one or more DImode RTL references into pairs of SImode
4508 references. The RTL can be REG, offsettable MEM, integer constant, or
4509 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
4510 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 4511 that parallel "operands". */
2a2ab3f9
JVA
4512
4513void
4514split_di (operands, num, lo_half, hi_half)
4515 rtx operands[];
4516 int num;
4517 rtx lo_half[], hi_half[];
4518{
4519 while (num--)
4520 {
57dbca5e 4521 rtx op = operands[num];
e075ae69
RH
4522 if (CONSTANT_P (op))
4523 split_double (op, &lo_half[num], &hi_half[num]);
4524 else if (! reload_completed)
a269a03c
JC
4525 {
4526 lo_half[num] = gen_lowpart (SImode, op);
4527 hi_half[num] = gen_highpart (SImode, op);
4528 }
4529 else if (GET_CODE (op) == REG)
2a2ab3f9 4530 {
0d7d98ee
JH
4531 if (TARGET_64BIT)
4532 abort();
57dbca5e
BS
4533 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
4534 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
2a2ab3f9 4535 }
57dbca5e 4536 else if (offsettable_memref_p (op))
2a2ab3f9 4537 {
f4ef873c 4538 lo_half[num] = adjust_address (op, SImode, 0);
b72f00af 4539 hi_half[num] = adjust_address (op, SImode, 4);
2a2ab3f9
JVA
4540 }
4541 else
564d80f4 4542 abort ();
2a2ab3f9
JVA
4543 }
4544}
4545\f
2a2ab3f9
JVA
4546/* Output code to perform a 387 binary operation in INSN, one of PLUS,
4547 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
4548 is the expression of the binary operation. The output may either be
4549 emitted here, or returned to the caller, like all output_* functions.
4550
4551 There is no guarantee that the operands are the same mode, as they
0f290768 4552 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 4553
e3c2afab
AM
4554#ifndef SYSV386_COMPAT
4555/* Set to 1 for compatibility with brain-damaged assemblers. No-one
4556 wants to fix the assemblers because that causes incompatibility
4557 with gcc. No-one wants to fix gcc because that causes
4558 incompatibility with assemblers... You can use the option of
4559 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
4560#define SYSV386_COMPAT 1
4561#endif
4562
69ddee61 4563const char *
2a2ab3f9
JVA
4564output_387_binary_op (insn, operands)
4565 rtx insn;
4566 rtx *operands;
4567{
e3c2afab 4568 static char buf[30];
69ddee61 4569 const char *p;
1deaa899
JH
4570 const char *ssep;
4571 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
2a2ab3f9 4572
e3c2afab
AM
4573#ifdef ENABLE_CHECKING
4574 /* Even if we do not want to check the inputs, this documents input
4575 constraints. Which helps in understanding the following code. */
4576 if (STACK_REG_P (operands[0])
4577 && ((REG_P (operands[1])
4578 && REGNO (operands[0]) == REGNO (operands[1])
4579 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
4580 || (REG_P (operands[2])
4581 && REGNO (operands[0]) == REGNO (operands[2])
4582 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
4583 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
4584 ; /* ok */
1deaa899 4585 else if (!is_sse)
e3c2afab
AM
4586 abort ();
4587#endif
4588
2a2ab3f9
JVA
4589 switch (GET_CODE (operands[3]))
4590 {
4591 case PLUS:
e075ae69
RH
4592 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4593 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4594 p = "fiadd";
4595 else
4596 p = "fadd";
1deaa899 4597 ssep = "add";
2a2ab3f9
JVA
4598 break;
4599
4600 case MINUS:
e075ae69
RH
4601 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4602 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4603 p = "fisub";
4604 else
4605 p = "fsub";
1deaa899 4606 ssep = "sub";
2a2ab3f9
JVA
4607 break;
4608
4609 case MULT:
e075ae69
RH
4610 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4611 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4612 p = "fimul";
4613 else
4614 p = "fmul";
1deaa899 4615 ssep = "mul";
2a2ab3f9
JVA
4616 break;
4617
4618 case DIV:
e075ae69
RH
4619 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4620 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4621 p = "fidiv";
4622 else
4623 p = "fdiv";
1deaa899 4624 ssep = "div";
2a2ab3f9
JVA
4625 break;
4626
4627 default:
4628 abort ();
4629 }
4630
1deaa899
JH
4631 if (is_sse)
4632 {
4633 strcpy (buf, ssep);
4634 if (GET_MODE (operands[0]) == SFmode)
4635 strcat (buf, "ss\t{%2, %0|%0, %2}");
4636 else
4637 strcat (buf, "sd\t{%2, %0|%0, %2}");
4638 return buf;
4639 }
e075ae69 4640 strcpy (buf, p);
2a2ab3f9
JVA
4641
4642 switch (GET_CODE (operands[3]))
4643 {
4644 case MULT:
4645 case PLUS:
4646 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
4647 {
e3c2afab 4648 rtx temp = operands[2];
2a2ab3f9
JVA
4649 operands[2] = operands[1];
4650 operands[1] = temp;
4651 }
4652
e3c2afab
AM
4653 /* know operands[0] == operands[1]. */
4654
2a2ab3f9 4655 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
4656 {
4657 p = "%z2\t%2";
4658 break;
4659 }
2a2ab3f9
JVA
4660
4661 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
4662 {
4663 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
4664 /* How is it that we are storing to a dead operand[2]?
4665 Well, presumably operands[1] is dead too. We can't
4666 store the result to st(0) as st(0) gets popped on this
4667 instruction. Instead store to operands[2] (which I
4668 think has to be st(1)). st(1) will be popped later.
4669 gcc <= 2.8.1 didn't have this check and generated
4670 assembly code that the Unixware assembler rejected. */
4671 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 4672 else
e3c2afab 4673 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 4674 break;
6b28fd63 4675 }
2a2ab3f9
JVA
4676
4677 if (STACK_TOP_P (operands[0]))
e3c2afab 4678 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 4679 else
e3c2afab 4680 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 4681 break;
2a2ab3f9
JVA
4682
4683 case MINUS:
4684 case DIV:
4685 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
4686 {
4687 p = "r%z1\t%1";
4688 break;
4689 }
2a2ab3f9
JVA
4690
4691 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
4692 {
4693 p = "%z2\t%2";
4694 break;
4695 }
2a2ab3f9 4696
2a2ab3f9 4697 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 4698 {
e3c2afab
AM
4699#if SYSV386_COMPAT
4700 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
4701 derived assemblers, confusingly reverse the direction of
4702 the operation for fsub{r} and fdiv{r} when the
4703 destination register is not st(0). The Intel assembler
4704 doesn't have this brain damage. Read !SYSV386_COMPAT to
4705 figure out what the hardware really does. */
4706 if (STACK_TOP_P (operands[0]))
4707 p = "{p\t%0, %2|rp\t%2, %0}";
4708 else
4709 p = "{rp\t%2, %0|p\t%0, %2}";
4710#else
6b28fd63 4711 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
4712 /* As above for fmul/fadd, we can't store to st(0). */
4713 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 4714 else
e3c2afab
AM
4715 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
4716#endif
e075ae69 4717 break;
6b28fd63 4718 }
2a2ab3f9
JVA
4719
4720 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 4721 {
e3c2afab 4722#if SYSV386_COMPAT
6b28fd63 4723 if (STACK_TOP_P (operands[0]))
e3c2afab 4724 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 4725 else
e3c2afab
AM
4726 p = "{p\t%1, %0|rp\t%0, %1}";
4727#else
4728 if (STACK_TOP_P (operands[0]))
4729 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
4730 else
4731 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
4732#endif
e075ae69 4733 break;
6b28fd63 4734 }
2a2ab3f9
JVA
4735
4736 if (STACK_TOP_P (operands[0]))
4737 {
4738 if (STACK_TOP_P (operands[1]))
e3c2afab 4739 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 4740 else
e3c2afab 4741 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 4742 break;
2a2ab3f9
JVA
4743 }
4744 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
4745 {
4746#if SYSV386_COMPAT
4747 p = "{\t%1, %0|r\t%0, %1}";
4748#else
4749 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
4750#endif
4751 }
2a2ab3f9 4752 else
e3c2afab
AM
4753 {
4754#if SYSV386_COMPAT
4755 p = "{r\t%2, %0|\t%0, %2}";
4756#else
4757 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
4758#endif
4759 }
e075ae69 4760 break;
2a2ab3f9
JVA
4761
4762 default:
4763 abort ();
4764 }
e075ae69
RH
4765
4766 strcat (buf, p);
4767 return buf;
2a2ab3f9 4768}
e075ae69 4769
a4f31c00 4770/* Output code to initialize control word copies used by
7a2e09f4
JH
4771 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
4772 is set to control word rounding downwards. */
4773void
4774emit_i387_cw_initialization (normal, round_down)
4775 rtx normal, round_down;
4776{
4777 rtx reg = gen_reg_rtx (HImode);
4778
4779 emit_insn (gen_x86_fnstcw_1 (normal));
4780 emit_move_insn (reg, normal);
4781 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
4782 && !TARGET_64BIT)
4783 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
4784 else
4785 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
4786 emit_move_insn (round_down, reg);
4787}
4788
2a2ab3f9 4789/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 4790 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 4791 operand may be [SDX]Fmode. */
2a2ab3f9 4792
69ddee61 4793const char *
2a2ab3f9
JVA
4794output_fix_trunc (insn, operands)
4795 rtx insn;
4796 rtx *operands;
4797{
4798 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69 4799 int dimode_p = GET_MODE (operands[0]) == DImode;
2a2ab3f9 4800
e075ae69
RH
4801 /* Jump through a hoop or two for DImode, since the hardware has no
4802 non-popping instruction. We used to do this a different way, but
4803 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
4804 if (dimode_p && !stack_top_dies)
4805 output_asm_insn ("fld\t%y1", operands);
e075ae69 4806
7a2e09f4 4807 if (!STACK_TOP_P (operands[1]))
10195bd8
JW
4808 abort ();
4809
e075ae69 4810 if (GET_CODE (operands[0]) != MEM)
7a2e09f4 4811 abort ();
e9a25f70 4812
7a2e09f4 4813 output_asm_insn ("fldcw\t%3", operands);
e075ae69 4814 if (stack_top_dies || dimode_p)
7a2e09f4 4815 output_asm_insn ("fistp%z0\t%0", operands);
10195bd8 4816 else
7a2e09f4 4817 output_asm_insn ("fist%z0\t%0", operands);
e075ae69 4818 output_asm_insn ("fldcw\t%2", operands);
10195bd8 4819
e075ae69 4820 return "";
2a2ab3f9 4821}
cda749b1 4822
e075ae69
RH
4823/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
4824 should be used and 2 when fnstsw should be used. UNORDERED_P is true
4825 when fucom should be used. */
4826
69ddee61 4827const char *
e075ae69 4828output_fp_compare (insn, operands, eflags_p, unordered_p)
cda749b1
JW
4829 rtx insn;
4830 rtx *operands;
e075ae69 4831 int eflags_p, unordered_p;
cda749b1 4832{
e075ae69
RH
4833 int stack_top_dies;
4834 rtx cmp_op0 = operands[0];
4835 rtx cmp_op1 = operands[1];
0644b628 4836 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
e075ae69
RH
4837
4838 if (eflags_p == 2)
4839 {
4840 cmp_op0 = cmp_op1;
4841 cmp_op1 = operands[2];
4842 }
0644b628
JH
4843 if (is_sse)
4844 {
4845 if (GET_MODE (operands[0]) == SFmode)
4846 if (unordered_p)
4847 return "ucomiss\t{%1, %0|%0, %1}";
4848 else
4849 return "comiss\t{%1, %0|%0, %y}";
4850 else
4851 if (unordered_p)
4852 return "ucomisd\t{%1, %0|%0, %1}";
4853 else
4854 return "comisd\t{%1, %0|%0, %y}";
4855 }
cda749b1 4856
e075ae69 4857 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
4858 abort ();
4859
e075ae69 4860 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 4861
e075ae69
RH
4862 if (STACK_REG_P (cmp_op1)
4863 && stack_top_dies
4864 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
4865 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 4866 {
e075ae69
RH
4867 /* If both the top of the 387 stack dies, and the other operand
4868 is also a stack register that dies, then this must be a
4869 `fcompp' float compare */
4870
4871 if (eflags_p == 1)
4872 {
4873 /* There is no double popping fcomi variant. Fortunately,
4874 eflags is immune from the fstp's cc clobbering. */
4875 if (unordered_p)
4876 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
4877 else
4878 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
4879 return "fstp\t%y0";
4880 }
4881 else
cda749b1 4882 {
e075ae69
RH
4883 if (eflags_p == 2)
4884 {
4885 if (unordered_p)
4886 return "fucompp\n\tfnstsw\t%0";
4887 else
4888 return "fcompp\n\tfnstsw\t%0";
4889 }
cda749b1
JW
4890 else
4891 {
e075ae69
RH
4892 if (unordered_p)
4893 return "fucompp";
4894 else
4895 return "fcompp";
cda749b1
JW
4896 }
4897 }
cda749b1
JW
4898 }
4899 else
4900 {
e075ae69 4901 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 4902
0f290768 4903 static const char * const alt[24] =
e075ae69
RH
4904 {
4905 "fcom%z1\t%y1",
4906 "fcomp%z1\t%y1",
4907 "fucom%z1\t%y1",
4908 "fucomp%z1\t%y1",
0f290768 4909
e075ae69
RH
4910 "ficom%z1\t%y1",
4911 "ficomp%z1\t%y1",
4912 NULL,
4913 NULL,
4914
4915 "fcomi\t{%y1, %0|%0, %y1}",
4916 "fcomip\t{%y1, %0|%0, %y1}",
4917 "fucomi\t{%y1, %0|%0, %y1}",
4918 "fucomip\t{%y1, %0|%0, %y1}",
4919
4920 NULL,
4921 NULL,
4922 NULL,
4923 NULL,
4924
4925 "fcom%z2\t%y2\n\tfnstsw\t%0",
4926 "fcomp%z2\t%y2\n\tfnstsw\t%0",
4927 "fucom%z2\t%y2\n\tfnstsw\t%0",
4928 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 4929
e075ae69
RH
4930 "ficom%z2\t%y2\n\tfnstsw\t%0",
4931 "ficomp%z2\t%y2\n\tfnstsw\t%0",
4932 NULL,
4933 NULL
4934 };
4935
4936 int mask;
69ddee61 4937 const char *ret;
e075ae69
RH
4938
4939 mask = eflags_p << 3;
4940 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
4941 mask |= unordered_p << 1;
4942 mask |= stack_top_dies;
4943
4944 if (mask >= 24)
4945 abort ();
4946 ret = alt[mask];
4947 if (ret == NULL)
4948 abort ();
cda749b1 4949
e075ae69 4950 return ret;
cda749b1
JW
4951 }
4952}
2a2ab3f9 4953
e075ae69 4954/* Output assembler code to FILE to initialize basic-block profiling.
2a2ab3f9 4955
e075ae69 4956 If profile_block_flag == 2
2a2ab3f9 4957
e075ae69
RH
4958 Output code to call the subroutine `__bb_init_trace_func'
4959 and pass two parameters to it. The first parameter is
4960 the address of a block allocated in the object module.
4961 The second parameter is the number of the first basic block
4962 of the function.
2a2ab3f9 4963
e075ae69 4964 The name of the block is a local symbol made with this statement:
0f290768 4965
e075ae69 4966 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
2a2ab3f9 4967
e075ae69
RH
4968 Of course, since you are writing the definition of
4969 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4970 can take a short cut in the definition of this macro and use the
4971 name that you know will result.
2a2ab3f9 4972
e075ae69
RH
4973 The number of the first basic block of the function is
4974 passed to the macro in BLOCK_OR_LABEL.
2a2ab3f9 4975
e075ae69
RH
4976 If described in a virtual assembler language the code to be
4977 output looks like:
2a2ab3f9 4978
e075ae69
RH
4979 parameter1 <- LPBX0
4980 parameter2 <- BLOCK_OR_LABEL
4981 call __bb_init_trace_func
2a2ab3f9 4982
e075ae69 4983 else if profile_block_flag != 0
e74389ff 4984
e075ae69
RH
4985 Output code to call the subroutine `__bb_init_func'
4986 and pass one single parameter to it, which is the same
4987 as the first parameter to `__bb_init_trace_func'.
e74389ff 4988
e075ae69
RH
4989 The first word of this parameter is a flag which will be nonzero if
4990 the object module has already been initialized. So test this word
4991 first, and do not call `__bb_init_func' if the flag is nonzero.
4992 Note: When profile_block_flag == 2 the test need not be done
4993 but `__bb_init_trace_func' *must* be called.
e74389ff 4994
e075ae69
RH
4995 BLOCK_OR_LABEL may be used to generate a label number as a
4996 branch destination in case `__bb_init_func' will not be called.
e74389ff 4997
e075ae69
RH
4998 If described in a virtual assembler language the code to be
4999 output looks like:
2a2ab3f9 5000
e075ae69
RH
5001 cmp (LPBX0),0
5002 jne local_label
5003 parameter1 <- LPBX0
5004 call __bb_init_func
5005 local_label:
5006*/
c572e5ba 5007
e075ae69
RH
5008void
5009ix86_output_function_block_profiler (file, block_or_label)
5010 FILE *file;
5011 int block_or_label;
c572e5ba 5012{
e075ae69
RH
5013 static int num_func = 0;
5014 rtx xops[8];
5015 char block_table[80], false_label[80];
c572e5ba 5016
e075ae69 5017 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
e9a25f70 5018
e075ae69
RH
5019 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
5020 xops[5] = stack_pointer_rtx;
5021 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
2a2ab3f9 5022
e075ae69 5023 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
c572e5ba 5024
e075ae69 5025 switch (profile_block_flag)
c572e5ba 5026 {
e075ae69
RH
5027 case 2:
5028 xops[2] = GEN_INT (block_or_label);
5029 xops[3] = gen_rtx_MEM (Pmode,
5030 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
5031 xops[6] = GEN_INT (8);
e9a25f70 5032
e075ae69
RH
5033 output_asm_insn ("push{l}\t%2", xops);
5034 if (!flag_pic)
5035 output_asm_insn ("push{l}\t%1", xops);
e9a25f70 5036 else
870a0c2c 5037 {
e075ae69
RH
5038 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
5039 output_asm_insn ("push{l}\t%7", xops);
870a0c2c 5040 }
e075ae69
RH
5041 output_asm_insn ("call\t%P3", xops);
5042 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
5043 break;
c572e5ba 5044
e075ae69
RH
5045 default:
5046 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
c572e5ba 5047
e075ae69
RH
5048 xops[0] = const0_rtx;
5049 xops[2] = gen_rtx_MEM (Pmode,
5050 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
5051 xops[3] = gen_rtx_MEM (Pmode,
5052 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
5053 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
5054 xops[6] = GEN_INT (4);
a14003ee 5055
e075ae69 5056 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
446ba526 5057
e075ae69
RH
5058 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
5059 output_asm_insn ("jne\t%2", xops);
870a0c2c 5060
e075ae69
RH
5061 if (!flag_pic)
5062 output_asm_insn ("push{l}\t%1", xops);
5063 else
5064 {
5065 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
5066 output_asm_insn ("push{l}\t%7", xops);
870a0c2c 5067 }
e075ae69
RH
5068 output_asm_insn ("call\t%P3", xops);
5069 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
5070 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
5071 num_func++;
5072 break;
c572e5ba 5073 }
2a2ab3f9 5074}
305f097e 5075
e075ae69
RH
5076/* Output assembler code to FILE to increment a counter associated
5077 with basic block number BLOCKNO.
305f097e 5078
e075ae69 5079 If profile_block_flag == 2
ecbc4695 5080
e075ae69
RH
5081 Output code to initialize the global structure `__bb' and
5082 call the function `__bb_trace_func' which will increment the
5083 counter.
ecbc4695 5084
e075ae69
RH
5085 `__bb' consists of two words. In the first word the number
5086 of the basic block has to be stored. In the second word
0f290768 5087 the address of a block allocated in the object module
e075ae69 5088 has to be stored.
ecbc4695 5089
e075ae69 5090 The basic block number is given by BLOCKNO.
ecbc4695 5091
0f290768 5092 The address of the block is given by the label created with
305f097e 5093
e075ae69 5094 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
305f097e 5095
e075ae69 5096 by FUNCTION_BLOCK_PROFILER.
ecbc4695 5097
e075ae69
RH
5098 Of course, since you are writing the definition of
5099 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
5100 can take a short cut in the definition of this macro and use the
5101 name that you know will result.
305f097e 5102
e075ae69
RH
5103 If described in a virtual assembler language the code to be
5104 output looks like:
305f097e 5105
e075ae69
RH
5106 move BLOCKNO -> (__bb)
5107 move LPBX0 -> (__bb+4)
5108 call __bb_trace_func
305f097e 5109
e075ae69
RH
5110 Note that function `__bb_trace_func' must not change the
5111 machine state, especially the flag register. To grant
5112 this, you must output code to save and restore registers
5113 either in this macro or in the macros MACHINE_STATE_SAVE
5114 and MACHINE_STATE_RESTORE. The last two macros will be
5115 used in the function `__bb_trace_func', so you must make
0f290768 5116 sure that the function prologue does not change any
e075ae69 5117 register prior to saving it with MACHINE_STATE_SAVE.
305f097e 5118
e075ae69 5119 else if profile_block_flag != 0
305f097e 5120
e075ae69
RH
5121 Output code to increment the counter directly.
5122 Basic blocks are numbered separately from zero within each
5123 compiled object module. The count associated with block number
0f290768 5124 BLOCKNO is at index BLOCKNO in an array of words; the name of
e075ae69 5125 this array is a local symbol made with this statement:
32b5b1aa 5126
e075ae69 5127 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
32b5b1aa 5128
e075ae69
RH
5129 Of course, since you are writing the definition of
5130 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
5131 can take a short cut in the definition of this macro and use the
0f290768 5132 name that you know will result.
32b5b1aa 5133
e075ae69
RH
5134 If described in a virtual assembler language the code to be
5135 output looks like:
32b5b1aa 5136
e075ae69
RH
5137 inc (LPBX2+4*BLOCKNO)
5138*/
32b5b1aa 5139
e075ae69
RH
5140void
5141ix86_output_block_profiler (file, blockno)
5142 FILE *file ATTRIBUTE_UNUSED;
5143 int blockno;
5144{
5145 rtx xops[8], cnt_rtx;
5146 char counts[80];
5147 char *block_table = counts;
5148
5149 switch (profile_block_flag)
5150 {
5151 case 2:
5152 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
32b5b1aa 5153
e075ae69
RH
5154 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
5155 xops[2] = GEN_INT (blockno);
5156 xops[3] = gen_rtx_MEM (Pmode,
5157 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
5158 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
5159 xops[5] = plus_constant (xops[4], 4);
5160 xops[0] = gen_rtx_MEM (SImode, xops[4]);
5161 xops[6] = gen_rtx_MEM (SImode, xops[5]);
79325812 5162
e075ae69 5163 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
32b5b1aa 5164
e075ae69
RH
5165 output_asm_insn ("pushf", xops);
5166 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5167 if (flag_pic)
32b5b1aa 5168 {
e075ae69
RH
5169 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
5170 output_asm_insn ("push{l}\t%7", xops);
5171 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
5172 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
5173 output_asm_insn ("pop{l}\t%7", xops);
5174 }
5175 else
5176 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
5177 output_asm_insn ("call\t%P3", xops);
5178 output_asm_insn ("popf", xops);
32b5b1aa 5179
e075ae69 5180 break;
32b5b1aa 5181
e075ae69
RH
5182 default:
5183 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
5184 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
5185 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
32b5b1aa 5186
e075ae69
RH
5187 if (blockno)
5188 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
32b5b1aa 5189
e075ae69
RH
5190 if (flag_pic)
5191 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
32b5b1aa 5192
e075ae69
RH
5193 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
5194 output_asm_insn ("inc{l}\t%0", xops);
32b5b1aa 5195
e075ae69 5196 break;
32b5b1aa 5197 }
32b5b1aa 5198}
32b5b1aa 5199\f
79325812 5200void
e075ae69
RH
5201ix86_expand_move (mode, operands)
5202 enum machine_mode mode;
5203 rtx operands[];
32b5b1aa 5204{
e075ae69 5205 int strict = (reload_in_progress || reload_completed);
e075ae69 5206 rtx insn;
e9a25f70 5207
e075ae69 5208 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
32b5b1aa 5209 {
e075ae69 5210 /* Emit insns to move operands[1] into operands[0]. */
e9a25f70 5211
e075ae69
RH
5212 if (GET_CODE (operands[0]) == MEM)
5213 operands[1] = force_reg (Pmode, operands[1]);
5214 else
32b5b1aa 5215 {
e075ae69
RH
5216 rtx temp = operands[0];
5217 if (GET_CODE (temp) != REG)
5218 temp = gen_reg_rtx (Pmode);
5219 temp = legitimize_pic_address (operands[1], temp);
5220 if (temp == operands[0])
5221 return;
5222 operands[1] = temp;
32b5b1aa 5223 }
e075ae69
RH
5224 }
5225 else
5226 {
d7a29404
JH
5227 if (GET_CODE (operands[0]) == MEM
5228 && (GET_MODE (operands[0]) == QImode
5229 || !push_operand (operands[0], mode))
5230 && GET_CODE (operands[1]) == MEM)
e075ae69 5231 operands[1] = force_reg (mode, operands[1]);
e9a25f70 5232
2c5a510c
RH
5233 if (push_operand (operands[0], mode)
5234 && ! general_no_elim_operand (operands[1], mode))
5235 operands[1] = copy_to_mode_reg (mode, operands[1]);
5236
e075ae69 5237 if (FLOAT_MODE_P (mode))
32b5b1aa 5238 {
d7a29404
JH
5239 /* If we are loading a floating point constant to a register,
5240 force the value to memory now, since we'll get better code
5241 out the back end. */
e075ae69
RH
5242
5243 if (strict)
5244 ;
e075ae69 5245 else if (GET_CODE (operands[1]) == CONST_DOUBLE
d7a29404 5246 && register_operand (operands[0], mode))
e075ae69 5247 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
32b5b1aa 5248 }
32b5b1aa 5249 }
e9a25f70 5250
e075ae69 5251 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
e9a25f70 5252
e075ae69
RH
5253 emit_insn (insn);
5254}
e9a25f70 5255
e075ae69
RH
5256/* Attempt to expand a binary operator. Make the expansion closer to the
5257 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 5258 memory references (one output, two input) in a single insn. */
e9a25f70 5259
e075ae69
RH
5260void
5261ix86_expand_binary_operator (code, mode, operands)
5262 enum rtx_code code;
5263 enum machine_mode mode;
5264 rtx operands[];
5265{
5266 int matching_memory;
5267 rtx src1, src2, dst, op, clob;
5268
5269 dst = operands[0];
5270 src1 = operands[1];
5271 src2 = operands[2];
5272
5273 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
5274 if (GET_RTX_CLASS (code) == 'c'
5275 && (rtx_equal_p (dst, src2)
5276 || immediate_operand (src1, mode)))
5277 {
5278 rtx temp = src1;
5279 src1 = src2;
5280 src2 = temp;
32b5b1aa 5281 }
e9a25f70 5282
e075ae69
RH
5283 /* If the destination is memory, and we do not have matching source
5284 operands, do things in registers. */
5285 matching_memory = 0;
5286 if (GET_CODE (dst) == MEM)
32b5b1aa 5287 {
e075ae69
RH
5288 if (rtx_equal_p (dst, src1))
5289 matching_memory = 1;
5290 else if (GET_RTX_CLASS (code) == 'c'
5291 && rtx_equal_p (dst, src2))
5292 matching_memory = 2;
5293 else
5294 dst = gen_reg_rtx (mode);
5295 }
0f290768 5296
e075ae69
RH
5297 /* Both source operands cannot be in memory. */
5298 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
5299 {
5300 if (matching_memory != 2)
5301 src2 = force_reg (mode, src2);
5302 else
5303 src1 = force_reg (mode, src1);
32b5b1aa 5304 }
e9a25f70 5305
06a964de
JH
5306 /* If the operation is not commutable, source 1 cannot be a constant
5307 or non-matching memory. */
0f290768 5308 if ((CONSTANT_P (src1)
06a964de
JH
5309 || (!matching_memory && GET_CODE (src1) == MEM))
5310 && GET_RTX_CLASS (code) != 'c')
e075ae69 5311 src1 = force_reg (mode, src1);
0f290768 5312
e075ae69 5313 /* If optimizing, copy to regs to improve CSE */
fe577e58 5314 if (optimize && ! no_new_pseudos)
32b5b1aa 5315 {
e075ae69
RH
5316 if (GET_CODE (dst) == MEM)
5317 dst = gen_reg_rtx (mode);
5318 if (GET_CODE (src1) == MEM)
5319 src1 = force_reg (mode, src1);
5320 if (GET_CODE (src2) == MEM)
5321 src2 = force_reg (mode, src2);
32b5b1aa 5322 }
e9a25f70 5323
e075ae69
RH
5324 /* Emit the instruction. */
5325
5326 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
5327 if (reload_in_progress)
5328 {
5329 /* Reload doesn't know about the flags register, and doesn't know that
5330 it doesn't want to clobber it. We can only do this with PLUS. */
5331 if (code != PLUS)
5332 abort ();
5333 emit_insn (op);
5334 }
5335 else
32b5b1aa 5336 {
e075ae69
RH
5337 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
5338 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 5339 }
e9a25f70 5340
e075ae69
RH
5341 /* Fix up the destination if needed. */
5342 if (dst != operands[0])
5343 emit_move_insn (operands[0], dst);
5344}
5345
5346/* Return TRUE or FALSE depending on whether the binary operator meets the
5347 appropriate constraints. */
5348
5349int
5350ix86_binary_operator_ok (code, mode, operands)
5351 enum rtx_code code;
5352 enum machine_mode mode ATTRIBUTE_UNUSED;
5353 rtx operands[3];
5354{
5355 /* Both source operands cannot be in memory. */
5356 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
5357 return 0;
5358 /* If the operation is not commutable, source 1 cannot be a constant. */
5359 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
5360 return 0;
5361 /* If the destination is memory, we must have a matching source operand. */
5362 if (GET_CODE (operands[0]) == MEM
5363 && ! (rtx_equal_p (operands[0], operands[1])
5364 || (GET_RTX_CLASS (code) == 'c'
5365 && rtx_equal_p (operands[0], operands[2]))))
5366 return 0;
06a964de
JH
5367 /* If the operation is not commutable and the source 1 is memory, we must
5368 have a matching destionation. */
5369 if (GET_CODE (operands[1]) == MEM
5370 && GET_RTX_CLASS (code) != 'c'
5371 && ! rtx_equal_p (operands[0], operands[1]))
5372 return 0;
e075ae69
RH
5373 return 1;
5374}
5375
5376/* Attempt to expand a unary operator. Make the expansion closer to the
5377 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 5378 memory references (one output, one input) in a single insn. */
e075ae69 5379
9d81fc27 5380void
e075ae69
RH
5381ix86_expand_unary_operator (code, mode, operands)
5382 enum rtx_code code;
5383 enum machine_mode mode;
5384 rtx operands[];
5385{
06a964de
JH
5386 int matching_memory;
5387 rtx src, dst, op, clob;
5388
5389 dst = operands[0];
5390 src = operands[1];
e075ae69 5391
06a964de
JH
5392 /* If the destination is memory, and we do not have matching source
5393 operands, do things in registers. */
5394 matching_memory = 0;
5395 if (GET_CODE (dst) == MEM)
32b5b1aa 5396 {
06a964de
JH
5397 if (rtx_equal_p (dst, src))
5398 matching_memory = 1;
e075ae69 5399 else
06a964de 5400 dst = gen_reg_rtx (mode);
32b5b1aa 5401 }
e9a25f70 5402
06a964de
JH
5403 /* When source operand is memory, destination must match. */
5404 if (!matching_memory && GET_CODE (src) == MEM)
5405 src = force_reg (mode, src);
0f290768 5406
06a964de 5407 /* If optimizing, copy to regs to improve CSE */
fe577e58 5408 if (optimize && ! no_new_pseudos)
06a964de
JH
5409 {
5410 if (GET_CODE (dst) == MEM)
5411 dst = gen_reg_rtx (mode);
5412 if (GET_CODE (src) == MEM)
5413 src = force_reg (mode, src);
5414 }
5415
5416 /* Emit the instruction. */
5417
5418 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
5419 if (reload_in_progress || code == NOT)
5420 {
5421 /* Reload doesn't know about the flags register, and doesn't know that
5422 it doesn't want to clobber it. */
5423 if (code != NOT)
5424 abort ();
5425 emit_insn (op);
5426 }
5427 else
5428 {
5429 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
5430 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
5431 }
5432
5433 /* Fix up the destination if needed. */
5434 if (dst != operands[0])
5435 emit_move_insn (operands[0], dst);
e075ae69
RH
5436}
5437
5438/* Return TRUE or FALSE depending on whether the unary operator meets the
5439 appropriate constraints. */
5440
5441int
5442ix86_unary_operator_ok (code, mode, operands)
5443 enum rtx_code code ATTRIBUTE_UNUSED;
5444 enum machine_mode mode ATTRIBUTE_UNUSED;
5445 rtx operands[2] ATTRIBUTE_UNUSED;
5446{
06a964de
JH
5447 /* If one of operands is memory, source and destination must match. */
5448 if ((GET_CODE (operands[0]) == MEM
5449 || GET_CODE (operands[1]) == MEM)
5450 && ! rtx_equal_p (operands[0], operands[1]))
5451 return FALSE;
e075ae69
RH
5452 return TRUE;
5453}
5454
16189740
RH
5455/* Return TRUE or FALSE depending on whether the first SET in INSN
5456 has source and destination with matching CC modes, and that the
5457 CC mode is at least as constrained as REQ_MODE. */
5458
5459int
5460ix86_match_ccmode (insn, req_mode)
5461 rtx insn;
5462 enum machine_mode req_mode;
5463{
5464 rtx set;
5465 enum machine_mode set_mode;
5466
5467 set = PATTERN (insn);
5468 if (GET_CODE (set) == PARALLEL)
5469 set = XVECEXP (set, 0, 0);
5470 if (GET_CODE (set) != SET)
5471 abort ();
9076b9c1
JH
5472 if (GET_CODE (SET_SRC (set)) != COMPARE)
5473 abort ();
16189740
RH
5474
5475 set_mode = GET_MODE (SET_DEST (set));
5476 switch (set_mode)
5477 {
9076b9c1
JH
5478 case CCNOmode:
5479 if (req_mode != CCNOmode
5480 && (req_mode != CCmode
5481 || XEXP (SET_SRC (set), 1) != const0_rtx))
5482 return 0;
5483 break;
16189740 5484 case CCmode:
9076b9c1 5485 if (req_mode == CCGCmode)
16189740
RH
5486 return 0;
5487 /* FALLTHRU */
9076b9c1
JH
5488 case CCGCmode:
5489 if (req_mode == CCGOCmode || req_mode == CCNOmode)
5490 return 0;
5491 /* FALLTHRU */
5492 case CCGOCmode:
16189740
RH
5493 if (req_mode == CCZmode)
5494 return 0;
5495 /* FALLTHRU */
5496 case CCZmode:
5497 break;
5498
5499 default:
5500 abort ();
5501 }
5502
5503 return (GET_MODE (SET_SRC (set)) == set_mode);
5504}
5505
e075ae69
RH
5506/* Generate insn patterns to do an integer compare of OPERANDS. */
5507
5508static rtx
5509ix86_expand_int_compare (code, op0, op1)
5510 enum rtx_code code;
5511 rtx op0, op1;
5512{
5513 enum machine_mode cmpmode;
5514 rtx tmp, flags;
5515
5516 cmpmode = SELECT_CC_MODE (code, op0, op1);
5517 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
5518
5519 /* This is very simple, but making the interface the same as in the
5520 FP case makes the rest of the code easier. */
5521 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
5522 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
5523
5524 /* Return the test that should be put into the flags user, i.e.
5525 the bcc, scc, or cmov instruction. */
5526 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
5527}
5528
3a3677ff
RH
5529/* Figure out whether to use ordered or unordered fp comparisons.
5530 Return the appropriate mode to use. */
e075ae69 5531
b1cdafbb 5532enum machine_mode
3a3677ff 5533ix86_fp_compare_mode (code)
8752c357 5534 enum rtx_code code ATTRIBUTE_UNUSED;
e075ae69 5535{
9e7adcb3
JH
5536 /* ??? In order to make all comparisons reversible, we do all comparisons
5537 non-trapping when compiling for IEEE. Once gcc is able to distinguish
5538 all forms trapping and nontrapping comparisons, we can make inequality
5539 comparisons trapping again, since it results in better code when using
5540 FCOM based compares. */
5541 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
5542}
5543
9076b9c1
JH
5544enum machine_mode
5545ix86_cc_mode (code, op0, op1)
5546 enum rtx_code code;
5547 rtx op0, op1;
5548{
5549 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5550 return ix86_fp_compare_mode (code);
5551 switch (code)
5552 {
5553 /* Only zero flag is needed. */
5554 case EQ: /* ZF=0 */
5555 case NE: /* ZF!=0 */
5556 return CCZmode;
5557 /* Codes needing carry flag. */
265dab10
JH
5558 case GEU: /* CF=0 */
5559 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
5560 case LTU: /* CF=1 */
5561 case LEU: /* CF=1 | ZF=1 */
265dab10 5562 return CCmode;
9076b9c1
JH
5563 /* Codes possibly doable only with sign flag when
5564 comparing against zero. */
5565 case GE: /* SF=OF or SF=0 */
7e08e190 5566 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
5567 if (op1 == const0_rtx)
5568 return CCGOCmode;
5569 else
5570 /* For other cases Carry flag is not required. */
5571 return CCGCmode;
5572 /* Codes doable only with sign flag when comparing
5573 against zero, but we miss jump instruction for it
5574 so we need to use relational tests agains overflow
5575 that thus needs to be zero. */
5576 case GT: /* ZF=0 & SF=OF */
5577 case LE: /* ZF=1 | SF<>OF */
5578 if (op1 == const0_rtx)
5579 return CCNOmode;
5580 else
5581 return CCGCmode;
5582 default:
0f290768 5583 abort ();
9076b9c1
JH
5584 }
5585}
5586
3a3677ff
RH
5587/* Return true if we should use an FCOMI instruction for this fp comparison. */
5588
a940d8bd 5589int
3a3677ff 5590ix86_use_fcomi_compare (code)
9e7adcb3 5591 enum rtx_code code ATTRIBUTE_UNUSED;
3a3677ff 5592{
9e7adcb3
JH
5593 enum rtx_code swapped_code = swap_condition (code);
5594 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
5595 || (ix86_fp_comparison_cost (swapped_code)
5596 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
5597}
5598
0f290768 5599/* Swap, force into registers, or otherwise massage the two operands
3a3677ff
RH
5600 to a fp comparison. The operands are updated in place; the new
5601 comparsion code is returned. */
5602
5603static enum rtx_code
5604ix86_prepare_fp_compare_args (code, pop0, pop1)
5605 enum rtx_code code;
5606 rtx *pop0, *pop1;
5607{
5608 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
5609 rtx op0 = *pop0, op1 = *pop1;
5610 enum machine_mode op_mode = GET_MODE (op0);
0644b628 5611 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
3a3677ff 5612
e075ae69 5613 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
5614 The same is true of the XFmode compare instructions. The same is
5615 true of the fcomi compare instructions. */
5616
0644b628
JH
5617 if (!is_sse
5618 && (fpcmp_mode == CCFPUmode
5619 || op_mode == XFmode
5620 || op_mode == TFmode
5621 || ix86_use_fcomi_compare (code)))
e075ae69 5622 {
3a3677ff
RH
5623 op0 = force_reg (op_mode, op0);
5624 op1 = force_reg (op_mode, op1);
e075ae69
RH
5625 }
5626 else
5627 {
5628 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
5629 things around if they appear profitable, otherwise force op0
5630 into a register. */
5631
5632 if (standard_80387_constant_p (op0) == 0
5633 || (GET_CODE (op0) == MEM
5634 && ! (standard_80387_constant_p (op1) == 0
5635 || GET_CODE (op1) == MEM)))
32b5b1aa 5636 {
e075ae69
RH
5637 rtx tmp;
5638 tmp = op0, op0 = op1, op1 = tmp;
5639 code = swap_condition (code);
5640 }
5641
5642 if (GET_CODE (op0) != REG)
3a3677ff 5643 op0 = force_reg (op_mode, op0);
e075ae69
RH
5644
5645 if (CONSTANT_P (op1))
5646 {
5647 if (standard_80387_constant_p (op1))
3a3677ff 5648 op1 = force_reg (op_mode, op1);
e075ae69 5649 else
3a3677ff 5650 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
5651 }
5652 }
e9a25f70 5653
9e7adcb3
JH
5654 /* Try to rearrange the comparison to make it cheaper. */
5655 if (ix86_fp_comparison_cost (code)
5656 > ix86_fp_comparison_cost (swap_condition (code))
5657 && (GET_CODE (op0) == REG || !reload_completed))
5658 {
5659 rtx tmp;
5660 tmp = op0, op0 = op1, op1 = tmp;
5661 code = swap_condition (code);
5662 if (GET_CODE (op0) != REG)
5663 op0 = force_reg (op_mode, op0);
5664 }
5665
3a3677ff
RH
5666 *pop0 = op0;
5667 *pop1 = op1;
5668 return code;
5669}
5670
c0c102a9
JH
5671/* Convert comparison codes we use to represent FP comparison to integer
5672 code that will result in proper branch. Return UNKNOWN if no such code
5673 is available. */
5674static enum rtx_code
5675ix86_fp_compare_code_to_integer (code)
5676 enum rtx_code code;
5677{
5678 switch (code)
5679 {
5680 case GT:
5681 return GTU;
5682 case GE:
5683 return GEU;
5684 case ORDERED:
5685 case UNORDERED:
5686 return code;
5687 break;
5688 case UNEQ:
5689 return EQ;
5690 break;
5691 case UNLT:
5692 return LTU;
5693 break;
5694 case UNLE:
5695 return LEU;
5696 break;
5697 case LTGT:
5698 return NE;
5699 break;
5700 default:
5701 return UNKNOWN;
5702 }
5703}
5704
5705/* Split comparison code CODE into comparisons we can do using branch
5706 instructions. BYPASS_CODE is comparison code for branch that will
5707 branch around FIRST_CODE and SECOND_CODE. If some of branches
5708 is not required, set value to NIL.
5709 We never require more than two branches. */
5710static void
5711ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
5712 enum rtx_code code, *bypass_code, *first_code, *second_code;
5713{
5714 *first_code = code;
5715 *bypass_code = NIL;
5716 *second_code = NIL;
5717
5718 /* The fcomi comparison sets flags as follows:
5719
5720 cmp ZF PF CF
5721 > 0 0 0
5722 < 0 0 1
5723 = 1 0 0
5724 un 1 1 1 */
5725
5726 switch (code)
5727 {
5728 case GT: /* GTU - CF=0 & ZF=0 */
5729 case GE: /* GEU - CF=0 */
5730 case ORDERED: /* PF=0 */
5731 case UNORDERED: /* PF=1 */
5732 case UNEQ: /* EQ - ZF=1 */
5733 case UNLT: /* LTU - CF=1 */
5734 case UNLE: /* LEU - CF=1 | ZF=1 */
5735 case LTGT: /* EQ - ZF=0 */
5736 break;
5737 case LT: /* LTU - CF=1 - fails on unordered */
5738 *first_code = UNLT;
5739 *bypass_code = UNORDERED;
5740 break;
5741 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
5742 *first_code = UNLE;
5743 *bypass_code = UNORDERED;
5744 break;
5745 case EQ: /* EQ - ZF=1 - fails on unordered */
5746 *first_code = UNEQ;
5747 *bypass_code = UNORDERED;
5748 break;
5749 case NE: /* NE - ZF=0 - fails on unordered */
5750 *first_code = LTGT;
5751 *second_code = UNORDERED;
5752 break;
5753 case UNGE: /* GEU - CF=0 - fails on unordered */
5754 *first_code = GE;
5755 *second_code = UNORDERED;
5756 break;
5757 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
5758 *first_code = GT;
5759 *second_code = UNORDERED;
5760 break;
5761 default:
5762 abort ();
5763 }
5764 if (!TARGET_IEEE_FP)
5765 {
5766 *second_code = NIL;
5767 *bypass_code = NIL;
5768 }
5769}
5770
9e7adcb3
JH
5771/* Return cost of comparison done fcom + arithmetics operations on AX.
5772 All following functions do use number of instructions as an cost metrics.
5773 In future this should be tweaked to compute bytes for optimize_size and
5774 take into account performance of various instructions on various CPUs. */
5775static int
5776ix86_fp_comparison_arithmetics_cost (code)
5777 enum rtx_code code;
5778{
5779 if (!TARGET_IEEE_FP)
5780 return 4;
5781 /* The cost of code output by ix86_expand_fp_compare. */
5782 switch (code)
5783 {
5784 case UNLE:
5785 case UNLT:
5786 case LTGT:
5787 case GT:
5788 case GE:
5789 case UNORDERED:
5790 case ORDERED:
5791 case UNEQ:
5792 return 4;
5793 break;
5794 case LT:
5795 case NE:
5796 case EQ:
5797 case UNGE:
5798 return 5;
5799 break;
5800 case LE:
5801 case UNGT:
5802 return 6;
5803 break;
5804 default:
5805 abort ();
5806 }
5807}
5808
5809/* Return cost of comparison done using fcomi operation.
5810 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5811static int
5812ix86_fp_comparison_fcomi_cost (code)
5813 enum rtx_code code;
5814{
5815 enum rtx_code bypass_code, first_code, second_code;
5816 /* Return arbitarily high cost when instruction is not supported - this
5817 prevents gcc from using it. */
5818 if (!TARGET_CMOVE)
5819 return 1024;
5820 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5821 return (bypass_code != NIL || second_code != NIL) + 2;
5822}
5823
5824/* Return cost of comparison done using sahf operation.
5825 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5826static int
5827ix86_fp_comparison_sahf_cost (code)
5828 enum rtx_code code;
5829{
5830 enum rtx_code bypass_code, first_code, second_code;
5831 /* Return arbitarily high cost when instruction is not preferred - this
5832 avoids gcc from using it. */
5833 if (!TARGET_USE_SAHF && !optimize_size)
5834 return 1024;
5835 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5836 return (bypass_code != NIL || second_code != NIL) + 3;
5837}
5838
5839/* Compute cost of the comparison done using any method.
5840 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5841static int
5842ix86_fp_comparison_cost (code)
5843 enum rtx_code code;
5844{
5845 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
5846 int min;
5847
5848 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
5849 sahf_cost = ix86_fp_comparison_sahf_cost (code);
5850
5851 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
5852 if (min > sahf_cost)
5853 min = sahf_cost;
5854 if (min > fcomi_cost)
5855 min = fcomi_cost;
5856 return min;
5857}
c0c102a9 5858
3a3677ff
RH
5859/* Generate insn patterns to do a floating point compare of OPERANDS. */
5860
9e7adcb3
JH
5861static rtx
5862ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
3a3677ff
RH
5863 enum rtx_code code;
5864 rtx op0, op1, scratch;
9e7adcb3
JH
5865 rtx *second_test;
5866 rtx *bypass_test;
3a3677ff
RH
5867{
5868 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 5869 rtx tmp, tmp2;
9e7adcb3 5870 int cost = ix86_fp_comparison_cost (code);
c0c102a9 5871 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
5872
5873 fpcmp_mode = ix86_fp_compare_mode (code);
5874 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
5875
9e7adcb3
JH
5876 if (second_test)
5877 *second_test = NULL_RTX;
5878 if (bypass_test)
5879 *bypass_test = NULL_RTX;
5880
c0c102a9
JH
5881 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5882
9e7adcb3
JH
5883 /* Do fcomi/sahf based test when profitable. */
5884 if ((bypass_code == NIL || bypass_test)
5885 && (second_code == NIL || second_test)
5886 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 5887 {
c0c102a9
JH
5888 if (TARGET_CMOVE)
5889 {
5890 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5891 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
5892 tmp);
5893 emit_insn (tmp);
5894 }
5895 else
5896 {
5897 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5898 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
bf71a4f8
JH
5899 if (!scratch)
5900 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
5901 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
5902 emit_insn (gen_x86_sahf_1 (scratch));
5903 }
e075ae69
RH
5904
5905 /* The FP codes work out to act like unsigned. */
9a915772 5906 intcmp_mode = fpcmp_mode;
9e7adcb3
JH
5907 code = first_code;
5908 if (bypass_code != NIL)
5909 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
5910 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5911 const0_rtx);
5912 if (second_code != NIL)
5913 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
5914 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5915 const0_rtx);
e075ae69
RH
5916 }
5917 else
5918 {
5919 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69
RH
5920 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5921 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
bf71a4f8
JH
5922 if (!scratch)
5923 scratch = gen_reg_rtx (HImode);
3a3677ff 5924 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 5925
9a915772
JH
5926 /* In the unordered case, we have to check C2 for NaN's, which
5927 doesn't happen to work out to anything nice combination-wise.
5928 So do some bit twiddling on the value we've got in AH to come
5929 up with an appropriate set of condition codes. */
e075ae69 5930
9a915772
JH
5931 intcmp_mode = CCNOmode;
5932 switch (code)
32b5b1aa 5933 {
9a915772
JH
5934 case GT:
5935 case UNGT:
5936 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 5937 {
3a3677ff 5938 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 5939 code = EQ;
9a915772
JH
5940 }
5941 else
5942 {
5943 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5944 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5945 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
5946 intcmp_mode = CCmode;
5947 code = GEU;
5948 }
5949 break;
5950 case LT:
5951 case UNLT:
5952 if (code == LT && TARGET_IEEE_FP)
5953 {
3a3677ff
RH
5954 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5955 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
5956 intcmp_mode = CCmode;
5957 code = EQ;
9a915772
JH
5958 }
5959 else
5960 {
5961 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
5962 code = NE;
5963 }
5964 break;
5965 case GE:
5966 case UNGE:
5967 if (code == GE || !TARGET_IEEE_FP)
5968 {
3a3677ff 5969 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 5970 code = EQ;
9a915772
JH
5971 }
5972 else
5973 {
5974 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5975 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
5976 GEN_INT (0x01)));
5977 code = NE;
5978 }
5979 break;
5980 case LE:
5981 case UNLE:
5982 if (code == LE && TARGET_IEEE_FP)
5983 {
3a3677ff
RH
5984 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5985 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5986 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
5987 intcmp_mode = CCmode;
5988 code = LTU;
9a915772
JH
5989 }
5990 else
5991 {
5992 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
5993 code = NE;
5994 }
5995 break;
5996 case EQ:
5997 case UNEQ:
5998 if (code == EQ && TARGET_IEEE_FP)
5999 {
3a3677ff
RH
6000 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
6001 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
6002 intcmp_mode = CCmode;
6003 code = EQ;
9a915772
JH
6004 }
6005 else
6006 {
3a3677ff
RH
6007 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
6008 code = NE;
6009 break;
9a915772
JH
6010 }
6011 break;
6012 case NE:
6013 case LTGT:
6014 if (code == NE && TARGET_IEEE_FP)
6015 {
3a3677ff 6016 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
6017 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
6018 GEN_INT (0x40)));
3a3677ff 6019 code = NE;
9a915772
JH
6020 }
6021 else
6022 {
3a3677ff
RH
6023 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
6024 code = EQ;
32b5b1aa 6025 }
9a915772
JH
6026 break;
6027
6028 case UNORDERED:
6029 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
6030 code = NE;
6031 break;
6032 case ORDERED:
6033 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
6034 code = EQ;
6035 break;
6036
6037 default:
6038 abort ();
32b5b1aa 6039 }
32b5b1aa 6040 }
e075ae69
RH
6041
6042 /* Return the test that should be put into the flags user, i.e.
6043 the bcc, scc, or cmov instruction. */
6044 return gen_rtx_fmt_ee (code, VOIDmode,
6045 gen_rtx_REG (intcmp_mode, FLAGS_REG),
6046 const0_rtx);
6047}
6048
9e3e266c 6049rtx
a1b8572c 6050ix86_expand_compare (code, second_test, bypass_test)
e075ae69 6051 enum rtx_code code;
a1b8572c 6052 rtx *second_test, *bypass_test;
e075ae69
RH
6053{
6054 rtx op0, op1, ret;
6055 op0 = ix86_compare_op0;
6056 op1 = ix86_compare_op1;
6057
a1b8572c
JH
6058 if (second_test)
6059 *second_test = NULL_RTX;
6060 if (bypass_test)
6061 *bypass_test = NULL_RTX;
6062
e075ae69 6063 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
bf71a4f8 6064 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
77ebd435 6065 second_test, bypass_test);
32b5b1aa 6066 else
e075ae69
RH
6067 ret = ix86_expand_int_compare (code, op0, op1);
6068
6069 return ret;
6070}
6071
03598dea
JH
6072/* Return true if the CODE will result in nontrivial jump sequence. */
6073bool
6074ix86_fp_jump_nontrivial_p (code)
6075 enum rtx_code code;
6076{
6077 enum rtx_code bypass_code, first_code, second_code;
6078 if (!TARGET_CMOVE)
6079 return true;
6080 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
6081 return bypass_code != NIL || second_code != NIL;
6082}
6083
e075ae69 6084void
3a3677ff 6085ix86_expand_branch (code, label)
e075ae69 6086 enum rtx_code code;
e075ae69
RH
6087 rtx label;
6088{
3a3677ff 6089 rtx tmp;
e075ae69 6090
3a3677ff 6091 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 6092 {
3a3677ff
RH
6093 case QImode:
6094 case HImode:
6095 case SImode:
0d7d98ee 6096 simple:
a1b8572c 6097 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
6098 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
6099 gen_rtx_LABEL_REF (VOIDmode, label),
6100 pc_rtx);
6101 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 6102 return;
e075ae69 6103
3a3677ff
RH
6104 case SFmode:
6105 case DFmode:
0f290768 6106 case XFmode:
2b589241 6107 case TFmode:
3a3677ff
RH
6108 {
6109 rtvec vec;
6110 int use_fcomi;
03598dea 6111 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
6112
6113 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
6114 &ix86_compare_op1);
03598dea
JH
6115
6116 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
6117
6118 /* Check whether we will use the natural sequence with one jump. If
6119 so, we can expand jump early. Otherwise delay expansion by
6120 creating compound insn to not confuse optimizers. */
6121 if (bypass_code == NIL && second_code == NIL
6122 && TARGET_CMOVE)
6123 {
6124 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
6125 gen_rtx_LABEL_REF (VOIDmode, label),
6126 pc_rtx, NULL_RTX);
6127 }
6128 else
6129 {
6130 tmp = gen_rtx_fmt_ee (code, VOIDmode,
6131 ix86_compare_op0, ix86_compare_op1);
6132 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
6133 gen_rtx_LABEL_REF (VOIDmode, label),
6134 pc_rtx);
6135 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
6136
6137 use_fcomi = ix86_use_fcomi_compare (code);
6138 vec = rtvec_alloc (3 + !use_fcomi);
6139 RTVEC_ELT (vec, 0) = tmp;
6140 RTVEC_ELT (vec, 1)
6141 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
6142 RTVEC_ELT (vec, 2)
6143 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
6144 if (! use_fcomi)
6145 RTVEC_ELT (vec, 3)
6146 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
6147
6148 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
6149 }
3a3677ff
RH
6150 return;
6151 }
32b5b1aa 6152
3a3677ff 6153 case DImode:
0d7d98ee
JH
6154 if (TARGET_64BIT)
6155 goto simple;
3a3677ff
RH
6156 /* Expand DImode branch into multiple compare+branch. */
6157 {
6158 rtx lo[2], hi[2], label2;
6159 enum rtx_code code1, code2, code3;
32b5b1aa 6160
3a3677ff
RH
6161 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
6162 {
6163 tmp = ix86_compare_op0;
6164 ix86_compare_op0 = ix86_compare_op1;
6165 ix86_compare_op1 = tmp;
6166 code = swap_condition (code);
6167 }
6168 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
6169 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 6170
3a3677ff
RH
6171 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
6172 avoid two branches. This costs one extra insn, so disable when
6173 optimizing for size. */
32b5b1aa 6174
3a3677ff
RH
6175 if ((code == EQ || code == NE)
6176 && (!optimize_size
6177 || hi[1] == const0_rtx || lo[1] == const0_rtx))
6178 {
6179 rtx xor0, xor1;
32b5b1aa 6180
3a3677ff
RH
6181 xor1 = hi[0];
6182 if (hi[1] != const0_rtx)
6183 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
6184 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 6185
3a3677ff
RH
6186 xor0 = lo[0];
6187 if (lo[1] != const0_rtx)
6188 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
6189 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 6190
3a3677ff
RH
6191 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
6192 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 6193
3a3677ff
RH
6194 ix86_compare_op0 = tmp;
6195 ix86_compare_op1 = const0_rtx;
6196 ix86_expand_branch (code, label);
6197 return;
6198 }
e075ae69 6199
1f9124e4
JJ
6200 /* Otherwise, if we are doing less-than or greater-or-equal-than,
6201 op1 is a constant and the low word is zero, then we can just
6202 examine the high word. */
32b5b1aa 6203
1f9124e4
JJ
6204 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
6205 switch (code)
6206 {
6207 case LT: case LTU: case GE: case GEU:
6208 ix86_compare_op0 = hi[0];
6209 ix86_compare_op1 = hi[1];
6210 ix86_expand_branch (code, label);
6211 return;
6212 default:
6213 break;
6214 }
e075ae69 6215
3a3677ff 6216 /* Otherwise, we need two or three jumps. */
e075ae69 6217
3a3677ff 6218 label2 = gen_label_rtx ();
e075ae69 6219
3a3677ff
RH
6220 code1 = code;
6221 code2 = swap_condition (code);
6222 code3 = unsigned_condition (code);
e075ae69 6223
3a3677ff
RH
6224 switch (code)
6225 {
6226 case LT: case GT: case LTU: case GTU:
6227 break;
e075ae69 6228
3a3677ff
RH
6229 case LE: code1 = LT; code2 = GT; break;
6230 case GE: code1 = GT; code2 = LT; break;
6231 case LEU: code1 = LTU; code2 = GTU; break;
6232 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 6233
3a3677ff
RH
6234 case EQ: code1 = NIL; code2 = NE; break;
6235 case NE: code2 = NIL; break;
e075ae69 6236
3a3677ff
RH
6237 default:
6238 abort ();
6239 }
e075ae69 6240
3a3677ff
RH
6241 /*
6242 * a < b =>
6243 * if (hi(a) < hi(b)) goto true;
6244 * if (hi(a) > hi(b)) goto false;
6245 * if (lo(a) < lo(b)) goto true;
6246 * false:
6247 */
6248
6249 ix86_compare_op0 = hi[0];
6250 ix86_compare_op1 = hi[1];
6251
6252 if (code1 != NIL)
6253 ix86_expand_branch (code1, label);
6254 if (code2 != NIL)
6255 ix86_expand_branch (code2, label2);
6256
6257 ix86_compare_op0 = lo[0];
6258 ix86_compare_op1 = lo[1];
6259 ix86_expand_branch (code3, label);
6260
6261 if (code2 != NIL)
6262 emit_label (label2);
6263 return;
6264 }
e075ae69 6265
3a3677ff
RH
6266 default:
6267 abort ();
6268 }
32b5b1aa 6269}
e075ae69 6270
9e7adcb3
JH
6271/* Split branch based on floating point condition. */
6272void
03598dea
JH
6273ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
6274 enum rtx_code code;
6275 rtx op1, op2, target1, target2, tmp;
9e7adcb3
JH
6276{
6277 rtx second, bypass;
6278 rtx label = NULL_RTX;
03598dea 6279 rtx condition;
6b24c259
JH
6280 int bypass_probability = -1, second_probability = -1, probability = -1;
6281 rtx i;
9e7adcb3
JH
6282
6283 if (target2 != pc_rtx)
6284 {
6285 rtx tmp = target2;
6286 code = reverse_condition_maybe_unordered (code);
6287 target2 = target1;
6288 target1 = tmp;
6289 }
6290
6291 condition = ix86_expand_fp_compare (code, op1, op2,
6292 tmp, &second, &bypass);
6b24c259
JH
6293
6294 if (split_branch_probability >= 0)
6295 {
6296 /* Distribute the probabilities across the jumps.
6297 Assume the BYPASS and SECOND to be always test
6298 for UNORDERED. */
6299 probability = split_branch_probability;
6300
6301 /* Value of 1 is low enought to make no need for probability
6302 to be updated. Later we may run some experiments and see
6303 if unordered values are more frequent in practice. */
6304 if (bypass)
6305 bypass_probability = 1;
6306 if (second)
6307 second_probability = 1;
6308 }
9e7adcb3
JH
6309 if (bypass != NULL_RTX)
6310 {
6311 label = gen_label_rtx ();
6b24c259
JH
6312 i = emit_jump_insn (gen_rtx_SET
6313 (VOIDmode, pc_rtx,
6314 gen_rtx_IF_THEN_ELSE (VOIDmode,
6315 bypass,
6316 gen_rtx_LABEL_REF (VOIDmode,
6317 label),
6318 pc_rtx)));
6319 if (bypass_probability >= 0)
6320 REG_NOTES (i)
6321 = gen_rtx_EXPR_LIST (REG_BR_PROB,
6322 GEN_INT (bypass_probability),
6323 REG_NOTES (i));
6324 }
6325 i = emit_jump_insn (gen_rtx_SET
9e7adcb3
JH
6326 (VOIDmode, pc_rtx,
6327 gen_rtx_IF_THEN_ELSE (VOIDmode,
6b24c259
JH
6328 condition, target1, target2)));
6329 if (probability >= 0)
6330 REG_NOTES (i)
6331 = gen_rtx_EXPR_LIST (REG_BR_PROB,
6332 GEN_INT (probability),
6333 REG_NOTES (i));
6334 if (second != NULL_RTX)
9e7adcb3 6335 {
6b24c259
JH
6336 i = emit_jump_insn (gen_rtx_SET
6337 (VOIDmode, pc_rtx,
6338 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
6339 target2)));
6340 if (second_probability >= 0)
6341 REG_NOTES (i)
6342 = gen_rtx_EXPR_LIST (REG_BR_PROB,
6343 GEN_INT (second_probability),
6344 REG_NOTES (i));
9e7adcb3 6345 }
9e7adcb3
JH
6346 if (label != NULL_RTX)
6347 emit_label (label);
6348}
6349
32b5b1aa 6350int
3a3677ff 6351ix86_expand_setcc (code, dest)
e075ae69 6352 enum rtx_code code;
e075ae69 6353 rtx dest;
32b5b1aa 6354{
a1b8572c
JH
6355 rtx ret, tmp, tmpreg;
6356 rtx second_test, bypass_test;
e075ae69
RH
6357 int type;
6358
885a70fd
JH
6359 if (GET_MODE (ix86_compare_op0) == DImode
6360 && !TARGET_64BIT)
e075ae69
RH
6361 return 0; /* FAIL */
6362
6363 /* Three modes of generation:
6364 0 -- destination does not overlap compare sources:
6365 clear dest first, emit strict_low_part setcc.
6366 1 -- destination does overlap compare sources:
6367 emit subreg setcc, zero extend.
6368 2 -- destination is in QImode:
6369 emit setcc only.
e075ae69 6370
c50e5bc0
RH
6371 We don't use mode 0 early in compilation because it confuses CSE.
6372 There are peepholes to turn mode 1 into mode 0 if things work out
6373 nicely after reload. */
6374
6375 type = cse_not_expected ? 0 : 1;
e075ae69
RH
6376
6377 if (GET_MODE (dest) == QImode)
6378 type = 2;
6379 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
a500c31b 6380 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
e075ae69
RH
6381 type = 1;
6382
6383 if (type == 0)
6384 emit_move_insn (dest, const0_rtx);
6385
a1b8572c 6386 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
6387 PUT_MODE (ret, QImode);
6388
6389 tmp = dest;
a1b8572c 6390 tmpreg = dest;
e075ae69 6391 if (type == 0)
32b5b1aa 6392 {
e075ae69 6393 tmp = gen_lowpart (QImode, dest);
a1b8572c 6394 tmpreg = tmp;
e075ae69
RH
6395 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
6396 }
6397 else if (type == 1)
6398 {
6399 if (!cse_not_expected)
6400 tmp = gen_reg_rtx (QImode);
6401 else
6402 tmp = gen_lowpart (QImode, dest);
a1b8572c 6403 tmpreg = tmp;
e075ae69 6404 }
32b5b1aa 6405
e075ae69 6406 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
6407 if (bypass_test || second_test)
6408 {
6409 rtx test = second_test;
6410 int bypass = 0;
6411 rtx tmp2 = gen_reg_rtx (QImode);
6412 if (bypass_test)
6413 {
6414 if (second_test)
6415 abort();
6416 test = bypass_test;
6417 bypass = 1;
6418 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
6419 }
6420 PUT_MODE (test, QImode);
6421 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
6422
6423 if (bypass)
6424 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
6425 else
6426 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
6427 }
e075ae69
RH
6428
6429 if (type == 1)
6430 {
6431 rtx clob;
6432
6433 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
6434 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
6435 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6436 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6437 emit_insn (tmp);
32b5b1aa 6438 }
e075ae69
RH
6439
6440 return 1; /* DONE */
32b5b1aa 6441}
e075ae69 6442
32b5b1aa 6443int
e075ae69
RH
6444ix86_expand_int_movcc (operands)
6445 rtx operands[];
32b5b1aa 6446{
e075ae69
RH
6447 enum rtx_code code = GET_CODE (operands[1]), compare_code;
6448 rtx compare_seq, compare_op;
a1b8572c 6449 rtx second_test, bypass_test;
32b5b1aa 6450
36583fea
JH
6451 /* When the compare code is not LTU or GEU, we can not use sbbl case.
6452 In case comparsion is done with immediate, we can convert it to LTU or
6453 GEU by altering the integer. */
6454
6455 if ((code == LEU || code == GTU)
6456 && GET_CODE (ix86_compare_op1) == CONST_INT
6457 && GET_MODE (operands[0]) != HImode
6458 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
0f290768 6459 && GET_CODE (operands[2]) == CONST_INT
36583fea
JH
6460 && GET_CODE (operands[3]) == CONST_INT)
6461 {
6462 if (code == LEU)
6463 code = LTU;
6464 else
6465 code = GEU;
6466 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
6467 }
3a3677ff 6468
e075ae69 6469 start_sequence ();
a1b8572c 6470 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
6471 compare_seq = gen_sequence ();
6472 end_sequence ();
6473
6474 compare_code = GET_CODE (compare_op);
6475
6476 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
6477 HImode insns, we'd be swallowed in word prefix ops. */
6478
6479 if (GET_MODE (operands[0]) != HImode
885a70fd 6480 && GET_MODE (operands[0]) != DImode
0f290768 6481 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
6482 && GET_CODE (operands[3]) == CONST_INT)
6483 {
6484 rtx out = operands[0];
6485 HOST_WIDE_INT ct = INTVAL (operands[2]);
6486 HOST_WIDE_INT cf = INTVAL (operands[3]);
6487 HOST_WIDE_INT diff;
6488
a1b8572c
JH
6489 if ((compare_code == LTU || compare_code == GEU)
6490 && !second_test && !bypass_test)
e075ae69 6491 {
e075ae69
RH
6492
6493 /* Detect overlap between destination and compare sources. */
6494 rtx tmp = out;
6495
0f290768 6496 /* To simplify rest of code, restrict to the GEU case. */
36583fea
JH
6497 if (compare_code == LTU)
6498 {
6499 int tmp = ct;
6500 ct = cf;
6501 cf = tmp;
6502 compare_code = reverse_condition (compare_code);
6503 code = reverse_condition (code);
6504 }
6505 diff = ct - cf;
6506
e075ae69 6507 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
a500c31b 6508 || reg_overlap_mentioned_p (out, ix86_compare_op1))
e075ae69
RH
6509 tmp = gen_reg_rtx (SImode);
6510
6511 emit_insn (compare_seq);
6512 emit_insn (gen_x86_movsicc_0_m1 (tmp));
6513
36583fea
JH
6514 if (diff == 1)
6515 {
6516 /*
6517 * cmpl op0,op1
6518 * sbbl dest,dest
6519 * [addl dest, ct]
6520 *
6521 * Size 5 - 8.
6522 */
6523 if (ct)
e99af66b 6524 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (ct)));
36583fea
JH
6525 }
6526 else if (cf == -1)
6527 {
6528 /*
6529 * cmpl op0,op1
6530 * sbbl dest,dest
6531 * orl $ct, dest
6532 *
6533 * Size 8.
6534 */
e99af66b 6535 emit_insn (gen_iorsi3 (tmp, tmp, GEN_INT (ct)));
36583fea
JH
6536 }
6537 else if (diff == -1 && ct)
6538 {
6539 /*
6540 * cmpl op0,op1
6541 * sbbl dest,dest
6542 * xorl $-1, dest
6543 * [addl dest, cf]
6544 *
6545 * Size 8 - 11.
6546 */
6547 emit_insn (gen_one_cmplsi2 (tmp, tmp));
6548 if (cf)
e99af66b 6549 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (cf)));
36583fea
JH
6550 }
6551 else
6552 {
6553 /*
6554 * cmpl op0,op1
6555 * sbbl dest,dest
6556 * andl cf - ct, dest
6557 * [addl dest, ct]
6558 *
6559 * Size 8 - 11.
6560 */
e99af66b 6561 emit_insn (gen_andsi3 (tmp, tmp, GEN_INT (trunc_int_for_mode
7471a1f0 6562 (cf - ct, SImode))));
36583fea 6563 if (ct)
e99af66b 6564 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (ct)));
36583fea 6565 }
e075ae69
RH
6566
6567 if (tmp != out)
6568 emit_move_insn (out, tmp);
6569
6570 return 1; /* DONE */
6571 }
6572
6573 diff = ct - cf;
6574 if (diff < 0)
6575 {
6576 HOST_WIDE_INT tmp;
6577 tmp = ct, ct = cf, cf = tmp;
6578 diff = -diff;
734dba19
JH
6579 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
6580 {
6581 /* We may be reversing unordered compare to normal compare, that
6582 is not valid in general (we may convert non-trapping condition
6583 to trapping one), however on i386 we currently emit all
6584 comparisons unordered. */
6585 compare_code = reverse_condition_maybe_unordered (compare_code);
6586 code = reverse_condition_maybe_unordered (code);
6587 }
6588 else
6589 {
6590 compare_code = reverse_condition (compare_code);
6591 code = reverse_condition (code);
6592 }
e075ae69
RH
6593 }
6594 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
6595 || diff == 3 || diff == 5 || diff == 9)
6596 {
6597 /*
6598 * xorl dest,dest
6599 * cmpl op1,op2
6600 * setcc dest
6601 * lea cf(dest*(ct-cf)),dest
6602 *
6603 * Size 14.
6604 *
6605 * This also catches the degenerate setcc-only case.
6606 */
6607
6608 rtx tmp;
6609 int nops;
6610
6611 out = emit_store_flag (out, code, ix86_compare_op0,
6612 ix86_compare_op1, VOIDmode, 0, 1);
6613
6614 nops = 0;
885a70fd
JH
6615 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
6616 done in proper mode to match. */
e075ae69 6617 if (diff == 1)
885a70fd
JH
6618 {
6619 if (Pmode != SImode)
6620 tmp = gen_lowpart (Pmode, out);
6621 else
6622 tmp = out;
6623 }
e075ae69
RH
6624 else
6625 {
885a70fd
JH
6626 rtx out1;
6627 if (Pmode != SImode)
6628 out1 = gen_lowpart (Pmode, out);
6629 else
6630 out1 = out;
6631 tmp = gen_rtx_MULT (Pmode, out1, GEN_INT (diff & ~1));
e075ae69
RH
6632 nops++;
6633 if (diff & 1)
6634 {
885a70fd 6635 tmp = gen_rtx_PLUS (Pmode, tmp, out1);
e075ae69
RH
6636 nops++;
6637 }
6638 }
6639 if (cf != 0)
6640 {
885a70fd 6641 tmp = gen_rtx_PLUS (Pmode, tmp, GEN_INT (cf));
e075ae69
RH
6642 nops++;
6643 }
885a70fd
JH
6644 if (tmp != out
6645 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
e075ae69 6646 {
885a70fd
JH
6647 if (Pmode != SImode)
6648 tmp = gen_rtx_SUBREG (SImode, tmp, 0);
6649
6650 /* ??? We should to take care for outputing non-lea arithmetics
6651 for Pmode != SImode case too, but it is quite tricky and not
6652 too important, since all TARGET_64BIT machines support real
6653 conditional moves. */
6654 if (nops == 1 && Pmode == SImode)
e075ae69
RH
6655 {
6656 rtx clob;
6657
6658 clob = gen_rtx_REG (CCmode, FLAGS_REG);
6659 clob = gen_rtx_CLOBBER (VOIDmode, clob);
6660
6661 tmp = gen_rtx_SET (VOIDmode, out, tmp);
6662 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6663 emit_insn (tmp);
6664 }
6665 else
6666 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
6667 }
6668 if (out != operands[0])
6669 emit_move_insn (operands[0], out);
6670
6671 return 1; /* DONE */
6672 }
6673
6674 /*
6675 * General case: Jumpful:
6676 * xorl dest,dest cmpl op1, op2
6677 * cmpl op1, op2 movl ct, dest
6678 * setcc dest jcc 1f
6679 * decl dest movl cf, dest
6680 * andl (cf-ct),dest 1:
6681 * addl ct,dest
0f290768 6682 *
e075ae69
RH
6683 * Size 20. Size 14.
6684 *
6685 * This is reasonably steep, but branch mispredict costs are
6686 * high on modern cpus, so consider failing only if optimizing
6687 * for space.
6688 *
6689 * %%% Parameterize branch_cost on the tuning architecture, then
6690 * use that. The 80386 couldn't care less about mispredicts.
6691 */
6692
6693 if (!optimize_size && !TARGET_CMOVE)
6694 {
6695 if (ct == 0)
6696 {
6697 ct = cf;
6698 cf = 0;
734dba19
JH
6699 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
6700 {
6701 /* We may be reversing unordered compare to normal compare,
6702 that is not valid in general (we may convert non-trapping
6703 condition to trapping one), however on i386 we currently
6704 emit all comparisons unordered. */
6705 compare_code = reverse_condition_maybe_unordered (compare_code);
6706 code = reverse_condition_maybe_unordered (code);
6707 }
6708 else
6709 {
6710 compare_code = reverse_condition (compare_code);
6711 code = reverse_condition (code);
6712 }
e075ae69
RH
6713 }
6714
6715 out = emit_store_flag (out, code, ix86_compare_op0,
6716 ix86_compare_op1, VOIDmode, 0, 1);
6717
6718 emit_insn (gen_addsi3 (out, out, constm1_rtx));
7471a1f0
AO
6719 emit_insn (gen_andsi3 (out, out, GEN_INT (trunc_int_for_mode
6720 (cf - ct, SImode))));
e075ae69
RH
6721 if (ct != 0)
6722 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
6723 if (out != operands[0])
6724 emit_move_insn (operands[0], out);
6725
6726 return 1; /* DONE */
6727 }
6728 }
6729
6730 if (!TARGET_CMOVE)
6731 {
6732 /* Try a few things more with specific constants and a variable. */
6733
78a0d70c 6734 optab op;
e075ae69
RH
6735 rtx var, orig_out, out, tmp;
6736
6737 if (optimize_size)
6738 return 0; /* FAIL */
6739
0f290768 6740 /* If one of the two operands is an interesting constant, load a
e075ae69 6741 constant with the above and mask it in with a logical operation. */
0f290768 6742
e075ae69
RH
6743 if (GET_CODE (operands[2]) == CONST_INT)
6744 {
6745 var = operands[3];
6746 if (INTVAL (operands[2]) == 0)
6747 operands[3] = constm1_rtx, op = and_optab;
6748 else if (INTVAL (operands[2]) == -1)
6749 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
6750 else
6751 return 0; /* FAIL */
e075ae69
RH
6752 }
6753 else if (GET_CODE (operands[3]) == CONST_INT)
6754 {
6755 var = operands[2];
6756 if (INTVAL (operands[3]) == 0)
6757 operands[2] = constm1_rtx, op = and_optab;
6758 else if (INTVAL (operands[3]) == -1)
6759 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
6760 else
6761 return 0; /* FAIL */
e075ae69 6762 }
78a0d70c 6763 else
e075ae69
RH
6764 return 0; /* FAIL */
6765
6766 orig_out = operands[0];
6767 tmp = gen_reg_rtx (GET_MODE (orig_out));
6768 operands[0] = tmp;
6769
6770 /* Recurse to get the constant loaded. */
6771 if (ix86_expand_int_movcc (operands) == 0)
6772 return 0; /* FAIL */
6773
6774 /* Mask in the interesting variable. */
6775 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
6776 OPTAB_WIDEN);
6777 if (out != orig_out)
6778 emit_move_insn (orig_out, out);
6779
6780 return 1; /* DONE */
6781 }
6782
6783 /*
6784 * For comparison with above,
6785 *
6786 * movl cf,dest
6787 * movl ct,tmp
6788 * cmpl op1,op2
6789 * cmovcc tmp,dest
6790 *
6791 * Size 15.
6792 */
6793
6794 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
6795 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
6796 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
6797 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
6798
a1b8572c
JH
6799 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
6800 {
6801 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
6802 emit_move_insn (tmp, operands[3]);
6803 operands[3] = tmp;
6804 }
6805 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
6806 {
6807 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
6808 emit_move_insn (tmp, operands[2]);
6809 operands[2] = tmp;
6810 }
c9682caf
JH
6811 if (! register_operand (operands[2], VOIDmode)
6812 && ! register_operand (operands[3], VOIDmode))
6813 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
a1b8572c 6814
e075ae69
RH
6815 emit_insn (compare_seq);
6816 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6817 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6818 compare_op, operands[2],
6819 operands[3])));
a1b8572c
JH
6820 if (bypass_test)
6821 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6822 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6823 bypass_test,
6824 operands[3],
6825 operands[0])));
6826 if (second_test)
6827 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6828 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6829 second_test,
6830 operands[2],
6831 operands[0])));
e075ae69
RH
6832
6833 return 1; /* DONE */
e9a25f70 6834}
e075ae69 6835
32b5b1aa 6836int
e075ae69
RH
6837ix86_expand_fp_movcc (operands)
6838 rtx operands[];
32b5b1aa 6839{
e075ae69 6840 enum rtx_code code;
e075ae69 6841 rtx tmp;
a1b8572c 6842 rtx compare_op, second_test, bypass_test;
32b5b1aa 6843
0073023d
JH
6844 /* For SF/DFmode conditional moves based on comparisons
6845 in same mode, we may want to use SSE min/max instructions. */
6846 if (((TARGET_SSE && GET_MODE (operands[0]) == SFmode)
6847 || (TARGET_SSE2 && GET_MODE (operands[0]) == DFmode))
6848 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
fa9f36a1
JH
6849 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
6850 && (!TARGET_IEEE_FP
6851 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
0073023d
JH
6852 /* We may be called from the post-reload splitter. */
6853 && (!REG_P (operands[0])
6854 || SSE_REG_P (operands[0])
52a661a6 6855 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
0073023d
JH
6856 {
6857 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
6858 code = GET_CODE (operands[1]);
6859
6860 /* See if we have (cross) match between comparison operands and
6861 conditional move operands. */
6862 if (rtx_equal_p (operands[2], op1))
6863 {
6864 rtx tmp = op0;
6865 op0 = op1;
6866 op1 = tmp;
6867 code = reverse_condition_maybe_unordered (code);
6868 }
6869 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
6870 {
6871 /* Check for min operation. */
6872 if (code == LT)
6873 {
6874 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
6875 if (memory_operand (op0, VOIDmode))
6876 op0 = force_reg (GET_MODE (operands[0]), op0);
6877 if (GET_MODE (operands[0]) == SFmode)
6878 emit_insn (gen_minsf3 (operands[0], op0, op1));
6879 else
6880 emit_insn (gen_mindf3 (operands[0], op0, op1));
6881 return 1;
6882 }
6883 /* Check for max operation. */
6884 if (code == GT)
6885 {
6886 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
6887 if (memory_operand (op0, VOIDmode))
6888 op0 = force_reg (GET_MODE (operands[0]), op0);
6889 if (GET_MODE (operands[0]) == SFmode)
6890 emit_insn (gen_maxsf3 (operands[0], op0, op1));
6891 else
6892 emit_insn (gen_maxdf3 (operands[0], op0, op1));
6893 return 1;
6894 }
6895 }
6896 /* Manage condition to be sse_comparison_operator. In case we are
6897 in non-ieee mode, try to canonicalize the destination operand
6898 to be first in the comparison - this helps reload to avoid extra
6899 moves. */
6900 if (!sse_comparison_operator (operands[1], VOIDmode)
6901 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
6902 {
6903 rtx tmp = ix86_compare_op0;
6904 ix86_compare_op0 = ix86_compare_op1;
6905 ix86_compare_op1 = tmp;
6906 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
6907 VOIDmode, ix86_compare_op0,
6908 ix86_compare_op1);
6909 }
6910 /* Similary try to manage result to be first operand of conditional
fa9f36a1
JH
6911 move. We also don't support the NE comparison on SSE, so try to
6912 avoid it. */
037f20f1
JH
6913 if ((rtx_equal_p (operands[0], operands[3])
6914 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
6915 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
0073023d
JH
6916 {
6917 rtx tmp = operands[2];
6918 operands[2] = operands[3];
92d0fb09 6919 operands[3] = tmp;
0073023d
JH
6920 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
6921 (GET_CODE (operands[1])),
6922 VOIDmode, ix86_compare_op0,
6923 ix86_compare_op1);
6924 }
6925 if (GET_MODE (operands[0]) == SFmode)
6926 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
6927 operands[2], operands[3],
6928 ix86_compare_op0, ix86_compare_op1));
6929 else
6930 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
6931 operands[2], operands[3],
6932 ix86_compare_op0, ix86_compare_op1));
6933 return 1;
6934 }
6935
e075ae69 6936 /* The floating point conditional move instructions don't directly
0f290768 6937 support conditions resulting from a signed integer comparison. */
32b5b1aa 6938
e075ae69 6939 code = GET_CODE (operands[1]);
a1b8572c 6940 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
6941
6942 /* The floating point conditional move instructions don't directly
6943 support signed integer comparisons. */
6944
a1b8572c 6945 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 6946 {
a1b8572c
JH
6947 if (second_test != NULL || bypass_test != NULL)
6948 abort();
e075ae69 6949 tmp = gen_reg_rtx (QImode);
3a3677ff 6950 ix86_expand_setcc (code, tmp);
e075ae69
RH
6951 code = NE;
6952 ix86_compare_op0 = tmp;
6953 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
6954 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
6955 }
6956 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
6957 {
6958 tmp = gen_reg_rtx (GET_MODE (operands[0]));
6959 emit_move_insn (tmp, operands[3]);
6960 operands[3] = tmp;
6961 }
6962 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
6963 {
6964 tmp = gen_reg_rtx (GET_MODE (operands[0]));
6965 emit_move_insn (tmp, operands[2]);
6966 operands[2] = tmp;
e075ae69 6967 }
e9a25f70 6968
e075ae69
RH
6969 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6970 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9e7adcb3 6971 compare_op,
e075ae69
RH
6972 operands[2],
6973 operands[3])));
a1b8572c
JH
6974 if (bypass_test)
6975 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6976 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6977 bypass_test,
6978 operands[3],
6979 operands[0])));
6980 if (second_test)
6981 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6982 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6983 second_test,
6984 operands[2],
6985 operands[0])));
32b5b1aa 6986
e075ae69 6987 return 1;
32b5b1aa
SC
6988}
6989
2450a057
JH
6990/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
6991 works for floating pointer parameters and nonoffsetable memories.
6992 For pushes, it returns just stack offsets; the values will be saved
6993 in the right order. Maximally three parts are generated. */
6994
2b589241 6995static int
2450a057
JH
6996ix86_split_to_parts (operand, parts, mode)
6997 rtx operand;
6998 rtx *parts;
6999 enum machine_mode mode;
32b5b1aa 7000{
26e5b205
JH
7001 int size;
7002
7003 if (!TARGET_64BIT)
7004 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
7005 else
7006 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 7007
a7180f70
BS
7008 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
7009 abort ();
2450a057
JH
7010 if (size < 2 || size > 3)
7011 abort ();
7012
d7a29404
JH
7013 /* Optimize constant pool reference to immediates. This is used by fp moves,
7014 that force all constants to memory to allow combining. */
7015
7016 if (GET_CODE (operand) == MEM
7017 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
7018 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
7019 operand = get_pool_constant (XEXP (operand, 0));
7020
2450a057 7021 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 7022 {
2450a057
JH
7023 /* The only non-offsetable memories we handle are pushes. */
7024 if (! push_operand (operand, VOIDmode))
7025 abort ();
7026
26e5b205
JH
7027 operand = copy_rtx (operand);
7028 PUT_MODE (operand, Pmode);
2450a057
JH
7029 parts[0] = parts[1] = parts[2] = operand;
7030 }
26e5b205 7031 else if (!TARGET_64BIT)
2450a057
JH
7032 {
7033 if (mode == DImode)
7034 split_di (&operand, 1, &parts[0], &parts[1]);
7035 else
e075ae69 7036 {
2450a057
JH
7037 if (REG_P (operand))
7038 {
7039 if (!reload_completed)
7040 abort ();
7041 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
7042 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
7043 if (size == 3)
7044 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
7045 }
7046 else if (offsettable_memref_p (operand))
7047 {
f4ef873c 7048 operand = adjust_address (operand, SImode, 0);
2450a057 7049 parts[0] = operand;
b72f00af 7050 parts[1] = adjust_address (operand, SImode, 4);
2450a057 7051 if (size == 3)
b72f00af 7052 parts[2] = adjust_address (operand, SImode, 8);
2450a057
JH
7053 }
7054 else if (GET_CODE (operand) == CONST_DOUBLE)
7055 {
7056 REAL_VALUE_TYPE r;
2b589241 7057 long l[4];
2450a057
JH
7058
7059 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
7060 switch (mode)
7061 {
7062 case XFmode:
2b589241 7063 case TFmode:
2450a057
JH
7064 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
7065 parts[2] = GEN_INT (l[2]);
7066 break;
7067 case DFmode:
7068 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
7069 break;
7070 default:
7071 abort ();
7072 }
7073 parts[1] = GEN_INT (l[1]);
7074 parts[0] = GEN_INT (l[0]);
7075 }
7076 else
7077 abort ();
e075ae69 7078 }
2450a057 7079 }
26e5b205
JH
7080 else
7081 {
7082 if (mode == XFmode || mode == TFmode)
7083 {
7084 if (REG_P (operand))
7085 {
7086 if (!reload_completed)
7087 abort ();
7088 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
7089 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
7090 }
7091 else if (offsettable_memref_p (operand))
7092 {
b72f00af 7093 operand = adjust_address (operand, DImode, 0);
26e5b205 7094 parts[0] = operand;
b72f00af 7095 parts[1] = adjust_address (operand, SImode, 8);
26e5b205
JH
7096 }
7097 else if (GET_CODE (operand) == CONST_DOUBLE)
7098 {
7099 REAL_VALUE_TYPE r;
7100 long l[3];
7101
7102 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
7103 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
7104 /* Do not use shift by 32 to avoid warning on 32bit systems. */
7105 if (HOST_BITS_PER_WIDE_INT >= 64)
7106 parts[0] = GEN_INT (l[0] + ((l[1] << 31) << 1));
7107 else
7108 parts[0] = immed_double_const (l[0], l[1], DImode);
7109 parts[1] = GEN_INT (l[2]);
7110 }
7111 else
7112 abort ();
7113 }
7114 }
2450a057 7115
2b589241 7116 return size;
2450a057
JH
7117}
7118
7119/* Emit insns to perform a move or push of DI, DF, and XF values.
7120 Return false when normal moves are needed; true when all required
7121 insns have been emitted. Operands 2-4 contain the input values
7122 int the correct order; operands 5-7 contain the output values. */
7123
26e5b205
JH
7124void
7125ix86_split_long_move (operands)
7126 rtx operands[];
2450a057
JH
7127{
7128 rtx part[2][3];
26e5b205 7129 int nparts;
2450a057
JH
7130 int push = 0;
7131 int collisions = 0;
26e5b205
JH
7132 enum machine_mode mode = GET_MODE (operands[0]);
7133
7134 /* The DFmode expanders may ask us to move double.
7135 For 64bit target this is single move. By hiding the fact
7136 here we simplify i386.md splitters. */
7137 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
7138 {
7139 /* Optimize constant pool reference to immediates. This is used by fp moves,
7140 that force all constants to memory to allow combining. */
7141
7142 if (GET_CODE (operands[1]) == MEM
7143 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
7144 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
7145 operands[1] = get_pool_constant (XEXP (operands[1], 0));
7146 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
7147 {
7148 operands[0] = copy_rtx (operands[0]);
7149 PUT_MODE (operands[0], Pmode);
7150 }
26e5b205
JH
7151 else
7152 operands[0] = gen_lowpart (DImode, operands[0]);
7153 operands[1] = gen_lowpart (DImode, operands[1]);
7154 emit_move_insn (operands[0], operands[1]);
7155 return;
7156 }
2450a057 7157
2450a057
JH
7158 /* The only non-offsettable memory we handle is push. */
7159 if (push_operand (operands[0], VOIDmode))
7160 push = 1;
7161 else if (GET_CODE (operands[0]) == MEM
7162 && ! offsettable_memref_p (operands[0]))
7163 abort ();
7164
26e5b205
JH
7165 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
7166 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
7167
7168 /* When emitting push, take care for source operands on the stack. */
7169 if (push && GET_CODE (operands[1]) == MEM
7170 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
7171 {
26e5b205 7172 if (nparts == 3)
886cbb88
JH
7173 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
7174 XEXP (part[1][2], 0));
7175 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
7176 XEXP (part[1][1], 0));
2450a057
JH
7177 }
7178
0f290768 7179 /* We need to do copy in the right order in case an address register
2450a057
JH
7180 of the source overlaps the destination. */
7181 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
7182 {
7183 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
7184 collisions++;
7185 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
7186 collisions++;
26e5b205 7187 if (nparts == 3
2450a057
JH
7188 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
7189 collisions++;
7190
7191 /* Collision in the middle part can be handled by reordering. */
26e5b205 7192 if (collisions == 1 && nparts == 3
2450a057 7193 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 7194 {
2450a057
JH
7195 rtx tmp;
7196 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
7197 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
7198 }
e075ae69 7199
2450a057
JH
7200 /* If there are more collisions, we can't handle it by reordering.
7201 Do an lea to the last part and use only one colliding move. */
7202 else if (collisions > 1)
7203 {
7204 collisions = 1;
26e5b205 7205 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
2450a057 7206 XEXP (part[1][0], 0)));
26e5b205
JH
7207 part[1][0] = change_address (part[1][0],
7208 TARGET_64BIT ? DImode : SImode,
7209 part[0][nparts - 1]);
b72f00af 7210 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
26e5b205 7211 if (nparts == 3)
b72f00af 7212 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
2450a057
JH
7213 }
7214 }
7215
7216 if (push)
7217 {
26e5b205 7218 if (!TARGET_64BIT)
2b589241 7219 {
26e5b205
JH
7220 if (nparts == 3)
7221 {
7222 /* We use only first 12 bytes of TFmode value, but for pushing we
7223 are required to adjust stack as if we were pushing real 16byte
7224 value. */
7225 if (mode == TFmode && !TARGET_64BIT)
7226 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
7227 GEN_INT (-4)));
7228 emit_move_insn (part[0][2], part[1][2]);
7229 }
2b589241 7230 }
26e5b205
JH
7231 else
7232 {
7233 /* In 64bit mode we don't have 32bit push available. In case this is
7234 register, it is OK - we will just use larger counterpart. We also
7235 retype memory - these comes from attempt to avoid REX prefix on
7236 moving of second half of TFmode value. */
7237 if (GET_MODE (part[1][1]) == SImode)
7238 {
7239 if (GET_CODE (part[1][1]) == MEM)
f4ef873c 7240 part[1][1] = adjust_address (part[1][1], DImode, 0);
26e5b205
JH
7241 else if (REG_P (part[1][1]))
7242 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
7243 else
7244 abort();
886cbb88
JH
7245 if (GET_MODE (part[1][0]) == SImode)
7246 part[1][0] = part[1][1];
26e5b205
JH
7247 }
7248 }
7249 emit_move_insn (part[0][1], part[1][1]);
7250 emit_move_insn (part[0][0], part[1][0]);
7251 return;
2450a057
JH
7252 }
7253
7254 /* Choose correct order to not overwrite the source before it is copied. */
7255 if ((REG_P (part[0][0])
7256 && REG_P (part[1][1])
7257 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 7258 || (nparts == 3
2450a057
JH
7259 && REGNO (part[0][0]) == REGNO (part[1][2]))))
7260 || (collisions > 0
7261 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
7262 {
26e5b205 7263 if (nparts == 3)
2450a057 7264 {
26e5b205
JH
7265 operands[2] = part[0][2];
7266 operands[3] = part[0][1];
7267 operands[4] = part[0][0];
7268 operands[5] = part[1][2];
7269 operands[6] = part[1][1];
7270 operands[7] = part[1][0];
2450a057
JH
7271 }
7272 else
7273 {
26e5b205
JH
7274 operands[2] = part[0][1];
7275 operands[3] = part[0][0];
7276 operands[5] = part[1][1];
7277 operands[6] = part[1][0];
2450a057
JH
7278 }
7279 }
7280 else
7281 {
26e5b205 7282 if (nparts == 3)
2450a057 7283 {
26e5b205
JH
7284 operands[2] = part[0][0];
7285 operands[3] = part[0][1];
7286 operands[4] = part[0][2];
7287 operands[5] = part[1][0];
7288 operands[6] = part[1][1];
7289 operands[7] = part[1][2];
2450a057
JH
7290 }
7291 else
7292 {
26e5b205
JH
7293 operands[2] = part[0][0];
7294 operands[3] = part[0][1];
7295 operands[5] = part[1][0];
7296 operands[6] = part[1][1];
e075ae69
RH
7297 }
7298 }
26e5b205
JH
7299 emit_move_insn (operands[2], operands[5]);
7300 emit_move_insn (operands[3], operands[6]);
7301 if (nparts == 3)
7302 emit_move_insn (operands[4], operands[7]);
32b5b1aa 7303
26e5b205 7304 return;
32b5b1aa 7305}
32b5b1aa 7306
e075ae69
RH
7307void
7308ix86_split_ashldi (operands, scratch)
7309 rtx *operands, scratch;
32b5b1aa 7310{
e075ae69
RH
7311 rtx low[2], high[2];
7312 int count;
b985a30f 7313
e075ae69
RH
7314 if (GET_CODE (operands[2]) == CONST_INT)
7315 {
7316 split_di (operands, 2, low, high);
7317 count = INTVAL (operands[2]) & 63;
32b5b1aa 7318
e075ae69
RH
7319 if (count >= 32)
7320 {
7321 emit_move_insn (high[0], low[1]);
7322 emit_move_insn (low[0], const0_rtx);
b985a30f 7323
e075ae69
RH
7324 if (count > 32)
7325 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
7326 }
7327 else
7328 {
7329 if (!rtx_equal_p (operands[0], operands[1]))
7330 emit_move_insn (operands[0], operands[1]);
7331 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
7332 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
7333 }
7334 }
7335 else
7336 {
7337 if (!rtx_equal_p (operands[0], operands[1]))
7338 emit_move_insn (operands[0], operands[1]);
b985a30f 7339
e075ae69 7340 split_di (operands, 1, low, high);
b985a30f 7341
e075ae69
RH
7342 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
7343 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 7344
fe577e58 7345 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 7346 {
fe577e58 7347 if (! no_new_pseudos)
e075ae69
RH
7348 scratch = force_reg (SImode, const0_rtx);
7349 else
7350 emit_move_insn (scratch, const0_rtx);
7351
7352 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
7353 scratch));
7354 }
7355 else
7356 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
7357 }
e9a25f70 7358}
32b5b1aa 7359
e075ae69
RH
7360void
7361ix86_split_ashrdi (operands, scratch)
7362 rtx *operands, scratch;
32b5b1aa 7363{
e075ae69
RH
7364 rtx low[2], high[2];
7365 int count;
32b5b1aa 7366
e075ae69
RH
7367 if (GET_CODE (operands[2]) == CONST_INT)
7368 {
7369 split_di (operands, 2, low, high);
7370 count = INTVAL (operands[2]) & 63;
32b5b1aa 7371
e075ae69
RH
7372 if (count >= 32)
7373 {
7374 emit_move_insn (low[0], high[1]);
32b5b1aa 7375
e075ae69
RH
7376 if (! reload_completed)
7377 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
7378 else
7379 {
7380 emit_move_insn (high[0], low[0]);
7381 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
7382 }
7383
7384 if (count > 32)
7385 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
7386 }
7387 else
7388 {
7389 if (!rtx_equal_p (operands[0], operands[1]))
7390 emit_move_insn (operands[0], operands[1]);
7391 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
7392 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
7393 }
7394 }
7395 else
32b5b1aa 7396 {
e075ae69
RH
7397 if (!rtx_equal_p (operands[0], operands[1]))
7398 emit_move_insn (operands[0], operands[1]);
7399
7400 split_di (operands, 1, low, high);
7401
7402 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
7403 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
7404
fe577e58 7405 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 7406 {
fe577e58 7407 if (! no_new_pseudos)
e075ae69
RH
7408 scratch = gen_reg_rtx (SImode);
7409 emit_move_insn (scratch, high[0]);
7410 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
7411 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
7412 scratch));
7413 }
7414 else
7415 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 7416 }
e075ae69 7417}
32b5b1aa 7418
e075ae69
RH
7419void
7420ix86_split_lshrdi (operands, scratch)
7421 rtx *operands, scratch;
7422{
7423 rtx low[2], high[2];
7424 int count;
32b5b1aa 7425
e075ae69 7426 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 7427 {
e075ae69
RH
7428 split_di (operands, 2, low, high);
7429 count = INTVAL (operands[2]) & 63;
7430
7431 if (count >= 32)
c7271385 7432 {
e075ae69
RH
7433 emit_move_insn (low[0], high[1]);
7434 emit_move_insn (high[0], const0_rtx);
32b5b1aa 7435
e075ae69
RH
7436 if (count > 32)
7437 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
7438 }
7439 else
7440 {
7441 if (!rtx_equal_p (operands[0], operands[1]))
7442 emit_move_insn (operands[0], operands[1]);
7443 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
7444 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
7445 }
32b5b1aa 7446 }
e075ae69
RH
7447 else
7448 {
7449 if (!rtx_equal_p (operands[0], operands[1]))
7450 emit_move_insn (operands[0], operands[1]);
32b5b1aa 7451
e075ae69
RH
7452 split_di (operands, 1, low, high);
7453
7454 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
7455 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
7456
7457 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 7458 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 7459 {
fe577e58 7460 if (! no_new_pseudos)
e075ae69
RH
7461 scratch = force_reg (SImode, const0_rtx);
7462 else
7463 emit_move_insn (scratch, const0_rtx);
7464
7465 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
7466 scratch));
7467 }
7468 else
7469 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
7470 }
32b5b1aa 7471}
3f803cd9 7472
0407c02b 7473/* Helper function for the string operations below. Dest VARIABLE whether
0945b39d
JH
7474 it is aligned to VALUE bytes. If true, jump to the label. */
7475static rtx
7476ix86_expand_aligntest (variable, value)
7477 rtx variable;
7478 int value;
7479{
7480 rtx label = gen_label_rtx ();
7481 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
7482 if (GET_MODE (variable) == DImode)
7483 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
7484 else
7485 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
7486 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
7487 1, 0, label);
7488 return label;
7489}
7490
7491/* Adjust COUNTER by the VALUE. */
7492static void
7493ix86_adjust_counter (countreg, value)
7494 rtx countreg;
7495 HOST_WIDE_INT value;
7496{
7497 if (GET_MODE (countreg) == DImode)
7498 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
7499 else
7500 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
7501}
7502
7503/* Zero extend possibly SImode EXP to Pmode register. */
d24b3457 7504rtx
0945b39d
JH
7505ix86_zero_extend_to_Pmode (exp)
7506 rtx exp;
7507{
7508 rtx r;
7509 if (GET_MODE (exp) == VOIDmode)
7510 return force_reg (Pmode, exp);
7511 if (GET_MODE (exp) == Pmode)
7512 return copy_to_mode_reg (Pmode, exp);
7513 r = gen_reg_rtx (Pmode);
7514 emit_insn (gen_zero_extendsidi2 (r, exp));
7515 return r;
7516}
7517
7518/* Expand string move (memcpy) operation. Use i386 string operations when
7519 profitable. expand_clrstr contains similar code. */
7520int
7521ix86_expand_movstr (dst, src, count_exp, align_exp)
7522 rtx dst, src, count_exp, align_exp;
7523{
7524 rtx srcreg, destreg, countreg;
7525 enum machine_mode counter_mode;
7526 HOST_WIDE_INT align = 0;
7527 unsigned HOST_WIDE_INT count = 0;
7528 rtx insns;
7529
7530 start_sequence ();
7531
7532 if (GET_CODE (align_exp) == CONST_INT)
7533 align = INTVAL (align_exp);
7534
7535 /* This simple hack avoids all inlining code and simplifies code bellow. */
7536 if (!TARGET_ALIGN_STRINGOPS)
7537 align = 64;
7538
7539 if (GET_CODE (count_exp) == CONST_INT)
7540 count = INTVAL (count_exp);
7541
7542 /* Figure out proper mode for counter. For 32bits it is always SImode,
7543 for 64bits use SImode when possible, otherwise DImode.
7544 Set count to number of bytes copied when known at compile time. */
7545 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
7546 || x86_64_zero_extended_value (count_exp))
7547 counter_mode = SImode;
7548 else
7549 counter_mode = DImode;
7550
7551 if (counter_mode != SImode && counter_mode != DImode)
7552 abort ();
7553
7554 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
7555 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
7556
7557 emit_insn (gen_cld ());
7558
7559 /* When optimizing for size emit simple rep ; movsb instruction for
7560 counts not divisible by 4. */
7561
7562 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
7563 {
7564 countreg = ix86_zero_extend_to_Pmode (count_exp);
7565 if (TARGET_64BIT)
7566 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
7567 destreg, srcreg, countreg));
7568 else
7569 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
7570 destreg, srcreg, countreg));
7571 }
7572
7573 /* For constant aligned (or small unaligned) copies use rep movsl
7574 followed by code copying the rest. For PentiumPro ensure 8 byte
7575 alignment to allow rep movsl acceleration. */
7576
7577 else if (count != 0
7578 && (align >= 8
7579 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
7580 || optimize_size || count < (unsigned int)64))
7581 {
7582 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
7583 if (count & ~(size - 1))
7584 {
7585 countreg = copy_to_mode_reg (counter_mode,
7586 GEN_INT ((count >> (size == 4 ? 2 : 3))
7587 & (TARGET_64BIT ? -1 : 0x3fffffff)));
7588 countreg = ix86_zero_extend_to_Pmode (countreg);
7589 if (size == 4)
7590 {
7591 if (TARGET_64BIT)
7592 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
7593 destreg, srcreg, countreg));
7594 else
7595 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
7596 destreg, srcreg, countreg));
7597 }
7598 else
7599 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
7600 destreg, srcreg, countreg));
7601 }
7602 if (size == 8 && (count & 0x04))
7603 emit_insn (gen_strmovsi (destreg, srcreg));
7604 if (count & 0x02)
7605 emit_insn (gen_strmovhi (destreg, srcreg));
7606 if (count & 0x01)
7607 emit_insn (gen_strmovqi (destreg, srcreg));
7608 }
7609 /* The generic code based on the glibc implementation:
7610 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
7611 allowing accelerated copying there)
7612 - copy the data using rep movsl
7613 - copy the rest. */
7614 else
7615 {
7616 rtx countreg2;
7617 rtx label = NULL;
7618
7619 /* In case we don't know anything about the alignment, default to
7620 library version, since it is usually equally fast and result in
7621 shorter code. */
7622 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
7623 {
7624 end_sequence ();
7625 return 0;
7626 }
7627
7628 if (TARGET_SINGLE_STRINGOP)
7629 emit_insn (gen_cld ());
7630
7631 countreg2 = gen_reg_rtx (Pmode);
7632 countreg = copy_to_mode_reg (counter_mode, count_exp);
7633
7634 /* We don't use loops to align destination and to copy parts smaller
7635 than 4 bytes, because gcc is able to optimize such code better (in
7636 the case the destination or the count really is aligned, gcc is often
7637 able to predict the branches) and also it is friendlier to the
a4f31c00 7638 hardware branch prediction.
0945b39d
JH
7639
7640 Using loops is benefical for generic case, because we can
7641 handle small counts using the loops. Many CPUs (such as Athlon)
7642 have large REP prefix setup costs.
7643
7644 This is quite costy. Maybe we can revisit this decision later or
7645 add some customizability to this code. */
7646
7647 if (count == 0
7648 && align < (TARGET_PENTIUMPRO && (count == 0
7649 || count >= (unsigned int)260)
7650 ? 8 : UNITS_PER_WORD))
7651 {
7652 label = gen_label_rtx ();
7653 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
7654 LEU, 0, counter_mode, 1, 0, label);
7655 }
7656 if (align <= 1)
7657 {
7658 rtx label = ix86_expand_aligntest (destreg, 1);
7659 emit_insn (gen_strmovqi (destreg, srcreg));
7660 ix86_adjust_counter (countreg, 1);
7661 emit_label (label);
7662 LABEL_NUSES (label) = 1;
7663 }
7664 if (align <= 2)
7665 {
7666 rtx label = ix86_expand_aligntest (destreg, 2);
7667 emit_insn (gen_strmovhi (destreg, srcreg));
7668 ix86_adjust_counter (countreg, 2);
7669 emit_label (label);
7670 LABEL_NUSES (label) = 1;
7671 }
7672 if (align <= 4
7673 && ((TARGET_PENTIUMPRO && (count == 0
7674 || count >= (unsigned int)260))
7675 || TARGET_64BIT))
7676 {
7677 rtx label = ix86_expand_aligntest (destreg, 4);
7678 emit_insn (gen_strmovsi (destreg, srcreg));
7679 ix86_adjust_counter (countreg, 4);
7680 emit_label (label);
7681 LABEL_NUSES (label) = 1;
7682 }
7683
7684 if (!TARGET_SINGLE_STRINGOP)
7685 emit_insn (gen_cld ());
7686 if (TARGET_64BIT)
7687 {
7688 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
7689 GEN_INT (3)));
7690 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
7691 destreg, srcreg, countreg2));
7692 }
7693 else
7694 {
7695 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
7696 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
7697 destreg, srcreg, countreg2));
7698 }
7699
7700 if (label)
7701 {
7702 emit_label (label);
7703 LABEL_NUSES (label) = 1;
7704 }
7705 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
7706 emit_insn (gen_strmovsi (destreg, srcreg));
7707 if ((align <= 4 || count == 0) && TARGET_64BIT)
7708 {
7709 rtx label = ix86_expand_aligntest (countreg, 4);
7710 emit_insn (gen_strmovsi (destreg, srcreg));
7711 emit_label (label);
7712 LABEL_NUSES (label) = 1;
7713 }
7714 if (align > 2 && count != 0 && (count & 2))
7715 emit_insn (gen_strmovhi (destreg, srcreg));
7716 if (align <= 2 || count == 0)
7717 {
7718 rtx label = ix86_expand_aligntest (countreg, 2);
7719 emit_insn (gen_strmovhi (destreg, srcreg));
7720 emit_label (label);
7721 LABEL_NUSES (label) = 1;
7722 }
7723 if (align > 1 && count != 0 && (count & 1))
7724 emit_insn (gen_strmovqi (destreg, srcreg));
7725 if (align <= 1 || count == 0)
7726 {
7727 rtx label = ix86_expand_aligntest (countreg, 1);
7728 emit_insn (gen_strmovqi (destreg, srcreg));
7729 emit_label (label);
7730 LABEL_NUSES (label) = 1;
7731 }
7732 }
7733
7734 insns = get_insns ();
7735 end_sequence ();
7736
7737 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
7738 emit_insns (insns);
7739 return 1;
7740}
7741
7742/* Expand string clear operation (bzero). Use i386 string operations when
7743 profitable. expand_movstr contains similar code. */
7744int
7745ix86_expand_clrstr (src, count_exp, align_exp)
7746 rtx src, count_exp, align_exp;
7747{
7748 rtx destreg, zeroreg, countreg;
7749 enum machine_mode counter_mode;
7750 HOST_WIDE_INT align = 0;
7751 unsigned HOST_WIDE_INT count = 0;
7752
7753 if (GET_CODE (align_exp) == CONST_INT)
7754 align = INTVAL (align_exp);
7755
7756 /* This simple hack avoids all inlining code and simplifies code bellow. */
7757 if (!TARGET_ALIGN_STRINGOPS)
7758 align = 32;
7759
7760 if (GET_CODE (count_exp) == CONST_INT)
7761 count = INTVAL (count_exp);
7762 /* Figure out proper mode for counter. For 32bits it is always SImode,
7763 for 64bits use SImode when possible, otherwise DImode.
7764 Set count to number of bytes copied when known at compile time. */
7765 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
7766 || x86_64_zero_extended_value (count_exp))
7767 counter_mode = SImode;
7768 else
7769 counter_mode = DImode;
7770
7771 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
7772
7773 emit_insn (gen_cld ());
7774
7775 /* When optimizing for size emit simple rep ; movsb instruction for
7776 counts not divisible by 4. */
7777
7778 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
7779 {
7780 countreg = ix86_zero_extend_to_Pmode (count_exp);
7781 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
7782 if (TARGET_64BIT)
7783 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
7784 destreg, countreg));
7785 else
7786 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
7787 destreg, countreg));
7788 }
7789 else if (count != 0
7790 && (align >= 8
7791 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
7792 || optimize_size || count < (unsigned int)64))
7793 {
7794 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
7795 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
7796 if (count & ~(size - 1))
7797 {
7798 countreg = copy_to_mode_reg (counter_mode,
7799 GEN_INT ((count >> (size == 4 ? 2 : 3))
7800 & (TARGET_64BIT ? -1 : 0x3fffffff)));
7801 countreg = ix86_zero_extend_to_Pmode (countreg);
7802 if (size == 4)
7803 {
7804 if (TARGET_64BIT)
7805 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
7806 destreg, countreg));
7807 else
7808 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
7809 destreg, countreg));
7810 }
7811 else
7812 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
7813 destreg, countreg));
7814 }
7815 if (size == 8 && (count & 0x04))
7816 emit_insn (gen_strsetsi (destreg,
7817 gen_rtx_SUBREG (SImode, zeroreg, 0)));
7818 if (count & 0x02)
7819 emit_insn (gen_strsethi (destreg,
7820 gen_rtx_SUBREG (HImode, zeroreg, 0)));
7821 if (count & 0x01)
7822 emit_insn (gen_strsetqi (destreg,
7823 gen_rtx_SUBREG (QImode, zeroreg, 0)));
7824 }
7825 else
7826 {
7827 rtx countreg2;
7828 rtx label = NULL;
7829
7830 /* In case we don't know anything about the alignment, default to
7831 library version, since it is usually equally fast and result in
7832 shorter code. */
7833 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
7834 return 0;
7835
7836 if (TARGET_SINGLE_STRINGOP)
7837 emit_insn (gen_cld ());
7838
7839 countreg2 = gen_reg_rtx (Pmode);
7840 countreg = copy_to_mode_reg (counter_mode, count_exp);
7841 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
7842
7843 if (count == 0
7844 && align < (TARGET_PENTIUMPRO && (count == 0
7845 || count >= (unsigned int)260)
7846 ? 8 : UNITS_PER_WORD))
7847 {
7848 label = gen_label_rtx ();
7849 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
7850 LEU, 0, counter_mode, 1, 0, label);
7851 }
7852 if (align <= 1)
7853 {
7854 rtx label = ix86_expand_aligntest (destreg, 1);
7855 emit_insn (gen_strsetqi (destreg,
7856 gen_rtx_SUBREG (QImode, zeroreg, 0)));
7857 ix86_adjust_counter (countreg, 1);
7858 emit_label (label);
7859 LABEL_NUSES (label) = 1;
7860 }
7861 if (align <= 2)
7862 {
7863 rtx label = ix86_expand_aligntest (destreg, 2);
7864 emit_insn (gen_strsethi (destreg,
7865 gen_rtx_SUBREG (HImode, zeroreg, 0)));
7866 ix86_adjust_counter (countreg, 2);
7867 emit_label (label);
7868 LABEL_NUSES (label) = 1;
7869 }
7870 if (align <= 4 && TARGET_PENTIUMPRO && (count == 0
7871 || count >= (unsigned int)260))
7872 {
7873 rtx label = ix86_expand_aligntest (destreg, 4);
7874 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
7875 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
7876 : zeroreg)));
7877 ix86_adjust_counter (countreg, 4);
7878 emit_label (label);
7879 LABEL_NUSES (label) = 1;
7880 }
7881
7882 if (!TARGET_SINGLE_STRINGOP)
7883 emit_insn (gen_cld ());
7884 if (TARGET_64BIT)
7885 {
7886 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
7887 GEN_INT (3)));
7888 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
7889 destreg, countreg2));
7890 }
7891 else
7892 {
7893 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
7894 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
7895 destreg, countreg2));
7896 }
7897
7898 if (label)
7899 {
7900 emit_label (label);
7901 LABEL_NUSES (label) = 1;
7902 }
7903 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
7904 emit_insn (gen_strsetsi (destreg,
7905 gen_rtx_SUBREG (SImode, zeroreg, 0)));
7906 if (TARGET_64BIT && (align <= 4 || count == 0))
7907 {
7908 rtx label = ix86_expand_aligntest (destreg, 2);
7909 emit_insn (gen_strsetsi (destreg,
7910 gen_rtx_SUBREG (SImode, zeroreg, 0)));
7911 emit_label (label);
7912 LABEL_NUSES (label) = 1;
7913 }
7914 if (align > 2 && count != 0 && (count & 2))
7915 emit_insn (gen_strsethi (destreg,
7916 gen_rtx_SUBREG (HImode, zeroreg, 0)));
7917 if (align <= 2 || count == 0)
7918 {
7919 rtx label = ix86_expand_aligntest (destreg, 2);
7920 emit_insn (gen_strsethi (destreg,
7921 gen_rtx_SUBREG (HImode, zeroreg, 0)));
7922 emit_label (label);
7923 LABEL_NUSES (label) = 1;
7924 }
7925 if (align > 1 && count != 0 && (count & 1))
7926 emit_insn (gen_strsetqi (destreg,
7927 gen_rtx_SUBREG (QImode, zeroreg, 0)));
7928 if (align <= 1 || count == 0)
7929 {
7930 rtx label = ix86_expand_aligntest (destreg, 1);
7931 emit_insn (gen_strsetqi (destreg,
7932 gen_rtx_SUBREG (QImode, zeroreg, 0)));
7933 emit_label (label);
7934 LABEL_NUSES (label) = 1;
7935 }
7936 }
7937 return 1;
7938}
7939/* Expand strlen. */
7940int
7941ix86_expand_strlen (out, src, eoschar, align)
7942 rtx out, src, eoschar, align;
7943{
7944 rtx addr, scratch1, scratch2, scratch3, scratch4;
7945
7946 /* The generic case of strlen expander is long. Avoid it's
7947 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
7948
7949 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
7950 && !TARGET_INLINE_ALL_STRINGOPS
7951 && !optimize_size
7952 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
7953 return 0;
7954
7955 addr = force_reg (Pmode, XEXP (src, 0));
7956 scratch1 = gen_reg_rtx (Pmode);
7957
7958 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
7959 && !optimize_size)
7960 {
7961 /* Well it seems that some optimizer does not combine a call like
7962 foo(strlen(bar), strlen(bar));
7963 when the move and the subtraction is done here. It does calculate
7964 the length just once when these instructions are done inside of
7965 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
7966 often used and I use one fewer register for the lifetime of
7967 output_strlen_unroll() this is better. */
7968
7969 emit_move_insn (out, addr);
7970
7971 ix86_expand_strlensi_unroll_1 (out, align);
7972
7973 /* strlensi_unroll_1 returns the address of the zero at the end of
7974 the string, like memchr(), so compute the length by subtracting
7975 the start address. */
7976 if (TARGET_64BIT)
7977 emit_insn (gen_subdi3 (out, out, addr));
7978 else
7979 emit_insn (gen_subsi3 (out, out, addr));
7980 }
7981 else
7982 {
7983 scratch2 = gen_reg_rtx (Pmode);
7984 scratch3 = gen_reg_rtx (Pmode);
7985 scratch4 = force_reg (Pmode, constm1_rtx);
7986
7987 emit_move_insn (scratch3, addr);
7988 eoschar = force_reg (QImode, eoschar);
7989
7990 emit_insn (gen_cld ());
7991 if (TARGET_64BIT)
7992 {
7993 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
7994 align, scratch4, scratch3));
7995 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
7996 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
7997 }
7998 else
7999 {
8000 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
8001 align, scratch4, scratch3));
8002 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
8003 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
8004 }
8005 }
8006 return 1;
8007}
8008
e075ae69
RH
8009/* Expand the appropriate insns for doing strlen if not just doing
8010 repnz; scasb
8011
8012 out = result, initialized with the start address
8013 align_rtx = alignment of the address.
8014 scratch = scratch register, initialized with the startaddress when
77ebd435 8015 not aligned, otherwise undefined
3f803cd9
SC
8016
8017 This is just the body. It needs the initialisations mentioned above and
8018 some address computing at the end. These things are done in i386.md. */
8019
0945b39d
JH
8020static void
8021ix86_expand_strlensi_unroll_1 (out, align_rtx)
8022 rtx out, align_rtx;
3f803cd9 8023{
e075ae69
RH
8024 int align;
8025 rtx tmp;
8026 rtx align_2_label = NULL_RTX;
8027 rtx align_3_label = NULL_RTX;
8028 rtx align_4_label = gen_label_rtx ();
8029 rtx end_0_label = gen_label_rtx ();
e075ae69 8030 rtx mem;
e2e52e1b 8031 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 8032 rtx scratch = gen_reg_rtx (SImode);
e075ae69
RH
8033
8034 align = 0;
8035 if (GET_CODE (align_rtx) == CONST_INT)
8036 align = INTVAL (align_rtx);
3f803cd9 8037
e9a25f70 8038 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 8039
e9a25f70 8040 /* Is there a known alignment and is it less than 4? */
e075ae69 8041 if (align < 4)
3f803cd9 8042 {
0945b39d
JH
8043 rtx scratch1 = gen_reg_rtx (Pmode);
8044 emit_move_insn (scratch1, out);
e9a25f70 8045 /* Is there a known alignment and is it not 2? */
e075ae69 8046 if (align != 2)
3f803cd9 8047 {
e075ae69
RH
8048 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
8049 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
8050
8051 /* Leave just the 3 lower bits. */
0945b39d 8052 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
8053 NULL_RTX, 0, OPTAB_WIDEN);
8054
9076b9c1 8055 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
0945b39d 8056 Pmode, 1, 0, align_4_label);
9076b9c1 8057 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
0945b39d 8058 Pmode, 1, 0, align_2_label);
9076b9c1 8059 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
0945b39d 8060 Pmode, 1, 0, align_3_label);
3f803cd9
SC
8061 }
8062 else
8063 {
e9a25f70
JL
8064 /* Since the alignment is 2, we have to check 2 or 0 bytes;
8065 check if is aligned to 4 - byte. */
e9a25f70 8066
0945b39d 8067 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
e075ae69
RH
8068 NULL_RTX, 0, OPTAB_WIDEN);
8069
9076b9c1 8070 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
0945b39d 8071 Pmode, 1, 0, align_4_label);
3f803cd9
SC
8072 }
8073
e075ae69 8074 mem = gen_rtx_MEM (QImode, out);
e9a25f70 8075
e075ae69 8076 /* Now compare the bytes. */
e9a25f70 8077
0f290768 8078 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1
JH
8079 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
8080 QImode, 1, 0, end_0_label);
3f803cd9 8081
0f290768 8082 /* Increment the address. */
0945b39d
JH
8083 if (TARGET_64BIT)
8084 emit_insn (gen_adddi3 (out, out, const1_rtx));
8085 else
8086 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 8087
e075ae69
RH
8088 /* Not needed with an alignment of 2 */
8089 if (align != 2)
8090 {
8091 emit_label (align_2_label);
3f803cd9 8092
9076b9c1
JH
8093 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
8094 QImode, 1, 0, end_0_label);
e075ae69 8095
0945b39d
JH
8096 if (TARGET_64BIT)
8097 emit_insn (gen_adddi3 (out, out, const1_rtx));
8098 else
8099 emit_insn (gen_addsi3 (out, out, const1_rtx));
e075ae69
RH
8100
8101 emit_label (align_3_label);
8102 }
8103
9076b9c1
JH
8104 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
8105 QImode, 1, 0, end_0_label);
e075ae69 8106
0945b39d
JH
8107 if (TARGET_64BIT)
8108 emit_insn (gen_adddi3 (out, out, const1_rtx));
8109 else
8110 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
8111 }
8112
e075ae69
RH
8113 /* Generate loop to check 4 bytes at a time. It is not a good idea to
8114 align this loop. It gives only huge programs, but does not help to
8115 speed up. */
8116 emit_label (align_4_label);
3f803cd9 8117
e075ae69
RH
8118 mem = gen_rtx_MEM (SImode, out);
8119 emit_move_insn (scratch, mem);
0945b39d
JH
8120 if (TARGET_64BIT)
8121 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
8122 else
8123 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 8124
e2e52e1b
JH
8125 /* This formula yields a nonzero result iff one of the bytes is zero.
8126 This saves three branches inside loop and many cycles. */
8127
8128 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
8129 emit_insn (gen_one_cmplsi2 (scratch, scratch));
8130 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0
AO
8131 emit_insn (gen_andsi3 (tmpreg, tmpreg,
8132 GEN_INT (trunc_int_for_mode
8133 (0x80808080, SImode))));
9076b9c1
JH
8134 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0,
8135 SImode, 1, 0, align_4_label);
e2e52e1b
JH
8136
8137 if (TARGET_CMOVE)
8138 {
8139 rtx reg = gen_reg_rtx (SImode);
0945b39d 8140 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
8141 emit_move_insn (reg, tmpreg);
8142 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
8143
0f290768 8144 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 8145 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
8146 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
8147 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
8148 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
8149 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
8150 reg,
8151 tmpreg)));
e2e52e1b 8152 /* Emit lea manually to avoid clobbering of flags. */
0945b39d
JH
8153 emit_insn (gen_rtx_SET (SImode, reg2,
8154 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
e2e52e1b
JH
8155
8156 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
8157 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
8158 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d 8159 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
a4f31c00
AJ
8160 reg2,
8161 out)));
e2e52e1b
JH
8162
8163 }
8164 else
8165 {
8166 rtx end_2_label = gen_label_rtx ();
8167 /* Is zero in the first two bytes? */
8168
16189740 8169 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
8170 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
8171 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
8172 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8173 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
8174 pc_rtx);
8175 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8176 JUMP_LABEL (tmp) = end_2_label;
8177
0f290768 8178 /* Not in the first two. Move two bytes forward. */
e2e52e1b 8179 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
0945b39d
JH
8180 if (TARGET_64BIT)
8181 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
8182 else
8183 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
e2e52e1b
JH
8184
8185 emit_label (end_2_label);
8186
8187 }
8188
0f290768 8189 /* Avoid branch in fixing the byte. */
e2e52e1b 8190 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 8191 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
0945b39d
JH
8192 if (TARGET_64BIT)
8193 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
8194 else
8195 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
e075ae69
RH
8196
8197 emit_label (end_0_label);
8198}
8199\f
e075ae69
RH
8200/* Clear stack slot assignments remembered from previous functions.
8201 This is called from INIT_EXPANDERS once before RTL is emitted for each
8202 function. */
8203
36edd3cc
BS
8204static void
8205ix86_init_machine_status (p)
1526a060 8206 struct function *p;
e075ae69 8207{
37b15744
RH
8208 p->machine = (struct machine_function *)
8209 xcalloc (1, sizeof (struct machine_function));
e075ae69
RH
8210}
8211
1526a060
BS
8212/* Mark machine specific bits of P for GC. */
8213static void
8214ix86_mark_machine_status (p)
8215 struct function *p;
8216{
37b15744 8217 struct machine_function *machine = p->machine;
1526a060
BS
8218 enum machine_mode mode;
8219 int n;
8220
37b15744
RH
8221 if (! machine)
8222 return;
8223
1526a060
BS
8224 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
8225 mode = (enum machine_mode) ((int) mode + 1))
8226 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
37b15744
RH
8227 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
8228}
8229
8230static void
8231ix86_free_machine_status (p)
8232 struct function *p;
8233{
8234 free (p->machine);
8235 p->machine = NULL;
1526a060
BS
8236}
8237
e075ae69
RH
8238/* Return a MEM corresponding to a stack slot with mode MODE.
8239 Allocate a new slot if necessary.
8240
8241 The RTL for a function can have several slots available: N is
8242 which slot to use. */
8243
8244rtx
8245assign_386_stack_local (mode, n)
8246 enum machine_mode mode;
8247 int n;
8248{
8249 if (n < 0 || n >= MAX_386_STACK_LOCALS)
8250 abort ();
8251
8252 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
8253 ix86_stack_locals[(int) mode][n]
8254 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
8255
8256 return ix86_stack_locals[(int) mode][n];
8257}
8258\f
8259/* Calculate the length of the memory address in the instruction
8260 encoding. Does not include the one-byte modrm, opcode, or prefix. */
8261
8262static int
8263memory_address_length (addr)
8264 rtx addr;
8265{
8266 struct ix86_address parts;
8267 rtx base, index, disp;
8268 int len;
8269
8270 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
8271 || GET_CODE (addr) == POST_INC
8272 || GET_CODE (addr) == PRE_MODIFY
8273 || GET_CODE (addr) == POST_MODIFY)
e075ae69 8274 return 0;
3f803cd9 8275
e075ae69
RH
8276 if (! ix86_decompose_address (addr, &parts))
8277 abort ();
3f803cd9 8278
e075ae69
RH
8279 base = parts.base;
8280 index = parts.index;
8281 disp = parts.disp;
8282 len = 0;
3f803cd9 8283
e075ae69
RH
8284 /* Register Indirect. */
8285 if (base && !index && !disp)
8286 {
8287 /* Special cases: ebp and esp need the two-byte modrm form. */
8288 if (addr == stack_pointer_rtx
8289 || addr == arg_pointer_rtx
564d80f4
JH
8290 || addr == frame_pointer_rtx
8291 || addr == hard_frame_pointer_rtx)
e075ae69 8292 len = 1;
3f803cd9 8293 }
e9a25f70 8294
e075ae69
RH
8295 /* Direct Addressing. */
8296 else if (disp && !base && !index)
8297 len = 4;
8298
3f803cd9
SC
8299 else
8300 {
e075ae69
RH
8301 /* Find the length of the displacement constant. */
8302 if (disp)
8303 {
8304 if (GET_CODE (disp) == CONST_INT
8305 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
8306 len = 1;
8307 else
8308 len = 4;
8309 }
3f803cd9 8310
e075ae69
RH
8311 /* An index requires the two-byte modrm form. */
8312 if (index)
8313 len += 1;
3f803cd9
SC
8314 }
8315
e075ae69
RH
8316 return len;
8317}
79325812 8318
6ef67412
JH
8319/* Compute default value for "length_immediate" attribute. When SHORTFORM is set
8320 expect that insn have 8bit immediate alternative. */
e075ae69 8321int
6ef67412 8322ix86_attr_length_immediate_default (insn, shortform)
e075ae69 8323 rtx insn;
6ef67412 8324 int shortform;
e075ae69 8325{
6ef67412
JH
8326 int len = 0;
8327 int i;
6c698a6d 8328 extract_insn_cached (insn);
6ef67412
JH
8329 for (i = recog_data.n_operands - 1; i >= 0; --i)
8330 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 8331 {
6ef67412 8332 if (len)
3071fab5 8333 abort ();
6ef67412
JH
8334 if (shortform
8335 && GET_CODE (recog_data.operand[i]) == CONST_INT
8336 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
8337 len = 1;
8338 else
8339 {
8340 switch (get_attr_mode (insn))
8341 {
8342 case MODE_QI:
8343 len+=1;
8344 break;
8345 case MODE_HI:
8346 len+=2;
8347 break;
8348 case MODE_SI:
8349 len+=4;
8350 break;
8351 default:
8352 fatal_insn ("Unknown insn mode", insn);
8353 }
8354 }
3071fab5 8355 }
6ef67412
JH
8356 return len;
8357}
8358/* Compute default value for "length_address" attribute. */
8359int
8360ix86_attr_length_address_default (insn)
8361 rtx insn;
8362{
8363 int i;
6c698a6d 8364 extract_insn_cached (insn);
1ccbefce
RH
8365 for (i = recog_data.n_operands - 1; i >= 0; --i)
8366 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 8367 {
6ef67412 8368 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
8369 break;
8370 }
6ef67412 8371 return 0;
3f803cd9 8372}
e075ae69
RH
8373\f
8374/* Return the maximum number of instructions a cpu can issue. */
b657fc39 8375
e075ae69
RH
8376int
8377ix86_issue_rate ()
b657fc39 8378{
e075ae69 8379 switch (ix86_cpu)
b657fc39 8380 {
e075ae69
RH
8381 case PROCESSOR_PENTIUM:
8382 case PROCESSOR_K6:
8383 return 2;
79325812 8384
e075ae69 8385 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
8386 case PROCESSOR_PENTIUM4:
8387 case PROCESSOR_ATHLON:
e075ae69 8388 return 3;
b657fc39 8389
b657fc39 8390 default:
e075ae69 8391 return 1;
b657fc39 8392 }
b657fc39
L
8393}
8394
e075ae69
RH
8395/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
8396 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 8397
e075ae69
RH
8398static int
8399ix86_flags_dependant (insn, dep_insn, insn_type)
8400 rtx insn, dep_insn;
8401 enum attr_type insn_type;
8402{
8403 rtx set, set2;
b657fc39 8404
e075ae69
RH
8405 /* Simplify the test for uninteresting insns. */
8406 if (insn_type != TYPE_SETCC
8407 && insn_type != TYPE_ICMOV
8408 && insn_type != TYPE_FCMOV
8409 && insn_type != TYPE_IBR)
8410 return 0;
b657fc39 8411
e075ae69
RH
8412 if ((set = single_set (dep_insn)) != 0)
8413 {
8414 set = SET_DEST (set);
8415 set2 = NULL_RTX;
8416 }
8417 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
8418 && XVECLEN (PATTERN (dep_insn), 0) == 2
8419 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
8420 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
8421 {
8422 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
8423 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
8424 }
78a0d70c
ZW
8425 else
8426 return 0;
b657fc39 8427
78a0d70c
ZW
8428 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
8429 return 0;
b657fc39 8430
78a0d70c
ZW
8431 /* This test is true if the dependant insn reads the flags but
8432 not any other potentially set register. */
8433 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
8434 return 0;
8435
8436 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
8437 return 0;
8438
8439 return 1;
e075ae69 8440}
b657fc39 8441
e075ae69
RH
8442/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
8443 address with operands set by DEP_INSN. */
8444
8445static int
8446ix86_agi_dependant (insn, dep_insn, insn_type)
8447 rtx insn, dep_insn;
8448 enum attr_type insn_type;
8449{
8450 rtx addr;
8451
6ad48e84
JH
8452 if (insn_type == TYPE_LEA
8453 && TARGET_PENTIUM)
5fbdde42
RH
8454 {
8455 addr = PATTERN (insn);
8456 if (GET_CODE (addr) == SET)
8457 ;
8458 else if (GET_CODE (addr) == PARALLEL
8459 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
8460 addr = XVECEXP (addr, 0, 0);
8461 else
8462 abort ();
8463 addr = SET_SRC (addr);
8464 }
e075ae69
RH
8465 else
8466 {
8467 int i;
6c698a6d 8468 extract_insn_cached (insn);
1ccbefce
RH
8469 for (i = recog_data.n_operands - 1; i >= 0; --i)
8470 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 8471 {
1ccbefce 8472 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
8473 goto found;
8474 }
8475 return 0;
8476 found:;
b657fc39
L
8477 }
8478
e075ae69 8479 return modified_in_p (addr, dep_insn);
b657fc39 8480}
a269a03c
JC
8481
8482int
e075ae69 8483ix86_adjust_cost (insn, link, dep_insn, cost)
a269a03c
JC
8484 rtx insn, link, dep_insn;
8485 int cost;
8486{
e075ae69 8487 enum attr_type insn_type, dep_insn_type;
6ad48e84 8488 enum attr_memory memory, dep_memory;
e075ae69 8489 rtx set, set2;
9b00189f 8490 int dep_insn_code_number;
a269a03c 8491
309ada50 8492 /* Anti and output depenancies have zero cost on all CPUs. */
e075ae69 8493 if (REG_NOTE_KIND (link) != 0)
309ada50 8494 return 0;
a269a03c 8495
9b00189f
JH
8496 dep_insn_code_number = recog_memoized (dep_insn);
8497
e075ae69 8498 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 8499 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 8500 return cost;
a269a03c 8501
1c71e60e
JH
8502 insn_type = get_attr_type (insn);
8503 dep_insn_type = get_attr_type (dep_insn);
9b00189f 8504
a269a03c
JC
8505 switch (ix86_cpu)
8506 {
8507 case PROCESSOR_PENTIUM:
e075ae69
RH
8508 /* Address Generation Interlock adds a cycle of latency. */
8509 if (ix86_agi_dependant (insn, dep_insn, insn_type))
8510 cost += 1;
8511
8512 /* ??? Compares pair with jump/setcc. */
8513 if (ix86_flags_dependant (insn, dep_insn, insn_type))
8514 cost = 0;
8515
8516 /* Floating point stores require value to be ready one cycle ealier. */
0f290768 8517 if (insn_type == TYPE_FMOV
e075ae69
RH
8518 && get_attr_memory (insn) == MEMORY_STORE
8519 && !ix86_agi_dependant (insn, dep_insn, insn_type))
8520 cost += 1;
8521 break;
a269a03c 8522
e075ae69 8523 case PROCESSOR_PENTIUMPRO:
6ad48e84
JH
8524 memory = get_attr_memory (insn);
8525 dep_memory = get_attr_memory (dep_insn);
8526
0f290768 8527 /* Since we can't represent delayed latencies of load+operation,
e075ae69
RH
8528 increase the cost here for non-imov insns. */
8529 if (dep_insn_type != TYPE_IMOV
6ad48e84
JH
8530 && dep_insn_type != TYPE_FMOV
8531 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
e075ae69
RH
8532 cost += 1;
8533
8534 /* INT->FP conversion is expensive. */
8535 if (get_attr_fp_int_src (dep_insn))
8536 cost += 5;
8537
8538 /* There is one cycle extra latency between an FP op and a store. */
8539 if (insn_type == TYPE_FMOV
8540 && (set = single_set (dep_insn)) != NULL_RTX
8541 && (set2 = single_set (insn)) != NULL_RTX
8542 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
8543 && GET_CODE (SET_DEST (set2)) == MEM)
8544 cost += 1;
6ad48e84
JH
8545
8546 /* Show ability of reorder buffer to hide latency of load by executing
8547 in parallel with previous instruction in case
8548 previous instruction is not needed to compute the address. */
8549 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
8550 && !ix86_agi_dependant (insn, dep_insn, insn_type))
8551 {
8552 /* Claim moves to take one cycle, as core can issue one load
8553 at time and the next load can start cycle later. */
8554 if (dep_insn_type == TYPE_IMOV
8555 || dep_insn_type == TYPE_FMOV)
8556 cost = 1;
8557 else if (cost > 1)
8558 cost--;
8559 }
e075ae69 8560 break;
a269a03c 8561
e075ae69 8562 case PROCESSOR_K6:
6ad48e84
JH
8563 memory = get_attr_memory (insn);
8564 dep_memory = get_attr_memory (dep_insn);
e075ae69
RH
8565 /* The esp dependency is resolved before the instruction is really
8566 finished. */
8567 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
8568 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
8569 return 1;
a269a03c 8570
0f290768 8571 /* Since we can't represent delayed latencies of load+operation,
e075ae69 8572 increase the cost here for non-imov insns. */
6ad48e84 8573 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
e075ae69
RH
8574 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
8575
8576 /* INT->FP conversion is expensive. */
8577 if (get_attr_fp_int_src (dep_insn))
8578 cost += 5;
6ad48e84
JH
8579
8580 /* Show ability of reorder buffer to hide latency of load by executing
8581 in parallel with previous instruction in case
8582 previous instruction is not needed to compute the address. */
8583 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
8584 && !ix86_agi_dependant (insn, dep_insn, insn_type))
8585 {
8586 /* Claim moves to take one cycle, as core can issue one load
8587 at time and the next load can start cycle later. */
8588 if (dep_insn_type == TYPE_IMOV
8589 || dep_insn_type == TYPE_FMOV)
8590 cost = 1;
8591 else if (cost > 2)
8592 cost -= 2;
8593 else
8594 cost = 1;
8595 }
a14003ee 8596 break;
e075ae69 8597
309ada50 8598 case PROCESSOR_ATHLON:
6ad48e84
JH
8599 memory = get_attr_memory (insn);
8600 dep_memory = get_attr_memory (dep_insn);
8601
8602 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
0b5107cf
JH
8603 {
8604 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
8605 cost += 2;
8606 else
8607 cost += 3;
8608 }
6ad48e84
JH
8609 /* Show ability of reorder buffer to hide latency of load by executing
8610 in parallel with previous instruction in case
8611 previous instruction is not needed to compute the address. */
8612 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
8613 && !ix86_agi_dependant (insn, dep_insn, insn_type))
8614 {
8615 /* Claim moves to take one cycle, as core can issue one load
8616 at time and the next load can start cycle later. */
8617 if (dep_insn_type == TYPE_IMOV
8618 || dep_insn_type == TYPE_FMOV)
8619 cost = 0;
8620 else if (cost >= 3)
8621 cost -= 3;
8622 else
8623 cost = 0;
8624 }
309ada50 8625
a269a03c 8626 default:
a269a03c
JC
8627 break;
8628 }
8629
8630 return cost;
8631}
0a726ef1 8632
e075ae69
RH
8633static union
8634{
8635 struct ppro_sched_data
8636 {
8637 rtx decode[3];
8638 int issued_this_cycle;
8639 } ppro;
8640} ix86_sched_data;
0a726ef1 8641
e075ae69
RH
8642static int
8643ix86_safe_length (insn)
8644 rtx insn;
8645{
8646 if (recog_memoized (insn) >= 0)
8647 return get_attr_length(insn);
8648 else
8649 return 128;
8650}
0a726ef1 8651
e075ae69
RH
8652static int
8653ix86_safe_length_prefix (insn)
8654 rtx insn;
8655{
8656 if (recog_memoized (insn) >= 0)
8657 return get_attr_length(insn);
8658 else
8659 return 0;
8660}
8661
8662static enum attr_memory
8663ix86_safe_memory (insn)
8664 rtx insn;
8665{
8666 if (recog_memoized (insn) >= 0)
8667 return get_attr_memory(insn);
8668 else
8669 return MEMORY_UNKNOWN;
8670}
0a726ef1 8671
e075ae69
RH
8672static enum attr_pent_pair
8673ix86_safe_pent_pair (insn)
8674 rtx insn;
8675{
8676 if (recog_memoized (insn) >= 0)
8677 return get_attr_pent_pair(insn);
8678 else
8679 return PENT_PAIR_NP;
8680}
0a726ef1 8681
e075ae69
RH
8682static enum attr_ppro_uops
8683ix86_safe_ppro_uops (insn)
8684 rtx insn;
8685{
8686 if (recog_memoized (insn) >= 0)
8687 return get_attr_ppro_uops (insn);
8688 else
8689 return PPRO_UOPS_MANY;
8690}
0a726ef1 8691
e075ae69
RH
8692static void
8693ix86_dump_ppro_packet (dump)
8694 FILE *dump;
0a726ef1 8695{
e075ae69 8696 if (ix86_sched_data.ppro.decode[0])
0a726ef1 8697 {
e075ae69
RH
8698 fprintf (dump, "PPRO packet: %d",
8699 INSN_UID (ix86_sched_data.ppro.decode[0]));
8700 if (ix86_sched_data.ppro.decode[1])
8701 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
8702 if (ix86_sched_data.ppro.decode[2])
8703 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
8704 fputc ('\n', dump);
8705 }
8706}
0a726ef1 8707
e075ae69 8708/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 8709
e075ae69
RH
8710void
8711ix86_sched_init (dump, sched_verbose)
8712 FILE *dump ATTRIBUTE_UNUSED;
8713 int sched_verbose ATTRIBUTE_UNUSED;
8714{
8715 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
8716}
8717
8718/* Shift INSN to SLOT, and shift everything else down. */
8719
8720static void
8721ix86_reorder_insn (insnp, slot)
8722 rtx *insnp, *slot;
8723{
8724 if (insnp != slot)
8725 {
8726 rtx insn = *insnp;
0f290768 8727 do
e075ae69
RH
8728 insnp[0] = insnp[1];
8729 while (++insnp != slot);
8730 *insnp = insn;
0a726ef1 8731 }
e075ae69
RH
8732}
8733
8734/* Find an instruction with given pairability and minimal amount of cycles
8735 lost by the fact that the CPU waits for both pipelines to finish before
8736 reading next instructions. Also take care that both instructions together
8737 can not exceed 7 bytes. */
8738
8739static rtx *
8740ix86_pent_find_pair (e_ready, ready, type, first)
8741 rtx *e_ready;
8742 rtx *ready;
8743 enum attr_pent_pair type;
8744 rtx first;
8745{
8746 int mincycles, cycles;
8747 enum attr_pent_pair tmp;
8748 enum attr_memory memory;
8749 rtx *insnp, *bestinsnp = NULL;
0a726ef1 8750
e075ae69
RH
8751 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
8752 return NULL;
0a726ef1 8753
e075ae69
RH
8754 memory = ix86_safe_memory (first);
8755 cycles = result_ready_cost (first);
8756 mincycles = INT_MAX;
8757
8758 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
8759 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
8760 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
6ec6d558 8761 {
e075ae69
RH
8762 enum attr_memory second_memory;
8763 int secondcycles, currentcycles;
8764
8765 second_memory = ix86_safe_memory (*insnp);
8766 secondcycles = result_ready_cost (*insnp);
8767 currentcycles = abs (cycles - secondcycles);
8768
8769 if (secondcycles >= 1 && cycles >= 1)
6ec6d558 8770 {
e075ae69
RH
8771 /* Two read/modify/write instructions together takes two
8772 cycles longer. */
8773 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
8774 currentcycles += 2;
0f290768 8775
e075ae69
RH
8776 /* Read modify/write instruction followed by read/modify
8777 takes one cycle longer. */
8778 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
8779 && tmp != PENT_PAIR_UV
8780 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
8781 currentcycles += 1;
6ec6d558 8782 }
e075ae69
RH
8783 if (currentcycles < mincycles)
8784 bestinsnp = insnp, mincycles = currentcycles;
6ec6d558 8785 }
0a726ef1 8786
e075ae69
RH
8787 return bestinsnp;
8788}
8789
78a0d70c 8790/* Subroutines of ix86_sched_reorder. */
e075ae69 8791
c6991660 8792static void
78a0d70c 8793ix86_sched_reorder_pentium (ready, e_ready)
e075ae69 8794 rtx *ready;
78a0d70c 8795 rtx *e_ready;
e075ae69 8796{
78a0d70c 8797 enum attr_pent_pair pair1, pair2;
e075ae69 8798 rtx *insnp;
e075ae69 8799
78a0d70c
ZW
8800 /* This wouldn't be necessary if Haifa knew that static insn ordering
8801 is important to which pipe an insn is issued to. So we have to make
8802 some minor rearrangements. */
e075ae69 8803
78a0d70c
ZW
8804 pair1 = ix86_safe_pent_pair (*e_ready);
8805
8806 /* If the first insn is non-pairable, let it be. */
8807 if (pair1 == PENT_PAIR_NP)
8808 return;
8809
8810 pair2 = PENT_PAIR_NP;
8811 insnp = 0;
8812
8813 /* If the first insn is UV or PV pairable, search for a PU
8814 insn to go with. */
8815 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
e075ae69 8816 {
78a0d70c
ZW
8817 insnp = ix86_pent_find_pair (e_ready-1, ready,
8818 PENT_PAIR_PU, *e_ready);
8819 if (insnp)
8820 pair2 = PENT_PAIR_PU;
8821 }
e075ae69 8822
78a0d70c
ZW
8823 /* If the first insn is PU or UV pairable, search for a PV
8824 insn to go with. */
8825 if (pair2 == PENT_PAIR_NP
8826 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
8827 {
8828 insnp = ix86_pent_find_pair (e_ready-1, ready,
8829 PENT_PAIR_PV, *e_ready);
8830 if (insnp)
8831 pair2 = PENT_PAIR_PV;
8832 }
e075ae69 8833
78a0d70c
ZW
8834 /* If the first insn is pairable, search for a UV
8835 insn to go with. */
8836 if (pair2 == PENT_PAIR_NP)
8837 {
8838 insnp = ix86_pent_find_pair (e_ready-1, ready,
8839 PENT_PAIR_UV, *e_ready);
8840 if (insnp)
8841 pair2 = PENT_PAIR_UV;
8842 }
e075ae69 8843
78a0d70c
ZW
8844 if (pair2 == PENT_PAIR_NP)
8845 return;
e075ae69 8846
78a0d70c
ZW
8847 /* Found something! Decide if we need to swap the order. */
8848 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
8849 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
8850 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
8851 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
8852 ix86_reorder_insn (insnp, e_ready);
8853 else
8854 ix86_reorder_insn (insnp, e_ready - 1);
8855}
e075ae69 8856
c6991660 8857static void
78a0d70c
ZW
8858ix86_sched_reorder_ppro (ready, e_ready)
8859 rtx *ready;
8860 rtx *e_ready;
8861{
8862 rtx decode[3];
8863 enum attr_ppro_uops cur_uops;
8864 int issued_this_cycle;
8865 rtx *insnp;
8866 int i;
e075ae69 8867
0f290768 8868 /* At this point .ppro.decode contains the state of the three
78a0d70c 8869 decoders from last "cycle". That is, those insns that were
0f290768 8870 actually independent. But here we're scheduling for the
78a0d70c
ZW
8871 decoder, and we may find things that are decodable in the
8872 same cycle. */
e075ae69 8873
0f290768 8874 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
78a0d70c 8875 issued_this_cycle = 0;
e075ae69 8876
78a0d70c
ZW
8877 insnp = e_ready;
8878 cur_uops = ix86_safe_ppro_uops (*insnp);
0a726ef1 8879
78a0d70c
ZW
8880 /* If the decoders are empty, and we've a complex insn at the
8881 head of the priority queue, let it issue without complaint. */
8882 if (decode[0] == NULL)
8883 {
8884 if (cur_uops == PPRO_UOPS_MANY)
8885 {
8886 decode[0] = *insnp;
8887 goto ppro_done;
8888 }
8889
8890 /* Otherwise, search for a 2-4 uop unsn to issue. */
8891 while (cur_uops != PPRO_UOPS_FEW)
8892 {
8893 if (insnp == ready)
8894 break;
8895 cur_uops = ix86_safe_ppro_uops (*--insnp);
8896 }
8897
8898 /* If so, move it to the head of the line. */
8899 if (cur_uops == PPRO_UOPS_FEW)
8900 ix86_reorder_insn (insnp, e_ready);
0a726ef1 8901
78a0d70c
ZW
8902 /* Issue the head of the queue. */
8903 issued_this_cycle = 1;
8904 decode[0] = *e_ready--;
8905 }
fb693d44 8906
78a0d70c
ZW
8907 /* Look for simple insns to fill in the other two slots. */
8908 for (i = 1; i < 3; ++i)
8909 if (decode[i] == NULL)
8910 {
8911 if (ready >= e_ready)
8912 goto ppro_done;
fb693d44 8913
e075ae69
RH
8914 insnp = e_ready;
8915 cur_uops = ix86_safe_ppro_uops (*insnp);
78a0d70c
ZW
8916 while (cur_uops != PPRO_UOPS_ONE)
8917 {
8918 if (insnp == ready)
8919 break;
8920 cur_uops = ix86_safe_ppro_uops (*--insnp);
8921 }
fb693d44 8922
78a0d70c
ZW
8923 /* Found one. Move it to the head of the queue and issue it. */
8924 if (cur_uops == PPRO_UOPS_ONE)
e075ae69 8925 {
78a0d70c
ZW
8926 ix86_reorder_insn (insnp, e_ready);
8927 decode[i] = *e_ready--;
8928 issued_this_cycle++;
8929 continue;
8930 }
fb693d44 8931
78a0d70c
ZW
8932 /* ??? Didn't find one. Ideally, here we would do a lazy split
8933 of 2-uop insns, issue one and queue the other. */
8934 }
fb693d44 8935
78a0d70c
ZW
8936 ppro_done:
8937 if (issued_this_cycle == 0)
8938 issued_this_cycle = 1;
8939 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
8940}
fb693d44 8941
0f290768 8942/* We are about to being issuing insns for this clock cycle.
78a0d70c
ZW
8943 Override the default sort algorithm to better slot instructions. */
8944int
8945ix86_sched_reorder (dump, sched_verbose, ready, n_ready, clock_var)
8946 FILE *dump ATTRIBUTE_UNUSED;
8947 int sched_verbose ATTRIBUTE_UNUSED;
8948 rtx *ready;
8949 int n_ready;
8950 int clock_var ATTRIBUTE_UNUSED;
8951{
8952 rtx *e_ready = ready + n_ready - 1;
fb693d44 8953
78a0d70c
ZW
8954 if (n_ready < 2)
8955 goto out;
e075ae69 8956
78a0d70c
ZW
8957 switch (ix86_cpu)
8958 {
8959 default:
8960 break;
e075ae69 8961
78a0d70c
ZW
8962 case PROCESSOR_PENTIUM:
8963 ix86_sched_reorder_pentium (ready, e_ready);
8964 break;
e075ae69 8965
78a0d70c
ZW
8966 case PROCESSOR_PENTIUMPRO:
8967 ix86_sched_reorder_ppro (ready, e_ready);
e075ae69 8968 break;
fb693d44
RH
8969 }
8970
e075ae69
RH
8971out:
8972 return ix86_issue_rate ();
8973}
fb693d44 8974
e075ae69
RH
8975/* We are about to issue INSN. Return the number of insns left on the
8976 ready queue that can be issued this cycle. */
b222082e 8977
e075ae69
RH
8978int
8979ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
8980 FILE *dump;
8981 int sched_verbose;
8982 rtx insn;
8983 int can_issue_more;
8984{
8985 int i;
8986 switch (ix86_cpu)
fb693d44 8987 {
e075ae69
RH
8988 default:
8989 return can_issue_more - 1;
fb693d44 8990
e075ae69
RH
8991 case PROCESSOR_PENTIUMPRO:
8992 {
8993 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 8994
e075ae69
RH
8995 if (uops == PPRO_UOPS_MANY)
8996 {
8997 if (sched_verbose)
8998 ix86_dump_ppro_packet (dump);
8999 ix86_sched_data.ppro.decode[0] = insn;
9000 ix86_sched_data.ppro.decode[1] = NULL;
9001 ix86_sched_data.ppro.decode[2] = NULL;
9002 if (sched_verbose)
9003 ix86_dump_ppro_packet (dump);
9004 ix86_sched_data.ppro.decode[0] = NULL;
9005 }
9006 else if (uops == PPRO_UOPS_FEW)
9007 {
9008 if (sched_verbose)
9009 ix86_dump_ppro_packet (dump);
9010 ix86_sched_data.ppro.decode[0] = insn;
9011 ix86_sched_data.ppro.decode[1] = NULL;
9012 ix86_sched_data.ppro.decode[2] = NULL;
9013 }
9014 else
9015 {
9016 for (i = 0; i < 3; ++i)
9017 if (ix86_sched_data.ppro.decode[i] == NULL)
9018 {
9019 ix86_sched_data.ppro.decode[i] = insn;
9020 break;
9021 }
9022 if (i == 3)
9023 abort ();
9024 if (i == 2)
9025 {
9026 if (sched_verbose)
9027 ix86_dump_ppro_packet (dump);
9028 ix86_sched_data.ppro.decode[0] = NULL;
9029 ix86_sched_data.ppro.decode[1] = NULL;
9030 ix86_sched_data.ppro.decode[2] = NULL;
9031 }
9032 }
9033 }
9034 return --ix86_sched_data.ppro.issued_this_cycle;
9035 }
fb693d44 9036}
a7180f70 9037\f
0e4970d7
RK
9038/* Walk through INSNS and look for MEM references whose address is DSTREG or
9039 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
9040 appropriate. */
9041
9042void
9043ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
9044 rtx insns;
9045 rtx dstref, srcref, dstreg, srcreg;
9046{
9047 rtx insn;
9048
9049 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
9050 if (INSN_P (insn))
9051 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
9052 dstreg, srcreg);
9053}
9054
9055/* Subroutine of above to actually do the updating by recursively walking
9056 the rtx. */
9057
9058static void
9059ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
9060 rtx x;
9061 rtx dstref, srcref, dstreg, srcreg;
9062{
9063 enum rtx_code code = GET_CODE (x);
9064 const char *format_ptr = GET_RTX_FORMAT (code);
9065 int i, j;
9066
9067 if (code == MEM && XEXP (x, 0) == dstreg)
9068 MEM_COPY_ATTRIBUTES (x, dstref);
9069 else if (code == MEM && XEXP (x, 0) == srcreg)
9070 MEM_COPY_ATTRIBUTES (x, srcref);
9071
9072 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
9073 {
9074 if (*format_ptr == 'e')
9075 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
9076 dstreg, srcreg);
9077 else if (*format_ptr == 'E')
9078 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
397be6cf 9079 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
0e4970d7
RK
9080 dstreg, srcreg);
9081 }
9082}
9083\f
a7180f70
BS
9084/* Compute the alignment given to a constant that is being placed in memory.
9085 EXP is the constant and ALIGN is the alignment that the object would
9086 ordinarily have.
9087 The value of this function is used instead of that alignment to align
9088 the object. */
9089
9090int
9091ix86_constant_alignment (exp, align)
9092 tree exp;
9093 int align;
9094{
9095 if (TREE_CODE (exp) == REAL_CST)
9096 {
9097 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
9098 return 64;
9099 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
9100 return 128;
9101 }
9102 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
9103 && align < 256)
9104 return 256;
9105
9106 return align;
9107}
9108
9109/* Compute the alignment for a static variable.
9110 TYPE is the data type, and ALIGN is the alignment that
9111 the object would ordinarily have. The value of this function is used
9112 instead of that alignment to align the object. */
9113
9114int
9115ix86_data_alignment (type, align)
9116 tree type;
9117 int align;
9118{
9119 if (AGGREGATE_TYPE_P (type)
9120 && TYPE_SIZE (type)
9121 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
9122 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
9123 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
9124 return 256;
9125
0d7d98ee
JH
9126 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
9127 to 16byte boundary. */
9128 if (TARGET_64BIT)
9129 {
9130 if (AGGREGATE_TYPE_P (type)
9131 && TYPE_SIZE (type)
9132 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
9133 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
9134 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
9135 return 128;
9136 }
9137
a7180f70
BS
9138 if (TREE_CODE (type) == ARRAY_TYPE)
9139 {
9140 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
9141 return 64;
9142 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
9143 return 128;
9144 }
9145 else if (TREE_CODE (type) == COMPLEX_TYPE)
9146 {
0f290768 9147
a7180f70
BS
9148 if (TYPE_MODE (type) == DCmode && align < 64)
9149 return 64;
9150 if (TYPE_MODE (type) == XCmode && align < 128)
9151 return 128;
9152 }
9153 else if ((TREE_CODE (type) == RECORD_TYPE
9154 || TREE_CODE (type) == UNION_TYPE
9155 || TREE_CODE (type) == QUAL_UNION_TYPE)
9156 && TYPE_FIELDS (type))
9157 {
9158 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
9159 return 64;
9160 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
9161 return 128;
9162 }
9163 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
9164 || TREE_CODE (type) == INTEGER_TYPE)
9165 {
9166 if (TYPE_MODE (type) == DFmode && align < 64)
9167 return 64;
9168 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
9169 return 128;
9170 }
9171
9172 return align;
9173}
9174
9175/* Compute the alignment for a local variable.
9176 TYPE is the data type, and ALIGN is the alignment that
9177 the object would ordinarily have. The value of this macro is used
9178 instead of that alignment to align the object. */
9179
9180int
9181ix86_local_alignment (type, align)
9182 tree type;
9183 int align;
9184{
0d7d98ee
JH
9185 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
9186 to 16byte boundary. */
9187 if (TARGET_64BIT)
9188 {
9189 if (AGGREGATE_TYPE_P (type)
9190 && TYPE_SIZE (type)
9191 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
9192 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
9193 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
9194 return 128;
9195 }
a7180f70
BS
9196 if (TREE_CODE (type) == ARRAY_TYPE)
9197 {
9198 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
9199 return 64;
9200 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
9201 return 128;
9202 }
9203 else if (TREE_CODE (type) == COMPLEX_TYPE)
9204 {
9205 if (TYPE_MODE (type) == DCmode && align < 64)
9206 return 64;
9207 if (TYPE_MODE (type) == XCmode && align < 128)
9208 return 128;
9209 }
9210 else if ((TREE_CODE (type) == RECORD_TYPE
9211 || TREE_CODE (type) == UNION_TYPE
9212 || TREE_CODE (type) == QUAL_UNION_TYPE)
9213 && TYPE_FIELDS (type))
9214 {
9215 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
9216 return 64;
9217 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
9218 return 128;
9219 }
9220 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
9221 || TREE_CODE (type) == INTEGER_TYPE)
9222 {
0f290768 9223
a7180f70
BS
9224 if (TYPE_MODE (type) == DFmode && align < 64)
9225 return 64;
9226 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
9227 return 128;
9228 }
9229 return align;
9230}
0ed08620
JH
9231\f
9232/* Emit RTL insns to initialize the variable parts of a trampoline.
9233 FNADDR is an RTX for the address of the function's pure code.
9234 CXT is an RTX for the static chain value for the function. */
9235void
9236x86_initialize_trampoline (tramp, fnaddr, cxt)
9237 rtx tramp, fnaddr, cxt;
9238{
9239 if (!TARGET_64BIT)
9240 {
9241 /* Compute offset from the end of the jmp to the target function. */
9242 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
9243 plus_constant (tramp, 10),
9244 NULL_RTX, 1, OPTAB_DIRECT);
9245 emit_move_insn (gen_rtx_MEM (QImode, tramp),
9246 GEN_INT (trunc_int_for_mode (0xb9, QImode)));
9247 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
9248 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
9249 GEN_INT (trunc_int_for_mode (0xe9, QImode)));
9250 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
9251 }
9252 else
9253 {
9254 int offset = 0;
9255 /* Try to load address using shorter movl instead of movabs.
9256 We may want to support movq for kernel mode, but kernel does not use
9257 trampolines at the moment. */
9258 if (x86_64_zero_extended_value (fnaddr))
9259 {
9260 fnaddr = copy_to_mode_reg (DImode, fnaddr);
9261 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
9262 GEN_INT (trunc_int_for_mode (0xbb41, HImode)));
9263 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
9264 gen_lowpart (SImode, fnaddr));
9265 offset += 6;
9266 }
9267 else
9268 {
9269 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
9270 GEN_INT (trunc_int_for_mode (0xbb49, HImode)));
9271 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
9272 fnaddr);
9273 offset += 10;
9274 }
9275 /* Load static chain using movabs to r10. */
9276 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
9277 GEN_INT (trunc_int_for_mode (0xba49, HImode)));
9278 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
9279 cxt);
9280 offset += 10;
9281 /* Jump to the r11 */
9282 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
9283 GEN_INT (trunc_int_for_mode (0xff49, HImode)));
9284 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
9285 GEN_INT (trunc_int_for_mode (0xe3, HImode)));
9286 offset += 3;
9287 if (offset > TRAMPOLINE_SIZE)
9288 abort();
9289 }
9290}
bd793c65
BS
9291
9292#define def_builtin(NAME, TYPE, CODE) \
df4ae160 9293 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL)
bd793c65
BS
9294struct builtin_description
9295{
9296 enum insn_code icode;
9297 const char * name;
9298 enum ix86_builtins code;
9299 enum rtx_code comparison;
9300 unsigned int flag;
9301};
9302
9303static struct builtin_description bdesc_comi[] =
9304{
9305 { CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
9306 { CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
9307 { CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
9308 { CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
9309 { CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
9310 { CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
9311 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
9312 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
9313 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
9314 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
9315 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
9316 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
9317};
9318
9319static struct builtin_description bdesc_2arg[] =
9320{
9321 /* SSE */
9322 { CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
9323 { CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
9324 { CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
9325 { CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
9326 { CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
9327 { CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
9328 { CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
9329 { CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
9330
9331 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
9332 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
9333 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
9334 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
9335 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
9336 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
9337 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
9338 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
9339 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
9340 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
9341 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
9342 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
9343 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
9344 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
9345 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
9346 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
9347 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
9348 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
9349 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
9350 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
9351 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
9352 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
9353 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
9354 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
9355
9356 { CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
9357 { CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
9358 { CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
9359 { CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
9360
9361 { CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
9362 { CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
9363 { CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
9364 { CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
9365
9366 { CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
9367 { CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
9368 { CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
9369 { CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
9370 { CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
9371
9372 /* MMX */
9373 { CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
9374 { CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
9375 { CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
9376 { CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
9377 { CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
9378 { CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
9379
9380 { CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
9381 { CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
9382 { CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
9383 { CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
9384 { CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
9385 { CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
9386 { CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
9387 { CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
9388
9389 { CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
9390 { CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
9391 { CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
9392
9393 { CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
9394 { CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
9395 { CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
9396 { CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
9397
9398 { CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
9399 { CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
9400
9401 { CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
9402 { CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
9403 { CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
9404 { CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
9405 { CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
9406 { CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
9407
9408 { CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
9409 { CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
9410 { CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
9411 { CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
9412
9413 { CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
9414 { CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
9415 { CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
9416 { CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
9417 { CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
9418 { CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
9419
9420 /* Special. */
9421 { CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
9422 { CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
9423 { CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
9424
9425 { CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
9426 { CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
9427
9428 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
9429 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
9430 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
9431 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
9432 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
9433 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
9434
9435 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
9436 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
9437 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
9438 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
9439 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
9440 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
9441
9442 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
9443 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
9444 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
9445 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
9446
9447 { CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
9448 { CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
9449
9450};
9451
9452static struct builtin_description bdesc_1arg[] =
9453{
9454 { CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
9455 { CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
9456
9457 { CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
9458 { CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
9459 { CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
9460
9461 { CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
9462 { CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
9463 { CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
9464 { CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
9465
9466};
9467
f6155fda
SS
9468/* Set up all the target-specific builtins. */
9469void
9470ix86_init_builtins ()
9471{
9472 if (TARGET_MMX)
9473 ix86_init_mmx_sse_builtins ();
9474}
9475
9476/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
bd793c65
BS
9477 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
9478 builtins. */
9479void
f6155fda 9480ix86_init_mmx_sse_builtins ()
bd793c65
BS
9481{
9482 struct builtin_description * d;
77ebd435 9483 size_t i;
cbd5937a 9484 tree endlink = void_list_node;
bd793c65
BS
9485
9486 tree pchar_type_node = build_pointer_type (char_type_node);
9487 tree pfloat_type_node = build_pointer_type (float_type_node);
9488 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
9489 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
9490
9491 /* Comparisons. */
9492 tree int_ftype_v4sf_v4sf
9493 = build_function_type (integer_type_node,
9494 tree_cons (NULL_TREE, V4SF_type_node,
9495 tree_cons (NULL_TREE,
9496 V4SF_type_node,
9497 endlink)));
9498 tree v4si_ftype_v4sf_v4sf
9499 = build_function_type (V4SI_type_node,
9500 tree_cons (NULL_TREE, V4SF_type_node,
9501 tree_cons (NULL_TREE,
9502 V4SF_type_node,
9503 endlink)));
9504 /* MMX/SSE/integer conversions. */
bd793c65
BS
9505 tree int_ftype_v4sf
9506 = build_function_type (integer_type_node,
9507 tree_cons (NULL_TREE, V4SF_type_node,
9508 endlink));
9509 tree int_ftype_v8qi
9510 = build_function_type (integer_type_node,
9511 tree_cons (NULL_TREE, V8QI_type_node,
9512 endlink));
9513 tree int_ftype_v2si
9514 = build_function_type (integer_type_node,
9515 tree_cons (NULL_TREE, V2SI_type_node,
9516 endlink));
9517 tree v2si_ftype_int
9518 = build_function_type (V2SI_type_node,
9519 tree_cons (NULL_TREE, integer_type_node,
9520 endlink));
9521 tree v4sf_ftype_v4sf_int
9522 = build_function_type (integer_type_node,
9523 tree_cons (NULL_TREE, V4SF_type_node,
9524 tree_cons (NULL_TREE, integer_type_node,
9525 endlink)));
9526 tree v4sf_ftype_v4sf_v2si
9527 = build_function_type (V4SF_type_node,
9528 tree_cons (NULL_TREE, V4SF_type_node,
9529 tree_cons (NULL_TREE, V2SI_type_node,
9530 endlink)));
9531 tree int_ftype_v4hi_int
9532 = build_function_type (integer_type_node,
9533 tree_cons (NULL_TREE, V4HI_type_node,
9534 tree_cons (NULL_TREE, integer_type_node,
9535 endlink)));
9536 tree v4hi_ftype_v4hi_int_int
332316cd 9537 = build_function_type (V4HI_type_node,
bd793c65
BS
9538 tree_cons (NULL_TREE, V4HI_type_node,
9539 tree_cons (NULL_TREE, integer_type_node,
9540 tree_cons (NULL_TREE,
9541 integer_type_node,
9542 endlink))));
9543 /* Miscellaneous. */
9544 tree v8qi_ftype_v4hi_v4hi
9545 = build_function_type (V8QI_type_node,
9546 tree_cons (NULL_TREE, V4HI_type_node,
9547 tree_cons (NULL_TREE, V4HI_type_node,
9548 endlink)));
9549 tree v4hi_ftype_v2si_v2si
9550 = build_function_type (V4HI_type_node,
9551 tree_cons (NULL_TREE, V2SI_type_node,
9552 tree_cons (NULL_TREE, V2SI_type_node,
9553 endlink)));
9554 tree v4sf_ftype_v4sf_v4sf_int
9555 = build_function_type (V4SF_type_node,
9556 tree_cons (NULL_TREE, V4SF_type_node,
9557 tree_cons (NULL_TREE, V4SF_type_node,
9558 tree_cons (NULL_TREE,
9559 integer_type_node,
9560 endlink))));
9561 tree v4hi_ftype_v8qi_v8qi
9562 = build_function_type (V4HI_type_node,
9563 tree_cons (NULL_TREE, V8QI_type_node,
9564 tree_cons (NULL_TREE, V8QI_type_node,
9565 endlink)));
9566 tree v2si_ftype_v4hi_v4hi
9567 = build_function_type (V2SI_type_node,
9568 tree_cons (NULL_TREE, V4HI_type_node,
9569 tree_cons (NULL_TREE, V4HI_type_node,
9570 endlink)));
9571 tree v4hi_ftype_v4hi_int
9572 = build_function_type (V4HI_type_node,
9573 tree_cons (NULL_TREE, V4HI_type_node,
9574 tree_cons (NULL_TREE, integer_type_node,
9575 endlink)));
bd793c65
BS
9576 tree v4hi_ftype_v4hi_di
9577 = build_function_type (V4HI_type_node,
9578 tree_cons (NULL_TREE, V4HI_type_node,
9579 tree_cons (NULL_TREE,
9580 long_long_integer_type_node,
9581 endlink)));
9582 tree v2si_ftype_v2si_di
9583 = build_function_type (V2SI_type_node,
9584 tree_cons (NULL_TREE, V2SI_type_node,
9585 tree_cons (NULL_TREE,
9586 long_long_integer_type_node,
9587 endlink)));
9588 tree void_ftype_void
9589 = build_function_type (void_type_node, endlink);
9590 tree void_ftype_pchar_int
9591 = build_function_type (void_type_node,
9592 tree_cons (NULL_TREE, pchar_type_node,
9593 tree_cons (NULL_TREE, integer_type_node,
9594 endlink)));
9595 tree void_ftype_unsigned
9596 = build_function_type (void_type_node,
9597 tree_cons (NULL_TREE, unsigned_type_node,
9598 endlink));
9599 tree unsigned_ftype_void
9600 = build_function_type (unsigned_type_node, endlink);
9601 tree di_ftype_void
9602 = build_function_type (long_long_unsigned_type_node, endlink);
9603 tree ti_ftype_void
9604 = build_function_type (intTI_type_node, endlink);
9605 tree v2si_ftype_v4sf
9606 = build_function_type (V2SI_type_node,
9607 tree_cons (NULL_TREE, V4SF_type_node,
9608 endlink));
9609 /* Loads/stores. */
9610 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
9611 tree_cons (NULL_TREE, V8QI_type_node,
9612 tree_cons (NULL_TREE,
9613 pchar_type_node,
9614 endlink)));
9615 tree void_ftype_v8qi_v8qi_pchar
9616 = build_function_type (void_type_node, maskmovq_args);
9617 tree v4sf_ftype_pfloat
9618 = build_function_type (V4SF_type_node,
9619 tree_cons (NULL_TREE, pfloat_type_node,
9620 endlink));
9621 tree v4sf_ftype_float
9622 = build_function_type (V4SF_type_node,
9623 tree_cons (NULL_TREE, float_type_node,
9624 endlink));
9625 tree v4sf_ftype_float_float_float_float
9626 = build_function_type (V4SF_type_node,
9627 tree_cons (NULL_TREE, float_type_node,
9628 tree_cons (NULL_TREE, float_type_node,
9629 tree_cons (NULL_TREE,
9630 float_type_node,
9631 tree_cons (NULL_TREE,
9632 float_type_node,
9633 endlink)))));
9634 /* @@@ the type is bogus */
9635 tree v4sf_ftype_v4sf_pv2si
9636 = build_function_type (V4SF_type_node,
9637 tree_cons (NULL_TREE, V4SF_type_node,
9638 tree_cons (NULL_TREE, pv2si_type_node,
9639 endlink)));
9640 tree v4sf_ftype_pv2si_v4sf
9641 = build_function_type (V4SF_type_node,
9642 tree_cons (NULL_TREE, V4SF_type_node,
9643 tree_cons (NULL_TREE, pv2si_type_node,
9644 endlink)));
9645 tree void_ftype_pfloat_v4sf
9646 = build_function_type (void_type_node,
9647 tree_cons (NULL_TREE, pfloat_type_node,
9648 tree_cons (NULL_TREE, V4SF_type_node,
9649 endlink)));
9650 tree void_ftype_pdi_di
9651 = build_function_type (void_type_node,
9652 tree_cons (NULL_TREE, pdi_type_node,
9653 tree_cons (NULL_TREE,
9654 long_long_unsigned_type_node,
9655 endlink)));
9656 /* Normal vector unops. */
9657 tree v4sf_ftype_v4sf
9658 = build_function_type (V4SF_type_node,
9659 tree_cons (NULL_TREE, V4SF_type_node,
9660 endlink));
0f290768 9661
bd793c65
BS
9662 /* Normal vector binops. */
9663 tree v4sf_ftype_v4sf_v4sf
9664 = build_function_type (V4SF_type_node,
9665 tree_cons (NULL_TREE, V4SF_type_node,
9666 tree_cons (NULL_TREE, V4SF_type_node,
9667 endlink)));
9668 tree v8qi_ftype_v8qi_v8qi
9669 = build_function_type (V8QI_type_node,
9670 tree_cons (NULL_TREE, V8QI_type_node,
9671 tree_cons (NULL_TREE, V8QI_type_node,
9672 endlink)));
9673 tree v4hi_ftype_v4hi_v4hi
9674 = build_function_type (V4HI_type_node,
9675 tree_cons (NULL_TREE, V4HI_type_node,
9676 tree_cons (NULL_TREE, V4HI_type_node,
9677 endlink)));
9678 tree v2si_ftype_v2si_v2si
9679 = build_function_type (V2SI_type_node,
9680 tree_cons (NULL_TREE, V2SI_type_node,
9681 tree_cons (NULL_TREE, V2SI_type_node,
9682 endlink)));
9683 tree ti_ftype_ti_ti
9684 = build_function_type (intTI_type_node,
9685 tree_cons (NULL_TREE, intTI_type_node,
9686 tree_cons (NULL_TREE, intTI_type_node,
9687 endlink)));
9688 tree di_ftype_di_di
9689 = build_function_type (long_long_unsigned_type_node,
9690 tree_cons (NULL_TREE, long_long_unsigned_type_node,
9691 tree_cons (NULL_TREE,
9692 long_long_unsigned_type_node,
9693 endlink)));
9694
9695 /* Add all builtins that are more or less simple operations on two
9696 operands. */
9697 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
9698 {
9699 /* Use one of the operands; the target can have a different mode for
9700 mask-generating compares. */
9701 enum machine_mode mode;
9702 tree type;
9703
9704 if (d->name == 0)
9705 continue;
9706 mode = insn_data[d->icode].operand[1].mode;
9707
9708 if (! TARGET_SSE && ! VALID_MMX_REG_MODE (mode))
9709 continue;
9710
9711 switch (mode)
9712 {
9713 case V4SFmode:
9714 type = v4sf_ftype_v4sf_v4sf;
9715 break;
9716 case V8QImode:
9717 type = v8qi_ftype_v8qi_v8qi;
9718 break;
9719 case V4HImode:
9720 type = v4hi_ftype_v4hi_v4hi;
9721 break;
9722 case V2SImode:
9723 type = v2si_ftype_v2si_v2si;
9724 break;
9725 case TImode:
9726 type = ti_ftype_ti_ti;
9727 break;
9728 case DImode:
9729 type = di_ftype_di_di;
9730 break;
9731
9732 default:
9733 abort ();
9734 }
0f290768 9735
bd793c65
BS
9736 /* Override for comparisons. */
9737 if (d->icode == CODE_FOR_maskcmpv4sf3
9738 || d->icode == CODE_FOR_maskncmpv4sf3
9739 || d->icode == CODE_FOR_vmmaskcmpv4sf3
9740 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
9741 type = v4si_ftype_v4sf_v4sf;
9742
9743 def_builtin (d->name, type, d->code);
9744 }
9745
9746 /* Add the remaining MMX insns with somewhat more complicated types. */
9747 def_builtin ("__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
9748 def_builtin ("__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
9749 def_builtin ("__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
9750 def_builtin ("__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
9751 def_builtin ("__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
9752 def_builtin ("__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
9753 def_builtin ("__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
9754 def_builtin ("__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
9755 def_builtin ("__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
9756
9757 def_builtin ("__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
9758 def_builtin ("__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
9759 def_builtin ("__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
9760
9761 def_builtin ("__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
9762 def_builtin ("__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
9763
9764 def_builtin ("__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
9765 def_builtin ("__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
9766
9767 /* Everything beyond this point is SSE only. */
9768 if (! TARGET_SSE)
9769 return;
0f290768 9770
bd793c65
BS
9771 /* comi/ucomi insns. */
9772 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
9773 def_builtin (d->name, int_ftype_v4sf_v4sf, d->code);
9774
9775 def_builtin ("__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
9776 def_builtin ("__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
9777 def_builtin ("__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
9778
9779 def_builtin ("__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
9780 def_builtin ("__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
9781 def_builtin ("__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
9782 def_builtin ("__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
9783 def_builtin ("__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
9784 def_builtin ("__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
9785
9786 def_builtin ("__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
9787 def_builtin ("__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
9788
9789 def_builtin ("__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
9790
9791 def_builtin ("__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
9792 def_builtin ("__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
9793 def_builtin ("__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
9794 def_builtin ("__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
9795 def_builtin ("__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
9796 def_builtin ("__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
9797
9798 def_builtin ("__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
9799 def_builtin ("__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
9800 def_builtin ("__builtin_ia32_storehps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
9801 def_builtin ("__builtin_ia32_storelps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
9802
9803 def_builtin ("__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
9804 def_builtin ("__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
9805 def_builtin ("__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
9806 def_builtin ("__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
9807
9808 def_builtin ("__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
9809 def_builtin ("__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
9810
9811 def_builtin ("__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
9812
9813 def_builtin ("__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
9814 def_builtin ("__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
9815 def_builtin ("__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
9816 def_builtin ("__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
9817 def_builtin ("__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
9818 def_builtin ("__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
9819
9820 def_builtin ("__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
9821
9822 /* Composite intrinsics. */
9823 def_builtin ("__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
9824 def_builtin ("__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
9825 def_builtin ("__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
9826 def_builtin ("__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
9827 def_builtin ("__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
9828 def_builtin ("__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
9829 def_builtin ("__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
9830}
9831
9832/* Errors in the source file can cause expand_expr to return const0_rtx
9833 where we expect a vector. To avoid crashing, use one of the vector
9834 clear instructions. */
9835static rtx
9836safe_vector_operand (x, mode)
9837 rtx x;
9838 enum machine_mode mode;
9839{
9840 if (x != const0_rtx)
9841 return x;
9842 x = gen_reg_rtx (mode);
9843
9844 if (VALID_MMX_REG_MODE (mode))
9845 emit_insn (gen_mmx_clrdi (mode == DImode ? x
9846 : gen_rtx_SUBREG (DImode, x, 0)));
9847 else
9848 emit_insn (gen_sse_clrti (mode == TImode ? x
9849 : gen_rtx_SUBREG (TImode, x, 0)));
9850 return x;
9851}
9852
9853/* Subroutine of ix86_expand_builtin to take care of binop insns. */
9854
9855static rtx
9856ix86_expand_binop_builtin (icode, arglist, target)
9857 enum insn_code icode;
9858 tree arglist;
9859 rtx target;
9860{
9861 rtx pat;
9862 tree arg0 = TREE_VALUE (arglist);
9863 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9864 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9865 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9866 enum machine_mode tmode = insn_data[icode].operand[0].mode;
9867 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
9868 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
9869
9870 if (VECTOR_MODE_P (mode0))
9871 op0 = safe_vector_operand (op0, mode0);
9872 if (VECTOR_MODE_P (mode1))
9873 op1 = safe_vector_operand (op1, mode1);
9874
9875 if (! target
9876 || GET_MODE (target) != tmode
9877 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9878 target = gen_reg_rtx (tmode);
9879
9880 /* In case the insn wants input operands in modes different from
9881 the result, abort. */
9882 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
9883 abort ();
9884
9885 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9886 op0 = copy_to_mode_reg (mode0, op0);
9887 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
9888 op1 = copy_to_mode_reg (mode1, op1);
9889
9890 pat = GEN_FCN (icode) (target, op0, op1);
9891 if (! pat)
9892 return 0;
9893 emit_insn (pat);
9894 return target;
9895}
9896
9897/* Subroutine of ix86_expand_builtin to take care of stores. */
9898
9899static rtx
9900ix86_expand_store_builtin (icode, arglist, shuffle)
9901 enum insn_code icode;
9902 tree arglist;
9903 int shuffle;
9904{
9905 rtx pat;
9906 tree arg0 = TREE_VALUE (arglist);
9907 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9908 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9909 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9910 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
9911 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
9912
9913 if (VECTOR_MODE_P (mode1))
9914 op1 = safe_vector_operand (op1, mode1);
9915
9916 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
9917 if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
9918 op1 = copy_to_mode_reg (mode1, op1);
9919 if (shuffle >= 0)
9920 emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
9921 pat = GEN_FCN (icode) (op0, op1);
9922 if (pat)
9923 emit_insn (pat);
9924 return 0;
9925}
9926
9927/* Subroutine of ix86_expand_builtin to take care of unop insns. */
9928
9929static rtx
9930ix86_expand_unop_builtin (icode, arglist, target, do_load)
9931 enum insn_code icode;
9932 tree arglist;
9933 rtx target;
9934 int do_load;
9935{
9936 rtx pat;
9937 tree arg0 = TREE_VALUE (arglist);
9938 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9939 enum machine_mode tmode = insn_data[icode].operand[0].mode;
9940 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
9941
9942 if (! target
9943 || GET_MODE (target) != tmode
9944 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9945 target = gen_reg_rtx (tmode);
9946 if (do_load)
9947 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
9948 else
9949 {
9950 if (VECTOR_MODE_P (mode0))
9951 op0 = safe_vector_operand (op0, mode0);
9952
9953 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9954 op0 = copy_to_mode_reg (mode0, op0);
9955 }
9956
9957 pat = GEN_FCN (icode) (target, op0);
9958 if (! pat)
9959 return 0;
9960 emit_insn (pat);
9961 return target;
9962}
9963
9964/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
9965 sqrtss, rsqrtss, rcpss. */
9966
9967static rtx
9968ix86_expand_unop1_builtin (icode, arglist, target)
9969 enum insn_code icode;
9970 tree arglist;
9971 rtx target;
9972{
9973 rtx pat;
9974 tree arg0 = TREE_VALUE (arglist);
9975 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9976 enum machine_mode tmode = insn_data[icode].operand[0].mode;
9977 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
9978
9979 if (! target
9980 || GET_MODE (target) != tmode
9981 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9982 target = gen_reg_rtx (tmode);
9983
9984 if (VECTOR_MODE_P (mode0))
9985 op0 = safe_vector_operand (op0, mode0);
9986
9987 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9988 op0 = copy_to_mode_reg (mode0, op0);
9989
9990 pat = GEN_FCN (icode) (target, op0, op0);
9991 if (! pat)
9992 return 0;
9993 emit_insn (pat);
9994 return target;
9995}
9996
9997/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
9998
9999static rtx
10000ix86_expand_sse_compare (d, arglist, target)
10001 struct builtin_description *d;
10002 tree arglist;
10003 rtx target;
10004{
10005 rtx pat;
10006 tree arg0 = TREE_VALUE (arglist);
10007 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10008 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10009 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10010 rtx op2;
10011 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
10012 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
10013 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
10014 enum rtx_code comparison = d->comparison;
10015
10016 if (VECTOR_MODE_P (mode0))
10017 op0 = safe_vector_operand (op0, mode0);
10018 if (VECTOR_MODE_P (mode1))
10019 op1 = safe_vector_operand (op1, mode1);
10020
10021 /* Swap operands if we have a comparison that isn't available in
10022 hardware. */
10023 if (d->flag)
10024 {
10025 target = gen_reg_rtx (tmode);
10026 emit_move_insn (target, op1);
10027 op1 = op0;
10028 op0 = target;
10029 comparison = swap_condition (comparison);
10030 }
10031 else if (! target
10032 || GET_MODE (target) != tmode
10033 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
10034 target = gen_reg_rtx (tmode);
10035
10036 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
10037 op0 = copy_to_mode_reg (mode0, op0);
10038 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
10039 op1 = copy_to_mode_reg (mode1, op1);
10040
10041 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
10042 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
10043 if (! pat)
10044 return 0;
10045 emit_insn (pat);
10046 return target;
10047}
10048
10049/* Subroutine of ix86_expand_builtin to take care of comi insns. */
10050
10051static rtx
10052ix86_expand_sse_comi (d, arglist, target)
10053 struct builtin_description *d;
10054 tree arglist;
10055 rtx target;
10056{
10057 rtx pat;
10058 tree arg0 = TREE_VALUE (arglist);
10059 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10060 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10061 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10062 rtx op2;
10063 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
10064 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
10065 enum rtx_code comparison = d->comparison;
10066
10067 if (VECTOR_MODE_P (mode0))
10068 op0 = safe_vector_operand (op0, mode0);
10069 if (VECTOR_MODE_P (mode1))
10070 op1 = safe_vector_operand (op1, mode1);
10071
10072 /* Swap operands if we have a comparison that isn't available in
10073 hardware. */
10074 if (d->flag)
10075 {
10076 rtx tmp = op1;
10077 op1 = op0;
10078 op0 = tmp;
10079 comparison = swap_condition (comparison);
10080 }
10081
10082 target = gen_reg_rtx (SImode);
10083 emit_move_insn (target, const0_rtx);
10084 target = gen_rtx_SUBREG (QImode, target, 0);
10085
10086 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
10087 op0 = copy_to_mode_reg (mode0, op0);
10088 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
10089 op1 = copy_to_mode_reg (mode1, op1);
10090
10091 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
10092 pat = GEN_FCN (d->icode) (op0, op1, op2);
10093 if (! pat)
10094 return 0;
10095 emit_insn (pat);
10096 emit_insn (gen_setcc_2 (target, op2));
10097
10098 return target;
10099}
10100
10101/* Expand an expression EXP that calls a built-in function,
10102 with result going to TARGET if that's convenient
10103 (and in mode MODE if that's convenient).
10104 SUBTARGET may be used as the target for computing one of EXP's operands.
10105 IGNORE is nonzero if the value is to be ignored. */
10106
10107rtx
10108ix86_expand_builtin (exp, target, subtarget, mode, ignore)
10109 tree exp;
10110 rtx target;
10111 rtx subtarget ATTRIBUTE_UNUSED;
10112 enum machine_mode mode ATTRIBUTE_UNUSED;
10113 int ignore ATTRIBUTE_UNUSED;
10114{
10115 struct builtin_description *d;
77ebd435 10116 size_t i;
bd793c65
BS
10117 enum insn_code icode;
10118 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
10119 tree arglist = TREE_OPERAND (exp, 1);
10120 tree arg0, arg1, arg2, arg3;
10121 rtx op0, op1, op2, pat;
10122 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 10123 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
10124
10125 switch (fcode)
10126 {
10127 case IX86_BUILTIN_EMMS:
10128 emit_insn (gen_emms ());
10129 return 0;
10130
10131 case IX86_BUILTIN_SFENCE:
10132 emit_insn (gen_sfence ());
10133 return 0;
10134
10135 case IX86_BUILTIN_M_FROM_INT:
10136 target = gen_reg_rtx (DImode);
10137 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
10138 emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
10139 return target;
10140
10141 case IX86_BUILTIN_M_TO_INT:
10142 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
10143 op0 = copy_to_mode_reg (DImode, op0);
10144 target = gen_reg_rtx (SImode);
10145 emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
10146 return target;
10147
10148 case IX86_BUILTIN_PEXTRW:
10149 icode = CODE_FOR_mmx_pextrw;
10150 arg0 = TREE_VALUE (arglist);
10151 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10152 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10153 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10154 tmode = insn_data[icode].operand[0].mode;
10155 mode0 = insn_data[icode].operand[1].mode;
10156 mode1 = insn_data[icode].operand[2].mode;
10157
10158 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
10159 op0 = copy_to_mode_reg (mode0, op0);
10160 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
10161 {
10162 /* @@@ better error message */
10163 error ("selector must be an immediate");
10164 return const0_rtx;
10165 }
10166 if (target == 0
10167 || GET_MODE (target) != tmode
10168 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10169 target = gen_reg_rtx (tmode);
10170 pat = GEN_FCN (icode) (target, op0, op1);
10171 if (! pat)
10172 return 0;
10173 emit_insn (pat);
10174 return target;
10175
10176 case IX86_BUILTIN_PINSRW:
10177 icode = CODE_FOR_mmx_pinsrw;
10178 arg0 = TREE_VALUE (arglist);
10179 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10180 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
10181 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10182 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10183 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
10184 tmode = insn_data[icode].operand[0].mode;
10185 mode0 = insn_data[icode].operand[1].mode;
10186 mode1 = insn_data[icode].operand[2].mode;
10187 mode2 = insn_data[icode].operand[3].mode;
10188
10189 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
10190 op0 = copy_to_mode_reg (mode0, op0);
10191 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
10192 op1 = copy_to_mode_reg (mode1, op1);
10193 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
10194 {
10195 /* @@@ better error message */
10196 error ("selector must be an immediate");
10197 return const0_rtx;
10198 }
10199 if (target == 0
10200 || GET_MODE (target) != tmode
10201 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10202 target = gen_reg_rtx (tmode);
10203 pat = GEN_FCN (icode) (target, op0, op1, op2);
10204 if (! pat)
10205 return 0;
10206 emit_insn (pat);
10207 return target;
10208
10209 case IX86_BUILTIN_MASKMOVQ:
10210 icode = CODE_FOR_mmx_maskmovq;
10211 /* Note the arg order is different from the operand order. */
10212 arg1 = TREE_VALUE (arglist);
10213 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
10214 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
10215 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10216 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10217 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
10218 mode0 = insn_data[icode].operand[0].mode;
10219 mode1 = insn_data[icode].operand[1].mode;
10220 mode2 = insn_data[icode].operand[2].mode;
10221
10222 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
10223 op0 = copy_to_mode_reg (mode0, op0);
10224 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
10225 op1 = copy_to_mode_reg (mode1, op1);
10226 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
10227 op2 = copy_to_mode_reg (mode2, op2);
10228 pat = GEN_FCN (icode) (op0, op1, op2);
10229 if (! pat)
10230 return 0;
10231 emit_insn (pat);
10232 return 0;
10233
10234 case IX86_BUILTIN_SQRTSS:
10235 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
10236 case IX86_BUILTIN_RSQRTSS:
10237 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
10238 case IX86_BUILTIN_RCPSS:
10239 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
10240
10241 case IX86_BUILTIN_LOADAPS:
10242 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
10243
10244 case IX86_BUILTIN_LOADUPS:
10245 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
10246
10247 case IX86_BUILTIN_STOREAPS:
10248 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
10249 case IX86_BUILTIN_STOREUPS:
10250 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
10251
10252 case IX86_BUILTIN_LOADSS:
10253 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
10254
10255 case IX86_BUILTIN_STORESS:
10256 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
10257
0f290768 10258 case IX86_BUILTIN_LOADHPS:
bd793c65
BS
10259 case IX86_BUILTIN_LOADLPS:
10260 icode = (fcode == IX86_BUILTIN_LOADHPS
10261 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
10262 arg0 = TREE_VALUE (arglist);
10263 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10264 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10265 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10266 tmode = insn_data[icode].operand[0].mode;
10267 mode0 = insn_data[icode].operand[1].mode;
10268 mode1 = insn_data[icode].operand[2].mode;
10269
10270 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
10271 op0 = copy_to_mode_reg (mode0, op0);
10272 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
10273 if (target == 0
10274 || GET_MODE (target) != tmode
10275 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10276 target = gen_reg_rtx (tmode);
10277 pat = GEN_FCN (icode) (target, op0, op1);
10278 if (! pat)
10279 return 0;
10280 emit_insn (pat);
10281 return target;
0f290768 10282
bd793c65
BS
10283 case IX86_BUILTIN_STOREHPS:
10284 case IX86_BUILTIN_STORELPS:
10285 icode = (fcode == IX86_BUILTIN_STOREHPS
10286 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
10287 arg0 = TREE_VALUE (arglist);
10288 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10289 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10290 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10291 mode0 = insn_data[icode].operand[1].mode;
10292 mode1 = insn_data[icode].operand[2].mode;
10293
10294 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
10295 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
10296 op1 = copy_to_mode_reg (mode1, op1);
10297
10298 pat = GEN_FCN (icode) (op0, op0, op1);
10299 if (! pat)
10300 return 0;
10301 emit_insn (pat);
10302 return 0;
10303
10304 case IX86_BUILTIN_MOVNTPS:
10305 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
10306 case IX86_BUILTIN_MOVNTQ:
10307 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
10308
10309 case IX86_BUILTIN_LDMXCSR:
10310 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
10311 target = assign_386_stack_local (SImode, 0);
10312 emit_move_insn (target, op0);
10313 emit_insn (gen_ldmxcsr (target));
10314 return 0;
10315
10316 case IX86_BUILTIN_STMXCSR:
10317 target = assign_386_stack_local (SImode, 0);
10318 emit_insn (gen_stmxcsr (target));
10319 return copy_to_mode_reg (SImode, target);
10320
10321 case IX86_BUILTIN_PREFETCH:
10322 icode = CODE_FOR_prefetch;
10323 arg0 = TREE_VALUE (arglist);
10324 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10325 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10326 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
332316cd
BS
10327 mode0 = insn_data[icode].operand[0].mode;
10328 mode1 = insn_data[icode].operand[1].mode;
bd793c65 10329
332316cd 10330 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
bd793c65
BS
10331 {
10332 /* @@@ better error message */
10333 error ("selector must be an immediate");
10334 return const0_rtx;
10335 }
10336
332316cd 10337 op0 = copy_to_mode_reg (Pmode, op0);
bd793c65
BS
10338 pat = GEN_FCN (icode) (op0, op1);
10339 if (! pat)
10340 return 0;
10341 emit_insn (pat);
10342 return target;
0f290768 10343
bd793c65
BS
10344 case IX86_BUILTIN_SHUFPS:
10345 icode = CODE_FOR_sse_shufps;
10346 arg0 = TREE_VALUE (arglist);
10347 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10348 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
10349 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10350 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10351 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
10352 tmode = insn_data[icode].operand[0].mode;
10353 mode0 = insn_data[icode].operand[1].mode;
10354 mode1 = insn_data[icode].operand[2].mode;
10355 mode2 = insn_data[icode].operand[3].mode;
10356
10357 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
10358 op0 = copy_to_mode_reg (mode0, op0);
10359 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
10360 op1 = copy_to_mode_reg (mode1, op1);
10361 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
10362 {
10363 /* @@@ better error message */
10364 error ("mask must be an immediate");
10365 return const0_rtx;
10366 }
10367 if (target == 0
10368 || GET_MODE (target) != tmode
10369 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10370 target = gen_reg_rtx (tmode);
10371 pat = GEN_FCN (icode) (target, op0, op1, op2);
10372 if (! pat)
10373 return 0;
10374 emit_insn (pat);
10375 return target;
10376
10377 case IX86_BUILTIN_PSHUFW:
10378 icode = CODE_FOR_mmx_pshufw;
10379 arg0 = TREE_VALUE (arglist);
10380 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10381 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10382 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10383 tmode = insn_data[icode].operand[0].mode;
10384 mode0 = insn_data[icode].operand[2].mode;
10385 mode1 = insn_data[icode].operand[3].mode;
10386
10387 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
10388 op0 = copy_to_mode_reg (mode0, op0);
10389 if (! (*insn_data[icode].operand[3].predicate) (op1, mode1))
10390 {
10391 /* @@@ better error message */
10392 error ("mask must be an immediate");
10393 return const0_rtx;
10394 }
10395 if (target == 0
10396 || GET_MODE (target) != tmode
10397 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10398 target = gen_reg_rtx (tmode);
10399 pat = GEN_FCN (icode) (target, target, op0, op1);
10400 if (! pat)
10401 return 0;
10402 emit_insn (pat);
10403 return target;
10404
10405 /* Composite intrinsics. */
10406 case IX86_BUILTIN_SETPS1:
10407 target = assign_386_stack_local (SFmode, 0);
10408 arg0 = TREE_VALUE (arglist);
f4ef873c 10409 emit_move_insn (adjust_address (target, SFmode, 0),
bd793c65
BS
10410 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
10411 op0 = gen_reg_rtx (V4SFmode);
f4ef873c 10412 emit_insn (gen_sse_loadss (op0, adjust_address (target, V4SFmode, 0)));
bd793c65
BS
10413 emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
10414 return op0;
0f290768 10415
bd793c65
BS
10416 case IX86_BUILTIN_SETPS:
10417 target = assign_386_stack_local (V4SFmode, 0);
bd793c65
BS
10418 arg0 = TREE_VALUE (arglist);
10419 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10420 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
10421 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
d2037d01 10422 emit_move_insn (adjust_address (target, SFmode, 0),
bd793c65 10423 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
d2037d01 10424 emit_move_insn (adjust_address (target, SFmode, 4),
bd793c65 10425 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
d2037d01 10426 emit_move_insn (adjust_address (target, SFmode, 8),
bd793c65 10427 expand_expr (arg2, NULL_RTX, VOIDmode, 0));
d2037d01 10428 emit_move_insn (adjust_address (target, SFmode, 12),
bd793c65
BS
10429 expand_expr (arg3, NULL_RTX, VOIDmode, 0));
10430 op0 = gen_reg_rtx (V4SFmode);
10431 emit_insn (gen_sse_movaps (op0, target));
10432 return op0;
10433
10434 case IX86_BUILTIN_CLRPS:
10435 target = gen_reg_rtx (TImode);
10436 emit_insn (gen_sse_clrti (target));
10437 return target;
10438
10439 case IX86_BUILTIN_LOADRPS:
10440 target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
10441 gen_reg_rtx (V4SFmode), 1);
10442 emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
10443 return target;
10444
10445 case IX86_BUILTIN_LOADPS1:
10446 target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
10447 gen_reg_rtx (V4SFmode), 1);
10448 emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
10449 return target;
10450
10451 case IX86_BUILTIN_STOREPS1:
10452 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
10453 case IX86_BUILTIN_STORERPS:
10454 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
10455
10456 case IX86_BUILTIN_MMX_ZERO:
10457 target = gen_reg_rtx (DImode);
10458 emit_insn (gen_mmx_clrdi (target));
10459 return target;
10460
10461 default:
10462 break;
10463 }
10464
10465 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
10466 if (d->code == fcode)
10467 {
10468 /* Compares are treated specially. */
10469 if (d->icode == CODE_FOR_maskcmpv4sf3
10470 || d->icode == CODE_FOR_vmmaskcmpv4sf3
10471 || d->icode == CODE_FOR_maskncmpv4sf3
10472 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
10473 return ix86_expand_sse_compare (d, arglist, target);
10474
10475 return ix86_expand_binop_builtin (d->icode, arglist, target);
10476 }
10477
10478 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
10479 if (d->code == fcode)
10480 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 10481
bd793c65
BS
10482 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
10483 if (d->code == fcode)
10484 return ix86_expand_sse_comi (d, arglist, target);
0f290768 10485
bd793c65
BS
10486 /* @@@ Should really do something sensible here. */
10487 return 0;
bd793c65 10488}
4211a8fb
JH
10489
10490/* Store OPERAND to the memory after reload is completed. This means
10491 that we can't easilly use assign_stack_local. */
10492rtx
10493ix86_force_to_memory (mode, operand)
10494 enum machine_mode mode;
10495 rtx operand;
10496{
898d374d 10497 rtx result;
4211a8fb
JH
10498 if (!reload_completed)
10499 abort ();
898d374d
JH
10500 if (TARGET_64BIT && TARGET_RED_ZONE)
10501 {
10502 result = gen_rtx_MEM (mode,
10503 gen_rtx_PLUS (Pmode,
10504 stack_pointer_rtx,
10505 GEN_INT (-RED_ZONE_SIZE)));
10506 emit_move_insn (result, operand);
10507 }
10508 else if (TARGET_64BIT && !TARGET_RED_ZONE)
4211a8fb 10509 {
898d374d 10510 switch (mode)
4211a8fb 10511 {
898d374d
JH
10512 case HImode:
10513 case SImode:
10514 operand = gen_lowpart (DImode, operand);
10515 /* FALLTHRU */
10516 case DImode:
4211a8fb 10517 emit_insn (
898d374d
JH
10518 gen_rtx_SET (VOIDmode,
10519 gen_rtx_MEM (DImode,
10520 gen_rtx_PRE_DEC (DImode,
10521 stack_pointer_rtx)),
10522 operand));
10523 break;
10524 default:
10525 abort ();
10526 }
10527 result = gen_rtx_MEM (mode, stack_pointer_rtx);
10528 }
10529 else
10530 {
10531 switch (mode)
10532 {
10533 case DImode:
10534 {
10535 rtx operands[2];
10536 split_di (&operand, 1, operands, operands + 1);
10537 emit_insn (
10538 gen_rtx_SET (VOIDmode,
10539 gen_rtx_MEM (SImode,
10540 gen_rtx_PRE_DEC (Pmode,
10541 stack_pointer_rtx)),
10542 operands[1]));
10543 emit_insn (
10544 gen_rtx_SET (VOIDmode,
10545 gen_rtx_MEM (SImode,
10546 gen_rtx_PRE_DEC (Pmode,
10547 stack_pointer_rtx)),
10548 operands[0]));
10549 }
10550 break;
10551 case HImode:
10552 /* It is better to store HImodes as SImodes. */
10553 if (!TARGET_PARTIAL_REG_STALL)
10554 operand = gen_lowpart (SImode, operand);
10555 /* FALLTHRU */
10556 case SImode:
4211a8fb 10557 emit_insn (
898d374d
JH
10558 gen_rtx_SET (VOIDmode,
10559 gen_rtx_MEM (GET_MODE (operand),
10560 gen_rtx_PRE_DEC (SImode,
10561 stack_pointer_rtx)),
10562 operand));
10563 break;
10564 default:
10565 abort ();
4211a8fb 10566 }
898d374d 10567 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 10568 }
898d374d 10569 return result;
4211a8fb
JH
10570}
10571
10572/* Free operand from the memory. */
10573void
10574ix86_free_from_memory (mode)
10575 enum machine_mode mode;
10576{
898d374d
JH
10577 if (!TARGET_64BIT || !TARGET_RED_ZONE)
10578 {
10579 int size;
10580
10581 if (mode == DImode || TARGET_64BIT)
10582 size = 8;
10583 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
10584 size = 2;
10585 else
10586 size = 4;
10587 /* Use LEA to deallocate stack space. In peephole2 it will be converted
10588 to pop or add instruction if registers are available. */
10589 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10590 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10591 GEN_INT (size))));
10592 }
4211a8fb 10593}
a946dd00 10594
f84aa48a
JH
10595/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
10596 QImode must go into class Q_REGS.
10597 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
10598 movdf to do mem-to-mem moves through integer regs. */
10599enum reg_class
10600ix86_preferred_reload_class (x, class)
10601 rtx x;
10602 enum reg_class class;
10603{
10604 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
10605 {
10606 /* SSE can't load any constant directly yet. */
10607 if (SSE_CLASS_P (class))
10608 return NO_REGS;
10609 /* Floats can load 0 and 1. */
10610 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
10611 {
10612 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
10613 if (MAYBE_SSE_CLASS_P (class))
10614 return (reg_class_subset_p (class, GENERAL_REGS)
10615 ? GENERAL_REGS : FLOAT_REGS);
10616 else
10617 return class;
10618 }
10619 /* General regs can load everything. */
10620 if (reg_class_subset_p (class, GENERAL_REGS))
10621 return GENERAL_REGS;
10622 /* In case we haven't resolved FLOAT or SSE yet, give up. */
10623 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
10624 return NO_REGS;
10625 }
10626 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
10627 return NO_REGS;
10628 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
10629 return Q_REGS;
10630 return class;
10631}
10632
10633/* If we are copying between general and FP registers, we need a memory
10634 location. The same is true for SSE and MMX registers.
10635
10636 The macro can't work reliably when one of the CLASSES is class containing
10637 registers from multiple units (SSE, MMX, integer). We avoid this by never
10638 combining those units in single alternative in the machine description.
10639 Ensure that this constraint holds to avoid unexpected surprises.
10640
10641 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
10642 enforce these sanity checks. */
10643int
10644ix86_secondary_memory_needed (class1, class2, mode, strict)
10645 enum reg_class class1, class2;
10646 enum machine_mode mode;
10647 int strict;
10648{
10649 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
10650 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
10651 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
10652 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
10653 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
10654 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
10655 {
10656 if (strict)
10657 abort ();
10658 else
10659 return 1;
10660 }
10661 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
10662 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
10663 && (mode) != SImode)
10664 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
10665 && (mode) != SImode));
10666}
10667/* Return the cost of moving data from a register in class CLASS1 to
a4f31c00 10668 one in class CLASS2.
f84aa48a
JH
10669
10670 It is not required that the cost always equal 2 when FROM is the same as TO;
10671 on some machines it is expensive to move between registers if they are not
10672 general registers. */
10673int
10674ix86_register_move_cost (mode, class1, class2)
10675 enum machine_mode mode;
10676 enum reg_class class1, class2;
10677{
10678 /* In case we require secondary memory, compute cost of the store followed
10679 by load. In case of copying from general_purpose_register we may emit
10680 multiple stores followed by single load causing memory size mismatch
10681 stall. Count this as arbitarily high cost of 20. */
10682 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
10683 {
92d0fb09 10684 int add_cost = 0;
62415523 10685 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
92d0fb09 10686 add_cost = 20;
62415523 10687 return (MEMORY_MOVE_COST (mode, class1, 0)
92d0fb09 10688 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
f84aa48a 10689 }
92d0fb09 10690 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
10691 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
10692 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
10693 return ix86_cost->mmxsse_to_integer;
10694 if (MAYBE_FLOAT_CLASS_P (class1))
10695 return ix86_cost->fp_move;
10696 if (MAYBE_SSE_CLASS_P (class1))
10697 return ix86_cost->sse_move;
10698 if (MAYBE_MMX_CLASS_P (class1))
10699 return ix86_cost->mmx_move;
f84aa48a
JH
10700 return 2;
10701}
10702
a946dd00
JH
10703/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
10704int
10705ix86_hard_regno_mode_ok (regno, mode)
10706 int regno;
10707 enum machine_mode mode;
10708{
10709 /* Flags and only flags can only hold CCmode values. */
10710 if (CC_REGNO_P (regno))
10711 return GET_MODE_CLASS (mode) == MODE_CC;
10712 if (GET_MODE_CLASS (mode) == MODE_CC
10713 || GET_MODE_CLASS (mode) == MODE_RANDOM
10714 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
10715 return 0;
10716 if (FP_REGNO_P (regno))
10717 return VALID_FP_MODE_P (mode);
10718 if (SSE_REGNO_P (regno))
10719 return VALID_SSE_REG_MODE (mode);
10720 if (MMX_REGNO_P (regno))
10721 return VALID_MMX_REG_MODE (mode);
10722 /* We handle both integer and floats in the general purpose registers.
10723 In future we should be able to handle vector modes as well. */
10724 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
10725 return 0;
10726 /* Take care for QImode values - they can be in non-QI regs, but then
10727 they do cause partial register stalls. */
d2836273 10728 if (regno < 4 || mode != QImode || TARGET_64BIT)
a946dd00
JH
10729 return 1;
10730 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
10731}
fa79946e
JH
10732
10733/* Return the cost of moving data of mode M between a
10734 register and memory. A value of 2 is the default; this cost is
10735 relative to those in `REGISTER_MOVE_COST'.
10736
10737 If moving between registers and memory is more expensive than
10738 between two registers, you should define this macro to express the
a4f31c00
AJ
10739 relative cost.
10740
fa79946e
JH
10741 Model also increased moving costs of QImode registers in non
10742 Q_REGS classes.
10743 */
10744int
10745ix86_memory_move_cost (mode, class, in)
10746 enum machine_mode mode;
10747 enum reg_class class;
10748 int in;
10749{
10750 if (FLOAT_CLASS_P (class))
10751 {
10752 int index;
10753 switch (mode)
10754 {
10755 case SFmode:
10756 index = 0;
10757 break;
10758 case DFmode:
10759 index = 1;
10760 break;
10761 case XFmode:
10762 case TFmode:
10763 index = 2;
10764 break;
10765 default:
10766 return 100;
10767 }
10768 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
10769 }
10770 if (SSE_CLASS_P (class))
10771 {
10772 int index;
10773 switch (GET_MODE_SIZE (mode))
10774 {
10775 case 4:
10776 index = 0;
10777 break;
10778 case 8:
10779 index = 1;
10780 break;
10781 case 16:
10782 index = 2;
10783 break;
10784 default:
10785 return 100;
10786 }
10787 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
10788 }
10789 if (MMX_CLASS_P (class))
10790 {
10791 int index;
10792 switch (GET_MODE_SIZE (mode))
10793 {
10794 case 4:
10795 index = 0;
10796 break;
10797 case 8:
10798 index = 1;
10799 break;
10800 default:
10801 return 100;
10802 }
10803 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
10804 }
10805 switch (GET_MODE_SIZE (mode))
10806 {
10807 case 1:
10808 if (in)
10809 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
10810 : ix86_cost->movzbl_load);
10811 else
10812 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
10813 : ix86_cost->int_store[0] + 4);
10814 break;
10815 case 2:
10816 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
10817 default:
10818 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
10819 if (mode == TFmode)
10820 mode = XFmode;
3bb7e126 10821 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
fa79946e
JH
10822 * (int) GET_MODE_SIZE (mode) / 4);
10823 }
10824}
0ecf09f9 10825
2cc07db4
RH
10826#ifdef DO_GLOBAL_CTORS_BODY
10827static void
10828ix86_svr3_asm_out_constructor (symbol, priority)
10829 rtx symbol;
10830 int priority ATTRIBUTE_UNUSED;
10831{
10832 init_section ();
10833 fputs ("\tpushl $", asm_out_file);
10834 assemble_name (asm_out_file, XSTR (symbol, 0));
10835 fputc ('\n', asm_out_file);
10836}
10837#endif
10838
7c262518
RH
10839#if defined(TARGET_ELF) && defined(TARGET_COFF)
10840static void
715bdd29 10841sco_asm_named_section (name, flags)
7c262518
RH
10842 const char *name;
10843 unsigned int flags;
7c262518
RH
10844{
10845 if (TARGET_ELF)
715bdd29 10846 default_elf_asm_named_section (name, flags);
7c262518 10847 else
715bdd29 10848 default_coff_asm_named_section (name, flags);
7c262518 10849}
2cc07db4
RH
10850
10851static void
10852sco_asm_out_constructor (symbol, priority)
10853 rtx symbol;
10854 int priority;
10855{
10856 if (TARGET_ELF)
10857 default_named_section_asm_out_constrctor (symbol, priority);
10858 else
10859 ix86_svr3_asm_out_constructor (symbol, priority);
10860}
7c262518 10861#endif
This page took 2.637414 seconds and 5 git commands to generate.