]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
cp-tree.h (complete_type_or_diagnostic): Changed prototype, renamed from...
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
07933f72
GS
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
2a2ab3f9
JVA
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
bb5177ac 23#include "system.h"
2a2ab3f9 24#include "rtl.h"
6baf1cc8
BS
25#include "tree.h"
26#include "tm_p.h"
2a2ab3f9
JVA
27#include "regs.h"
28#include "hard-reg-set.h"
29#include "real.h"
30#include "insn-config.h"
31#include "conditions.h"
2a2ab3f9
JVA
32#include "output.h"
33#include "insn-attr.h"
2a2ab3f9 34#include "flags.h"
a8ffcc81 35#include "except.h"
ecbc4695 36#include "function.h"
00c79232 37#include "recog.h"
ced8dd8c 38#include "expr.h"
e78d8e51 39#include "optabs.h"
f103890b 40#include "toplev.h"
e075ae69 41#include "basic-block.h"
1526a060 42#include "ggc.h"
672a6f42
NB
43#include "target.h"
44#include "target-def.h"
f1e639b1 45#include "langhooks.h"
2a2ab3f9 46
8dfe5673 47#ifndef CHECK_STACK_LIMIT
07933f72 48#define CHECK_STACK_LIMIT (-1)
8dfe5673
RK
49#endif
50
2ab0437e 51/* Processor costs (relative to an add) */
8b60264b 52static const
2ab0437e
JH
53struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
44cf5b6a
JH
61 3, /* cost of movsx */
62 3, /* cost of movzx */
2ab0437e
JH
63 0, /* "large" insn */
64 2, /* MOVE_RATIO */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
f4365627
JH
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
2ab0437e 87};
32b5b1aa 88/* Processor costs (relative to an add) */
8b60264b 89static const
32b5b1aa 90struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 91 1, /* cost of an add instruction */
32b5b1aa
SC
92 1, /* cost of a lea instruction */
93 3, /* variable shift costs */
94 2, /* constant shift costs */
95 6, /* cost of starting a multiply */
96 1, /* cost of multiply per each bit set */
e075ae69 97 23, /* cost of a divide/mod */
44cf5b6a
JH
98 3, /* cost of movsx */
99 2, /* cost of movzx */
96e7ae40 100 15, /* "large" insn */
e2e52e1b 101 3, /* MOVE_RATIO */
7c6b971d 102 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
0f290768 105 Relative to reg-reg move (2). */
96e7ae40
JH
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {8, 8, 8}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
fa79946e
JH
110 {8, 8, 8}, /* cost of loading integer registers */
111 2, /* cost of moving MMX register */
112 {4, 8}, /* cost of loading MMX registers
113 in SImode and DImode */
114 {4, 8}, /* cost of storing MMX registers
115 in SImode and DImode */
116 2, /* cost of moving SSE register */
117 {4, 8, 16}, /* cost of loading SSE registers
118 in SImode, DImode and TImode */
119 {4, 8, 16}, /* cost of storing SSE registers
120 in SImode, DImode and TImode */
121 3, /* MMX or SSE register to integer */
f4365627
JH
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
32b5b1aa
SC
124};
125
8b60264b 126static const
32b5b1aa
SC
127struct processor_costs i486_cost = { /* 486 specific costs */
128 1, /* cost of an add instruction */
129 1, /* cost of a lea instruction */
130 3, /* variable shift costs */
131 2, /* constant shift costs */
132 12, /* cost of starting a multiply */
133 1, /* cost of multiply per each bit set */
e075ae69 134 40, /* cost of a divide/mod */
44cf5b6a
JH
135 3, /* cost of movsx */
136 2, /* cost of movzx */
96e7ae40 137 15, /* "large" insn */
e2e52e1b 138 3, /* MOVE_RATIO */
7c6b971d 139 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
140 {2, 4, 2}, /* cost of loading integer registers
141 in QImode, HImode and SImode.
0f290768 142 Relative to reg-reg move (2). */
96e7ae40
JH
143 {2, 4, 2}, /* cost of storing integer registers */
144 2, /* cost of reg,reg fld/fst */
145 {8, 8, 8}, /* cost of loading fp registers
146 in SFmode, DFmode and XFmode */
fa79946e
JH
147 {8, 8, 8}, /* cost of loading integer registers */
148 2, /* cost of moving MMX register */
149 {4, 8}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {4, 8}, /* cost of storing MMX registers
152 in SImode and DImode */
153 2, /* cost of moving SSE register */
154 {4, 8, 16}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {4, 8, 16}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
f4365627
JH
158 3, /* MMX or SSE register to integer */
159 0, /* size of prefetch block */
160 0, /* number of parallel prefetches */
32b5b1aa
SC
161};
162
8b60264b 163static const
e5cb57e8 164struct processor_costs pentium_cost = {
32b5b1aa
SC
165 1, /* cost of an add instruction */
166 1, /* cost of a lea instruction */
856b07a1 167 4, /* variable shift costs */
e5cb57e8 168 1, /* constant shift costs */
856b07a1
SC
169 11, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
e075ae69 171 25, /* cost of a divide/mod */
44cf5b6a
JH
172 3, /* cost of movsx */
173 2, /* cost of movzx */
96e7ae40 174 8, /* "large" insn */
e2e52e1b 175 6, /* MOVE_RATIO */
7c6b971d 176 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
177 {2, 4, 2}, /* cost of loading integer registers
178 in QImode, HImode and SImode.
0f290768 179 Relative to reg-reg move (2). */
96e7ae40
JH
180 {2, 4, 2}, /* cost of storing integer registers */
181 2, /* cost of reg,reg fld/fst */
182 {2, 2, 6}, /* cost of loading fp registers
183 in SFmode, DFmode and XFmode */
fa79946e
JH
184 {4, 4, 6}, /* cost of loading integer registers */
185 8, /* cost of moving MMX register */
186 {8, 8}, /* cost of loading MMX registers
187 in SImode and DImode */
188 {8, 8}, /* cost of storing MMX registers
189 in SImode and DImode */
190 2, /* cost of moving SSE register */
191 {4, 8, 16}, /* cost of loading SSE registers
192 in SImode, DImode and TImode */
193 {4, 8, 16}, /* cost of storing SSE registers
194 in SImode, DImode and TImode */
f4365627
JH
195 3, /* MMX or SSE register to integer */
196 0, /* size of prefetch block */
197 0, /* number of parallel prefetches */
32b5b1aa
SC
198};
199
8b60264b 200static const
856b07a1
SC
201struct processor_costs pentiumpro_cost = {
202 1, /* cost of an add instruction */
203 1, /* cost of a lea instruction */
e075ae69 204 1, /* variable shift costs */
856b07a1 205 1, /* constant shift costs */
369e59b1 206 4, /* cost of starting a multiply */
856b07a1 207 0, /* cost of multiply per each bit set */
e075ae69 208 17, /* cost of a divide/mod */
44cf5b6a
JH
209 1, /* cost of movsx */
210 1, /* cost of movzx */
96e7ae40 211 8, /* "large" insn */
e2e52e1b 212 6, /* MOVE_RATIO */
7c6b971d 213 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
214 {4, 4, 4}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
0f290768 216 Relative to reg-reg move (2). */
96e7ae40
JH
217 {2, 2, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {2, 2, 6}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
fa79946e
JH
221 {4, 4, 6}, /* cost of loading integer registers */
222 2, /* cost of moving MMX register */
223 {2, 2}, /* cost of loading MMX registers
224 in SImode and DImode */
225 {2, 2}, /* cost of storing MMX registers
226 in SImode and DImode */
227 2, /* cost of moving SSE register */
228 {2, 2, 8}, /* cost of loading SSE registers
229 in SImode, DImode and TImode */
230 {2, 2, 8}, /* cost of storing SSE registers
231 in SImode, DImode and TImode */
f4365627
JH
232 3, /* MMX or SSE register to integer */
233 32, /* size of prefetch block */
234 6, /* number of parallel prefetches */
856b07a1
SC
235};
236
8b60264b 237static const
a269a03c
JC
238struct processor_costs k6_cost = {
239 1, /* cost of an add instruction */
e075ae69 240 2, /* cost of a lea instruction */
a269a03c
JC
241 1, /* variable shift costs */
242 1, /* constant shift costs */
73fe76e4 243 3, /* cost of starting a multiply */
a269a03c 244 0, /* cost of multiply per each bit set */
e075ae69 245 18, /* cost of a divide/mod */
44cf5b6a
JH
246 2, /* cost of movsx */
247 2, /* cost of movzx */
96e7ae40 248 8, /* "large" insn */
e2e52e1b 249 4, /* MOVE_RATIO */
7c6b971d 250 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
251 {4, 5, 4}, /* cost of loading integer registers
252 in QImode, HImode and SImode.
0f290768 253 Relative to reg-reg move (2). */
96e7ae40
JH
254 {2, 3, 2}, /* cost of storing integer registers */
255 4, /* cost of reg,reg fld/fst */
256 {6, 6, 6}, /* cost of loading fp registers
257 in SFmode, DFmode and XFmode */
fa79946e
JH
258 {4, 4, 4}, /* cost of loading integer registers */
259 2, /* cost of moving MMX register */
260 {2, 2}, /* cost of loading MMX registers
261 in SImode and DImode */
262 {2, 2}, /* cost of storing MMX registers
263 in SImode and DImode */
264 2, /* cost of moving SSE register */
265 {2, 2, 8}, /* cost of loading SSE registers
266 in SImode, DImode and TImode */
267 {2, 2, 8}, /* cost of storing SSE registers
268 in SImode, DImode and TImode */
f4365627
JH
269 6, /* MMX or SSE register to integer */
270 32, /* size of prefetch block */
271 1, /* number of parallel prefetches */
a269a03c
JC
272};
273
8b60264b 274static const
309ada50
JH
275struct processor_costs athlon_cost = {
276 1, /* cost of an add instruction */
0b5107cf 277 2, /* cost of a lea instruction */
309ada50
JH
278 1, /* variable shift costs */
279 1, /* constant shift costs */
280 5, /* cost of starting a multiply */
281 0, /* cost of multiply per each bit set */
0b5107cf 282 42, /* cost of a divide/mod */
44cf5b6a
JH
283 1, /* cost of movsx */
284 1, /* cost of movzx */
309ada50 285 8, /* "large" insn */
e2e52e1b 286 9, /* MOVE_RATIO */
309ada50
JH
287 4, /* cost for loading QImode using movzbl */
288 {4, 5, 4}, /* cost of loading integer registers
289 in QImode, HImode and SImode.
0f290768 290 Relative to reg-reg move (2). */
309ada50
JH
291 {2, 3, 2}, /* cost of storing integer registers */
292 4, /* cost of reg,reg fld/fst */
0b5107cf 293 {6, 6, 20}, /* cost of loading fp registers
309ada50 294 in SFmode, DFmode and XFmode */
fa79946e
JH
295 {4, 4, 16}, /* cost of loading integer registers */
296 2, /* cost of moving MMX register */
297 {2, 2}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {2, 2}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {2, 2, 8}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {2, 2, 8}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
f4365627
JH
306 6, /* MMX or SSE register to integer */
307 64, /* size of prefetch block */
308 6, /* number of parallel prefetches */
309ada50
JH
309};
310
8b60264b 311static const
b4e89e2d
JH
312struct processor_costs pentium4_cost = {
313 1, /* cost of an add instruction */
314 1, /* cost of a lea instruction */
315 8, /* variable shift costs */
316 8, /* constant shift costs */
317 30, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 112, /* cost of a divide/mod */
44cf5b6a
JH
320 1, /* cost of movsx */
321 1, /* cost of movzx */
b4e89e2d
JH
322 16, /* "large" insn */
323 6, /* MOVE_RATIO */
324 2, /* cost for loading QImode using movzbl */
325 {4, 5, 4}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {2, 3, 2}, /* cost of storing integer registers */
329 2, /* cost of reg,reg fld/fst */
330 {2, 2, 6}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {4, 4, 6}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 12, /* cost of moving SSE register */
339 {12, 12, 12}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 10, /* MMX or SSE register to integer */
f4365627
JH
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
b4e89e2d
JH
346};
347
8b60264b 348const struct processor_costs *ix86_cost = &pentium_cost;
32b5b1aa 349
a269a03c
JC
350/* Processor feature/optimization bitmasks. */
351#define m_386 (1<<PROCESSOR_I386)
352#define m_486 (1<<PROCESSOR_I486)
353#define m_PENT (1<<PROCESSOR_PENTIUM)
354#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
355#define m_K6 (1<<PROCESSOR_K6)
309ada50 356#define m_ATHLON (1<<PROCESSOR_ATHLON)
b4e89e2d 357#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
a269a03c 358
309ada50 359const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
b4e89e2d 360const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
a269a03c 361const int x86_zero_extend_with_and = m_486 | m_PENT;
b4e89e2d 362const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
e075ae69 363const int x86_double_with_add = ~m_386;
a269a03c 364const int x86_use_bit_test = m_386;
e2e52e1b 365const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
b4e89e2d 366const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
47f339cf 367const int x86_3dnow_a = m_ATHLON;
b4e89e2d 368const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
ef6257cd 369const int x86_branch_hints = m_PENT4;
b4e89e2d 370const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
e075ae69
RH
371const int x86_partial_reg_stall = m_PPRO;
372const int x86_use_loop = m_K6;
309ada50 373const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
e075ae69
RH
374const int x86_use_mov0 = m_K6;
375const int x86_use_cltd = ~(m_PENT | m_K6);
376const int x86_read_modify_write = ~m_PENT;
377const int x86_read_modify = ~(m_PENT | m_PPRO);
378const int x86_split_long_moves = m_PPRO;
e9e80858 379const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
b4e89e2d 380const int x86_single_stringop = m_386 | m_PENT4;
d9f32422
JH
381const int x86_qimode_math = ~(0);
382const int x86_promote_qi_regs = 0;
383const int x86_himode_math = ~(m_PPRO);
384const int x86_promote_hi_regs = m_PPRO;
b4e89e2d
JH
385const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
386const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
387const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
388const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
389const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
390const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
391const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
c6036a37
JH
392const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
393const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
394const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
b972dd02 395const int x86_decompose_lea = m_PENT4;
30c99a84 396const int x86_arch_always_fancy_math_387 = m_PENT|m_PPRO|m_ATHLON|m_PENT4;
a269a03c 397
6ab16dd9
JH
398/* In case the avreage insn count for single function invocation is
399 lower than this constant, emit fast (but longer) prologue and
400 epilogue code. */
401#define FAST_PROLOGUE_INSN_COUNT 30
402/* Set by prologue expander and used by epilogue expander to determine
403 the style used. */
404static int use_fast_prologue_epilogue;
405
07933f72 406#define AT_BP(MODE) (gen_rtx_MEM ((MODE), hard_frame_pointer_rtx))
2a2ab3f9 407
83182544
KG
408static const char *const hi_reg_name[] = HI_REGISTER_NAMES; /* names for 16 bit regs */
409static const char *const qi_reg_name[] = QI_REGISTER_NAMES; /* names for 8 bit regs (low) */
410static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; /* names for 8 bit regs (high) */
4c0d89b5
RS
411
412/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 413 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 414
e075ae69 415enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
416{
417 /* ax, dx, cx, bx */
ab408a86 418 AREG, DREG, CREG, BREG,
4c0d89b5 419 /* si, di, bp, sp */
e075ae69 420 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
421 /* FP registers */
422 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 423 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 424 /* arg pointer */
83774849 425 NON_Q_REGS,
564d80f4 426 /* flags, fpsr, dirflag, frame */
a7180f70
BS
427 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
428 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
429 SSE_REGS, SSE_REGS,
430 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30
JH
431 MMX_REGS, MMX_REGS,
432 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
433 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
434 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
435 SSE_REGS, SSE_REGS,
4c0d89b5 436};
c572e5ba 437
3d117b30 438/* The "default" register map used in 32bit mode. */
83774849 439
0f290768 440int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
441{
442 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
443 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
3d117b30 444 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
445 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
446 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
447 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
448 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
449};
450
07933f72 451static int const x86_64_int_parameter_registers[6] = {5 /*RDI*/, 4 /*RSI*/,
53c17031
JH
452 1 /*RDX*/, 2 /*RCX*/,
453 FIRST_REX_INT_REG /*R8 */,
454 FIRST_REX_INT_REG + 1 /*R9 */};
07933f72 455static int const x86_64_int_return_registers[4] = {0 /*RAX*/, 1 /*RDI*/, 5, 4};
53c17031 456
0f7fa3d0
JH
457/* The "default" register map used in 64bit mode. */
458int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
459{
460 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
3de72741 461 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
0f7fa3d0
JH
462 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
463 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
464 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
465 8,9,10,11,12,13,14,15, /* extended integer registers */
466 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
467};
468
83774849
RH
469/* Define the register numbers to be used in Dwarf debugging information.
470 The SVR4 reference port C compiler uses the following register numbers
471 in its Dwarf output code:
472 0 for %eax (gcc regno = 0)
473 1 for %ecx (gcc regno = 2)
474 2 for %edx (gcc regno = 1)
475 3 for %ebx (gcc regno = 3)
476 4 for %esp (gcc regno = 7)
477 5 for %ebp (gcc regno = 6)
478 6 for %esi (gcc regno = 4)
479 7 for %edi (gcc regno = 5)
480 The following three DWARF register numbers are never generated by
481 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
482 believes these numbers have these meanings.
483 8 for %eip (no gcc equivalent)
484 9 for %eflags (gcc regno = 17)
485 10 for %trapno (no gcc equivalent)
486 It is not at all clear how we should number the FP stack registers
487 for the x86 architecture. If the version of SDB on x86/svr4 were
488 a bit less brain dead with respect to floating-point then we would
489 have a precedent to follow with respect to DWARF register numbers
490 for x86 FP registers, but the SDB on x86/svr4 is so completely
491 broken with respect to FP registers that it is hardly worth thinking
492 of it as something to strive for compatibility with.
493 The version of x86/svr4 SDB I have at the moment does (partially)
494 seem to believe that DWARF register number 11 is associated with
495 the x86 register %st(0), but that's about all. Higher DWARF
496 register numbers don't seem to be associated with anything in
497 particular, and even for DWARF regno 11, SDB only seems to under-
498 stand that it should say that a variable lives in %st(0) (when
499 asked via an `=' command) if we said it was in DWARF regno 11,
500 but SDB still prints garbage when asked for the value of the
501 variable in question (via a `/' command).
502 (Also note that the labels SDB prints for various FP stack regs
503 when doing an `x' command are all wrong.)
504 Note that these problems generally don't affect the native SVR4
505 C compiler because it doesn't allow the use of -O with -g and
506 because when it is *not* optimizing, it allocates a memory
507 location for each floating-point variable, and the memory
508 location is what gets described in the DWARF AT_location
509 attribute for the variable in question.
510 Regardless of the severe mental illness of the x86/svr4 SDB, we
511 do something sensible here and we use the following DWARF
512 register numbers. Note that these are all stack-top-relative
513 numbers.
514 11 for %st(0) (gcc regno = 8)
515 12 for %st(1) (gcc regno = 9)
516 13 for %st(2) (gcc regno = 10)
517 14 for %st(3) (gcc regno = 11)
518 15 for %st(4) (gcc regno = 12)
519 16 for %st(5) (gcc regno = 13)
520 17 for %st(6) (gcc regno = 14)
521 18 for %st(7) (gcc regno = 15)
522*/
0f290768 523int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
524{
525 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
526 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
3f3f2124 527 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
528 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
529 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
3f3f2124
JH
530 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
531 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
83774849
RH
532};
533
c572e5ba
JVA
534/* Test and compare insns in i386.md store the information needed to
535 generate branch and scc insns here. */
536
07933f72
GS
537rtx ix86_compare_op0 = NULL_RTX;
538rtx ix86_compare_op1 = NULL_RTX;
f5316dfe 539
7a2e09f4 540#define MAX_386_STACK_LOCALS 3
8362f420
JH
541/* Size of the register save area. */
542#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
36edd3cc
BS
543
544/* Define the structure for the machine field in struct function. */
545struct machine_function
546{
547 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
8362f420 548 int save_varrargs_registers;
6fca22eb 549 int accesses_prev_frame;
36edd3cc
BS
550};
551
01d939e8 552#define ix86_stack_locals (cfun->machine->stack_locals)
8362f420 553#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
36edd3cc 554
4dd2ac2c
JH
555/* Structure describing stack frame layout.
556 Stack grows downward:
557
558 [arguments]
559 <- ARG_POINTER
560 saved pc
561
562 saved frame pointer if frame_pointer_needed
563 <- HARD_FRAME_POINTER
564 [saved regs]
565
566 [padding1] \
567 )
568 [va_arg registers] (
569 > to_allocate <- FRAME_POINTER
570 [frame] (
571 )
572 [padding2] /
573 */
574struct ix86_frame
575{
576 int nregs;
577 int padding1;
8362f420 578 int va_arg_size;
4dd2ac2c
JH
579 HOST_WIDE_INT frame;
580 int padding2;
581 int outgoing_arguments_size;
8362f420 582 int red_zone_size;
4dd2ac2c
JH
583
584 HOST_WIDE_INT to_allocate;
585 /* The offsets relative to ARG_POINTER. */
586 HOST_WIDE_INT frame_pointer_offset;
587 HOST_WIDE_INT hard_frame_pointer_offset;
588 HOST_WIDE_INT stack_pointer_offset;
589};
590
c93e80a5
JH
591/* Used to enable/disable debugging features. */
592const char *ix86_debug_arg_string, *ix86_debug_addr_string;
6189a572
JH
593/* Code model option as passed by user. */
594const char *ix86_cmodel_string;
595/* Parsed value. */
596enum cmodel ix86_cmodel;
80f33d06
GS
597/* Asm dialect. */
598const char *ix86_asm_string;
599enum asm_dialect ix86_asm_dialect = ASM_ATT;
6189a572 600
c8c5cb99 601/* which cpu are we scheduling for */
e42ea7f9 602enum processor_type ix86_cpu;
c8c5cb99 603
965f5423
JH
604/* which unit we are generating floating point math for */
605enum fpmath_unit ix86_fpmath;
606
c8c5cb99 607/* which instruction set architecture to use. */
c942177e 608int ix86_arch;
c8c5cb99
SC
609
610/* Strings to hold which cpu and instruction set architecture to use. */
9c23aa47
ZW
611const char *ix86_cpu_string; /* for -mcpu=<xxx> */
612const char *ix86_arch_string; /* for -march=<xxx> */
965f5423 613const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
c8c5cb99 614
0f290768 615/* # of registers to use to pass arguments. */
e075ae69 616const char *ix86_regparm_string;
e9a25f70 617
f4365627
JH
618/* true if sse prefetch instruction is not NOOP. */
619int x86_prefetch_sse;
620
e075ae69
RH
621/* ix86_regparm_string as a number */
622int ix86_regparm;
e9a25f70
JL
623
624/* Alignment to use for loops and jumps: */
625
0f290768 626/* Power of two alignment for loops. */
e075ae69 627const char *ix86_align_loops_string;
e9a25f70 628
0f290768 629/* Power of two alignment for non-loop jumps. */
e075ae69 630const char *ix86_align_jumps_string;
e9a25f70 631
3af4bd89 632/* Power of two alignment for stack boundary in bytes. */
e075ae69 633const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
634
635/* Preferred alignment for stack boundary in bits. */
e075ae69 636int ix86_preferred_stack_boundary;
3af4bd89 637
e9a25f70 638/* Values 1-5: see jump.c */
e075ae69
RH
639int ix86_branch_cost;
640const char *ix86_branch_cost_string;
e9a25f70 641
0f290768 642/* Power of two alignment for functions. */
e075ae69 643const char *ix86_align_funcs_string;
623fe810
RH
644
645/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
646static char internal_label_prefix[16];
647static int internal_label_prefix_len;
e075ae69 648\f
623fe810 649static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
f6da8bc3
KG
650static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
651static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
e075ae69 652 int, int, FILE *));
f6da8bc3 653static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
3a3677ff
RH
654static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
655 rtx *, rtx *));
f6da8bc3
KG
656static rtx gen_push PARAMS ((rtx));
657static int memory_address_length PARAMS ((rtx addr));
658static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
659static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
f6da8bc3
KG
660static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
661static void ix86_dump_ppro_packet PARAMS ((FILE *));
662static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
f6da8bc3
KG
663static void ix86_init_machine_status PARAMS ((struct function *));
664static void ix86_mark_machine_status PARAMS ((struct function *));
37b15744 665static void ix86_free_machine_status PARAMS ((struct function *));
2b589241 666static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
b531087a
KH
667static int ix86_nsaved_regs PARAMS ((void));
668static void ix86_emit_save_regs PARAMS ((void));
c6036a37 669static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
37a58036 670static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
0e4970d7 671static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
b531087a 672static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
55efb413 673static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
0945b39d 674static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
0945b39d
JH
675static rtx ix86_expand_aligntest PARAMS ((rtx, int));
676static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
c237e94a
ZW
677static int ix86_issue_rate PARAMS ((void));
678static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
679static void ix86_sched_init PARAMS ((FILE *, int, int));
680static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
681static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
9b690711
RH
682static int ia32_use_dfa_pipeline_interface PARAMS ((void));
683static int ia32_multipass_dfa_lookahead PARAMS ((void));
e37af218 684static void ix86_init_mmx_sse_builtins PARAMS ((void));
e075ae69
RH
685
686struct ix86_address
687{
688 rtx base, index, disp;
689 HOST_WIDE_INT scale;
690};
b08de47e 691
e075ae69 692static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
bd793c65
BS
693
694struct builtin_description;
8b60264b
KG
695static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
696 tree, rtx));
697static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
698 tree, rtx));
bd793c65
BS
699static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
700static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
701static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
e37af218
RH
702static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
703 tree, rtx));
704static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
bd793c65 705static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
c0c102a9
JH
706static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
707static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
708 enum rtx_code *,
709 enum rtx_code *,
710 enum rtx_code *));
9e7adcb3
JH
711static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
712 rtx *, rtx *));
713static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
714static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
715static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
716static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
9b690711 717static int ix86_save_reg PARAMS ((unsigned int, int));
4dd2ac2c 718static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
8d8e52be 719static int ix86_comp_type_attributes PARAMS ((tree, tree));
91d231cb
JM
720const struct attribute_spec ix86_attribute_table[];
721static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
722static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
7c262518 723
2cc07db4
RH
724#ifdef DO_GLOBAL_CTORS_BODY
725static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
726#endif
e56feed6 727
53c17031
JH
728/* Register class used for passing given 64bit part of the argument.
729 These represent classes as documented by the PS ABI, with the exception
730 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
731 use SF or DFmode move instead of DImode to avoid reformating penalties.
732
733 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
734 whenever possible (upper half does contain padding).
735 */
736enum x86_64_reg_class
737 {
738 X86_64_NO_CLASS,
739 X86_64_INTEGER_CLASS,
740 X86_64_INTEGERSI_CLASS,
741 X86_64_SSE_CLASS,
742 X86_64_SSESF_CLASS,
743 X86_64_SSEDF_CLASS,
744 X86_64_SSEUP_CLASS,
745 X86_64_X87_CLASS,
746 X86_64_X87UP_CLASS,
747 X86_64_MEMORY_CLASS
748 };
0b5826ac 749static const char * const x86_64_reg_class_name[] =
53c17031
JH
750 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
751
752#define MAX_CLASSES 4
753static int classify_argument PARAMS ((enum machine_mode, tree,
754 enum x86_64_reg_class [MAX_CLASSES],
755 int));
756static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
757 int *));
758static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
07933f72 759 const int *, int));
53c17031
JH
760static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
761 enum x86_64_reg_class));
672a6f42
NB
762\f
763/* Initialize the GCC target structure. */
91d231cb
JM
764#undef TARGET_ATTRIBUTE_TABLE
765#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
672a6f42 766#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
f5f4be42
NB
767# undef TARGET_MERGE_DECL_ATTRIBUTES
768# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
672a6f42
NB
769#endif
770
8d8e52be
JM
771#undef TARGET_COMP_TYPE_ATTRIBUTES
772#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
773
f6155fda
SS
774#undef TARGET_INIT_BUILTINS
775#define TARGET_INIT_BUILTINS ix86_init_builtins
776
777#undef TARGET_EXPAND_BUILTIN
778#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
779
08c148a8
NB
780#if defined (OSF_OS) || defined (TARGET_OSF1ELF)
781 static void ix86_osf_output_function_prologue PARAMS ((FILE *,
782 HOST_WIDE_INT));
783# undef TARGET_ASM_FUNCTION_PROLOGUE
784# define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue
785#endif
786
17b53c33
NB
787#undef TARGET_ASM_OPEN_PAREN
788#define TARGET_ASM_OPEN_PAREN ""
789#undef TARGET_ASM_CLOSE_PAREN
790#define TARGET_ASM_CLOSE_PAREN ""
791
301d03af
RS
792#undef TARGET_ASM_ALIGNED_HI_OP
793#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
794#undef TARGET_ASM_ALIGNED_SI_OP
795#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
796#ifdef ASM_QUAD
797#undef TARGET_ASM_ALIGNED_DI_OP
798#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
799#endif
800
801#undef TARGET_ASM_UNALIGNED_HI_OP
802#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
803#undef TARGET_ASM_UNALIGNED_SI_OP
804#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
805#undef TARGET_ASM_UNALIGNED_DI_OP
806#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
807
c237e94a
ZW
808#undef TARGET_SCHED_ADJUST_COST
809#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
810#undef TARGET_SCHED_ISSUE_RATE
811#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
812#undef TARGET_SCHED_VARIABLE_ISSUE
813#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
814#undef TARGET_SCHED_INIT
815#define TARGET_SCHED_INIT ix86_sched_init
816#undef TARGET_SCHED_REORDER
817#define TARGET_SCHED_REORDER ix86_sched_reorder
9b690711
RH
818#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
819#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
820 ia32_use_dfa_pipeline_interface
821#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
822#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
823 ia32_multipass_dfa_lookahead
c237e94a 824
f6897b10 825struct gcc_target targetm = TARGET_INITIALIZER;
e075ae69 826\f
f5316dfe
MM
827/* Sometimes certain combinations of command options do not make
828 sense on a particular target machine. You can define a macro
829 `OVERRIDE_OPTIONS' to take account of this. This macro, if
830 defined, is executed once just after all the command options have
831 been parsed.
832
833 Don't use this macro to turn on various extra optimizations for
834 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
835
836void
837override_options ()
838{
400500c4 839 int i;
e075ae69
RH
840 /* Comes from final.c -- no real reason to change it. */
841#define MAX_CODE_ALIGN 16
f5316dfe 842
c8c5cb99
SC
843 static struct ptt
844 {
8b60264b
KG
845 const struct processor_costs *cost; /* Processor costs */
846 const int target_enable; /* Target flags to enable. */
847 const int target_disable; /* Target flags to disable. */
848 const int align_loop; /* Default alignments. */
2cca7283 849 const int align_loop_max_skip;
8b60264b 850 const int align_jump;
2cca7283 851 const int align_jump_max_skip;
8b60264b
KG
852 const int align_func;
853 const int branch_cost;
e075ae69 854 }
0f290768 855 const processor_target_table[PROCESSOR_max] =
e075ae69 856 {
2cca7283
JH
857 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
858 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
859 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
860 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
861 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
862 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
863 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
e075ae69
RH
864 };
865
f4365627 866 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
e075ae69
RH
867 static struct pta
868 {
8b60264b
KG
869 const char *const name; /* processor name or nickname. */
870 const enum processor_type processor;
0dd0e980
JH
871 const enum pta_flags
872 {
873 PTA_SSE = 1,
874 PTA_SSE2 = 2,
875 PTA_MMX = 4,
f4365627 876 PTA_PREFETCH_SSE = 8,
0dd0e980
JH
877 PTA_3DNOW = 16,
878 PTA_3DNOW_A = 64
879 } flags;
e075ae69 880 }
0f290768 881 const processor_alias_table[] =
e075ae69 882 {
0dd0e980
JH
883 {"i386", PROCESSOR_I386, 0},
884 {"i486", PROCESSOR_I486, 0},
885 {"i586", PROCESSOR_PENTIUM, 0},
886 {"pentium", PROCESSOR_PENTIUM, 0},
887 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
888 {"i686", PROCESSOR_PENTIUMPRO, 0},
889 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
890 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
f4365627 891 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
0dd0e980 892 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
f4365627 893 PTA_MMX | PTA_PREFETCH_SSE},
0dd0e980
JH
894 {"k6", PROCESSOR_K6, PTA_MMX},
895 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
896 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
f4365627 897 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 898 | PTA_3DNOW_A},
f4365627 899 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
0dd0e980 900 | PTA_3DNOW | PTA_3DNOW_A},
f4365627 901 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 902 | PTA_3DNOW_A | PTA_SSE},
f4365627 903 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 904 | PTA_3DNOW_A | PTA_SSE},
f4365627 905 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 906 | PTA_3DNOW_A | PTA_SSE},
3af4bd89 907 };
c8c5cb99 908
ca7558fc 909 int const pta_size = ARRAY_SIZE (processor_alias_table);
c8c5cb99 910
f5316dfe
MM
911#ifdef SUBTARGET_OVERRIDE_OPTIONS
912 SUBTARGET_OVERRIDE_OPTIONS;
913#endif
914
f4365627
JH
915 if (!ix86_cpu_string && ix86_arch_string)
916 ix86_cpu_string = ix86_arch_string;
917 if (!ix86_cpu_string)
918 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
919 if (!ix86_arch_string)
920 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
e075ae69 921
6189a572
JH
922 if (ix86_cmodel_string != 0)
923 {
924 if (!strcmp (ix86_cmodel_string, "small"))
925 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
926 else if (flag_pic)
c725bd79 927 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
6189a572
JH
928 else if (!strcmp (ix86_cmodel_string, "32"))
929 ix86_cmodel = CM_32;
930 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
931 ix86_cmodel = CM_KERNEL;
932 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
933 ix86_cmodel = CM_MEDIUM;
934 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
935 ix86_cmodel = CM_LARGE;
936 else
937 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
938 }
939 else
940 {
941 ix86_cmodel = CM_32;
942 if (TARGET_64BIT)
943 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
944 }
c93e80a5
JH
945 if (ix86_asm_string != 0)
946 {
947 if (!strcmp (ix86_asm_string, "intel"))
948 ix86_asm_dialect = ASM_INTEL;
949 else if (!strcmp (ix86_asm_string, "att"))
950 ix86_asm_dialect = ASM_ATT;
951 else
952 error ("bad value (%s) for -masm= switch", ix86_asm_string);
953 }
6189a572 954 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
c725bd79 955 error ("code model `%s' not supported in the %s bit mode",
6189a572
JH
956 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
957 if (ix86_cmodel == CM_LARGE)
c725bd79 958 sorry ("code model `large' not supported yet");
0c2dc519 959 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
c725bd79 960 sorry ("%i-bit mode not compiled in",
0c2dc519 961 (target_flags & MASK_64BIT) ? 64 : 32);
6189a572 962
f4365627
JH
963 for (i = 0; i < pta_size; i++)
964 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
965 {
966 ix86_arch = processor_alias_table[i].processor;
967 /* Default cpu tuning to the architecture. */
968 ix86_cpu = ix86_arch;
969 if (processor_alias_table[i].flags & PTA_MMX
970 && !(target_flags & MASK_MMX_SET))
971 target_flags |= MASK_MMX;
972 if (processor_alias_table[i].flags & PTA_3DNOW
973 && !(target_flags & MASK_3DNOW_SET))
974 target_flags |= MASK_3DNOW;
975 if (processor_alias_table[i].flags & PTA_3DNOW_A
976 && !(target_flags & MASK_3DNOW_A_SET))
977 target_flags |= MASK_3DNOW_A;
978 if (processor_alias_table[i].flags & PTA_SSE
979 && !(target_flags & MASK_SSE_SET))
980 target_flags |= MASK_SSE;
981 if (processor_alias_table[i].flags & PTA_SSE2
982 && !(target_flags & MASK_SSE2_SET))
983 target_flags |= MASK_SSE2;
984 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
985 x86_prefetch_sse = true;
986 break;
987 }
400500c4 988
f4365627
JH
989 if (i == pta_size)
990 error ("bad value (%s) for -march= switch", ix86_arch_string);
e075ae69 991
f4365627
JH
992 for (i = 0; i < pta_size; i++)
993 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
994 {
995 ix86_cpu = processor_alias_table[i].processor;
996 break;
997 }
998 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
999 x86_prefetch_sse = true;
1000 if (i == pta_size)
1001 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
e075ae69 1002
2ab0437e
JH
1003 if (optimize_size)
1004 ix86_cost = &size_cost;
1005 else
1006 ix86_cost = processor_target_table[ix86_cpu].cost;
e075ae69
RH
1007 target_flags |= processor_target_table[ix86_cpu].target_enable;
1008 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1009
36edd3cc
BS
1010 /* Arrange to set up i386_stack_locals for all functions. */
1011 init_machine_status = ix86_init_machine_status;
1526a060 1012 mark_machine_status = ix86_mark_machine_status;
37b15744 1013 free_machine_status = ix86_free_machine_status;
36edd3cc 1014
0f290768 1015 /* Validate -mregparm= value. */
e075ae69 1016 if (ix86_regparm_string)
b08de47e 1017 {
400500c4
RK
1018 i = atoi (ix86_regparm_string);
1019 if (i < 0 || i > REGPARM_MAX)
1020 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1021 else
1022 ix86_regparm = i;
b08de47e 1023 }
0d7d98ee
JH
1024 else
1025 if (TARGET_64BIT)
1026 ix86_regparm = REGPARM_MAX;
b08de47e 1027
3e18fdf6 1028 /* If the user has provided any of the -malign-* options,
a4f31c00 1029 warn and use that value only if -falign-* is not set.
3e18fdf6 1030 Remove this code in GCC 3.2 or later. */
e075ae69 1031 if (ix86_align_loops_string)
b08de47e 1032 {
3e18fdf6
GK
1033 warning ("-malign-loops is obsolete, use -falign-loops");
1034 if (align_loops == 0)
1035 {
1036 i = atoi (ix86_align_loops_string);
1037 if (i < 0 || i > MAX_CODE_ALIGN)
1038 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1039 else
1040 align_loops = 1 << i;
1041 }
b08de47e 1042 }
3af4bd89 1043
e075ae69 1044 if (ix86_align_jumps_string)
b08de47e 1045 {
3e18fdf6
GK
1046 warning ("-malign-jumps is obsolete, use -falign-jumps");
1047 if (align_jumps == 0)
1048 {
1049 i = atoi (ix86_align_jumps_string);
1050 if (i < 0 || i > MAX_CODE_ALIGN)
1051 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1052 else
1053 align_jumps = 1 << i;
1054 }
b08de47e 1055 }
b08de47e 1056
e075ae69 1057 if (ix86_align_funcs_string)
b08de47e 1058 {
3e18fdf6
GK
1059 warning ("-malign-functions is obsolete, use -falign-functions");
1060 if (align_functions == 0)
1061 {
1062 i = atoi (ix86_align_funcs_string);
1063 if (i < 0 || i > MAX_CODE_ALIGN)
1064 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1065 else
1066 align_functions = 1 << i;
1067 }
b08de47e 1068 }
3af4bd89 1069
3e18fdf6 1070 /* Default align_* from the processor table. */
3e18fdf6 1071 if (align_loops == 0)
2cca7283
JH
1072 {
1073 align_loops = processor_target_table[ix86_cpu].align_loop;
1074 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1075 }
3e18fdf6 1076 if (align_jumps == 0)
2cca7283
JH
1077 {
1078 align_jumps = processor_target_table[ix86_cpu].align_jump;
1079 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1080 }
3e18fdf6 1081 if (align_functions == 0)
2cca7283
JH
1082 {
1083 align_functions = processor_target_table[ix86_cpu].align_func;
1084 }
3e18fdf6 1085
e4c0478d 1086 /* Validate -mpreferred-stack-boundary= value, or provide default.
fbb83b43
AO
1087 The default of 128 bits is for Pentium III's SSE __m128, but we
1088 don't want additional code to keep the stack aligned when
1089 optimizing for code size. */
1090 ix86_preferred_stack_boundary = (optimize_size
1091 ? TARGET_64BIT ? 64 : 32
1092 : 128);
e075ae69 1093 if (ix86_preferred_stack_boundary_string)
3af4bd89 1094 {
400500c4 1095 i = atoi (ix86_preferred_stack_boundary_string);
c6257c5d
AO
1096 if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
1097 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
0d7d98ee 1098 TARGET_64BIT ? 3 : 2);
400500c4
RK
1099 else
1100 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 1101 }
77a989d1 1102
0f290768 1103 /* Validate -mbranch-cost= value, or provide default. */
e075ae69
RH
1104 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1105 if (ix86_branch_cost_string)
804a8ee0 1106 {
400500c4
RK
1107 i = atoi (ix86_branch_cost_string);
1108 if (i < 0 || i > 5)
1109 error ("-mbranch-cost=%d is not between 0 and 5", i);
1110 else
1111 ix86_branch_cost = i;
804a8ee0 1112 }
804a8ee0 1113
e9a25f70
JL
1114 /* Keep nonleaf frame pointers. */
1115 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 1116 flag_omit_frame_pointer = 1;
e075ae69
RH
1117
1118 /* If we're doing fast math, we don't care about comparison order
1119 wrt NaNs. This lets us use a shorter comparison sequence. */
de6c5979 1120 if (flag_unsafe_math_optimizations)
e075ae69
RH
1121 target_flags &= ~MASK_IEEE_FP;
1122
30c99a84
RH
1123 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1124 since the insns won't need emulation. */
1125 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1126 target_flags &= ~MASK_NO_FANCY_MATH_387;
1127
14f73b5a
JH
1128 if (TARGET_64BIT)
1129 {
1130 if (TARGET_ALIGN_DOUBLE)
c725bd79 1131 error ("-malign-double makes no sense in the 64bit mode");
14f73b5a 1132 if (TARGET_RTD)
c725bd79 1133 error ("-mrtd calling convention not supported in the 64bit mode");
14f73b5a 1134 /* Enable by default the SSE and MMX builtins. */
965f5423
JH
1135 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1136 ix86_fpmath = FPMATH_SSE;
14f73b5a 1137 }
965f5423
JH
1138 else
1139 ix86_fpmath = FPMATH_387;
1140
1141 if (ix86_fpmath_string != 0)
1142 {
1143 if (! strcmp (ix86_fpmath_string, "387"))
1144 ix86_fpmath = FPMATH_387;
1145 else if (! strcmp (ix86_fpmath_string, "sse"))
1146 {
1147 if (!TARGET_SSE)
1148 {
1149 warning ("SSE instruction set disabled, using 387 arithmetics");
1150 ix86_fpmath = FPMATH_387;
1151 }
1152 else
1153 ix86_fpmath = FPMATH_SSE;
1154 }
1155 else if (! strcmp (ix86_fpmath_string, "387,sse")
1156 || ! strcmp (ix86_fpmath_string, "sse,387"))
1157 {
1158 if (!TARGET_SSE)
1159 {
1160 warning ("SSE instruction set disabled, using 387 arithmetics");
1161 ix86_fpmath = FPMATH_387;
1162 }
1163 else if (!TARGET_80387)
1164 {
1165 warning ("387 instruction set disabled, using SSE arithmetics");
1166 ix86_fpmath = FPMATH_SSE;
1167 }
1168 else
1169 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1170 }
1171 else
1172 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1173 }
14f73b5a 1174
a7180f70
BS
1175 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1176 on by -msse. */
1177 if (TARGET_SSE)
e37af218
RH
1178 {
1179 target_flags |= MASK_MMX;
1180 x86_prefetch_sse = true;
1181 }
c6036a37 1182
47f339cf
BS
1183 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1184 if (TARGET_3DNOW)
1185 {
1186 target_flags |= MASK_MMX;
1187 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1188 extensions it adds. */
1189 if (x86_3dnow_a & (1 << ix86_arch))
1190 target_flags |= MASK_3DNOW_A;
1191 }
c6036a37 1192 if ((x86_accumulate_outgoing_args & CPUMASK)
0dd0e980 1193 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
c6036a37
JH
1194 && !optimize_size)
1195 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
623fe810
RH
1196
1197 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1198 {
1199 char *p;
1200 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1201 p = strchr (internal_label_prefix, 'X');
1202 internal_label_prefix_len = p - internal_label_prefix;
1203 *p = '\0';
1204 }
f5316dfe
MM
1205}
1206\f
32b5b1aa 1207void
c6aded7c 1208optimization_options (level, size)
32b5b1aa 1209 int level;
bb5177ac 1210 int size ATTRIBUTE_UNUSED;
32b5b1aa 1211{
e9a25f70
JL
1212 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1213 make the problem with not enough registers even worse. */
32b5b1aa
SC
1214#ifdef INSN_SCHEDULING
1215 if (level > 1)
1216 flag_schedule_insns = 0;
1217#endif
53c17031
JH
1218 if (TARGET_64BIT && optimize >= 1)
1219 flag_omit_frame_pointer = 1;
1220 if (TARGET_64BIT)
b932f770
JH
1221 {
1222 flag_pcc_struct_return = 0;
1223 flag_asynchronous_unwind_tables = 1;
1224 }
32b5b1aa 1225}
b08de47e 1226\f
91d231cb
JM
1227/* Table of valid machine attributes. */
1228const struct attribute_spec ix86_attribute_table[] =
b08de47e 1229{
91d231cb 1230 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
b08de47e
MM
1231 /* Stdcall attribute says callee is responsible for popping arguments
1232 if they are not variable. */
91d231cb
JM
1233 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1234 /* Cdecl attribute says the callee is a normal C declaration */
1235 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
b08de47e 1236 /* Regparm attribute specifies how many integer arguments are to be
0f290768 1237 passed in registers. */
91d231cb
JM
1238 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1239#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
3da1eb0b
DS
1240 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1241 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1242 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
91d231cb
JM
1243#endif
1244 { NULL, 0, 0, false, false, false, NULL }
1245};
1246
1247/* Handle a "cdecl" or "stdcall" attribute;
1248 arguments as in struct attribute_spec.handler. */
1249static tree
1250ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1251 tree *node;
1252 tree name;
1253 tree args ATTRIBUTE_UNUSED;
1254 int flags ATTRIBUTE_UNUSED;
1255 bool *no_add_attrs;
1256{
1257 if (TREE_CODE (*node) != FUNCTION_TYPE
1258 && TREE_CODE (*node) != METHOD_TYPE
1259 && TREE_CODE (*node) != FIELD_DECL
1260 && TREE_CODE (*node) != TYPE_DECL)
b08de47e 1261 {
91d231cb
JM
1262 warning ("`%s' attribute only applies to functions",
1263 IDENTIFIER_POINTER (name));
1264 *no_add_attrs = true;
1265 }
b08de47e 1266
91d231cb
JM
1267 if (TARGET_64BIT)
1268 {
1269 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1270 *no_add_attrs = true;
1271 }
b08de47e 1272
91d231cb
JM
1273 return NULL_TREE;
1274}
b08de47e 1275
91d231cb
JM
1276/* Handle a "regparm" attribute;
1277 arguments as in struct attribute_spec.handler. */
1278static tree
1279ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1280 tree *node;
1281 tree name;
1282 tree args;
1283 int flags ATTRIBUTE_UNUSED;
1284 bool *no_add_attrs;
1285{
1286 if (TREE_CODE (*node) != FUNCTION_TYPE
1287 && TREE_CODE (*node) != METHOD_TYPE
1288 && TREE_CODE (*node) != FIELD_DECL
1289 && TREE_CODE (*node) != TYPE_DECL)
1290 {
1291 warning ("`%s' attribute only applies to functions",
1292 IDENTIFIER_POINTER (name));
1293 *no_add_attrs = true;
1294 }
1295 else
1296 {
1297 tree cst;
b08de47e 1298
91d231cb
JM
1299 cst = TREE_VALUE (args);
1300 if (TREE_CODE (cst) != INTEGER_CST)
1301 {
1302 warning ("`%s' attribute requires an integer constant argument",
1303 IDENTIFIER_POINTER (name));
1304 *no_add_attrs = true;
1305 }
1306 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1307 {
1308 warning ("argument to `%s' attribute larger than %d",
1309 IDENTIFIER_POINTER (name), REGPARM_MAX);
1310 *no_add_attrs = true;
1311 }
b08de47e
MM
1312 }
1313
91d231cb 1314 return NULL_TREE;
b08de47e
MM
1315}
1316
08c148a8
NB
1317#if defined (OSF_OS) || defined (TARGET_OSF1ELF)
1318
1319/* Generate the assembly code for function entry. FILE is a stdio
1320 stream to output the code to. SIZE is an int: how many units of
1321 temporary storage to allocate.
1322
1323 Refer to the array `regs_ever_live' to determine which registers to
1324 save; `regs_ever_live[I]' is nonzero if register number I is ever
1325 used in the function. This function is responsible for knowing
1326 which registers should not be saved even if used.
1327
1328 We override it here to allow for the new profiling code to go before
1329 the prologue and the old mcount code to go after the prologue (and
1330 after %ebx has been set up for ELF shared library support). */
1331
1332static void
1333ix86_osf_output_function_prologue (file, size)
1334 FILE *file;
1335 HOST_WIDE_INT size;
1336{
5f37d07c
KG
1337 const char *prefix = "";
1338 const char *const lprefix = LPREFIX;
f6f315fe 1339 int labelno = current_function_profile_label_no;
08c148a8
NB
1340
1341#ifdef OSF_OS
1342
1343 if (TARGET_UNDERSCORES)
1344 prefix = "_";
1345
70f4f91c 1346 if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE)
08c148a8
NB
1347 {
1348 if (!flag_pic && !HALF_PIC_P ())
1349 {
1350 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1351 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1352 }
1353
1354 else if (HALF_PIC_P ())
1355 {
1356 rtx symref;
1357
1358 HALF_PIC_EXTERNAL ("_mcount_ptr");
1359 symref = HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode,
1360 "_mcount_ptr"));
1361
1362 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1363 fprintf (file, "\tmovl %s%s,%%eax\n", prefix,
1364 XSTR (symref, 0));
1365 fprintf (file, "\tcall *(%%eax)\n");
1366 }
1367
1368 else
1369 {
1370 static int call_no = 0;
1371
1372 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1373 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1374 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1375 lprefix, call_no++);
1376 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1377 lprefix, labelno);
1378 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1379 prefix);
1380 fprintf (file, "\tcall *(%%eax)\n");
1381 }
1382 }
1383
1384#else /* !OSF_OS */
1385
70f4f91c 1386 if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE)
08c148a8
NB
1387 {
1388 if (!flag_pic)
1389 {
1390 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1391 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1392 }
1393
1394 else
1395 {
1396 static int call_no = 0;
1397
1398 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1399 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1400 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1401 lprefix, call_no++);
1402 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1403 lprefix, labelno);
1404 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1405 prefix);
1406 fprintf (file, "\tcall *(%%eax)\n");
1407 }
1408 }
1409#endif /* !OSF_OS */
1410
1411 function_prologue (file, size);
1412}
1413
1414#endif /* OSF_OS || TARGET_OSF1ELF */
1415
b08de47e
MM
1416/* Return 0 if the attributes for two types are incompatible, 1 if they
1417 are compatible, and 2 if they are nearly compatible (which causes a
1418 warning to be generated). */
1419
8d8e52be 1420static int
e075ae69 1421ix86_comp_type_attributes (type1, type2)
afcfe58c
MM
1422 tree type1;
1423 tree type2;
b08de47e 1424{
0f290768 1425 /* Check for mismatch of non-default calling convention. */
27c38fbe 1426 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
1427
1428 if (TREE_CODE (type1) != FUNCTION_TYPE)
1429 return 1;
1430
1431 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
1432 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1433 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 1434 return 0;
b08de47e
MM
1435 return 1;
1436}
b08de47e
MM
1437\f
1438/* Value is the number of bytes of arguments automatically
1439 popped when returning from a subroutine call.
1440 FUNDECL is the declaration node of the function (as a tree),
1441 FUNTYPE is the data type of the function (as a tree),
1442 or for a library call it is an identifier node for the subroutine name.
1443 SIZE is the number of bytes of arguments passed on the stack.
1444
1445 On the 80386, the RTD insn may be used to pop them if the number
1446 of args is fixed, but if the number is variable then the caller
1447 must pop them all. RTD can't be used for library calls now
1448 because the library is compiled with the Unix compiler.
1449 Use of RTD is a selectable option, since it is incompatible with
1450 standard Unix calling sequences. If the option is not selected,
1451 the caller must always pop the args.
1452
1453 The attribute stdcall is equivalent to RTD on a per module basis. */
1454
1455int
e075ae69 1456ix86_return_pops_args (fundecl, funtype, size)
b08de47e
MM
1457 tree fundecl;
1458 tree funtype;
1459 int size;
79325812 1460{
3345ee7d 1461 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 1462
0f290768 1463 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 1464 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 1465
0f290768 1466 /* Stdcall functions will pop the stack if not variable args. */
698cdd84
SC
1467 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1468 rtd = 1;
79325812 1469
698cdd84
SC
1470 if (rtd
1471 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
1472 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1473 == void_type_node)))
698cdd84
SC
1474 return size;
1475 }
79325812 1476
232b8f52 1477 /* Lose any fake structure return argument if it is passed on the stack. */
0d7d98ee
JH
1478 if (aggregate_value_p (TREE_TYPE (funtype))
1479 && !TARGET_64BIT)
232b8f52
JJ
1480 {
1481 int nregs = ix86_regparm;
79325812 1482
232b8f52
JJ
1483 if (funtype)
1484 {
1485 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype));
1486
1487 if (attr)
1488 nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1489 }
1490
1491 if (!nregs)
1492 return GET_MODE_SIZE (Pmode);
1493 }
1494
1495 return 0;
b08de47e 1496}
b08de47e
MM
1497\f
1498/* Argument support functions. */
1499
53c17031
JH
1500/* Return true when register may be used to pass function parameters. */
1501bool
1502ix86_function_arg_regno_p (regno)
1503 int regno;
1504{
1505 int i;
1506 if (!TARGET_64BIT)
0333394e
JJ
1507 return (regno < REGPARM_MAX
1508 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
53c17031
JH
1509 if (SSE_REGNO_P (regno) && TARGET_SSE)
1510 return true;
1511 /* RAX is used as hidden argument to va_arg functions. */
1512 if (!regno)
1513 return true;
1514 for (i = 0; i < REGPARM_MAX; i++)
1515 if (regno == x86_64_int_parameter_registers[i])
1516 return true;
1517 return false;
1518}
1519
b08de47e
MM
1520/* Initialize a variable CUM of type CUMULATIVE_ARGS
1521 for a call to a function whose data type is FNTYPE.
1522 For a library call, FNTYPE is 0. */
1523
1524void
1525init_cumulative_args (cum, fntype, libname)
e9a25f70 1526 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
b08de47e
MM
1527 tree fntype; /* tree ptr for function decl */
1528 rtx libname; /* SYMBOL_REF of library name or 0 */
1529{
1530 static CUMULATIVE_ARGS zero_cum;
1531 tree param, next_param;
1532
1533 if (TARGET_DEBUG_ARG)
1534 {
1535 fprintf (stderr, "\ninit_cumulative_args (");
1536 if (fntype)
e9a25f70
JL
1537 fprintf (stderr, "fntype code = %s, ret code = %s",
1538 tree_code_name[(int) TREE_CODE (fntype)],
1539 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
1540 else
1541 fprintf (stderr, "no fntype");
1542
1543 if (libname)
1544 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1545 }
1546
1547 *cum = zero_cum;
1548
1549 /* Set up the number of registers to use for passing arguments. */
e075ae69 1550 cum->nregs = ix86_regparm;
53c17031
JH
1551 cum->sse_nregs = SSE_REGPARM_MAX;
1552 if (fntype && !TARGET_64BIT)
b08de47e
MM
1553 {
1554 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
e9a25f70 1555
b08de47e
MM
1556 if (attr)
1557 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1558 }
53c17031 1559 cum->maybe_vaarg = false;
b08de47e
MM
1560
1561 /* Determine if this function has variable arguments. This is
1562 indicated by the last argument being 'void_type_mode' if there
1563 are no variable arguments. If there are variable arguments, then
1564 we won't pass anything in registers */
1565
1566 if (cum->nregs)
1567 {
1568 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 1569 param != 0; param = next_param)
b08de47e
MM
1570 {
1571 next_param = TREE_CHAIN (param);
e9a25f70 1572 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
53c17031
JH
1573 {
1574 if (!TARGET_64BIT)
1575 cum->nregs = 0;
1576 cum->maybe_vaarg = true;
1577 }
b08de47e
MM
1578 }
1579 }
53c17031
JH
1580 if ((!fntype && !libname)
1581 || (fntype && !TYPE_ARG_TYPES (fntype)))
1582 cum->maybe_vaarg = 1;
b08de47e
MM
1583
1584 if (TARGET_DEBUG_ARG)
1585 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1586
1587 return;
1588}
1589
53c17031 1590/* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
f710504c 1591 of this code is to classify each 8bytes of incoming argument by the register
53c17031
JH
1592 class and assign registers accordingly. */
1593
1594/* Return the union class of CLASS1 and CLASS2.
1595 See the x86-64 PS ABI for details. */
1596
1597static enum x86_64_reg_class
1598merge_classes (class1, class2)
1599 enum x86_64_reg_class class1, class2;
1600{
1601 /* Rule #1: If both classes are equal, this is the resulting class. */
1602 if (class1 == class2)
1603 return class1;
1604
1605 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1606 the other class. */
1607 if (class1 == X86_64_NO_CLASS)
1608 return class2;
1609 if (class2 == X86_64_NO_CLASS)
1610 return class1;
1611
1612 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1613 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1614 return X86_64_MEMORY_CLASS;
1615
1616 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1617 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1618 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1619 return X86_64_INTEGERSI_CLASS;
1620 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1621 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1622 return X86_64_INTEGER_CLASS;
1623
1624 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1625 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1626 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1627 return X86_64_MEMORY_CLASS;
1628
1629 /* Rule #6: Otherwise class SSE is used. */
1630 return X86_64_SSE_CLASS;
1631}
1632
1633/* Classify the argument of type TYPE and mode MODE.
1634 CLASSES will be filled by the register class used to pass each word
1635 of the operand. The number of words is returned. In case the parameter
1636 should be passed in memory, 0 is returned. As a special case for zero
1637 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1638
1639 BIT_OFFSET is used internally for handling records and specifies offset
1640 of the offset in bits modulo 256 to avoid overflow cases.
1641
1642 See the x86-64 PS ABI for details.
1643*/
1644
1645static int
1646classify_argument (mode, type, classes, bit_offset)
1647 enum machine_mode mode;
1648 tree type;
1649 enum x86_64_reg_class classes[MAX_CLASSES];
1650 int bit_offset;
1651{
1652 int bytes =
1653 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1654 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1655
1656 if (type && AGGREGATE_TYPE_P (type))
1657 {
1658 int i;
1659 tree field;
1660 enum x86_64_reg_class subclasses[MAX_CLASSES];
1661
1662 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1663 if (bytes > 16)
1664 return 0;
1665
1666 for (i = 0; i < words; i++)
1667 classes[i] = X86_64_NO_CLASS;
1668
1669 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1670 signalize memory class, so handle it as special case. */
1671 if (!words)
1672 {
1673 classes[0] = X86_64_NO_CLASS;
1674 return 1;
1675 }
1676
1677 /* Classify each field of record and merge classes. */
1678 if (TREE_CODE (type) == RECORD_TYPE)
1679 {
1680 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1681 {
1682 if (TREE_CODE (field) == FIELD_DECL)
1683 {
1684 int num;
1685
1686 /* Bitfields are always classified as integer. Handle them
1687 early, since later code would consider them to be
1688 misaligned integers. */
1689 if (DECL_BIT_FIELD (field))
1690 {
1691 for (i = int_bit_position (field) / 8 / 8;
1692 i < (int_bit_position (field)
1693 + tree_low_cst (DECL_SIZE (field), 0)
1694 + 63) / 8 / 8; i++)
1695 classes[i] =
1696 merge_classes (X86_64_INTEGER_CLASS,
1697 classes[i]);
1698 }
1699 else
1700 {
1701 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1702 TREE_TYPE (field), subclasses,
1703 (int_bit_position (field)
1704 + bit_offset) % 256);
1705 if (!num)
1706 return 0;
1707 for (i = 0; i < num; i++)
1708 {
1709 int pos =
1710 (int_bit_position (field) + bit_offset) / 8 / 8;
1711 classes[i + pos] =
1712 merge_classes (subclasses[i], classes[i + pos]);
1713 }
1714 }
1715 }
1716 }
1717 }
1718 /* Arrays are handled as small records. */
1719 else if (TREE_CODE (type) == ARRAY_TYPE)
1720 {
1721 int num;
1722 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1723 TREE_TYPE (type), subclasses, bit_offset);
1724 if (!num)
1725 return 0;
1726
1727 /* The partial classes are now full classes. */
1728 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1729 subclasses[0] = X86_64_SSE_CLASS;
1730 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1731 subclasses[0] = X86_64_INTEGER_CLASS;
1732
1733 for (i = 0; i < words; i++)
1734 classes[i] = subclasses[i % num];
1735 }
1736 /* Unions are similar to RECORD_TYPE but offset is always 0. */
e4dbaed5
AS
1737 else if (TREE_CODE (type) == UNION_TYPE
1738 || TREE_CODE (type) == QUAL_UNION_TYPE)
53c17031
JH
1739 {
1740 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1741 {
1742 if (TREE_CODE (field) == FIELD_DECL)
1743 {
1744 int num;
1745 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1746 TREE_TYPE (field), subclasses,
1747 bit_offset);
1748 if (!num)
1749 return 0;
1750 for (i = 0; i < num; i++)
1751 classes[i] = merge_classes (subclasses[i], classes[i]);
1752 }
1753 }
1754 }
1755 else
1756 abort ();
1757
1758 /* Final merger cleanup. */
1759 for (i = 0; i < words; i++)
1760 {
1761 /* If one class is MEMORY, everything should be passed in
1762 memory. */
1763 if (classes[i] == X86_64_MEMORY_CLASS)
1764 return 0;
1765
d6a7951f 1766 /* The X86_64_SSEUP_CLASS should be always preceded by
53c17031
JH
1767 X86_64_SSE_CLASS. */
1768 if (classes[i] == X86_64_SSEUP_CLASS
1769 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1770 classes[i] = X86_64_SSE_CLASS;
1771
d6a7951f 1772 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
53c17031
JH
1773 if (classes[i] == X86_64_X87UP_CLASS
1774 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1775 classes[i] = X86_64_SSE_CLASS;
1776 }
1777 return words;
1778 }
1779
1780 /* Compute alignment needed. We align all types to natural boundaries with
1781 exception of XFmode that is aligned to 64bits. */
1782 if (mode != VOIDmode && mode != BLKmode)
1783 {
1784 int mode_alignment = GET_MODE_BITSIZE (mode);
1785
1786 if (mode == XFmode)
1787 mode_alignment = 128;
1788 else if (mode == XCmode)
1789 mode_alignment = 256;
f5143c46 1790 /* Misaligned fields are always returned in memory. */
53c17031
JH
1791 if (bit_offset % mode_alignment)
1792 return 0;
1793 }
1794
1795 /* Classification of atomic types. */
1796 switch (mode)
1797 {
1798 case DImode:
1799 case SImode:
1800 case HImode:
1801 case QImode:
1802 case CSImode:
1803 case CHImode:
1804 case CQImode:
1805 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1806 classes[0] = X86_64_INTEGERSI_CLASS;
1807 else
1808 classes[0] = X86_64_INTEGER_CLASS;
1809 return 1;
1810 case CDImode:
1811 case TImode:
1812 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1813 return 2;
1814 case CTImode:
1815 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1816 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1817 return 4;
1818 case SFmode:
1819 if (!(bit_offset % 64))
1820 classes[0] = X86_64_SSESF_CLASS;
1821 else
1822 classes[0] = X86_64_SSE_CLASS;
1823 return 1;
1824 case DFmode:
1825 classes[0] = X86_64_SSEDF_CLASS;
1826 return 1;
1827 case TFmode:
1828 classes[0] = X86_64_X87_CLASS;
1829 classes[1] = X86_64_X87UP_CLASS;
1830 return 2;
1831 case TCmode:
1832 classes[0] = X86_64_X87_CLASS;
1833 classes[1] = X86_64_X87UP_CLASS;
1834 classes[2] = X86_64_X87_CLASS;
1835 classes[3] = X86_64_X87UP_CLASS;
1836 return 4;
1837 case DCmode:
1838 classes[0] = X86_64_SSEDF_CLASS;
1839 classes[1] = X86_64_SSEDF_CLASS;
1840 return 2;
1841 case SCmode:
1842 classes[0] = X86_64_SSE_CLASS;
1843 return 1;
e95d6b23
JH
1844 case V4SFmode:
1845 case V4SImode:
1846 classes[0] = X86_64_SSE_CLASS;
1847 classes[1] = X86_64_SSEUP_CLASS;
1848 return 2;
1849 case V2SFmode:
1850 case V2SImode:
1851 case V4HImode:
1852 case V8QImode:
1853 classes[0] = X86_64_SSE_CLASS;
1854 return 1;
53c17031 1855 case BLKmode:
e95d6b23 1856 case VOIDmode:
53c17031
JH
1857 return 0;
1858 default:
1859 abort ();
1860 }
1861}
1862
1863/* Examine the argument and return set number of register required in each
f5143c46 1864 class. Return 0 iff parameter should be passed in memory. */
53c17031
JH
1865static int
1866examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1867 enum machine_mode mode;
1868 tree type;
1869 int *int_nregs, *sse_nregs;
1870 int in_return;
1871{
1872 enum x86_64_reg_class class[MAX_CLASSES];
1873 int n = classify_argument (mode, type, class, 0);
1874
1875 *int_nregs = 0;
1876 *sse_nregs = 0;
1877 if (!n)
1878 return 0;
1879 for (n--; n >= 0; n--)
1880 switch (class[n])
1881 {
1882 case X86_64_INTEGER_CLASS:
1883 case X86_64_INTEGERSI_CLASS:
1884 (*int_nregs)++;
1885 break;
1886 case X86_64_SSE_CLASS:
1887 case X86_64_SSESF_CLASS:
1888 case X86_64_SSEDF_CLASS:
1889 (*sse_nregs)++;
1890 break;
1891 case X86_64_NO_CLASS:
1892 case X86_64_SSEUP_CLASS:
1893 break;
1894 case X86_64_X87_CLASS:
1895 case X86_64_X87UP_CLASS:
1896 if (!in_return)
1897 return 0;
1898 break;
1899 case X86_64_MEMORY_CLASS:
1900 abort ();
1901 }
1902 return 1;
1903}
1904/* Construct container for the argument used by GCC interface. See
1905 FUNCTION_ARG for the detailed description. */
1906static rtx
1907construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1908 enum machine_mode mode;
1909 tree type;
1910 int in_return;
1911 int nintregs, nsseregs;
07933f72
GS
1912 const int * intreg;
1913 int sse_regno;
53c17031
JH
1914{
1915 enum machine_mode tmpmode;
1916 int bytes =
1917 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1918 enum x86_64_reg_class class[MAX_CLASSES];
1919 int n;
1920 int i;
1921 int nexps = 0;
1922 int needed_sseregs, needed_intregs;
1923 rtx exp[MAX_CLASSES];
1924 rtx ret;
1925
1926 n = classify_argument (mode, type, class, 0);
1927 if (TARGET_DEBUG_ARG)
1928 {
1929 if (!n)
1930 fprintf (stderr, "Memory class\n");
1931 else
1932 {
1933 fprintf (stderr, "Classes:");
1934 for (i = 0; i < n; i++)
1935 {
1936 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1937 }
1938 fprintf (stderr, "\n");
1939 }
1940 }
1941 if (!n)
1942 return NULL;
1943 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1944 return NULL;
1945 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1946 return NULL;
1947
1948 /* First construct simple cases. Avoid SCmode, since we want to use
1949 single register to pass this type. */
1950 if (n == 1 && mode != SCmode)
1951 switch (class[0])
1952 {
1953 case X86_64_INTEGER_CLASS:
1954 case X86_64_INTEGERSI_CLASS:
1955 return gen_rtx_REG (mode, intreg[0]);
1956 case X86_64_SSE_CLASS:
1957 case X86_64_SSESF_CLASS:
1958 case X86_64_SSEDF_CLASS:
1959 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1960 case X86_64_X87_CLASS:
1961 return gen_rtx_REG (mode, FIRST_STACK_REG);
1962 case X86_64_NO_CLASS:
1963 /* Zero sized array, struct or class. */
1964 return NULL;
1965 default:
1966 abort ();
1967 }
1968 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
e95d6b23 1969 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
53c17031
JH
1970 if (n == 2
1971 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1972 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1973 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1974 && class[1] == X86_64_INTEGER_CLASS
1975 && (mode == CDImode || mode == TImode)
1976 && intreg[0] + 1 == intreg[1])
1977 return gen_rtx_REG (mode, intreg[0]);
1978 if (n == 4
1979 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1980 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1981 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1982
1983 /* Otherwise figure out the entries of the PARALLEL. */
1984 for (i = 0; i < n; i++)
1985 {
1986 switch (class[i])
1987 {
1988 case X86_64_NO_CLASS:
1989 break;
1990 case X86_64_INTEGER_CLASS:
1991 case X86_64_INTEGERSI_CLASS:
1992 /* Merge TImodes on aligned occassions here too. */
1993 if (i * 8 + 8 > bytes)
1994 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
1995 else if (class[i] == X86_64_INTEGERSI_CLASS)
1996 tmpmode = SImode;
1997 else
1998 tmpmode = DImode;
1999 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2000 if (tmpmode == BLKmode)
2001 tmpmode = DImode;
2002 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2003 gen_rtx_REG (tmpmode, *intreg),
2004 GEN_INT (i*8));
2005 intreg++;
2006 break;
2007 case X86_64_SSESF_CLASS:
2008 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2009 gen_rtx_REG (SFmode,
2010 SSE_REGNO (sse_regno)),
2011 GEN_INT (i*8));
2012 sse_regno++;
2013 break;
2014 case X86_64_SSEDF_CLASS:
2015 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2016 gen_rtx_REG (DFmode,
2017 SSE_REGNO (sse_regno)),
2018 GEN_INT (i*8));
2019 sse_regno++;
2020 break;
2021 case X86_64_SSE_CLASS:
2022 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2023 tmpmode = TImode, i++;
2024 else
2025 tmpmode = DImode;
2026 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2027 gen_rtx_REG (tmpmode,
2028 SSE_REGNO (sse_regno)),
2029 GEN_INT (i*8));
2030 sse_regno++;
2031 break;
2032 default:
2033 abort ();
2034 }
2035 }
2036 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2037 for (i = 0; i < nexps; i++)
2038 XVECEXP (ret, 0, i) = exp [i];
2039 return ret;
2040}
2041
b08de47e
MM
2042/* Update the data in CUM to advance over an argument
2043 of mode MODE and data type TYPE.
2044 (TYPE is null for libcalls where that information may not be available.) */
2045
2046void
2047function_arg_advance (cum, mode, type, named)
2048 CUMULATIVE_ARGS *cum; /* current arg information */
2049 enum machine_mode mode; /* current arg mode */
2050 tree type; /* type of the argument or 0 if lib support */
2051 int named; /* whether or not the argument was named */
2052{
5ac9118e
KG
2053 int bytes =
2054 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2055 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2056
2057 if (TARGET_DEBUG_ARG)
2058 fprintf (stderr,
e9a25f70 2059 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
b08de47e 2060 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
53c17031 2061 if (TARGET_64BIT)
b08de47e 2062 {
53c17031
JH
2063 int int_nregs, sse_nregs;
2064 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2065 cum->words += words;
2066 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
82a127a9 2067 {
53c17031
JH
2068 cum->nregs -= int_nregs;
2069 cum->sse_nregs -= sse_nregs;
2070 cum->regno += int_nregs;
2071 cum->sse_regno += sse_nregs;
82a127a9 2072 }
53c17031
JH
2073 else
2074 cum->words += words;
b08de47e 2075 }
a4f31c00 2076 else
82a127a9 2077 {
53c17031
JH
2078 if (TARGET_SSE && mode == TImode)
2079 {
2080 cum->sse_words += words;
2081 cum->sse_nregs -= 1;
2082 cum->sse_regno += 1;
2083 if (cum->sse_nregs <= 0)
2084 {
2085 cum->sse_nregs = 0;
2086 cum->sse_regno = 0;
2087 }
2088 }
2089 else
82a127a9 2090 {
53c17031
JH
2091 cum->words += words;
2092 cum->nregs -= words;
2093 cum->regno += words;
2094
2095 if (cum->nregs <= 0)
2096 {
2097 cum->nregs = 0;
2098 cum->regno = 0;
2099 }
82a127a9
CM
2100 }
2101 }
b08de47e
MM
2102 return;
2103}
2104
2105/* Define where to put the arguments to a function.
2106 Value is zero to push the argument on the stack,
2107 or a hard register in which to store the argument.
2108
2109 MODE is the argument's machine mode.
2110 TYPE is the data type of the argument (as a tree).
2111 This is null for libcalls where that information may
2112 not be available.
2113 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2114 the preceding args and about the function being called.
2115 NAMED is nonzero if this argument is a named parameter
2116 (otherwise it is an extra parameter matching an ellipsis). */
2117
07933f72 2118rtx
b08de47e
MM
2119function_arg (cum, mode, type, named)
2120 CUMULATIVE_ARGS *cum; /* current arg information */
2121 enum machine_mode mode; /* current arg mode */
2122 tree type; /* type of the argument or 0 if lib support */
2123 int named; /* != 0 for normal args, == 0 for ... args */
2124{
2125 rtx ret = NULL_RTX;
5ac9118e
KG
2126 int bytes =
2127 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2128 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2129
53c17031
JH
2130 /* Handle an hidden AL argument containing number of registers for varargs
2131 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2132 any AL settings. */
32ee7d1d 2133 if (mode == VOIDmode)
b08de47e 2134 {
53c17031
JH
2135 if (TARGET_64BIT)
2136 return GEN_INT (cum->maybe_vaarg
2137 ? (cum->sse_nregs < 0
2138 ? SSE_REGPARM_MAX
2139 : cum->sse_regno)
2140 : -1);
2141 else
2142 return constm1_rtx;
b08de47e 2143 }
53c17031
JH
2144 if (TARGET_64BIT)
2145 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2146 &x86_64_int_parameter_registers [cum->regno],
2147 cum->sse_regno);
2148 else
2149 switch (mode)
2150 {
2151 /* For now, pass fp/complex values on the stack. */
2152 default:
2153 break;
2154
2155 case BLKmode:
2156 case DImode:
2157 case SImode:
2158 case HImode:
2159 case QImode:
2160 if (words <= cum->nregs)
2161 ret = gen_rtx_REG (mode, cum->regno);
2162 break;
2163 case TImode:
2164 if (cum->sse_nregs)
2165 ret = gen_rtx_REG (mode, cum->sse_regno);
2166 break;
2167 }
b08de47e
MM
2168
2169 if (TARGET_DEBUG_ARG)
2170 {
2171 fprintf (stderr,
e9a25f70 2172 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
b08de47e
MM
2173 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2174
2175 if (ret)
b531087a 2176 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO (ret) ]);
b08de47e
MM
2177 else
2178 fprintf (stderr, ", stack");
2179
2180 fprintf (stderr, " )\n");
2181 }
2182
2183 return ret;
2184}
53c17031
JH
2185
2186/* Gives the alignment boundary, in bits, of an argument with the specified mode
2187 and type. */
2188
2189int
2190ix86_function_arg_boundary (mode, type)
2191 enum machine_mode mode;
2192 tree type;
2193{
2194 int align;
2195 if (!TARGET_64BIT)
2196 return PARM_BOUNDARY;
2197 if (type)
2198 align = TYPE_ALIGN (type);
2199 else
2200 align = GET_MODE_ALIGNMENT (mode);
2201 if (align < PARM_BOUNDARY)
2202 align = PARM_BOUNDARY;
2203 if (align > 128)
2204 align = 128;
2205 return align;
2206}
2207
2208/* Return true if N is a possible register number of function value. */
2209bool
2210ix86_function_value_regno_p (regno)
2211 int regno;
2212{
2213 if (!TARGET_64BIT)
2214 {
2215 return ((regno) == 0
2216 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2217 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2218 }
2219 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2220 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2221 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2222}
2223
2224/* Define how to find the value returned by a function.
2225 VALTYPE is the data type of the value (as a tree).
2226 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2227 otherwise, FUNC is 0. */
2228rtx
2229ix86_function_value (valtype)
2230 tree valtype;
2231{
2232 if (TARGET_64BIT)
2233 {
2234 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2235 REGPARM_MAX, SSE_REGPARM_MAX,
2236 x86_64_int_return_registers, 0);
2237 /* For zero sized structures, construct_continer return NULL, but we need
2238 to keep rest of compiler happy by returning meaningfull value. */
2239 if (!ret)
2240 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2241 return ret;
2242 }
2243 else
2244 return gen_rtx_REG (TYPE_MODE (valtype), VALUE_REGNO (TYPE_MODE (valtype)));
2245}
2246
f5143c46 2247/* Return false iff type is returned in memory. */
53c17031
JH
2248int
2249ix86_return_in_memory (type)
2250 tree type;
2251{
2252 int needed_intregs, needed_sseregs;
2253 if (TARGET_64BIT)
2254 {
2255 return !examine_argument (TYPE_MODE (type), type, 1,
2256 &needed_intregs, &needed_sseregs);
2257 }
2258 else
2259 {
2260 if (TYPE_MODE (type) == BLKmode
2261 || (VECTOR_MODE_P (TYPE_MODE (type))
2262 && int_size_in_bytes (type) == 8)
2263 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2264 && TYPE_MODE (type) != TFmode
2265 && !VECTOR_MODE_P (TYPE_MODE (type))))
2266 return 1;
2267 return 0;
2268 }
2269}
2270
2271/* Define how to find the value returned by a library function
2272 assuming the value has mode MODE. */
2273rtx
2274ix86_libcall_value (mode)
2275 enum machine_mode mode;
2276{
2277 if (TARGET_64BIT)
2278 {
2279 switch (mode)
2280 {
2281 case SFmode:
2282 case SCmode:
2283 case DFmode:
2284 case DCmode:
2285 return gen_rtx_REG (mode, FIRST_SSE_REG);
2286 case TFmode:
2287 case TCmode:
2288 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2289 default:
2290 return gen_rtx_REG (mode, 0);
2291 }
2292 }
2293 else
2294 return gen_rtx_REG (mode, VALUE_REGNO (mode));
2295}
ad919812
JH
2296\f
2297/* Create the va_list data type. */
53c17031 2298
ad919812
JH
2299tree
2300ix86_build_va_list ()
2301{
2302 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
53c17031 2303
ad919812
JH
2304 /* For i386 we use plain pointer to argument area. */
2305 if (!TARGET_64BIT)
2306 return build_pointer_type (char_type_node);
2307
f1e639b1 2308 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
ad919812
JH
2309 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2310
2311 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2312 unsigned_type_node);
2313 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2314 unsigned_type_node);
2315 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2316 ptr_type_node);
2317 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2318 ptr_type_node);
2319
2320 DECL_FIELD_CONTEXT (f_gpr) = record;
2321 DECL_FIELD_CONTEXT (f_fpr) = record;
2322 DECL_FIELD_CONTEXT (f_ovf) = record;
2323 DECL_FIELD_CONTEXT (f_sav) = record;
2324
2325 TREE_CHAIN (record) = type_decl;
2326 TYPE_NAME (record) = type_decl;
2327 TYPE_FIELDS (record) = f_gpr;
2328 TREE_CHAIN (f_gpr) = f_fpr;
2329 TREE_CHAIN (f_fpr) = f_ovf;
2330 TREE_CHAIN (f_ovf) = f_sav;
2331
2332 layout_type (record);
2333
2334 /* The correct type is an array type of one element. */
2335 return build_array_type (record, build_index_type (size_zero_node));
2336}
2337
2338/* Perform any needed actions needed for a function that is receiving a
2339 variable number of arguments.
2340
2341 CUM is as above.
2342
2343 MODE and TYPE are the mode and type of the current parameter.
2344
2345 PRETEND_SIZE is a variable that should be set to the amount of stack
2346 that must be pushed by the prolog to pretend that our caller pushed
2347 it.
2348
2349 Normally, this macro will push all remaining incoming registers on the
2350 stack and set PRETEND_SIZE to the length of the registers pushed. */
2351
2352void
2353ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2354 CUMULATIVE_ARGS *cum;
2355 enum machine_mode mode;
2356 tree type;
2357 int *pretend_size ATTRIBUTE_UNUSED;
2358 int no_rtl;
2359
2360{
2361 CUMULATIVE_ARGS next_cum;
2362 rtx save_area = NULL_RTX, mem;
2363 rtx label;
2364 rtx label_ref;
2365 rtx tmp_reg;
2366 rtx nsse_reg;
2367 int set;
2368 tree fntype;
2369 int stdarg_p;
2370 int i;
2371
2372 if (!TARGET_64BIT)
2373 return;
2374
2375 /* Indicate to allocate space on the stack for varargs save area. */
2376 ix86_save_varrargs_registers = 1;
2377
2378 fntype = TREE_TYPE (current_function_decl);
2379 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2380 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2381 != void_type_node));
2382
2383 /* For varargs, we do not want to skip the dummy va_dcl argument.
2384 For stdargs, we do want to skip the last named argument. */
2385 next_cum = *cum;
2386 if (stdarg_p)
2387 function_arg_advance (&next_cum, mode, type, 1);
2388
2389 if (!no_rtl)
2390 save_area = frame_pointer_rtx;
2391
2392 set = get_varargs_alias_set ();
2393
2394 for (i = next_cum.regno; i < ix86_regparm; i++)
2395 {
2396 mem = gen_rtx_MEM (Pmode,
2397 plus_constant (save_area, i * UNITS_PER_WORD));
0692acba 2398 set_mem_alias_set (mem, set);
ad919812
JH
2399 emit_move_insn (mem, gen_rtx_REG (Pmode,
2400 x86_64_int_parameter_registers[i]));
2401 }
2402
2403 if (next_cum.sse_nregs)
2404 {
2405 /* Now emit code to save SSE registers. The AX parameter contains number
2406 of SSE parameter regsiters used to call this function. We use
2407 sse_prologue_save insn template that produces computed jump across
2408 SSE saves. We need some preparation work to get this working. */
2409
2410 label = gen_label_rtx ();
2411 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2412
2413 /* Compute address to jump to :
2414 label - 5*eax + nnamed_sse_arguments*5 */
2415 tmp_reg = gen_reg_rtx (Pmode);
2416 nsse_reg = gen_reg_rtx (Pmode);
2417 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2418 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
44cf5b6a 2419 gen_rtx_MULT (Pmode, nsse_reg,
ad919812
JH
2420 GEN_INT (4))));
2421 if (next_cum.sse_regno)
2422 emit_move_insn
2423 (nsse_reg,
2424 gen_rtx_CONST (DImode,
2425 gen_rtx_PLUS (DImode,
2426 label_ref,
2427 GEN_INT (next_cum.sse_regno * 4))));
2428 else
2429 emit_move_insn (nsse_reg, label_ref);
2430 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2431
2432 /* Compute address of memory block we save into. We always use pointer
2433 pointing 127 bytes after first byte to store - this is needed to keep
2434 instruction size limited by 4 bytes. */
2435 tmp_reg = gen_reg_rtx (Pmode);
8ac61af7
RK
2436 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2437 plus_constant (save_area,
2438 8 * REGPARM_MAX + 127)));
ad919812 2439 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
14f73b5a 2440 set_mem_alias_set (mem, set);
8ac61af7 2441 set_mem_align (mem, BITS_PER_WORD);
ad919812
JH
2442
2443 /* And finally do the dirty job! */
8ac61af7
RK
2444 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2445 GEN_INT (next_cum.sse_regno), label));
ad919812
JH
2446 }
2447
2448}
2449
2450/* Implement va_start. */
2451
2452void
2453ix86_va_start (stdarg_p, valist, nextarg)
2454 int stdarg_p;
2455 tree valist;
2456 rtx nextarg;
2457{
2458 HOST_WIDE_INT words, n_gpr, n_fpr;
2459 tree f_gpr, f_fpr, f_ovf, f_sav;
2460 tree gpr, fpr, ovf, sav, t;
2461
2462 /* Only 64bit target needs something special. */
2463 if (!TARGET_64BIT)
2464 {
2465 std_expand_builtin_va_start (stdarg_p, valist, nextarg);
2466 return;
2467 }
2468
2469 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2470 f_fpr = TREE_CHAIN (f_gpr);
2471 f_ovf = TREE_CHAIN (f_fpr);
2472 f_sav = TREE_CHAIN (f_ovf);
2473
2474 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2475 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2476 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2477 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2478 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2479
2480 /* Count number of gp and fp argument registers used. */
2481 words = current_function_args_info.words;
2482 n_gpr = current_function_args_info.regno;
2483 n_fpr = current_function_args_info.sse_regno;
2484
2485 if (TARGET_DEBUG_ARG)
2486 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
b531087a 2487 (int) words, (int) n_gpr, (int) n_fpr);
ad919812
JH
2488
2489 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2490 build_int_2 (n_gpr * 8, 0));
2491 TREE_SIDE_EFFECTS (t) = 1;
2492 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2493
2494 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2495 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2496 TREE_SIDE_EFFECTS (t) = 1;
2497 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2498
2499 /* Find the overflow area. */
2500 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2501 if (words != 0)
2502 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2503 build_int_2 (words * UNITS_PER_WORD, 0));
2504 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2505 TREE_SIDE_EFFECTS (t) = 1;
2506 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2507
2508 /* Find the register save area.
2509 Prologue of the function save it right above stack frame. */
2510 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2511 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2512 TREE_SIDE_EFFECTS (t) = 1;
2513 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2514}
2515
2516/* Implement va_arg. */
2517rtx
2518ix86_va_arg (valist, type)
2519 tree valist, type;
2520{
0139adca 2521 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
ad919812
JH
2522 tree f_gpr, f_fpr, f_ovf, f_sav;
2523 tree gpr, fpr, ovf, sav, t;
b932f770 2524 int size, rsize;
ad919812
JH
2525 rtx lab_false, lab_over = NULL_RTX;
2526 rtx addr_rtx, r;
2527 rtx container;
2528
2529 /* Only 64bit target needs something special. */
2530 if (!TARGET_64BIT)
2531 {
2532 return std_expand_builtin_va_arg (valist, type);
2533 }
2534
2535 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2536 f_fpr = TREE_CHAIN (f_gpr);
2537 f_ovf = TREE_CHAIN (f_fpr);
2538 f_sav = TREE_CHAIN (f_ovf);
2539
2540 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2541 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2542 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2543 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2544 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2545
2546 size = int_size_in_bytes (type);
2547 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2548
2549 container = construct_container (TYPE_MODE (type), type, 0,
2550 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2551 /*
2552 * Pull the value out of the saved registers ...
2553 */
2554
2555 addr_rtx = gen_reg_rtx (Pmode);
2556
2557 if (container)
2558 {
2559 rtx int_addr_rtx, sse_addr_rtx;
2560 int needed_intregs, needed_sseregs;
2561 int need_temp;
2562
2563 lab_over = gen_label_rtx ();
2564 lab_false = gen_label_rtx ();
8bad7136 2565
ad919812
JH
2566 examine_argument (TYPE_MODE (type), type, 0,
2567 &needed_intregs, &needed_sseregs);
2568
2569
2570 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2571 || TYPE_ALIGN (type) > 128);
2572
2573 /* In case we are passing structure, verify that it is consetuctive block
2574 on the register save area. If not we need to do moves. */
2575 if (!need_temp && !REG_P (container))
2576 {
2577 /* Verify that all registers are strictly consetuctive */
2578 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2579 {
2580 int i;
2581
2582 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2583 {
2584 rtx slot = XVECEXP (container, 0, i);
b531087a 2585 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
ad919812
JH
2586 || INTVAL (XEXP (slot, 1)) != i * 16)
2587 need_temp = 1;
2588 }
2589 }
2590 else
2591 {
2592 int i;
2593
2594 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2595 {
2596 rtx slot = XVECEXP (container, 0, i);
b531087a 2597 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
ad919812
JH
2598 || INTVAL (XEXP (slot, 1)) != i * 8)
2599 need_temp = 1;
2600 }
2601 }
2602 }
2603 if (!need_temp)
2604 {
2605 int_addr_rtx = addr_rtx;
2606 sse_addr_rtx = addr_rtx;
2607 }
2608 else
2609 {
2610 int_addr_rtx = gen_reg_rtx (Pmode);
2611 sse_addr_rtx = gen_reg_rtx (Pmode);
2612 }
2613 /* First ensure that we fit completely in registers. */
2614 if (needed_intregs)
2615 {
2616 emit_cmp_and_jump_insns (expand_expr
2617 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2618 GEN_INT ((REGPARM_MAX - needed_intregs +
2619 1) * 8), GE, const1_rtx, SImode,
d43e0b7d 2620 1, lab_false);
ad919812
JH
2621 }
2622 if (needed_sseregs)
2623 {
2624 emit_cmp_and_jump_insns (expand_expr
2625 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2626 GEN_INT ((SSE_REGPARM_MAX -
2627 needed_sseregs + 1) * 16 +
2628 REGPARM_MAX * 8), GE, const1_rtx,
d43e0b7d 2629 SImode, 1, lab_false);
ad919812
JH
2630 }
2631
2632 /* Compute index to start of area used for integer regs. */
2633 if (needed_intregs)
2634 {
2635 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2636 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2637 if (r != int_addr_rtx)
2638 emit_move_insn (int_addr_rtx, r);
2639 }
2640 if (needed_sseregs)
2641 {
2642 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2643 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2644 if (r != sse_addr_rtx)
2645 emit_move_insn (sse_addr_rtx, r);
2646 }
2647 if (need_temp)
2648 {
2649 int i;
2650 rtx mem;
2651
b932f770
JH
2652 /* Never use the memory itself, as it has the alias set. */
2653 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2654 mem = gen_rtx_MEM (BLKmode, addr_rtx);
0692acba 2655 set_mem_alias_set (mem, get_varargs_alias_set ());
8ac61af7 2656 set_mem_align (mem, BITS_PER_UNIT);
b932f770 2657
ad919812
JH
2658 for (i = 0; i < XVECLEN (container, 0); i++)
2659 {
2660 rtx slot = XVECEXP (container, 0, i);
2661 rtx reg = XEXP (slot, 0);
2662 enum machine_mode mode = GET_MODE (reg);
2663 rtx src_addr;
2664 rtx src_mem;
2665 int src_offset;
2666 rtx dest_mem;
2667
2668 if (SSE_REGNO_P (REGNO (reg)))
2669 {
2670 src_addr = sse_addr_rtx;
2671 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2672 }
2673 else
2674 {
2675 src_addr = int_addr_rtx;
2676 src_offset = REGNO (reg) * 8;
2677 }
2678 src_mem = gen_rtx_MEM (mode, src_addr);
0692acba 2679 set_mem_alias_set (src_mem, get_varargs_alias_set ());
ad919812
JH
2680 src_mem = adjust_address (src_mem, mode, src_offset);
2681 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
ad919812
JH
2682 emit_move_insn (dest_mem, src_mem);
2683 }
2684 }
2685
2686 if (needed_intregs)
2687 {
2688 t =
2689 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2690 build_int_2 (needed_intregs * 8, 0));
2691 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2692 TREE_SIDE_EFFECTS (t) = 1;
2693 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2694 }
2695 if (needed_sseregs)
2696 {
2697 t =
2698 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2699 build_int_2 (needed_sseregs * 16, 0));
2700 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2701 TREE_SIDE_EFFECTS (t) = 1;
2702 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2703 }
2704
2705 emit_jump_insn (gen_jump (lab_over));
2706 emit_barrier ();
2707 emit_label (lab_false);
2708 }
2709
2710 /* ... otherwise out of the overflow area. */
2711
2712 /* Care for on-stack alignment if needed. */
2713 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2714 t = ovf;
2715 else
2716 {
2717 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2718 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2719 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2720 }
2721 t = save_expr (t);
2722
2723 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2724 if (r != addr_rtx)
2725 emit_move_insn (addr_rtx, r);
2726
2727 t =
2728 build (PLUS_EXPR, TREE_TYPE (t), t,
2729 build_int_2 (rsize * UNITS_PER_WORD, 0));
2730 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2731 TREE_SIDE_EFFECTS (t) = 1;
2732 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2733
2734 if (container)
2735 emit_label (lab_over);
2736
ad919812
JH
2737 return addr_rtx;
2738}
2739\f
7dd4b4a3
JH
2740/* Return nonzero if OP is general operand representable on x86_64. */
2741
2742int
2743x86_64_general_operand (op, mode)
2744 rtx op;
2745 enum machine_mode mode;
2746{
2747 if (!TARGET_64BIT)
2748 return general_operand (op, mode);
2749 if (nonimmediate_operand (op, mode))
2750 return 1;
2751 return x86_64_sign_extended_value (op);
2752}
2753
2754/* Return nonzero if OP is general operand representable on x86_64
d6a7951f 2755 as either sign extended or zero extended constant. */
7dd4b4a3
JH
2756
2757int
2758x86_64_szext_general_operand (op, mode)
2759 rtx op;
2760 enum machine_mode mode;
2761{
2762 if (!TARGET_64BIT)
2763 return general_operand (op, mode);
2764 if (nonimmediate_operand (op, mode))
2765 return 1;
2766 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2767}
2768
2769/* Return nonzero if OP is nonmemory operand representable on x86_64. */
2770
2771int
2772x86_64_nonmemory_operand (op, mode)
2773 rtx op;
2774 enum machine_mode mode;
2775{
2776 if (!TARGET_64BIT)
2777 return nonmemory_operand (op, mode);
2778 if (register_operand (op, mode))
2779 return 1;
2780 return x86_64_sign_extended_value (op);
2781}
2782
2783/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2784
2785int
2786x86_64_movabs_operand (op, mode)
2787 rtx op;
2788 enum machine_mode mode;
2789{
2790 if (!TARGET_64BIT || !flag_pic)
2791 return nonmemory_operand (op, mode);
2792 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2793 return 1;
2794 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2795 return 1;
2796 return 0;
2797}
2798
2799/* Return nonzero if OP is nonmemory operand representable on x86_64. */
2800
2801int
2802x86_64_szext_nonmemory_operand (op, mode)
2803 rtx op;
2804 enum machine_mode mode;
2805{
2806 if (!TARGET_64BIT)
2807 return nonmemory_operand (op, mode);
2808 if (register_operand (op, mode))
2809 return 1;
2810 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2811}
2812
2813/* Return nonzero if OP is immediate operand representable on x86_64. */
2814
2815int
2816x86_64_immediate_operand (op, mode)
2817 rtx op;
2818 enum machine_mode mode;
2819{
2820 if (!TARGET_64BIT)
2821 return immediate_operand (op, mode);
2822 return x86_64_sign_extended_value (op);
2823}
2824
2825/* Return nonzero if OP is immediate operand representable on x86_64. */
2826
2827int
2828x86_64_zext_immediate_operand (op, mode)
2829 rtx op;
2830 enum machine_mode mode ATTRIBUTE_UNUSED;
2831{
2832 return x86_64_zero_extended_value (op);
2833}
2834
8bad7136
JL
2835/* Return nonzero if OP is (const_int 1), else return zero. */
2836
2837int
2838const_int_1_operand (op, mode)
2839 rtx op;
2840 enum machine_mode mode ATTRIBUTE_UNUSED;
2841{
2842 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2843}
2844
e075ae69
RH
2845/* Returns 1 if OP is either a symbol reference or a sum of a symbol
2846 reference and a constant. */
b08de47e
MM
2847
2848int
e075ae69
RH
2849symbolic_operand (op, mode)
2850 register rtx op;
2851 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 2852{
e075ae69 2853 switch (GET_CODE (op))
2a2ab3f9 2854 {
e075ae69
RH
2855 case SYMBOL_REF:
2856 case LABEL_REF:
2857 return 1;
2858
2859 case CONST:
2860 op = XEXP (op, 0);
2861 if (GET_CODE (op) == SYMBOL_REF
2862 || GET_CODE (op) == LABEL_REF
2863 || (GET_CODE (op) == UNSPEC
8ee41eaf
RH
2864 && (XINT (op, 1) == UNSPEC_GOT
2865 || XINT (op, 1) == UNSPEC_GOTOFF
2866 || XINT (op, 1) == UNSPEC_GOTPCREL)))
e075ae69
RH
2867 return 1;
2868 if (GET_CODE (op) != PLUS
2869 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2870 return 0;
2871
2872 op = XEXP (op, 0);
2873 if (GET_CODE (op) == SYMBOL_REF
2874 || GET_CODE (op) == LABEL_REF)
2875 return 1;
2876 /* Only @GOTOFF gets offsets. */
2877 if (GET_CODE (op) != UNSPEC
8ee41eaf 2878 || XINT (op, 1) != UNSPEC_GOTOFF)
e075ae69
RH
2879 return 0;
2880
2881 op = XVECEXP (op, 0, 0);
2882 if (GET_CODE (op) == SYMBOL_REF
2883 || GET_CODE (op) == LABEL_REF)
2884 return 1;
2885 return 0;
2886
2887 default:
2888 return 0;
2a2ab3f9
JVA
2889 }
2890}
2a2ab3f9 2891
e075ae69 2892/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 2893
e075ae69
RH
2894int
2895pic_symbolic_operand (op, mode)
2896 register rtx op;
2897 enum machine_mode mode ATTRIBUTE_UNUSED;
2898{
6eb791fc
JH
2899 if (GET_CODE (op) != CONST)
2900 return 0;
2901 op = XEXP (op, 0);
2902 if (TARGET_64BIT)
2903 {
2904 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2905 return 1;
2906 }
2907 else
2a2ab3f9 2908 {
e075ae69
RH
2909 if (GET_CODE (op) == UNSPEC)
2910 return 1;
2911 if (GET_CODE (op) != PLUS
2912 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2913 return 0;
2914 op = XEXP (op, 0);
2915 if (GET_CODE (op) == UNSPEC)
2916 return 1;
2a2ab3f9 2917 }
e075ae69 2918 return 0;
2a2ab3f9 2919}
2a2ab3f9 2920
623fe810
RH
2921/* Return true if OP is a symbolic operand that resolves locally. */
2922
2923static int
2924local_symbolic_operand (op, mode)
2925 rtx op;
2926 enum machine_mode mode ATTRIBUTE_UNUSED;
2927{
2928 if (GET_CODE (op) == LABEL_REF)
2929 return 1;
2930
2931 if (GET_CODE (op) == CONST
2932 && GET_CODE (XEXP (op, 0)) == PLUS
2933 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2934 op = XEXP (XEXP (op, 0), 0);
2935
2936 if (GET_CODE (op) != SYMBOL_REF)
2937 return 0;
2938
2939 /* These we've been told are local by varasm and encode_section_info
2940 respectively. */
2941 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2942 return 1;
2943
2944 /* There is, however, a not insubstantial body of code in the rest of
2945 the compiler that assumes it can just stick the results of
2946 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2947 /* ??? This is a hack. Should update the body of the compiler to
2948 always create a DECL an invoke ENCODE_SECTION_INFO. */
2949 if (strncmp (XSTR (op, 0), internal_label_prefix,
2950 internal_label_prefix_len) == 0)
2951 return 1;
2952
2953 return 0;
2954}
2955
28d52ffb
RH
2956/* Test for a valid operand for a call instruction. Don't allow the
2957 arg pointer register or virtual regs since they may decay into
2958 reg + const, which the patterns can't handle. */
2a2ab3f9 2959
e075ae69
RH
2960int
2961call_insn_operand (op, mode)
2962 rtx op;
2963 enum machine_mode mode ATTRIBUTE_UNUSED;
2964{
e075ae69
RH
2965 /* Disallow indirect through a virtual register. This leads to
2966 compiler aborts when trying to eliminate them. */
2967 if (GET_CODE (op) == REG
2968 && (op == arg_pointer_rtx
564d80f4 2969 || op == frame_pointer_rtx
e075ae69
RH
2970 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
2971 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
2972 return 0;
2a2ab3f9 2973
28d52ffb
RH
2974 /* Disallow `call 1234'. Due to varying assembler lameness this
2975 gets either rejected or translated to `call .+1234'. */
2976 if (GET_CODE (op) == CONST_INT)
2977 return 0;
2978
cbbf65e0
RH
2979 /* Explicitly allow SYMBOL_REF even if pic. */
2980 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 2981 return 1;
2a2ab3f9 2982
cbbf65e0
RH
2983 /* Half-pic doesn't allow anything but registers and constants.
2984 We've just taken care of the later. */
2985 if (HALF_PIC_P ())
2986 return register_operand (op, Pmode);
2987
2988 /* Otherwise we can allow any general_operand in the address. */
2989 return general_operand (op, Pmode);
e075ae69 2990}
79325812 2991
e075ae69
RH
2992int
2993constant_call_address_operand (op, mode)
2994 rtx op;
2995 enum machine_mode mode ATTRIBUTE_UNUSED;
2996{
eaf19aba
JJ
2997 if (GET_CODE (op) == CONST
2998 && GET_CODE (XEXP (op, 0)) == PLUS
2999 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3000 op = XEXP (XEXP (op, 0), 0);
e1ff012c 3001 return GET_CODE (op) == SYMBOL_REF;
e075ae69 3002}
2a2ab3f9 3003
e075ae69 3004/* Match exactly zero and one. */
e9a25f70 3005
0f290768 3006int
e075ae69
RH
3007const0_operand (op, mode)
3008 register rtx op;
3009 enum machine_mode mode;
3010{
3011 return op == CONST0_RTX (mode);
3012}
e9a25f70 3013
0f290768 3014int
e075ae69
RH
3015const1_operand (op, mode)
3016 register rtx op;
3017 enum machine_mode mode ATTRIBUTE_UNUSED;
3018{
3019 return op == const1_rtx;
3020}
2a2ab3f9 3021
e075ae69 3022/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 3023
e075ae69
RH
3024int
3025const248_operand (op, mode)
3026 register rtx op;
3027 enum machine_mode mode ATTRIBUTE_UNUSED;
3028{
3029 return (GET_CODE (op) == CONST_INT
3030 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3031}
e9a25f70 3032
e075ae69 3033/* True if this is a constant appropriate for an increment or decremenmt. */
81fd0956 3034
e075ae69
RH
3035int
3036incdec_operand (op, mode)
3037 register rtx op;
0631e0bf 3038 enum machine_mode mode ATTRIBUTE_UNUSED;
e075ae69 3039{
f5143c46 3040 /* On Pentium4, the inc and dec operations causes extra dependency on flag
b4e89e2d
JH
3041 registers, since carry flag is not set. */
3042 if (TARGET_PENTIUM4 && !optimize_size)
3043 return 0;
2b1c08f5 3044 return op == const1_rtx || op == constm1_rtx;
e075ae69 3045}
2a2ab3f9 3046
371bc54b
JH
3047/* Return nonzero if OP is acceptable as operand of DImode shift
3048 expander. */
3049
3050int
3051shiftdi_operand (op, mode)
3052 rtx op;
3053 enum machine_mode mode ATTRIBUTE_UNUSED;
3054{
3055 if (TARGET_64BIT)
3056 return nonimmediate_operand (op, mode);
3057 else
3058 return register_operand (op, mode);
3059}
3060
0f290768 3061/* Return false if this is the stack pointer, or any other fake
e075ae69
RH
3062 register eliminable to the stack pointer. Otherwise, this is
3063 a register operand.
2a2ab3f9 3064
e075ae69
RH
3065 This is used to prevent esp from being used as an index reg.
3066 Which would only happen in pathological cases. */
5f1ec3e6 3067
e075ae69
RH
3068int
3069reg_no_sp_operand (op, mode)
3070 register rtx op;
3071 enum machine_mode mode;
3072{
3073 rtx t = op;
3074 if (GET_CODE (t) == SUBREG)
3075 t = SUBREG_REG (t);
564d80f4 3076 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 3077 return 0;
2a2ab3f9 3078
e075ae69 3079 return register_operand (op, mode);
2a2ab3f9 3080}
b840bfb0 3081
915119a5
BS
3082int
3083mmx_reg_operand (op, mode)
3084 register rtx op;
bd793c65 3085 enum machine_mode mode ATTRIBUTE_UNUSED;
915119a5
BS
3086{
3087 return MMX_REG_P (op);
3088}
3089
2c5a510c
RH
3090/* Return false if this is any eliminable register. Otherwise
3091 general_operand. */
3092
3093int
3094general_no_elim_operand (op, mode)
3095 register rtx op;
3096 enum machine_mode mode;
3097{
3098 rtx t = op;
3099 if (GET_CODE (t) == SUBREG)
3100 t = SUBREG_REG (t);
3101 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3102 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3103 || t == virtual_stack_dynamic_rtx)
3104 return 0;
1020a5ab
RH
3105 if (REG_P (t)
3106 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3107 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3108 return 0;
2c5a510c
RH
3109
3110 return general_operand (op, mode);
3111}
3112
3113/* Return false if this is any eliminable register. Otherwise
3114 register_operand or const_int. */
3115
3116int
3117nonmemory_no_elim_operand (op, mode)
3118 register rtx op;
3119 enum machine_mode mode;
3120{
3121 rtx t = op;
3122 if (GET_CODE (t) == SUBREG)
3123 t = SUBREG_REG (t);
3124 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3125 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3126 || t == virtual_stack_dynamic_rtx)
3127 return 0;
3128
3129 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3130}
3131
e075ae69 3132/* Return true if op is a Q_REGS class register. */
b840bfb0 3133
e075ae69
RH
3134int
3135q_regs_operand (op, mode)
3136 register rtx op;
3137 enum machine_mode mode;
b840bfb0 3138{
e075ae69
RH
3139 if (mode != VOIDmode && GET_MODE (op) != mode)
3140 return 0;
3141 if (GET_CODE (op) == SUBREG)
3142 op = SUBREG_REG (op);
7799175f 3143 return ANY_QI_REG_P (op);
0f290768 3144}
b840bfb0 3145
e075ae69 3146/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 3147
e075ae69
RH
3148int
3149non_q_regs_operand (op, mode)
3150 register rtx op;
3151 enum machine_mode mode;
3152{
3153 if (mode != VOIDmode && GET_MODE (op) != mode)
3154 return 0;
3155 if (GET_CODE (op) == SUBREG)
3156 op = SUBREG_REG (op);
3157 return NON_QI_REG_P (op);
0f290768 3158}
b840bfb0 3159
915119a5
BS
3160/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3161 insns. */
3162int
3163sse_comparison_operator (op, mode)
3164 rtx op;
3165 enum machine_mode mode ATTRIBUTE_UNUSED;
3166{
3167 enum rtx_code code = GET_CODE (op);
a46d1d38
JH
3168 switch (code)
3169 {
3170 /* Operations supported directly. */
3171 case EQ:
3172 case LT:
3173 case LE:
3174 case UNORDERED:
3175 case NE:
3176 case UNGE:
3177 case UNGT:
3178 case ORDERED:
3179 return 1;
3180 /* These are equivalent to ones above in non-IEEE comparisons. */
3181 case UNEQ:
3182 case UNLT:
3183 case UNLE:
3184 case LTGT:
3185 case GE:
3186 case GT:
3187 return !TARGET_IEEE_FP;
3188 default:
3189 return 0;
3190 }
915119a5 3191}
9076b9c1 3192/* Return 1 if OP is a valid comparison operator in valid mode. */
e075ae69 3193int
9076b9c1
JH
3194ix86_comparison_operator (op, mode)
3195 register rtx op;
3196 enum machine_mode mode;
e075ae69 3197{
9076b9c1 3198 enum machine_mode inmode;
9a915772 3199 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
3200 if (mode != VOIDmode && GET_MODE (op) != mode)
3201 return 0;
9a915772
JH
3202 if (GET_RTX_CLASS (code) != '<')
3203 return 0;
3204 inmode = GET_MODE (XEXP (op, 0));
3205
3206 if (inmode == CCFPmode || inmode == CCFPUmode)
3207 {
3208 enum rtx_code second_code, bypass_code;
3209 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3210 return (bypass_code == NIL && second_code == NIL);
3211 }
3212 switch (code)
3a3677ff
RH
3213 {
3214 case EQ: case NE:
3a3677ff 3215 return 1;
9076b9c1 3216 case LT: case GE:
7e08e190 3217 if (inmode == CCmode || inmode == CCGCmode
9076b9c1
JH
3218 || inmode == CCGOCmode || inmode == CCNOmode)
3219 return 1;
3220 return 0;
7e08e190 3221 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
7e08e190 3222 if (inmode == CCmode)
9076b9c1
JH
3223 return 1;
3224 return 0;
3225 case GT: case LE:
7e08e190 3226 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
9076b9c1
JH
3227 return 1;
3228 return 0;
3a3677ff
RH
3229 default:
3230 return 0;
3231 }
3232}
3233
9076b9c1 3234/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3a3677ff 3235
9076b9c1
JH
3236int
3237fcmov_comparison_operator (op, mode)
3a3677ff
RH
3238 register rtx op;
3239 enum machine_mode mode;
3240{
b62d22a2 3241 enum machine_mode inmode;
9a915772 3242 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
3243 if (mode != VOIDmode && GET_MODE (op) != mode)
3244 return 0;
9a915772
JH
3245 if (GET_RTX_CLASS (code) != '<')
3246 return 0;
3247 inmode = GET_MODE (XEXP (op, 0));
3248 if (inmode == CCFPmode || inmode == CCFPUmode)
3a3677ff 3249 {
9a915772
JH
3250 enum rtx_code second_code, bypass_code;
3251 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3252 if (bypass_code != NIL || second_code != NIL)
3253 return 0;
3254 code = ix86_fp_compare_code_to_integer (code);
3255 }
3256 /* i387 supports just limited amount of conditional codes. */
3257 switch (code)
3258 {
3259 case LTU: case GTU: case LEU: case GEU:
3260 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
9076b9c1
JH
3261 return 1;
3262 return 0;
9a915772
JH
3263 case ORDERED: case UNORDERED:
3264 case EQ: case NE:
3265 return 1;
3a3677ff
RH
3266 default:
3267 return 0;
3268 }
e075ae69 3269}
b840bfb0 3270
e9e80858
JH
3271/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3272
3273int
3274promotable_binary_operator (op, mode)
3275 register rtx op;
3276 enum machine_mode mode ATTRIBUTE_UNUSED;
3277{
3278 switch (GET_CODE (op))
3279 {
3280 case MULT:
3281 /* Modern CPUs have same latency for HImode and SImode multiply,
3282 but 386 and 486 do HImode multiply faster. */
3283 return ix86_cpu > PROCESSOR_I486;
3284 case PLUS:
3285 case AND:
3286 case IOR:
3287 case XOR:
3288 case ASHIFT:
3289 return 1;
3290 default:
3291 return 0;
3292 }
3293}
3294
e075ae69
RH
3295/* Nearly general operand, but accept any const_double, since we wish
3296 to be able to drop them into memory rather than have them get pulled
3297 into registers. */
b840bfb0 3298
2a2ab3f9 3299int
e075ae69
RH
3300cmp_fp_expander_operand (op, mode)
3301 register rtx op;
3302 enum machine_mode mode;
2a2ab3f9 3303{
e075ae69 3304 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 3305 return 0;
e075ae69 3306 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 3307 return 1;
e075ae69 3308 return general_operand (op, mode);
2a2ab3f9
JVA
3309}
3310
e075ae69 3311/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
3312
3313int
e075ae69 3314ext_register_operand (op, mode)
2a2ab3f9 3315 register rtx op;
bb5177ac 3316 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 3317{
3522082b 3318 int regno;
0d7d98ee
JH
3319 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3320 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
e075ae69 3321 return 0;
3522082b
JH
3322
3323 if (!register_operand (op, VOIDmode))
3324 return 0;
3325
3326 /* Be curefull to accept only registers having upper parts. */
3327 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3328 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
e075ae69
RH
3329}
3330
3331/* Return 1 if this is a valid binary floating-point operation.
0f290768 3332 OP is the expression matched, and MODE is its mode. */
e075ae69
RH
3333
3334int
3335binary_fp_operator (op, mode)
3336 register rtx op;
3337 enum machine_mode mode;
3338{
3339 if (mode != VOIDmode && mode != GET_MODE (op))
3340 return 0;
3341
2a2ab3f9
JVA
3342 switch (GET_CODE (op))
3343 {
e075ae69
RH
3344 case PLUS:
3345 case MINUS:
3346 case MULT:
3347 case DIV:
3348 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 3349
2a2ab3f9
JVA
3350 default:
3351 return 0;
3352 }
3353}
fee2770d 3354
e075ae69 3355int
b531087a 3356mult_operator (op, mode)
e075ae69
RH
3357 register rtx op;
3358 enum machine_mode mode ATTRIBUTE_UNUSED;
3359{
3360 return GET_CODE (op) == MULT;
3361}
3362
3363int
b531087a 3364div_operator (op, mode)
e075ae69
RH
3365 register rtx op;
3366 enum machine_mode mode ATTRIBUTE_UNUSED;
3367{
3368 return GET_CODE (op) == DIV;
3369}
0a726ef1
JL
3370
3371int
e075ae69
RH
3372arith_or_logical_operator (op, mode)
3373 rtx op;
3374 enum machine_mode mode;
0a726ef1 3375{
e075ae69
RH
3376 return ((mode == VOIDmode || GET_MODE (op) == mode)
3377 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3378 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
3379}
3380
e075ae69 3381/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
3382
3383int
e075ae69
RH
3384memory_displacement_operand (op, mode)
3385 register rtx op;
3386 enum machine_mode mode;
4f2c8ebb 3387{
e075ae69 3388 struct ix86_address parts;
e9a25f70 3389
e075ae69
RH
3390 if (! memory_operand (op, mode))
3391 return 0;
3392
3393 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3394 abort ();
3395
3396 return parts.disp != NULL_RTX;
4f2c8ebb
RS
3397}
3398
16189740 3399/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
3400 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3401
3402 ??? It seems likely that this will only work because cmpsi is an
3403 expander, and no actual insns use this. */
4f2c8ebb
RS
3404
3405int
e075ae69
RH
3406cmpsi_operand (op, mode)
3407 rtx op;
3408 enum machine_mode mode;
fee2770d 3409{
b9b2c339 3410 if (nonimmediate_operand (op, mode))
e075ae69
RH
3411 return 1;
3412
3413 if (GET_CODE (op) == AND
3414 && GET_MODE (op) == SImode
3415 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3416 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3417 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3418 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3419 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3420 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 3421 return 1;
e9a25f70 3422
fee2770d
RS
3423 return 0;
3424}
d784886d 3425
e075ae69
RH
3426/* Returns 1 if OP is memory operand that can not be represented by the
3427 modRM array. */
d784886d
RK
3428
3429int
e075ae69 3430long_memory_operand (op, mode)
d784886d
RK
3431 register rtx op;
3432 enum machine_mode mode;
3433{
e075ae69 3434 if (! memory_operand (op, mode))
d784886d
RK
3435 return 0;
3436
e075ae69 3437 return memory_address_length (op) != 0;
d784886d 3438}
2247f6ed
JH
3439
3440/* Return nonzero if the rtx is known aligned. */
3441
3442int
3443aligned_operand (op, mode)
3444 rtx op;
3445 enum machine_mode mode;
3446{
3447 struct ix86_address parts;
3448
3449 if (!general_operand (op, mode))
3450 return 0;
3451
0f290768 3452 /* Registers and immediate operands are always "aligned". */
2247f6ed
JH
3453 if (GET_CODE (op) != MEM)
3454 return 1;
3455
0f290768 3456 /* Don't even try to do any aligned optimizations with volatiles. */
2247f6ed
JH
3457 if (MEM_VOLATILE_P (op))
3458 return 0;
3459
3460 op = XEXP (op, 0);
3461
3462 /* Pushes and pops are only valid on the stack pointer. */
3463 if (GET_CODE (op) == PRE_DEC
3464 || GET_CODE (op) == POST_INC)
3465 return 1;
3466
3467 /* Decode the address. */
3468 if (! ix86_decompose_address (op, &parts))
3469 abort ();
3470
1540f9eb
JH
3471 if (parts.base && GET_CODE (parts.base) == SUBREG)
3472 parts.base = SUBREG_REG (parts.base);
3473 if (parts.index && GET_CODE (parts.index) == SUBREG)
3474 parts.index = SUBREG_REG (parts.index);
3475
2247f6ed
JH
3476 /* Look for some component that isn't known to be aligned. */
3477 if (parts.index)
3478 {
3479 if (parts.scale < 4
bdb429a5 3480 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
3481 return 0;
3482 }
3483 if (parts.base)
3484 {
bdb429a5 3485 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
3486 return 0;
3487 }
3488 if (parts.disp)
3489 {
3490 if (GET_CODE (parts.disp) != CONST_INT
3491 || (INTVAL (parts.disp) & 3) != 0)
3492 return 0;
3493 }
3494
3495 /* Didn't find one -- this must be an aligned address. */
3496 return 1;
3497}
e075ae69
RH
3498\f
3499/* Return true if the constant is something that can be loaded with
3500 a special instruction. Only handle 0.0 and 1.0; others are less
3501 worthwhile. */
57dbca5e
BS
3502
3503int
e075ae69
RH
3504standard_80387_constant_p (x)
3505 rtx x;
57dbca5e 3506{
2b04e52b 3507 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 3508 return -1;
2b04e52b
JH
3509 /* Note that on the 80387, other constants, such as pi, that we should support
3510 too. On some machines, these are much slower to load as standard constant,
3511 than to load from doubles in memory. */
3512 if (x == CONST0_RTX (GET_MODE (x)))
3513 return 1;
3514 if (x == CONST1_RTX (GET_MODE (x)))
3515 return 2;
e075ae69 3516 return 0;
57dbca5e
BS
3517}
3518
2b04e52b
JH
3519/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3520 */
3521int
3522standard_sse_constant_p (x)
3523 rtx x;
3524{
3525 if (GET_CODE (x) != CONST_DOUBLE)
3526 return -1;
3527 return (x == CONST0_RTX (GET_MODE (x)));
3528}
3529
2a2ab3f9
JVA
3530/* Returns 1 if OP contains a symbol reference */
3531
3532int
3533symbolic_reference_mentioned_p (op)
3534 rtx op;
3535{
6f7d635c 3536 register const char *fmt;
2a2ab3f9
JVA
3537 register int i;
3538
3539 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3540 return 1;
3541
3542 fmt = GET_RTX_FORMAT (GET_CODE (op));
3543 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3544 {
3545 if (fmt[i] == 'E')
3546 {
3547 register int j;
3548
3549 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3550 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3551 return 1;
3552 }
e9a25f70 3553
2a2ab3f9
JVA
3554 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3555 return 1;
3556 }
3557
3558 return 0;
3559}
e075ae69
RH
3560
3561/* Return 1 if it is appropriate to emit `ret' instructions in the
3562 body of a function. Do this only if the epilogue is simple, needing a
3563 couple of insns. Prior to reloading, we can't tell how many registers
3564 must be saved, so return 0 then. Return 0 if there is no frame
3565 marker to de-allocate.
3566
3567 If NON_SAVING_SETJMP is defined and true, then it is not possible
3568 for the epilogue to be simple, so return 0. This is a special case
3569 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3570 until final, but jump_optimize may need to know sooner if a
3571 `return' is OK. */
32b5b1aa
SC
3572
3573int
e075ae69 3574ix86_can_use_return_insn_p ()
32b5b1aa 3575{
4dd2ac2c 3576 struct ix86_frame frame;
9a7372d6 3577
e075ae69
RH
3578#ifdef NON_SAVING_SETJMP
3579 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3580 return 0;
3581#endif
9a7372d6
RH
3582
3583 if (! reload_completed || frame_pointer_needed)
3584 return 0;
32b5b1aa 3585
9a7372d6
RH
3586 /* Don't allow more than 32 pop, since that's all we can do
3587 with one instruction. */
3588 if (current_function_pops_args
3589 && current_function_args_size >= 32768)
e075ae69 3590 return 0;
32b5b1aa 3591
4dd2ac2c
JH
3592 ix86_compute_frame_layout (&frame);
3593 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 3594}
6189a572
JH
3595\f
3596/* Return 1 if VALUE can be stored in the sign extended immediate field. */
3597int
3598x86_64_sign_extended_value (value)
3599 rtx value;
3600{
3601 switch (GET_CODE (value))
3602 {
3603 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3604 to be at least 32 and this all acceptable constants are
3605 represented as CONST_INT. */
3606 case CONST_INT:
3607 if (HOST_BITS_PER_WIDE_INT == 32)
3608 return 1;
3609 else
3610 {
3611 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
fa9f36a1 3612 return trunc_int_for_mode (val, SImode) == val;
6189a572
JH
3613 }
3614 break;
3615
3616 /* For certain code models, the symbolic references are known to fit. */
3617 case SYMBOL_REF:
3618 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3619
3620 /* For certain code models, the code is near as well. */
3621 case LABEL_REF:
3622 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3623
3624 /* We also may accept the offsetted memory references in certain special
3625 cases. */
3626 case CONST:
3627 if (GET_CODE (XEXP (value, 0)) == UNSPEC
8ee41eaf 3628 && XINT (XEXP (value, 0), 1) == UNSPEC_GOTPCREL)
6189a572
JH
3629 return 1;
3630 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3631 {
3632 rtx op1 = XEXP (XEXP (value, 0), 0);
3633 rtx op2 = XEXP (XEXP (value, 0), 1);
3634 HOST_WIDE_INT offset;
3635
3636 if (ix86_cmodel == CM_LARGE)
3637 return 0;
3638 if (GET_CODE (op2) != CONST_INT)
3639 return 0;
3640 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3641 switch (GET_CODE (op1))
3642 {
3643 case SYMBOL_REF:
3644 /* For CM_SMALL assume that latest object is 1MB before
3645 end of 31bits boundary. We may also accept pretty
3646 large negative constants knowing that all objects are
3647 in the positive half of address space. */
3648 if (ix86_cmodel == CM_SMALL
3649 && offset < 1024*1024*1024
3650 && trunc_int_for_mode (offset, SImode) == offset)
3651 return 1;
3652 /* For CM_KERNEL we know that all object resist in the
3653 negative half of 32bits address space. We may not
3654 accept negative offsets, since they may be just off
d6a7951f 3655 and we may accept pretty large positive ones. */
6189a572
JH
3656 if (ix86_cmodel == CM_KERNEL
3657 && offset > 0
3658 && trunc_int_for_mode (offset, SImode) == offset)
3659 return 1;
3660 break;
3661 case LABEL_REF:
3662 /* These conditions are similar to SYMBOL_REF ones, just the
3663 constraints for code models differ. */
3664 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3665 && offset < 1024*1024*1024
3666 && trunc_int_for_mode (offset, SImode) == offset)
3667 return 1;
3668 if (ix86_cmodel == CM_KERNEL
3669 && offset > 0
3670 && trunc_int_for_mode (offset, SImode) == offset)
3671 return 1;
3672 break;
3673 default:
3674 return 0;
3675 }
3676 }
3677 return 0;
3678 default:
3679 return 0;
3680 }
3681}
3682
3683/* Return 1 if VALUE can be stored in the zero extended immediate field. */
3684int
3685x86_64_zero_extended_value (value)
3686 rtx value;
3687{
3688 switch (GET_CODE (value))
3689 {
3690 case CONST_DOUBLE:
3691 if (HOST_BITS_PER_WIDE_INT == 32)
3692 return (GET_MODE (value) == VOIDmode
3693 && !CONST_DOUBLE_HIGH (value));
3694 else
3695 return 0;
3696 case CONST_INT:
3697 if (HOST_BITS_PER_WIDE_INT == 32)
3698 return INTVAL (value) >= 0;
3699 else
b531087a 3700 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
6189a572
JH
3701 break;
3702
3703 /* For certain code models, the symbolic references are known to fit. */
3704 case SYMBOL_REF:
3705 return ix86_cmodel == CM_SMALL;
3706
3707 /* For certain code models, the code is near as well. */
3708 case LABEL_REF:
3709 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3710
3711 /* We also may accept the offsetted memory references in certain special
3712 cases. */
3713 case CONST:
3714 if (GET_CODE (XEXP (value, 0)) == PLUS)
3715 {
3716 rtx op1 = XEXP (XEXP (value, 0), 0);
3717 rtx op2 = XEXP (XEXP (value, 0), 1);
3718
3719 if (ix86_cmodel == CM_LARGE)
3720 return 0;
3721 switch (GET_CODE (op1))
3722 {
3723 case SYMBOL_REF:
3724 return 0;
d6a7951f 3725 /* For small code model we may accept pretty large positive
6189a572
JH
3726 offsets, since one bit is available for free. Negative
3727 offsets are limited by the size of NULL pointer area
3728 specified by the ABI. */
3729 if (ix86_cmodel == CM_SMALL
3730 && GET_CODE (op2) == CONST_INT
3731 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3732 && (trunc_int_for_mode (INTVAL (op2), SImode)
3733 == INTVAL (op2)))
3734 return 1;
3735 /* ??? For the kernel, we may accept adjustment of
3736 -0x10000000, since we know that it will just convert
d6a7951f 3737 negative address space to positive, but perhaps this
6189a572
JH
3738 is not worthwhile. */
3739 break;
3740 case LABEL_REF:
3741 /* These conditions are similar to SYMBOL_REF ones, just the
3742 constraints for code models differ. */
3743 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3744 && GET_CODE (op2) == CONST_INT
3745 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3746 && (trunc_int_for_mode (INTVAL (op2), SImode)
3747 == INTVAL (op2)))
3748 return 1;
3749 break;
3750 default:
3751 return 0;
3752 }
3753 }
3754 return 0;
3755 default:
3756 return 0;
3757 }
3758}
6fca22eb
RH
3759
3760/* Value should be nonzero if functions must have frame pointers.
3761 Zero means the frame pointer need not be set up (and parms may
3762 be accessed via the stack pointer) in functions that seem suitable. */
3763
3764int
3765ix86_frame_pointer_required ()
3766{
3767 /* If we accessed previous frames, then the generated code expects
3768 to be able to access the saved ebp value in our frame. */
3769 if (cfun->machine->accesses_prev_frame)
3770 return 1;
a4f31c00 3771
6fca22eb
RH
3772 /* Several x86 os'es need a frame pointer for other reasons,
3773 usually pertaining to setjmp. */
3774 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3775 return 1;
3776
3777 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3778 the frame pointer by default. Turn it back on now if we've not
3779 got a leaf function. */
3780 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
3781 return 1;
3782
3783 return 0;
3784}
3785
3786/* Record that the current function accesses previous call frames. */
3787
3788void
3789ix86_setup_frame_addresses ()
3790{
3791 cfun->machine->accesses_prev_frame = 1;
3792}
e075ae69 3793\f
4cf12e7e 3794static char pic_label_name[32];
e9a25f70 3795
e075ae69
RH
3796/* This function generates code for -fpic that loads %ebx with
3797 the return address of the caller and then returns. */
3798
3799void
4cf12e7e 3800ix86_asm_file_end (file)
e075ae69 3801 FILE *file;
e075ae69
RH
3802{
3803 rtx xops[2];
32b5b1aa 3804
4cf12e7e
RH
3805 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
3806 return;
32b5b1aa 3807
c7f0da1d
RH
3808 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
3809 to updating relocations to a section being discarded such that this
3810 doesn't work. Ought to detect this at configure time. */
7c262518 3811#if 0
4cf12e7e
RH
3812 /* The trick here is to create a linkonce section containing the
3813 pic label thunk, but to refer to it with an internal label.
3814 Because the label is internal, we don't have inter-dso name
3815 binding issues on hosts that don't support ".hidden".
e9a25f70 3816
4cf12e7e
RH
3817 In order to use these macros, however, we must create a fake
3818 function decl. */
7c262518
RH
3819 if (targetm.have_named_sections)
3820 {
3821 tree decl = build_decl (FUNCTION_DECL,
3822 get_identifier ("i686.get_pc_thunk"),
3823 error_mark_node);
3824 DECL_ONE_ONLY (decl) = 1;
ae46c4e0 3825 (*targetm.asm_out.unique_section) (decl, 0);
715bdd29 3826 named_section (decl, NULL);
7c262518
RH
3827 }
3828 else
4cf12e7e 3829#else
7c262518 3830 text_section ();
4cf12e7e 3831#endif
0afeb08a 3832
4cf12e7e
RH
3833 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
3834 internal (non-global) label that's being emitted, it didn't make
3835 sense to have .type information for local labels. This caused
3836 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
3837 me debug info for a label that you're declaring non-global?) this
3838 was changed to call ASM_OUTPUT_LABEL() instead. */
3839
3840 ASM_OUTPUT_LABEL (file, pic_label_name);
3841
3842 xops[0] = pic_offset_table_rtx;
3843 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3844 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3845 output_asm_insn ("ret", xops);
32b5b1aa 3846}
32b5b1aa 3847
e075ae69
RH
3848void
3849load_pic_register ()
32b5b1aa 3850{
e075ae69 3851 rtx gotsym, pclab;
32b5b1aa 3852
0d7d98ee 3853 if (TARGET_64BIT)
b531087a 3854 abort ();
0d7d98ee 3855
a8a05998 3856 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
32b5b1aa 3857
e075ae69 3858 if (TARGET_DEEP_BRANCH_PREDICTION)
32b5b1aa 3859 {
4cf12e7e
RH
3860 if (! pic_label_name[0])
3861 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
e075ae69 3862 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
32b5b1aa 3863 }
e075ae69 3864 else
e5cb57e8 3865 {
e075ae69 3866 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
e5cb57e8 3867 }
e5cb57e8 3868
e075ae69 3869 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
2a2ab3f9 3870
e075ae69
RH
3871 if (! TARGET_DEEP_BRANCH_PREDICTION)
3872 emit_insn (gen_popsi1 (pic_offset_table_rtx));
79325812 3873
e075ae69 3874 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
e9a25f70 3875}
8dfe5673 3876
0d7d98ee 3877/* Generate an "push" pattern for input ARG. */
e9a25f70 3878
e075ae69
RH
3879static rtx
3880gen_push (arg)
3881 rtx arg;
e9a25f70 3882{
c5c76735 3883 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
3884 gen_rtx_MEM (Pmode,
3885 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
3886 stack_pointer_rtx)),
3887 arg);
e9a25f70
JL
3888}
3889
4dd2ac2c
JH
3890/* Return 1 if we need to save REGNO. */
3891static int
1020a5ab 3892ix86_save_reg (regno, maybe_eh_return)
9b690711 3893 unsigned int regno;
37a58036 3894 int maybe_eh_return;
1020a5ab 3895{
5b43fed1 3896 if (regno == PIC_OFFSET_TABLE_REGNUM
1020a5ab
RH
3897 && (current_function_uses_pic_offset_table
3898 || current_function_uses_const_pool
3899 || current_function_calls_eh_return))
3900 return 1;
3901
3902 if (current_function_calls_eh_return && maybe_eh_return)
3903 {
3904 unsigned i;
3905 for (i = 0; ; i++)
3906 {
b531087a 3907 unsigned test = EH_RETURN_DATA_REGNO (i);
1020a5ab
RH
3908 if (test == INVALID_REGNUM)
3909 break;
9b690711 3910 if (test == regno)
1020a5ab
RH
3911 return 1;
3912 }
3913 }
4dd2ac2c 3914
1020a5ab
RH
3915 return (regs_ever_live[regno]
3916 && !call_used_regs[regno]
3917 && !fixed_regs[regno]
3918 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
3919}
3920
0903fcab
JH
3921/* Return number of registers to be saved on the stack. */
3922
3923static int
3924ix86_nsaved_regs ()
3925{
3926 int nregs = 0;
0903fcab
JH
3927 int regno;
3928
4dd2ac2c 3929 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 3930 if (ix86_save_reg (regno, true))
4dd2ac2c 3931 nregs++;
0903fcab
JH
3932 return nregs;
3933}
3934
3935/* Return the offset between two registers, one to be eliminated, and the other
3936 its replacement, at the start of a routine. */
3937
3938HOST_WIDE_INT
3939ix86_initial_elimination_offset (from, to)
3940 int from;
3941 int to;
3942{
4dd2ac2c
JH
3943 struct ix86_frame frame;
3944 ix86_compute_frame_layout (&frame);
564d80f4
JH
3945
3946 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 3947 return frame.hard_frame_pointer_offset;
564d80f4
JH
3948 else if (from == FRAME_POINTER_REGNUM
3949 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 3950 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
3951 else
3952 {
564d80f4
JH
3953 if (to != STACK_POINTER_REGNUM)
3954 abort ();
3955 else if (from == ARG_POINTER_REGNUM)
4dd2ac2c 3956 return frame.stack_pointer_offset;
564d80f4
JH
3957 else if (from != FRAME_POINTER_REGNUM)
3958 abort ();
0903fcab 3959 else
4dd2ac2c 3960 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
3961 }
3962}
3963
4dd2ac2c 3964/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 3965
4dd2ac2c
JH
3966static void
3967ix86_compute_frame_layout (frame)
3968 struct ix86_frame *frame;
65954bd8 3969{
65954bd8 3970 HOST_WIDE_INT total_size;
564d80f4 3971 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
44affdae
JH
3972 int offset;
3973 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4dd2ac2c 3974 HOST_WIDE_INT size = get_frame_size ();
65954bd8 3975
4dd2ac2c 3976 frame->nregs = ix86_nsaved_regs ();
564d80f4 3977 total_size = size;
65954bd8 3978
9ba81eaa 3979 /* Skip return address and saved base pointer. */
4dd2ac2c
JH
3980 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
3981
3982 frame->hard_frame_pointer_offset = offset;
564d80f4 3983
fcbfaa65
RK
3984 /* Do some sanity checking of stack_alignment_needed and
3985 preferred_alignment, since i386 port is the only using those features
f710504c 3986 that may break easily. */
564d80f4 3987
44affdae
JH
3988 if (size && !stack_alignment_needed)
3989 abort ();
44affdae
JH
3990 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
3991 abort ();
3992 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3993 abort ();
3994 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3995 abort ();
564d80f4 3996
4dd2ac2c
JH
3997 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
3998 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 3999
4dd2ac2c
JH
4000 /* Register save area */
4001 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 4002
8362f420
JH
4003 /* Va-arg area */
4004 if (ix86_save_varrargs_registers)
4005 {
4006 offset += X86_64_VARARGS_SIZE;
4007 frame->va_arg_size = X86_64_VARARGS_SIZE;
4008 }
4009 else
4010 frame->va_arg_size = 0;
4011
4dd2ac2c
JH
4012 /* Align start of frame for local function. */
4013 frame->padding1 = ((offset + stack_alignment_needed - 1)
4014 & -stack_alignment_needed) - offset;
f73ad30e 4015
4dd2ac2c 4016 offset += frame->padding1;
65954bd8 4017
4dd2ac2c
JH
4018 /* Frame pointer points here. */
4019 frame->frame_pointer_offset = offset;
54ff41b7 4020
4dd2ac2c 4021 offset += size;
65954bd8 4022
4dd2ac2c 4023 /* Add outgoing arguments area. */
f73ad30e 4024 if (ACCUMULATE_OUTGOING_ARGS)
4dd2ac2c
JH
4025 {
4026 offset += current_function_outgoing_args_size;
4027 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4028 }
4029 else
4030 frame->outgoing_arguments_size = 0;
564d80f4 4031
4dd2ac2c
JH
4032 /* Align stack boundary. */
4033 frame->padding2 = ((offset + preferred_alignment - 1)
4034 & -preferred_alignment) - offset;
4035
4036 offset += frame->padding2;
4037
4038 /* We've reached end of stack frame. */
4039 frame->stack_pointer_offset = offset;
4040
4041 /* Size prologue needs to allocate. */
4042 frame->to_allocate =
4043 (size + frame->padding1 + frame->padding2
8362f420 4044 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 4045
8362f420
JH
4046 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4047 && current_function_is_leaf)
4048 {
4049 frame->red_zone_size = frame->to_allocate;
4050 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4051 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4052 }
4053 else
4054 frame->red_zone_size = 0;
4055 frame->to_allocate -= frame->red_zone_size;
4056 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c
JH
4057#if 0
4058 fprintf (stderr, "nregs: %i\n", frame->nregs);
4059 fprintf (stderr, "size: %i\n", size);
4060 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4061 fprintf (stderr, "padding1: %i\n", frame->padding1);
8362f420 4062 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4dd2ac2c
JH
4063 fprintf (stderr, "padding2: %i\n", frame->padding2);
4064 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
8362f420 4065 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4dd2ac2c
JH
4066 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4067 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4068 frame->hard_frame_pointer_offset);
4069 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4070#endif
65954bd8
JL
4071}
4072
0903fcab
JH
4073/* Emit code to save registers in the prologue. */
4074
4075static void
4076ix86_emit_save_regs ()
4077{
4078 register int regno;
0903fcab 4079 rtx insn;
0903fcab 4080
4dd2ac2c 4081 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4082 if (ix86_save_reg (regno, true))
0903fcab 4083 {
0d7d98ee 4084 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
4085 RTX_FRAME_RELATED_P (insn) = 1;
4086 }
4087}
4088
c6036a37
JH
4089/* Emit code to save registers using MOV insns. First register
4090 is restored from POINTER + OFFSET. */
4091static void
4092ix86_emit_save_regs_using_mov (pointer, offset)
b72f00af
RK
4093 rtx pointer;
4094 HOST_WIDE_INT offset;
c6036a37
JH
4095{
4096 int regno;
4097 rtx insn;
4098
4099 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4100 if (ix86_save_reg (regno, true))
4101 {
b72f00af
RK
4102 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4103 Pmode, offset),
c6036a37
JH
4104 gen_rtx_REG (Pmode, regno));
4105 RTX_FRAME_RELATED_P (insn) = 1;
4106 offset += UNITS_PER_WORD;
4107 }
4108}
4109
0f290768 4110/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
4111
4112void
4113ix86_expand_prologue ()
2a2ab3f9 4114{
564d80f4 4115 rtx insn;
0d7d98ee
JH
4116 int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table
4117 || current_function_uses_const_pool)
4118 && !TARGET_64BIT);
4dd2ac2c 4119 struct ix86_frame frame;
6ab16dd9 4120 int use_mov = 0;
c6036a37 4121 HOST_WIDE_INT allocate;
4dd2ac2c 4122
2ab0437e 4123 if (!optimize_size)
6ab16dd9
JH
4124 {
4125 use_fast_prologue_epilogue
4126 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
2ab0437e
JH
4127 if (TARGET_PROLOGUE_USING_MOVE)
4128 use_mov = use_fast_prologue_epilogue;
6ab16dd9 4129 }
4dd2ac2c 4130 ix86_compute_frame_layout (&frame);
79325812 4131
e075ae69
RH
4132 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4133 slower on all targets. Also sdb doesn't like it. */
e9a25f70 4134
2a2ab3f9
JVA
4135 if (frame_pointer_needed)
4136 {
564d80f4 4137 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 4138 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 4139
564d80f4 4140 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 4141 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
4142 }
4143
c6036a37
JH
4144 allocate = frame.to_allocate;
4145 /* In case we are dealing only with single register and empty frame,
4146 push is equivalent of the mov+add sequence. */
4147 if (allocate == 0 && frame.nregs <= 1)
4148 use_mov = 0;
4149
4150 if (!use_mov)
4151 ix86_emit_save_regs ();
4152 else
4153 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 4154
c6036a37 4155 if (allocate == 0)
8dfe5673 4156 ;
e323735c 4157 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
469ac993 4158 {
f2042df3
RH
4159 insn = emit_insn (gen_pro_epilogue_adjust_stack
4160 (stack_pointer_rtx, stack_pointer_rtx,
e323735c 4161 GEN_INT (-allocate)));
e075ae69 4162 RTX_FRAME_RELATED_P (insn) = 1;
469ac993 4163 }
79325812 4164 else
8dfe5673 4165 {
e075ae69 4166 /* ??? Is this only valid for Win32? */
e9a25f70 4167
e075ae69 4168 rtx arg0, sym;
e9a25f70 4169
8362f420 4170 if (TARGET_64BIT)
b531087a 4171 abort ();
8362f420 4172
e075ae69 4173 arg0 = gen_rtx_REG (SImode, 0);
c6036a37 4174 emit_move_insn (arg0, GEN_INT (allocate));
77a989d1 4175
e075ae69
RH
4176 sym = gen_rtx_MEM (FUNCTION_MODE,
4177 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
32ee7d1d 4178 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
e075ae69
RH
4179
4180 CALL_INSN_FUNCTION_USAGE (insn)
276ab4a4
RH
4181 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4182 CALL_INSN_FUNCTION_USAGE (insn));
e075ae69 4183 }
c6036a37
JH
4184 if (use_mov)
4185 {
4186 if (!frame_pointer_needed || !frame.to_allocate)
4187 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4188 else
4189 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4190 -frame.nregs * UNITS_PER_WORD);
4191 }
e9a25f70 4192
84530511
SC
4193#ifdef SUBTARGET_PROLOGUE
4194 SUBTARGET_PROLOGUE;
0f290768 4195#endif
84530511 4196
e9a25f70 4197 if (pic_reg_used)
36ad2436 4198 load_pic_register ();
77a989d1 4199
e9a25f70
JL
4200 /* If we are profiling, make sure no instructions are scheduled before
4201 the call to mcount. However, if -fpic, the above call will have
4202 done that. */
70f4f91c 4203 if (current_function_profile && ! pic_reg_used)
e9a25f70 4204 emit_insn (gen_blockage ());
77a989d1
SC
4205}
4206
da2d1d3a
JH
4207/* Emit code to restore saved registers using MOV insns. First register
4208 is restored from POINTER + OFFSET. */
4209static void
1020a5ab
RH
4210ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4211 rtx pointer;
4212 int offset;
37a58036 4213 int maybe_eh_return;
da2d1d3a
JH
4214{
4215 int regno;
da2d1d3a 4216
4dd2ac2c 4217 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4218 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 4219 {
4dd2ac2c 4220 emit_move_insn (gen_rtx_REG (Pmode, regno),
b72f00af
RK
4221 adjust_address (gen_rtx_MEM (Pmode, pointer),
4222 Pmode, offset));
4dd2ac2c 4223 offset += UNITS_PER_WORD;
da2d1d3a
JH
4224 }
4225}
4226
0f290768 4227/* Restore function stack, frame, and registers. */
e9a25f70 4228
2a2ab3f9 4229void
1020a5ab
RH
4230ix86_expand_epilogue (style)
4231 int style;
2a2ab3f9 4232{
1c71e60e 4233 int regno;
fdb8a883 4234 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 4235 struct ix86_frame frame;
65954bd8 4236 HOST_WIDE_INT offset;
4dd2ac2c
JH
4237
4238 ix86_compute_frame_layout (&frame);
2a2ab3f9 4239
a4f31c00 4240 /* Calculate start of saved registers relative to ebp. Special care
84e306b4
RH
4241 must be taken for the normal return case of a function using
4242 eh_return: the eax and edx registers are marked as saved, but not
4243 restored along this path. */
4244 offset = frame.nregs;
4245 if (current_function_calls_eh_return && style != 2)
4246 offset -= 2;
4247 offset *= -UNITS_PER_WORD;
2a2ab3f9 4248
fdb8a883
JW
4249 /* If we're only restoring one register and sp is not valid then
4250 using a move instruction to restore the register since it's
0f290768 4251 less work than reloading sp and popping the register.
da2d1d3a
JH
4252
4253 The default code result in stack adjustment using add/lea instruction,
4254 while this code results in LEAVE instruction (or discrete equivalent),
4255 so it is profitable in some other cases as well. Especially when there
4256 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4257 and there is exactly one register to pop. This heruistic may need some
4258 tuning in future. */
4dd2ac2c 4259 if ((!sp_valid && frame.nregs <= 1)
2ab0437e 4260 || (TARGET_EPILOGUE_USING_MOVE
6ab16dd9 4261 && use_fast_prologue_epilogue
c6036a37 4262 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 4263 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2ab0437e 4264 || (frame_pointer_needed && TARGET_USE_LEAVE
6ab16dd9 4265 && use_fast_prologue_epilogue && frame.nregs == 1)
2ab0437e 4266 || current_function_calls_eh_return)
2a2ab3f9 4267 {
da2d1d3a
JH
4268 /* Restore registers. We can use ebp or esp to address the memory
4269 locations. If both are available, default to ebp, since offsets
4270 are known to be small. Only exception is esp pointing directly to the
4271 end of block of saved registers, where we may simplify addressing
4272 mode. */
4273
4dd2ac2c 4274 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
4275 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4276 frame.to_allocate, style == 2);
da2d1d3a 4277 else
1020a5ab
RH
4278 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4279 offset, style == 2);
4280
4281 /* eh_return epilogues need %ecx added to the stack pointer. */
4282 if (style == 2)
4283 {
4284 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 4285
1020a5ab
RH
4286 if (frame_pointer_needed)
4287 {
4288 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4289 tmp = plus_constant (tmp, UNITS_PER_WORD);
4290 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4291
4292 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4293 emit_move_insn (hard_frame_pointer_rtx, tmp);
4294
4295 emit_insn (gen_pro_epilogue_adjust_stack
f2042df3 4296 (stack_pointer_rtx, sa, const0_rtx));
1020a5ab
RH
4297 }
4298 else
4299 {
4300 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4301 tmp = plus_constant (tmp, (frame.to_allocate
4302 + frame.nregs * UNITS_PER_WORD));
4303 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4304 }
4305 }
4306 else if (!frame_pointer_needed)
f2042df3
RH
4307 emit_insn (gen_pro_epilogue_adjust_stack
4308 (stack_pointer_rtx, stack_pointer_rtx,
4309 GEN_INT (frame.to_allocate
4310 + frame.nregs * UNITS_PER_WORD)));
0f290768 4311 /* If not an i386, mov & pop is faster than "leave". */
6ab16dd9 4312 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
8362f420 4313 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
c8c5cb99 4314 else
2a2ab3f9 4315 {
1c71e60e
JH
4316 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4317 hard_frame_pointer_rtx,
f2042df3 4318 const0_rtx));
8362f420
JH
4319 if (TARGET_64BIT)
4320 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4321 else
4322 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
4323 }
4324 }
1c71e60e 4325 else
68f654ec 4326 {
1c71e60e
JH
4327 /* First step is to deallocate the stack frame so that we can
4328 pop the registers. */
4329 if (!sp_valid)
4330 {
4331 if (!frame_pointer_needed)
4332 abort ();
4333 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4334 hard_frame_pointer_rtx,
f2042df3 4335 GEN_INT (offset)));
1c71e60e 4336 }
4dd2ac2c 4337 else if (frame.to_allocate)
f2042df3
RH
4338 emit_insn (gen_pro_epilogue_adjust_stack
4339 (stack_pointer_rtx, stack_pointer_rtx,
4340 GEN_INT (frame.to_allocate)));
1c71e60e 4341
4dd2ac2c 4342 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4343 if (ix86_save_reg (regno, false))
8362f420
JH
4344 {
4345 if (TARGET_64BIT)
4346 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4347 else
4348 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4349 }
4dd2ac2c 4350 if (frame_pointer_needed)
8362f420 4351 {
f5143c46 4352 /* Leave results in shorter dependency chains on CPUs that are
2ab0437e
JH
4353 able to grok it fast. */
4354 if (TARGET_USE_LEAVE)
4355 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4356 else if (TARGET_64BIT)
8362f420
JH
4357 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4358 else
4359 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4360 }
68f654ec 4361 }
68f654ec 4362
cbbf65e0 4363 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 4364 if (style == 0)
cbbf65e0
RH
4365 return;
4366
2a2ab3f9
JVA
4367 if (current_function_pops_args && current_function_args_size)
4368 {
e075ae69 4369 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 4370
b8c752c8
UD
4371 /* i386 can only pop 64K bytes. If asked to pop more, pop
4372 return address, do explicit add, and jump indirectly to the
0f290768 4373 caller. */
2a2ab3f9 4374
b8c752c8 4375 if (current_function_pops_args >= 65536)
2a2ab3f9 4376 {
e075ae69 4377 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 4378
8362f420
JH
4379 /* There are is no "pascal" calling convention in 64bit ABI. */
4380 if (TARGET_64BIT)
b531087a 4381 abort ();
8362f420 4382
e075ae69
RH
4383 emit_insn (gen_popsi1 (ecx));
4384 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 4385 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 4386 }
79325812 4387 else
e075ae69
RH
4388 emit_jump_insn (gen_return_pop_internal (popc));
4389 }
4390 else
4391 emit_jump_insn (gen_return_internal ());
4392}
4393\f
4394/* Extract the parts of an RTL expression that is a valid memory address
b446e5a2
JH
4395 for an instruction. Return 0 if the structure of the address is
4396 grossly off. Return -1 if the address contains ASHIFT, so it is not
4397 strictly valid, but still used for computing length of lea instruction.
4398 */
e075ae69
RH
4399
4400static int
4401ix86_decompose_address (addr, out)
4402 register rtx addr;
4403 struct ix86_address *out;
4404{
4405 rtx base = NULL_RTX;
4406 rtx index = NULL_RTX;
4407 rtx disp = NULL_RTX;
4408 HOST_WIDE_INT scale = 1;
4409 rtx scale_rtx = NULL_RTX;
b446e5a2 4410 int retval = 1;
e075ae69 4411
1540f9eb 4412 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
e075ae69
RH
4413 base = addr;
4414 else if (GET_CODE (addr) == PLUS)
4415 {
4416 rtx op0 = XEXP (addr, 0);
4417 rtx op1 = XEXP (addr, 1);
4418 enum rtx_code code0 = GET_CODE (op0);
4419 enum rtx_code code1 = GET_CODE (op1);
4420
4421 if (code0 == REG || code0 == SUBREG)
4422 {
4423 if (code1 == REG || code1 == SUBREG)
4424 index = op0, base = op1; /* index + base */
4425 else
4426 base = op0, disp = op1; /* base + displacement */
4427 }
4428 else if (code0 == MULT)
e9a25f70 4429 {
e075ae69
RH
4430 index = XEXP (op0, 0);
4431 scale_rtx = XEXP (op0, 1);
4432 if (code1 == REG || code1 == SUBREG)
4433 base = op1; /* index*scale + base */
e9a25f70 4434 else
e075ae69
RH
4435 disp = op1; /* index*scale + disp */
4436 }
4437 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4438 {
4439 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4440 scale_rtx = XEXP (XEXP (op0, 0), 1);
4441 base = XEXP (op0, 1);
4442 disp = op1;
2a2ab3f9 4443 }
e075ae69
RH
4444 else if (code0 == PLUS)
4445 {
4446 index = XEXP (op0, 0); /* index + base + disp */
4447 base = XEXP (op0, 1);
4448 disp = op1;
4449 }
4450 else
b446e5a2 4451 return 0;
e075ae69
RH
4452 }
4453 else if (GET_CODE (addr) == MULT)
4454 {
4455 index = XEXP (addr, 0); /* index*scale */
4456 scale_rtx = XEXP (addr, 1);
4457 }
4458 else if (GET_CODE (addr) == ASHIFT)
4459 {
4460 rtx tmp;
4461
4462 /* We're called for lea too, which implements ashift on occasion. */
4463 index = XEXP (addr, 0);
4464 tmp = XEXP (addr, 1);
4465 if (GET_CODE (tmp) != CONST_INT)
b446e5a2 4466 return 0;
e075ae69
RH
4467 scale = INTVAL (tmp);
4468 if ((unsigned HOST_WIDE_INT) scale > 3)
b446e5a2 4469 return 0;
e075ae69 4470 scale = 1 << scale;
b446e5a2 4471 retval = -1;
2a2ab3f9 4472 }
2a2ab3f9 4473 else
e075ae69
RH
4474 disp = addr; /* displacement */
4475
4476 /* Extract the integral value of scale. */
4477 if (scale_rtx)
e9a25f70 4478 {
e075ae69 4479 if (GET_CODE (scale_rtx) != CONST_INT)
b446e5a2 4480 return 0;
e075ae69 4481 scale = INTVAL (scale_rtx);
e9a25f70 4482 }
3b3c6a3f 4483
e075ae69
RH
4484 /* Allow arg pointer and stack pointer as index if there is not scaling */
4485 if (base && index && scale == 1
564d80f4
JH
4486 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4487 || index == stack_pointer_rtx))
e075ae69
RH
4488 {
4489 rtx tmp = base;
4490 base = index;
4491 index = tmp;
4492 }
4493
4494 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
4495 if ((base == hard_frame_pointer_rtx
4496 || base == frame_pointer_rtx
4497 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
4498 disp = const0_rtx;
4499
4500 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4501 Avoid this by transforming to [%esi+0]. */
4502 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4503 && base && !index && !disp
329e1d01 4504 && REG_P (base)
e075ae69
RH
4505 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4506 disp = const0_rtx;
4507
4508 /* Special case: encode reg+reg instead of reg*2. */
4509 if (!base && index && scale && scale == 2)
4510 base = index, scale = 1;
0f290768 4511
e075ae69
RH
4512 /* Special case: scaling cannot be encoded without base or displacement. */
4513 if (!base && !disp && index && scale != 1)
4514 disp = const0_rtx;
4515
4516 out->base = base;
4517 out->index = index;
4518 out->disp = disp;
4519 out->scale = scale;
3b3c6a3f 4520
b446e5a2 4521 return retval;
e075ae69 4522}
01329426
JH
4523\f
4524/* Return cost of the memory address x.
4525 For i386, it is better to use a complex address than let gcc copy
4526 the address into a reg and make a new pseudo. But not if the address
4527 requires to two regs - that would mean more pseudos with longer
4528 lifetimes. */
4529int
4530ix86_address_cost (x)
4531 rtx x;
4532{
4533 struct ix86_address parts;
4534 int cost = 1;
3b3c6a3f 4535
01329426
JH
4536 if (!ix86_decompose_address (x, &parts))
4537 abort ();
4538
1540f9eb
JH
4539 if (parts.base && GET_CODE (parts.base) == SUBREG)
4540 parts.base = SUBREG_REG (parts.base);
4541 if (parts.index && GET_CODE (parts.index) == SUBREG)
4542 parts.index = SUBREG_REG (parts.index);
4543
01329426
JH
4544 /* More complex memory references are better. */
4545 if (parts.disp && parts.disp != const0_rtx)
4546 cost--;
4547
4548 /* Attempt to minimize number of registers in the address. */
4549 if ((parts.base
4550 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4551 || (parts.index
4552 && (!REG_P (parts.index)
4553 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4554 cost++;
4555
4556 if (parts.base
4557 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4558 && parts.index
4559 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4560 && parts.base != parts.index)
4561 cost++;
4562
4563 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4564 since it's predecode logic can't detect the length of instructions
4565 and it degenerates to vector decoded. Increase cost of such
4566 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 4567 to split such addresses or even refuse such addresses at all.
01329426
JH
4568
4569 Following addressing modes are affected:
4570 [base+scale*index]
4571 [scale*index+disp]
4572 [base+index]
0f290768 4573
01329426
JH
4574 The first and last case may be avoidable by explicitly coding the zero in
4575 memory address, but I don't have AMD-K6 machine handy to check this
4576 theory. */
4577
4578 if (TARGET_K6
4579 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4580 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4581 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4582 cost += 10;
0f290768 4583
01329426
JH
4584 return cost;
4585}
4586\f
b949ea8b
JW
4587/* If X is a machine specific address (i.e. a symbol or label being
4588 referenced as a displacement from the GOT implemented using an
4589 UNSPEC), then return the base term. Otherwise return X. */
4590
4591rtx
4592ix86_find_base_term (x)
4593 rtx x;
4594{
4595 rtx term;
4596
6eb791fc
JH
4597 if (TARGET_64BIT)
4598 {
4599 if (GET_CODE (x) != CONST)
4600 return x;
4601 term = XEXP (x, 0);
4602 if (GET_CODE (term) == PLUS
4603 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4604 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4605 term = XEXP (term, 0);
4606 if (GET_CODE (term) != UNSPEC
8ee41eaf 4607 || XINT (term, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
4608 return x;
4609
4610 term = XVECEXP (term, 0, 0);
4611
4612 if (GET_CODE (term) != SYMBOL_REF
4613 && GET_CODE (term) != LABEL_REF)
4614 return x;
4615
4616 return term;
4617 }
4618
b949ea8b
JW
4619 if (GET_CODE (x) != PLUS
4620 || XEXP (x, 0) != pic_offset_table_rtx
4621 || GET_CODE (XEXP (x, 1)) != CONST)
4622 return x;
4623
4624 term = XEXP (XEXP (x, 1), 0);
4625
4626 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4627 term = XEXP (term, 0);
4628
4629 if (GET_CODE (term) != UNSPEC
8ee41eaf 4630 || XINT (term, 1) != UNSPEC_GOTOFF)
b949ea8b
JW
4631 return x;
4632
4633 term = XVECEXP (term, 0, 0);
4634
4635 if (GET_CODE (term) != SYMBOL_REF
4636 && GET_CODE (term) != LABEL_REF)
4637 return x;
4638
4639 return term;
4640}
4641\f
e075ae69
RH
4642/* Determine if a given CONST RTX is a valid memory displacement
4643 in PIC mode. */
0f290768 4644
59be65f6 4645int
91bb873f
RH
4646legitimate_pic_address_disp_p (disp)
4647 register rtx disp;
4648{
6eb791fc
JH
4649 /* In 64bit mode we can allow direct addresses of symbols and labels
4650 when they are not dynamic symbols. */
4651 if (TARGET_64BIT)
4652 {
4653 rtx x = disp;
4654 if (GET_CODE (disp) == CONST)
4655 x = XEXP (disp, 0);
4656 /* ??? Handle PIC code models */
4657 if (GET_CODE (x) == PLUS
4658 && (GET_CODE (XEXP (x, 1)) == CONST_INT
4659 && ix86_cmodel == CM_SMALL_PIC
4660 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4661 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4662 x = XEXP (x, 0);
4663 if (local_symbolic_operand (x, Pmode))
4664 return 1;
4665 }
91bb873f
RH
4666 if (GET_CODE (disp) != CONST)
4667 return 0;
4668 disp = XEXP (disp, 0);
4669
6eb791fc
JH
4670 if (TARGET_64BIT)
4671 {
4672 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4673 of GOT tables. We should not need these anyway. */
4674 if (GET_CODE (disp) != UNSPEC
8ee41eaf 4675 || XINT (disp, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
4676 return 0;
4677
4678 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4679 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4680 return 0;
4681 return 1;
4682 }
4683
91bb873f
RH
4684 if (GET_CODE (disp) == PLUS)
4685 {
4686 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4687 return 0;
4688 disp = XEXP (disp, 0);
4689 }
4690
8ee41eaf 4691 if (GET_CODE (disp) != UNSPEC)
91bb873f
RH
4692 return 0;
4693
4694 /* Must be @GOT or @GOTOFF. */
623fe810
RH
4695 switch (XINT (disp, 1))
4696 {
8ee41eaf 4697 case UNSPEC_GOT:
623fe810 4698 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
8ee41eaf 4699 case UNSPEC_GOTOFF:
623fe810
RH
4700 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4701 }
4702
4703 return 0;
91bb873f
RH
4704}
4705
e075ae69
RH
4706/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4707 memory address for an instruction. The MODE argument is the machine mode
4708 for the MEM expression that wants to use this address.
4709
4710 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
4711 convert common non-canonical forms to canonical form so that they will
4712 be recognized. */
4713
3b3c6a3f
MM
4714int
4715legitimate_address_p (mode, addr, strict)
4716 enum machine_mode mode;
4717 register rtx addr;
4718 int strict;
4719{
e075ae69
RH
4720 struct ix86_address parts;
4721 rtx base, index, disp;
4722 HOST_WIDE_INT scale;
4723 const char *reason = NULL;
4724 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
4725
4726 if (TARGET_DEBUG_ADDR)
4727 {
4728 fprintf (stderr,
e9a25f70 4729 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 4730 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
4731 debug_rtx (addr);
4732 }
4733
b446e5a2 4734 if (ix86_decompose_address (addr, &parts) <= 0)
3b3c6a3f 4735 {
e075ae69 4736 reason = "decomposition failed";
50e60bc3 4737 goto report_error;
3b3c6a3f
MM
4738 }
4739
e075ae69
RH
4740 base = parts.base;
4741 index = parts.index;
4742 disp = parts.disp;
4743 scale = parts.scale;
91f0226f 4744
e075ae69 4745 /* Validate base register.
e9a25f70
JL
4746
4747 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
4748 is one word out of a two word structure, which is represented internally
4749 as a DImode int. */
e9a25f70 4750
3b3c6a3f
MM
4751 if (base)
4752 {
1540f9eb 4753 rtx reg;
e075ae69
RH
4754 reason_rtx = base;
4755
1540f9eb
JH
4756 if (GET_CODE (base) == SUBREG)
4757 reg = SUBREG_REG (base);
4758 else
4759 reg = base;
4760
4761 if (GET_CODE (reg) != REG)
3b3c6a3f 4762 {
e075ae69 4763 reason = "base is not a register";
50e60bc3 4764 goto report_error;
3b3c6a3f
MM
4765 }
4766
c954bd01
RH
4767 if (GET_MODE (base) != Pmode)
4768 {
e075ae69 4769 reason = "base is not in Pmode";
50e60bc3 4770 goto report_error;
c954bd01
RH
4771 }
4772
1540f9eb
JH
4773 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
4774 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
3b3c6a3f 4775 {
e075ae69 4776 reason = "base is not valid";
50e60bc3 4777 goto report_error;
3b3c6a3f
MM
4778 }
4779 }
4780
e075ae69 4781 /* Validate index register.
e9a25f70
JL
4782
4783 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
4784 is one word out of a two word structure, which is represented internally
4785 as a DImode int. */
e075ae69
RH
4786
4787 if (index)
3b3c6a3f 4788 {
1540f9eb 4789 rtx reg;
e075ae69
RH
4790 reason_rtx = index;
4791
1540f9eb
JH
4792 if (GET_CODE (index) == SUBREG)
4793 reg = SUBREG_REG (index);
4794 else
4795 reg = index;
4796
4797 if (GET_CODE (reg) != REG)
3b3c6a3f 4798 {
e075ae69 4799 reason = "index is not a register";
50e60bc3 4800 goto report_error;
3b3c6a3f
MM
4801 }
4802
e075ae69 4803 if (GET_MODE (index) != Pmode)
c954bd01 4804 {
e075ae69 4805 reason = "index is not in Pmode";
50e60bc3 4806 goto report_error;
c954bd01
RH
4807 }
4808
1540f9eb
JH
4809 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
4810 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
3b3c6a3f 4811 {
e075ae69 4812 reason = "index is not valid";
50e60bc3 4813 goto report_error;
3b3c6a3f
MM
4814 }
4815 }
3b3c6a3f 4816
e075ae69
RH
4817 /* Validate scale factor. */
4818 if (scale != 1)
3b3c6a3f 4819 {
e075ae69
RH
4820 reason_rtx = GEN_INT (scale);
4821 if (!index)
3b3c6a3f 4822 {
e075ae69 4823 reason = "scale without index";
50e60bc3 4824 goto report_error;
3b3c6a3f
MM
4825 }
4826
e075ae69 4827 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 4828 {
e075ae69 4829 reason = "scale is not a valid multiplier";
50e60bc3 4830 goto report_error;
3b3c6a3f
MM
4831 }
4832 }
4833
91bb873f 4834 /* Validate displacement. */
3b3c6a3f
MM
4835 if (disp)
4836 {
e075ae69
RH
4837 reason_rtx = disp;
4838
91bb873f 4839 if (!CONSTANT_ADDRESS_P (disp))
3b3c6a3f 4840 {
e075ae69 4841 reason = "displacement is not constant";
50e60bc3 4842 goto report_error;
3b3c6a3f
MM
4843 }
4844
0d7d98ee 4845 if (TARGET_64BIT)
3b3c6a3f 4846 {
0d7d98ee
JH
4847 if (!x86_64_sign_extended_value (disp))
4848 {
4849 reason = "displacement is out of range";
4850 goto report_error;
4851 }
4852 }
4853 else
4854 {
4855 if (GET_CODE (disp) == CONST_DOUBLE)
4856 {
4857 reason = "displacement is a const_double";
4858 goto report_error;
4859 }
3b3c6a3f
MM
4860 }
4861
91bb873f 4862 if (flag_pic && SYMBOLIC_CONST (disp))
3b3c6a3f 4863 {
0d7d98ee
JH
4864 if (TARGET_64BIT && (index || base))
4865 {
4866 reason = "non-constant pic memory reference";
4867 goto report_error;
4868 }
91bb873f
RH
4869 if (! legitimate_pic_address_disp_p (disp))
4870 {
e075ae69 4871 reason = "displacement is an invalid pic construct";
50e60bc3 4872 goto report_error;
91bb873f
RH
4873 }
4874
4e9efe54 4875 /* This code used to verify that a symbolic pic displacement
0f290768
KH
4876 includes the pic_offset_table_rtx register.
4877
4e9efe54
JH
4878 While this is good idea, unfortunately these constructs may
4879 be created by "adds using lea" optimization for incorrect
4880 code like:
4881
4882 int a;
4883 int foo(int i)
4884 {
4885 return *(&a+i);
4886 }
4887
50e60bc3 4888 This code is nonsensical, but results in addressing
4e9efe54 4889 GOT table with pic_offset_table_rtx base. We can't
f710504c 4890 just refuse it easily, since it gets matched by
4e9efe54
JH
4891 "addsi3" pattern, that later gets split to lea in the
4892 case output register differs from input. While this
4893 can be handled by separate addsi pattern for this case
4894 that never results in lea, this seems to be easier and
4895 correct fix for crash to disable this test. */
3b3c6a3f 4896 }
91bb873f 4897 else if (HALF_PIC_P ())
3b3c6a3f 4898 {
91bb873f 4899 if (! HALF_PIC_ADDRESS_P (disp)
e075ae69 4900 || (base != NULL_RTX || index != NULL_RTX))
91bb873f 4901 {
e075ae69 4902 reason = "displacement is an invalid half-pic reference";
50e60bc3 4903 goto report_error;
91bb873f 4904 }
3b3c6a3f
MM
4905 }
4906 }
4907
e075ae69 4908 /* Everything looks valid. */
3b3c6a3f 4909 if (TARGET_DEBUG_ADDR)
e075ae69 4910 fprintf (stderr, "Success.\n");
3b3c6a3f 4911 return TRUE;
e075ae69 4912
50e60bc3 4913report_error:
e075ae69
RH
4914 if (TARGET_DEBUG_ADDR)
4915 {
4916 fprintf (stderr, "Error: %s\n", reason);
4917 debug_rtx (reason_rtx);
4918 }
4919 return FALSE;
3b3c6a3f 4920}
3b3c6a3f 4921\f
55efb413
JW
4922/* Return an unique alias set for the GOT. */
4923
0f290768 4924static HOST_WIDE_INT
55efb413
JW
4925ix86_GOT_alias_set ()
4926{
4927 static HOST_WIDE_INT set = -1;
4928 if (set == -1)
4929 set = new_alias_set ();
4930 return set;
0f290768 4931}
55efb413 4932
3b3c6a3f
MM
4933/* Return a legitimate reference for ORIG (an address) using the
4934 register REG. If REG is 0, a new pseudo is generated.
4935
91bb873f 4936 There are two types of references that must be handled:
3b3c6a3f
MM
4937
4938 1. Global data references must load the address from the GOT, via
4939 the PIC reg. An insn is emitted to do this load, and the reg is
4940 returned.
4941
91bb873f
RH
4942 2. Static data references, constant pool addresses, and code labels
4943 compute the address as an offset from the GOT, whose base is in
4944 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
4945 differentiate them from global data objects. The returned
4946 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
4947
4948 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 4949 reg also appears in the address. */
3b3c6a3f
MM
4950
4951rtx
4952legitimize_pic_address (orig, reg)
4953 rtx orig;
4954 rtx reg;
4955{
4956 rtx addr = orig;
4957 rtx new = orig;
91bb873f 4958 rtx base;
3b3c6a3f 4959
623fe810 4960 if (local_symbolic_operand (addr, Pmode))
3b3c6a3f 4961 {
14f73b5a
JH
4962 /* In 64bit mode we can address such objects directly. */
4963 if (TARGET_64BIT)
4964 new = addr;
4965 else
4966 {
4967 /* This symbol may be referenced via a displacement from the PIC
4968 base address (@GOTOFF). */
3b3c6a3f 4969
14f73b5a 4970 current_function_uses_pic_offset_table = 1;
8ee41eaf 4971 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
14f73b5a
JH
4972 new = gen_rtx_CONST (Pmode, new);
4973 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 4974
14f73b5a
JH
4975 if (reg != 0)
4976 {
4977 emit_move_insn (reg, new);
4978 new = reg;
4979 }
4980 }
3b3c6a3f 4981 }
91bb873f 4982 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 4983 {
14f73b5a
JH
4984 if (TARGET_64BIT)
4985 {
4986 current_function_uses_pic_offset_table = 1;
8ee41eaf 4987 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
14f73b5a
JH
4988 new = gen_rtx_CONST (Pmode, new);
4989 new = gen_rtx_MEM (Pmode, new);
4990 RTX_UNCHANGING_P (new) = 1;
4991 set_mem_alias_set (new, ix86_GOT_alias_set ());
4992
4993 if (reg == 0)
4994 reg = gen_reg_rtx (Pmode);
4995 /* Use directly gen_movsi, otherwise the address is loaded
4996 into register for CSE. We don't want to CSE this addresses,
4997 instead we CSE addresses from the GOT table, so skip this. */
4998 emit_insn (gen_movsi (reg, new));
4999 new = reg;
5000 }
5001 else
5002 {
5003 /* This symbol must be referenced via a load from the
5004 Global Offset Table (@GOT). */
3b3c6a3f 5005
14f73b5a 5006 current_function_uses_pic_offset_table = 1;
8ee41eaf 5007 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
14f73b5a
JH
5008 new = gen_rtx_CONST (Pmode, new);
5009 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5010 new = gen_rtx_MEM (Pmode, new);
5011 RTX_UNCHANGING_P (new) = 1;
5012 set_mem_alias_set (new, ix86_GOT_alias_set ());
3b3c6a3f 5013
14f73b5a
JH
5014 if (reg == 0)
5015 reg = gen_reg_rtx (Pmode);
5016 emit_move_insn (reg, new);
5017 new = reg;
5018 }
0f290768 5019 }
91bb873f
RH
5020 else
5021 {
5022 if (GET_CODE (addr) == CONST)
3b3c6a3f 5023 {
91bb873f 5024 addr = XEXP (addr, 0);
e3c8ea67
RH
5025
5026 /* We must match stuff we generate before. Assume the only
5027 unspecs that can get here are ours. Not that we could do
5028 anything with them anyway... */
5029 if (GET_CODE (addr) == UNSPEC
5030 || (GET_CODE (addr) == PLUS
5031 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5032 return orig;
5033 if (GET_CODE (addr) != PLUS)
564d80f4 5034 abort ();
3b3c6a3f 5035 }
91bb873f
RH
5036 if (GET_CODE (addr) == PLUS)
5037 {
5038 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 5039
91bb873f
RH
5040 /* Check first to see if this is a constant offset from a @GOTOFF
5041 symbol reference. */
623fe810 5042 if (local_symbolic_operand (op0, Pmode)
91bb873f
RH
5043 && GET_CODE (op1) == CONST_INT)
5044 {
6eb791fc
JH
5045 if (!TARGET_64BIT)
5046 {
5047 current_function_uses_pic_offset_table = 1;
8ee41eaf
RH
5048 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5049 UNSPEC_GOTOFF);
6eb791fc
JH
5050 new = gen_rtx_PLUS (Pmode, new, op1);
5051 new = gen_rtx_CONST (Pmode, new);
5052 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
91bb873f 5053
6eb791fc
JH
5054 if (reg != 0)
5055 {
5056 emit_move_insn (reg, new);
5057 new = reg;
5058 }
5059 }
5060 else
91bb873f 5061 {
6eb791fc 5062 /* ??? We need to limit offsets here. */
91bb873f
RH
5063 }
5064 }
5065 else
5066 {
5067 base = legitimize_pic_address (XEXP (addr, 0), reg);
5068 new = legitimize_pic_address (XEXP (addr, 1),
5069 base == reg ? NULL_RTX : reg);
5070
5071 if (GET_CODE (new) == CONST_INT)
5072 new = plus_constant (base, INTVAL (new));
5073 else
5074 {
5075 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5076 {
5077 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5078 new = XEXP (new, 1);
5079 }
5080 new = gen_rtx_PLUS (Pmode, base, new);
5081 }
5082 }
5083 }
3b3c6a3f
MM
5084 }
5085 return new;
5086}
5087\f
3b3c6a3f
MM
5088/* Try machine-dependent ways of modifying an illegitimate address
5089 to be legitimate. If we find one, return the new, valid address.
5090 This macro is used in only one place: `memory_address' in explow.c.
5091
5092 OLDX is the address as it was before break_out_memory_refs was called.
5093 In some cases it is useful to look at this to decide what needs to be done.
5094
5095 MODE and WIN are passed so that this macro can use
5096 GO_IF_LEGITIMATE_ADDRESS.
5097
5098 It is always safe for this macro to do nothing. It exists to recognize
5099 opportunities to optimize the output.
5100
5101 For the 80386, we handle X+REG by loading X into a register R and
5102 using R+REG. R will go in a general reg and indexing will be used.
5103 However, if REG is a broken-out memory address or multiplication,
5104 nothing needs to be done because REG can certainly go in a general reg.
5105
5106 When -fpic is used, special handling is needed for symbolic references.
5107 See comments by legitimize_pic_address in i386.c for details. */
5108
5109rtx
5110legitimize_address (x, oldx, mode)
5111 register rtx x;
bb5177ac 5112 register rtx oldx ATTRIBUTE_UNUSED;
3b3c6a3f
MM
5113 enum machine_mode mode;
5114{
5115 int changed = 0;
5116 unsigned log;
5117
5118 if (TARGET_DEBUG_ADDR)
5119 {
e9a25f70
JL
5120 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5121 GET_MODE_NAME (mode));
3b3c6a3f
MM
5122 debug_rtx (x);
5123 }
5124
5125 if (flag_pic && SYMBOLIC_CONST (x))
5126 return legitimize_pic_address (x, 0);
5127
5128 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5129 if (GET_CODE (x) == ASHIFT
5130 && GET_CODE (XEXP (x, 1)) == CONST_INT
b531087a 5131 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3b3c6a3f
MM
5132 {
5133 changed = 1;
a269a03c
JC
5134 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5135 GEN_INT (1 << log));
3b3c6a3f
MM
5136 }
5137
5138 if (GET_CODE (x) == PLUS)
5139 {
0f290768 5140 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 5141
3b3c6a3f
MM
5142 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5143 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
b531087a 5144 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3b3c6a3f
MM
5145 {
5146 changed = 1;
c5c76735
JL
5147 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5148 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5149 GEN_INT (1 << log));
3b3c6a3f
MM
5150 }
5151
5152 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5153 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
b531087a 5154 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3b3c6a3f
MM
5155 {
5156 changed = 1;
c5c76735
JL
5157 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5158 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5159 GEN_INT (1 << log));
3b3c6a3f
MM
5160 }
5161
0f290768 5162 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
5163 if (GET_CODE (XEXP (x, 1)) == MULT)
5164 {
5165 rtx tmp = XEXP (x, 0);
5166 XEXP (x, 0) = XEXP (x, 1);
5167 XEXP (x, 1) = tmp;
5168 changed = 1;
5169 }
5170
5171 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5172 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5173 created by virtual register instantiation, register elimination, and
5174 similar optimizations. */
5175 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5176 {
5177 changed = 1;
c5c76735
JL
5178 x = gen_rtx_PLUS (Pmode,
5179 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5180 XEXP (XEXP (x, 1), 0)),
5181 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
5182 }
5183
e9a25f70
JL
5184 /* Canonicalize
5185 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
5186 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5187 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5188 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5189 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5190 && CONSTANT_P (XEXP (x, 1)))
5191 {
00c79232
ML
5192 rtx constant;
5193 rtx other = NULL_RTX;
3b3c6a3f
MM
5194
5195 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5196 {
5197 constant = XEXP (x, 1);
5198 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5199 }
5200 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5201 {
5202 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5203 other = XEXP (x, 1);
5204 }
5205 else
5206 constant = 0;
5207
5208 if (constant)
5209 {
5210 changed = 1;
c5c76735
JL
5211 x = gen_rtx_PLUS (Pmode,
5212 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5213 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5214 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
5215 }
5216 }
5217
5218 if (changed && legitimate_address_p (mode, x, FALSE))
5219 return x;
5220
5221 if (GET_CODE (XEXP (x, 0)) == MULT)
5222 {
5223 changed = 1;
5224 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5225 }
5226
5227 if (GET_CODE (XEXP (x, 1)) == MULT)
5228 {
5229 changed = 1;
5230 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5231 }
5232
5233 if (changed
5234 && GET_CODE (XEXP (x, 1)) == REG
5235 && GET_CODE (XEXP (x, 0)) == REG)
5236 return x;
5237
5238 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5239 {
5240 changed = 1;
5241 x = legitimize_pic_address (x, 0);
5242 }
5243
5244 if (changed && legitimate_address_p (mode, x, FALSE))
5245 return x;
5246
5247 if (GET_CODE (XEXP (x, 0)) == REG)
5248 {
5249 register rtx temp = gen_reg_rtx (Pmode);
5250 register rtx val = force_operand (XEXP (x, 1), temp);
5251 if (val != temp)
5252 emit_move_insn (temp, val);
5253
5254 XEXP (x, 1) = temp;
5255 return x;
5256 }
5257
5258 else if (GET_CODE (XEXP (x, 1)) == REG)
5259 {
5260 register rtx temp = gen_reg_rtx (Pmode);
5261 register rtx val = force_operand (XEXP (x, 0), temp);
5262 if (val != temp)
5263 emit_move_insn (temp, val);
5264
5265 XEXP (x, 0) = temp;
5266 return x;
5267 }
5268 }
5269
5270 return x;
5271}
2a2ab3f9
JVA
5272\f
5273/* Print an integer constant expression in assembler syntax. Addition
5274 and subtraction are the only arithmetic that may appear in these
5275 expressions. FILE is the stdio stream to write to, X is the rtx, and
5276 CODE is the operand print code from the output string. */
5277
5278static void
5279output_pic_addr_const (file, x, code)
5280 FILE *file;
5281 rtx x;
5282 int code;
5283{
5284 char buf[256];
5285
5286 switch (GET_CODE (x))
5287 {
5288 case PC:
5289 if (flag_pic)
5290 putc ('.', file);
5291 else
5292 abort ();
5293 break;
5294
5295 case SYMBOL_REF:
91bb873f
RH
5296 assemble_name (file, XSTR (x, 0));
5297 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
5298 fputs ("@PLT", file);
2a2ab3f9
JVA
5299 break;
5300
91bb873f
RH
5301 case LABEL_REF:
5302 x = XEXP (x, 0);
5303 /* FALLTHRU */
2a2ab3f9
JVA
5304 case CODE_LABEL:
5305 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5306 assemble_name (asm_out_file, buf);
5307 break;
5308
5309 case CONST_INT:
f64cecad 5310 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
5311 break;
5312
5313 case CONST:
5314 /* This used to output parentheses around the expression,
5315 but that does not work on the 386 (either ATT or BSD assembler). */
5316 output_pic_addr_const (file, XEXP (x, 0), code);
5317 break;
5318
5319 case CONST_DOUBLE:
5320 if (GET_MODE (x) == VOIDmode)
5321 {
5322 /* We can use %d if the number is <32 bits and positive. */
5323 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
5324 fprintf (file, "0x%lx%08lx",
5325 (unsigned long) CONST_DOUBLE_HIGH (x),
5326 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 5327 else
f64cecad 5328 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
5329 }
5330 else
5331 /* We can't handle floating point constants;
5332 PRINT_OPERAND must handle them. */
5333 output_operand_lossage ("floating constant misused");
5334 break;
5335
5336 case PLUS:
e9a25f70 5337 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
5338 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5339 {
2a2ab3f9 5340 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 5341 putc ('+', file);
e9a25f70 5342 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 5343 }
91bb873f 5344 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 5345 {
2a2ab3f9 5346 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 5347 putc ('+', file);
e9a25f70 5348 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 5349 }
91bb873f
RH
5350 else
5351 abort ();
2a2ab3f9
JVA
5352 break;
5353
5354 case MINUS:
80f33d06 5355 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
2a2ab3f9 5356 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 5357 putc ('-', file);
2a2ab3f9 5358 output_pic_addr_const (file, XEXP (x, 1), code);
80f33d06 5359 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
2a2ab3f9
JVA
5360 break;
5361
91bb873f
RH
5362 case UNSPEC:
5363 if (XVECLEN (x, 0) != 1)
77ebd435 5364 abort ();
91bb873f
RH
5365 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5366 switch (XINT (x, 1))
77ebd435 5367 {
8ee41eaf 5368 case UNSPEC_GOT:
77ebd435
AJ
5369 fputs ("@GOT", file);
5370 break;
8ee41eaf 5371 case UNSPEC_GOTOFF:
77ebd435
AJ
5372 fputs ("@GOTOFF", file);
5373 break;
8ee41eaf 5374 case UNSPEC_PLT:
77ebd435
AJ
5375 fputs ("@PLT", file);
5376 break;
8ee41eaf 5377 case UNSPEC_GOTPCREL:
6eb791fc
JH
5378 fputs ("@GOTPCREL(%RIP)", file);
5379 break;
77ebd435
AJ
5380 default:
5381 output_operand_lossage ("invalid UNSPEC as operand");
5382 break;
5383 }
91bb873f
RH
5384 break;
5385
2a2ab3f9
JVA
5386 default:
5387 output_operand_lossage ("invalid expression as operand");
5388 }
5389}
1865dbb5 5390
0f290768 5391/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
1865dbb5
JM
5392 We need to handle our special PIC relocations. */
5393
0f290768 5394void
1865dbb5
JM
5395i386_dwarf_output_addr_const (file, x)
5396 FILE *file;
5397 rtx x;
5398{
14f73b5a 5399#ifdef ASM_QUAD
18b5b8d6 5400 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
14f73b5a
JH
5401#else
5402 if (TARGET_64BIT)
5403 abort ();
18b5b8d6 5404 fprintf (file, "%s", ASM_LONG);
14f73b5a 5405#endif
1865dbb5
JM
5406 if (flag_pic)
5407 output_pic_addr_const (file, x, '\0');
5408 else
5409 output_addr_const (file, x);
5410 fputc ('\n', file);
5411}
5412
5413/* In the name of slightly smaller debug output, and to cater to
5414 general assembler losage, recognize PIC+GOTOFF and turn it back
5415 into a direct symbol reference. */
5416
5417rtx
5418i386_simplify_dwarf_addr (orig_x)
5419 rtx orig_x;
5420{
ec65b2e3 5421 rtx x = orig_x, y;
1865dbb5 5422
4c8c0dec
JJ
5423 if (GET_CODE (x) == MEM)
5424 x = XEXP (x, 0);
5425
6eb791fc
JH
5426 if (TARGET_64BIT)
5427 {
5428 if (GET_CODE (x) != CONST
5429 || GET_CODE (XEXP (x, 0)) != UNSPEC
8ee41eaf 5430 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
4c8c0dec 5431 || GET_CODE (orig_x) != MEM)
6eb791fc
JH
5432 return orig_x;
5433 return XVECEXP (XEXP (x, 0), 0, 0);
5434 }
5435
1865dbb5 5436 if (GET_CODE (x) != PLUS
1865dbb5
JM
5437 || GET_CODE (XEXP (x, 1)) != CONST)
5438 return orig_x;
5439
ec65b2e3
JJ
5440 if (GET_CODE (XEXP (x, 0)) == REG
5441 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5442 /* %ebx + GOT/GOTOFF */
5443 y = NULL;
5444 else if (GET_CODE (XEXP (x, 0)) == PLUS)
5445 {
5446 /* %ebx + %reg * scale + GOT/GOTOFF */
5447 y = XEXP (x, 0);
5448 if (GET_CODE (XEXP (y, 0)) == REG
5449 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
5450 y = XEXP (y, 1);
5451 else if (GET_CODE (XEXP (y, 1)) == REG
5452 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
5453 y = XEXP (y, 0);
5454 else
5455 return orig_x;
5456 if (GET_CODE (y) != REG
5457 && GET_CODE (y) != MULT
5458 && GET_CODE (y) != ASHIFT)
5459 return orig_x;
5460 }
5461 else
5462 return orig_x;
5463
1865dbb5
JM
5464 x = XEXP (XEXP (x, 1), 0);
5465 if (GET_CODE (x) == UNSPEC
8ee41eaf
RH
5466 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
5467 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
5468 {
5469 if (y)
5470 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
5471 return XVECEXP (x, 0, 0);
5472 }
1865dbb5
JM
5473
5474 if (GET_CODE (x) == PLUS
5475 && GET_CODE (XEXP (x, 0)) == UNSPEC
5476 && GET_CODE (XEXP (x, 1)) == CONST_INT
8ee41eaf
RH
5477 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
5478 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
5479 && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
5480 {
5481 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
5482 if (y)
5483 return gen_rtx_PLUS (Pmode, y, x);
5484 return x;
5485 }
1865dbb5
JM
5486
5487 return orig_x;
5488}
2a2ab3f9 5489\f
a269a03c 5490static void
e075ae69 5491put_condition_code (code, mode, reverse, fp, file)
a269a03c 5492 enum rtx_code code;
e075ae69
RH
5493 enum machine_mode mode;
5494 int reverse, fp;
a269a03c
JC
5495 FILE *file;
5496{
a269a03c
JC
5497 const char *suffix;
5498
9a915772
JH
5499 if (mode == CCFPmode || mode == CCFPUmode)
5500 {
5501 enum rtx_code second_code, bypass_code;
5502 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
5503 if (bypass_code != NIL || second_code != NIL)
b531087a 5504 abort ();
9a915772
JH
5505 code = ix86_fp_compare_code_to_integer (code);
5506 mode = CCmode;
5507 }
a269a03c
JC
5508 if (reverse)
5509 code = reverse_condition (code);
e075ae69 5510
a269a03c
JC
5511 switch (code)
5512 {
5513 case EQ:
5514 suffix = "e";
5515 break;
a269a03c
JC
5516 case NE:
5517 suffix = "ne";
5518 break;
a269a03c 5519 case GT:
7e08e190 5520 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
5521 abort ();
5522 suffix = "g";
a269a03c 5523 break;
a269a03c 5524 case GTU:
e075ae69
RH
5525 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
5526 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 5527 if (mode != CCmode)
0f290768 5528 abort ();
e075ae69 5529 suffix = fp ? "nbe" : "a";
a269a03c 5530 break;
a269a03c 5531 case LT:
9076b9c1 5532 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 5533 suffix = "s";
7e08e190 5534 else if (mode == CCmode || mode == CCGCmode)
e075ae69 5535 suffix = "l";
9076b9c1 5536 else
0f290768 5537 abort ();
a269a03c 5538 break;
a269a03c 5539 case LTU:
9076b9c1 5540 if (mode != CCmode)
0f290768 5541 abort ();
a269a03c
JC
5542 suffix = "b";
5543 break;
a269a03c 5544 case GE:
9076b9c1 5545 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 5546 suffix = "ns";
7e08e190 5547 else if (mode == CCmode || mode == CCGCmode)
e075ae69 5548 suffix = "ge";
9076b9c1 5549 else
0f290768 5550 abort ();
a269a03c 5551 break;
a269a03c 5552 case GEU:
e075ae69 5553 /* ??? As above. */
7e08e190 5554 if (mode != CCmode)
0f290768 5555 abort ();
7e08e190 5556 suffix = fp ? "nb" : "ae";
a269a03c 5557 break;
a269a03c 5558 case LE:
7e08e190 5559 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
5560 abort ();
5561 suffix = "le";
a269a03c 5562 break;
a269a03c 5563 case LEU:
9076b9c1
JH
5564 if (mode != CCmode)
5565 abort ();
7e08e190 5566 suffix = "be";
a269a03c 5567 break;
3a3677ff 5568 case UNORDERED:
9e7adcb3 5569 suffix = fp ? "u" : "p";
3a3677ff
RH
5570 break;
5571 case ORDERED:
9e7adcb3 5572 suffix = fp ? "nu" : "np";
3a3677ff 5573 break;
a269a03c
JC
5574 default:
5575 abort ();
5576 }
5577 fputs (suffix, file);
5578}
5579
e075ae69
RH
5580void
5581print_reg (x, code, file)
5582 rtx x;
5583 int code;
5584 FILE *file;
e5cb57e8 5585{
e075ae69 5586 if (REGNO (x) == ARG_POINTER_REGNUM
564d80f4 5587 || REGNO (x) == FRAME_POINTER_REGNUM
e075ae69
RH
5588 || REGNO (x) == FLAGS_REG
5589 || REGNO (x) == FPSR_REG)
5590 abort ();
e9a25f70 5591
80f33d06 5592 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
e075ae69
RH
5593 putc ('%', file);
5594
ef6257cd 5595 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
5596 code = 2;
5597 else if (code == 'b')
5598 code = 1;
5599 else if (code == 'k')
5600 code = 4;
3f3f2124
JH
5601 else if (code == 'q')
5602 code = 8;
e075ae69
RH
5603 else if (code == 'y')
5604 code = 3;
5605 else if (code == 'h')
5606 code = 0;
5607 else
5608 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 5609
3f3f2124
JH
5610 /* Irritatingly, AMD extended registers use different naming convention
5611 from the normal registers. */
5612 if (REX_INT_REG_P (x))
5613 {
885a70fd
JH
5614 if (!TARGET_64BIT)
5615 abort ();
3f3f2124
JH
5616 switch (code)
5617 {
ef6257cd 5618 case 0:
c725bd79 5619 error ("extended registers have no high halves");
3f3f2124
JH
5620 break;
5621 case 1:
5622 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
5623 break;
5624 case 2:
5625 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
5626 break;
5627 case 4:
5628 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
5629 break;
5630 case 8:
5631 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
5632 break;
5633 default:
c725bd79 5634 error ("unsupported operand size for extended register");
3f3f2124
JH
5635 break;
5636 }
5637 return;
5638 }
e075ae69
RH
5639 switch (code)
5640 {
5641 case 3:
5642 if (STACK_TOP_P (x))
5643 {
5644 fputs ("st(0)", file);
5645 break;
5646 }
5647 /* FALLTHRU */
e075ae69 5648 case 8:
3f3f2124 5649 case 4:
e075ae69 5650 case 12:
446988df 5651 if (! ANY_FP_REG_P (x))
885a70fd 5652 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
e075ae69 5653 /* FALLTHRU */
a7180f70 5654 case 16:
e075ae69
RH
5655 case 2:
5656 fputs (hi_reg_name[REGNO (x)], file);
5657 break;
5658 case 1:
5659 fputs (qi_reg_name[REGNO (x)], file);
5660 break;
5661 case 0:
5662 fputs (qi_high_reg_name[REGNO (x)], file);
5663 break;
5664 default:
5665 abort ();
fe25fea3 5666 }
e5cb57e8
SC
5667}
5668
2a2ab3f9 5669/* Meaning of CODE:
fe25fea3 5670 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 5671 C -- print opcode suffix for set/cmov insn.
fe25fea3 5672 c -- like C, but print reversed condition
ef6257cd 5673 F,f -- likewise, but for floating-point.
048b1c95
JJ
5674 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
5675 nothing
2a2ab3f9
JVA
5676 R -- print the prefix for register names.
5677 z -- print the opcode suffix for the size of the current operand.
5678 * -- print a star (in certain assembler syntax)
fb204271 5679 A -- print an absolute memory reference.
2a2ab3f9 5680 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
5681 s -- print a shift double count, followed by the assemblers argument
5682 delimiter.
fe25fea3
SC
5683 b -- print the QImode name of the register for the indicated operand.
5684 %b0 would print %al if operands[0] is reg 0.
5685 w -- likewise, print the HImode name of the register.
5686 k -- likewise, print the SImode name of the register.
3f3f2124 5687 q -- likewise, print the DImode name of the register.
ef6257cd
JH
5688 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
5689 y -- print "st(0)" instead of "st" as a register.
a46d1d38 5690 D -- print condition for SSE cmp instruction.
ef6257cd
JH
5691 P -- if PIC, print an @PLT suffix.
5692 X -- don't print any sort of PIC '@' suffix for a symbol.
a46d1d38 5693 */
2a2ab3f9
JVA
5694
5695void
5696print_operand (file, x, code)
5697 FILE *file;
5698 rtx x;
5699 int code;
5700{
5701 if (code)
5702 {
5703 switch (code)
5704 {
5705 case '*':
80f33d06 5706 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9
JVA
5707 putc ('*', file);
5708 return;
5709
fb204271 5710 case 'A':
80f33d06 5711 if (ASSEMBLER_DIALECT == ASM_ATT)
fb204271 5712 putc ('*', file);
80f33d06 5713 else if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
5714 {
5715 /* Intel syntax. For absolute addresses, registers should not
5716 be surrounded by braces. */
5717 if (GET_CODE (x) != REG)
5718 {
5719 putc ('[', file);
5720 PRINT_OPERAND (file, x, 0);
5721 putc (']', file);
5722 return;
5723 }
5724 }
80f33d06
GS
5725 else
5726 abort ();
fb204271
DN
5727
5728 PRINT_OPERAND (file, x, 0);
5729 return;
5730
5731
2a2ab3f9 5732 case 'L':
80f33d06 5733 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 5734 putc ('l', file);
2a2ab3f9
JVA
5735 return;
5736
5737 case 'W':
80f33d06 5738 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 5739 putc ('w', file);
2a2ab3f9
JVA
5740 return;
5741
5742 case 'B':
80f33d06 5743 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 5744 putc ('b', file);
2a2ab3f9
JVA
5745 return;
5746
5747 case 'Q':
80f33d06 5748 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 5749 putc ('l', file);
2a2ab3f9
JVA
5750 return;
5751
5752 case 'S':
80f33d06 5753 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 5754 putc ('s', file);
2a2ab3f9
JVA
5755 return;
5756
5f1ec3e6 5757 case 'T':
80f33d06 5758 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 5759 putc ('t', file);
5f1ec3e6
JVA
5760 return;
5761
2a2ab3f9
JVA
5762 case 'z':
5763 /* 387 opcodes don't get size suffixes if the operands are
0f290768 5764 registers. */
2a2ab3f9
JVA
5765 if (STACK_REG_P (x))
5766 return;
5767
831c4e87
KC
5768 /* Likewise if using Intel opcodes. */
5769 if (ASSEMBLER_DIALECT == ASM_INTEL)
5770 return;
5771
5772 /* This is the size of op from size of operand. */
2a2ab3f9
JVA
5773 switch (GET_MODE_SIZE (GET_MODE (x)))
5774 {
2a2ab3f9 5775 case 2:
155d8a47
JW
5776#ifdef HAVE_GAS_FILDS_FISTS
5777 putc ('s', file);
5778#endif
2a2ab3f9
JVA
5779 return;
5780
5781 case 4:
5782 if (GET_MODE (x) == SFmode)
5783 {
e075ae69 5784 putc ('s', file);
2a2ab3f9
JVA
5785 return;
5786 }
5787 else
e075ae69 5788 putc ('l', file);
2a2ab3f9
JVA
5789 return;
5790
5f1ec3e6 5791 case 12:
2b589241 5792 case 16:
e075ae69
RH
5793 putc ('t', file);
5794 return;
5f1ec3e6 5795
2a2ab3f9
JVA
5796 case 8:
5797 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
5798 {
5799#ifdef GAS_MNEMONICS
e075ae69 5800 putc ('q', file);
56c0e8fa 5801#else
e075ae69
RH
5802 putc ('l', file);
5803 putc ('l', file);
56c0e8fa
JVA
5804#endif
5805 }
e075ae69
RH
5806 else
5807 putc ('l', file);
2a2ab3f9 5808 return;
155d8a47
JW
5809
5810 default:
5811 abort ();
2a2ab3f9 5812 }
4af3895e
JVA
5813
5814 case 'b':
5815 case 'w':
5816 case 'k':
3f3f2124 5817 case 'q':
4af3895e
JVA
5818 case 'h':
5819 case 'y':
5cb6195d 5820 case 'X':
e075ae69 5821 case 'P':
4af3895e
JVA
5822 break;
5823
2d49677f
SC
5824 case 's':
5825 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
5826 {
5827 PRINT_OPERAND (file, x, 0);
e075ae69 5828 putc (',', file);
2d49677f 5829 }
a269a03c
JC
5830 return;
5831
a46d1d38
JH
5832 case 'D':
5833 /* Little bit of braindamage here. The SSE compare instructions
5834 does use completely different names for the comparisons that the
5835 fp conditional moves. */
5836 switch (GET_CODE (x))
5837 {
5838 case EQ:
5839 case UNEQ:
5840 fputs ("eq", file);
5841 break;
5842 case LT:
5843 case UNLT:
5844 fputs ("lt", file);
5845 break;
5846 case LE:
5847 case UNLE:
5848 fputs ("le", file);
5849 break;
5850 case UNORDERED:
5851 fputs ("unord", file);
5852 break;
5853 case NE:
5854 case LTGT:
5855 fputs ("neq", file);
5856 break;
5857 case UNGE:
5858 case GE:
5859 fputs ("nlt", file);
5860 break;
5861 case UNGT:
5862 case GT:
5863 fputs ("nle", file);
5864 break;
5865 case ORDERED:
5866 fputs ("ord", file);
5867 break;
5868 default:
5869 abort ();
5870 break;
5871 }
5872 return;
048b1c95
JJ
5873 case 'O':
5874#ifdef CMOV_SUN_AS_SYNTAX
5875 if (ASSEMBLER_DIALECT == ASM_ATT)
5876 {
5877 switch (GET_MODE (x))
5878 {
5879 case HImode: putc ('w', file); break;
5880 case SImode:
5881 case SFmode: putc ('l', file); break;
5882 case DImode:
5883 case DFmode: putc ('q', file); break;
5884 default: abort ();
5885 }
5886 putc ('.', file);
5887 }
5888#endif
5889 return;
1853aadd 5890 case 'C':
e075ae69 5891 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 5892 return;
fe25fea3 5893 case 'F':
048b1c95
JJ
5894#ifdef CMOV_SUN_AS_SYNTAX
5895 if (ASSEMBLER_DIALECT == ASM_ATT)
5896 putc ('.', file);
5897#endif
e075ae69 5898 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
5899 return;
5900
e9a25f70 5901 /* Like above, but reverse condition */
e075ae69 5902 case 'c':
c1d5afc4
CR
5903 /* Check to see if argument to %c is really a constant
5904 and not a condition code which needs to be reversed. */
5905 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
5906 {
5907 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
5908 return;
5909 }
e075ae69
RH
5910 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
5911 return;
fe25fea3 5912 case 'f':
048b1c95
JJ
5913#ifdef CMOV_SUN_AS_SYNTAX
5914 if (ASSEMBLER_DIALECT == ASM_ATT)
5915 putc ('.', file);
5916#endif
e075ae69 5917 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 5918 return;
ef6257cd
JH
5919 case '+':
5920 {
5921 rtx x;
e5cb57e8 5922
ef6257cd
JH
5923 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
5924 return;
a4f31c00 5925
ef6257cd
JH
5926 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5927 if (x)
5928 {
5929 int pred_val = INTVAL (XEXP (x, 0));
5930
5931 if (pred_val < REG_BR_PROB_BASE * 45 / 100
5932 || pred_val > REG_BR_PROB_BASE * 55 / 100)
5933 {
5934 int taken = pred_val > REG_BR_PROB_BASE / 2;
5935 int cputaken = final_forward_branch_p (current_output_insn) == 0;
5936
5937 /* Emit hints only in the case default branch prediction
5938 heruistics would fail. */
5939 if (taken != cputaken)
5940 {
5941 /* We use 3e (DS) prefix for taken branches and
5942 2e (CS) prefix for not taken branches. */
5943 if (taken)
5944 fputs ("ds ; ", file);
5945 else
5946 fputs ("cs ; ", file);
5947 }
5948 }
5949 }
5950 return;
5951 }
4af3895e 5952 default:
a52453cc 5953 output_operand_lossage ("invalid operand code `%c'", code);
2a2ab3f9
JVA
5954 }
5955 }
e9a25f70 5956
2a2ab3f9
JVA
5957 if (GET_CODE (x) == REG)
5958 {
5959 PRINT_REG (x, code, file);
5960 }
e9a25f70 5961
2a2ab3f9
JVA
5962 else if (GET_CODE (x) == MEM)
5963 {
e075ae69 5964 /* No `byte ptr' prefix for call instructions. */
80f33d06 5965 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
2a2ab3f9 5966 {
69ddee61 5967 const char * size;
e075ae69
RH
5968 switch (GET_MODE_SIZE (GET_MODE (x)))
5969 {
5970 case 1: size = "BYTE"; break;
5971 case 2: size = "WORD"; break;
5972 case 4: size = "DWORD"; break;
5973 case 8: size = "QWORD"; break;
5974 case 12: size = "XWORD"; break;
a7180f70 5975 case 16: size = "XMMWORD"; break;
e075ae69 5976 default:
564d80f4 5977 abort ();
e075ae69 5978 }
fb204271
DN
5979
5980 /* Check for explicit size override (codes 'b', 'w' and 'k') */
5981 if (code == 'b')
5982 size = "BYTE";
5983 else if (code == 'w')
5984 size = "WORD";
5985 else if (code == 'k')
5986 size = "DWORD";
5987
e075ae69
RH
5988 fputs (size, file);
5989 fputs (" PTR ", file);
2a2ab3f9 5990 }
e075ae69
RH
5991
5992 x = XEXP (x, 0);
5993 if (flag_pic && CONSTANT_ADDRESS_P (x))
5994 output_pic_addr_const (file, x, code);
0d7d98ee
JH
5995 /* Avoid (%rip) for call operands. */
5996 else if (CONSTANT_ADDRESS_P (x) && code =='P'
5997 && GET_CODE (x) != CONST_INT)
5998 output_addr_const (file, x);
c8b94768
RH
5999 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6000 output_operand_lossage ("invalid constraints for operand");
2a2ab3f9 6001 else
e075ae69 6002 output_address (x);
2a2ab3f9 6003 }
e9a25f70 6004
2a2ab3f9
JVA
6005 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6006 {
e9a25f70
JL
6007 REAL_VALUE_TYPE r;
6008 long l;
6009
5f1ec3e6
JVA
6010 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6011 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69 6012
80f33d06 6013 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6014 putc ('$', file);
52267fcb 6015 fprintf (file, "0x%lx", l);
5f1ec3e6 6016 }
e9a25f70 6017
0f290768 6018 /* These float cases don't actually occur as immediate operands. */
5f1ec3e6
JVA
6019 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6020 {
e9a25f70
JL
6021 REAL_VALUE_TYPE r;
6022 char dstr[30];
6023
5f1ec3e6
JVA
6024 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6025 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6026 fprintf (file, "%s", dstr);
2a2ab3f9 6027 }
e9a25f70 6028
2b589241
JH
6029 else if (GET_CODE (x) == CONST_DOUBLE
6030 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
2a2ab3f9 6031 {
e9a25f70
JL
6032 REAL_VALUE_TYPE r;
6033 char dstr[30];
6034
5f1ec3e6
JVA
6035 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6036 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6037 fprintf (file, "%s", dstr);
2a2ab3f9 6038 }
79325812 6039 else
2a2ab3f9 6040 {
4af3895e 6041 if (code != 'P')
2a2ab3f9 6042 {
695dac07 6043 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69 6044 {
80f33d06 6045 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
6046 putc ('$', file);
6047 }
2a2ab3f9
JVA
6048 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6049 || GET_CODE (x) == LABEL_REF)
e075ae69 6050 {
80f33d06 6051 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
6052 putc ('$', file);
6053 else
6054 fputs ("OFFSET FLAT:", file);
6055 }
2a2ab3f9 6056 }
e075ae69
RH
6057 if (GET_CODE (x) == CONST_INT)
6058 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6059 else if (flag_pic)
2a2ab3f9
JVA
6060 output_pic_addr_const (file, x, code);
6061 else
6062 output_addr_const (file, x);
6063 }
6064}
6065\f
6066/* Print a memory operand whose address is ADDR. */
6067
6068void
6069print_operand_address (file, addr)
6070 FILE *file;
6071 register rtx addr;
6072{
e075ae69
RH
6073 struct ix86_address parts;
6074 rtx base, index, disp;
6075 int scale;
e9a25f70 6076
e075ae69
RH
6077 if (! ix86_decompose_address (addr, &parts))
6078 abort ();
e9a25f70 6079
e075ae69
RH
6080 base = parts.base;
6081 index = parts.index;
6082 disp = parts.disp;
6083 scale = parts.scale;
e9a25f70 6084
e075ae69
RH
6085 if (!base && !index)
6086 {
6087 /* Displacement only requires special attention. */
e9a25f70 6088
e075ae69 6089 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 6090 {
80f33d06 6091 if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
6092 {
6093 if (USER_LABEL_PREFIX[0] == 0)
6094 putc ('%', file);
6095 fputs ("ds:", file);
6096 }
e075ae69 6097 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
2a2ab3f9 6098 }
e075ae69
RH
6099 else if (flag_pic)
6100 output_pic_addr_const (file, addr, 0);
6101 else
6102 output_addr_const (file, addr);
0d7d98ee
JH
6103
6104 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6105 if (GET_CODE (disp) != CONST_INT && TARGET_64BIT)
6106 fputs ("(%rip)", file);
e075ae69
RH
6107 }
6108 else
6109 {
80f33d06 6110 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9 6111 {
e075ae69 6112 if (disp)
2a2ab3f9 6113 {
c399861d 6114 if (flag_pic)
e075ae69
RH
6115 output_pic_addr_const (file, disp, 0);
6116 else if (GET_CODE (disp) == LABEL_REF)
6117 output_asm_label (disp);
2a2ab3f9 6118 else
e075ae69 6119 output_addr_const (file, disp);
2a2ab3f9
JVA
6120 }
6121
e075ae69
RH
6122 putc ('(', file);
6123 if (base)
6124 PRINT_REG (base, 0, file);
6125 if (index)
2a2ab3f9 6126 {
e075ae69
RH
6127 putc (',', file);
6128 PRINT_REG (index, 0, file);
6129 if (scale != 1)
6130 fprintf (file, ",%d", scale);
2a2ab3f9 6131 }
e075ae69 6132 putc (')', file);
2a2ab3f9 6133 }
2a2ab3f9
JVA
6134 else
6135 {
e075ae69 6136 rtx offset = NULL_RTX;
e9a25f70 6137
e075ae69
RH
6138 if (disp)
6139 {
6140 /* Pull out the offset of a symbol; print any symbol itself. */
6141 if (GET_CODE (disp) == CONST
6142 && GET_CODE (XEXP (disp, 0)) == PLUS
6143 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6144 {
6145 offset = XEXP (XEXP (disp, 0), 1);
6146 disp = gen_rtx_CONST (VOIDmode,
6147 XEXP (XEXP (disp, 0), 0));
6148 }
ce193852 6149
e075ae69
RH
6150 if (flag_pic)
6151 output_pic_addr_const (file, disp, 0);
6152 else if (GET_CODE (disp) == LABEL_REF)
6153 output_asm_label (disp);
6154 else if (GET_CODE (disp) == CONST_INT)
6155 offset = disp;
6156 else
6157 output_addr_const (file, disp);
6158 }
e9a25f70 6159
e075ae69
RH
6160 putc ('[', file);
6161 if (base)
a8620236 6162 {
e075ae69
RH
6163 PRINT_REG (base, 0, file);
6164 if (offset)
6165 {
6166 if (INTVAL (offset) >= 0)
6167 putc ('+', file);
6168 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6169 }
a8620236 6170 }
e075ae69
RH
6171 else if (offset)
6172 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 6173 else
e075ae69 6174 putc ('0', file);
e9a25f70 6175
e075ae69
RH
6176 if (index)
6177 {
6178 putc ('+', file);
6179 PRINT_REG (index, 0, file);
6180 if (scale != 1)
6181 fprintf (file, "*%d", scale);
6182 }
6183 putc (']', file);
6184 }
2a2ab3f9
JVA
6185 }
6186}
6187\f
6188/* Split one or more DImode RTL references into pairs of SImode
6189 references. The RTL can be REG, offsettable MEM, integer constant, or
6190 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6191 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 6192 that parallel "operands". */
2a2ab3f9
JVA
6193
6194void
6195split_di (operands, num, lo_half, hi_half)
6196 rtx operands[];
6197 int num;
6198 rtx lo_half[], hi_half[];
6199{
6200 while (num--)
6201 {
57dbca5e 6202 rtx op = operands[num];
b932f770
JH
6203
6204 /* simplify_subreg refuse to split volatile memory addresses,
6205 but we still have to handle it. */
6206 if (GET_CODE (op) == MEM)
2a2ab3f9 6207 {
f4ef873c 6208 lo_half[num] = adjust_address (op, SImode, 0);
b72f00af 6209 hi_half[num] = adjust_address (op, SImode, 4);
2a2ab3f9
JVA
6210 }
6211 else
b932f770 6212 {
38ca929b
JH
6213 lo_half[num] = simplify_gen_subreg (SImode, op,
6214 GET_MODE (op) == VOIDmode
6215 ? DImode : GET_MODE (op), 0);
6216 hi_half[num] = simplify_gen_subreg (SImode, op,
6217 GET_MODE (op) == VOIDmode
6218 ? DImode : GET_MODE (op), 4);
b932f770 6219 }
2a2ab3f9
JVA
6220 }
6221}
44cf5b6a
JH
6222/* Split one or more TImode RTL references into pairs of SImode
6223 references. The RTL can be REG, offsettable MEM, integer constant, or
6224 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6225 split and "num" is its length. lo_half and hi_half are output arrays
6226 that parallel "operands". */
6227
6228void
6229split_ti (operands, num, lo_half, hi_half)
6230 rtx operands[];
6231 int num;
6232 rtx lo_half[], hi_half[];
6233{
6234 while (num--)
6235 {
6236 rtx op = operands[num];
b932f770
JH
6237
6238 /* simplify_subreg refuse to split volatile memory addresses, but we
6239 still have to handle it. */
6240 if (GET_CODE (op) == MEM)
44cf5b6a
JH
6241 {
6242 lo_half[num] = adjust_address (op, DImode, 0);
6243 hi_half[num] = adjust_address (op, DImode, 8);
6244 }
6245 else
b932f770
JH
6246 {
6247 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6248 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6249 }
44cf5b6a
JH
6250 }
6251}
2a2ab3f9 6252\f
2a2ab3f9
JVA
6253/* Output code to perform a 387 binary operation in INSN, one of PLUS,
6254 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6255 is the expression of the binary operation. The output may either be
6256 emitted here, or returned to the caller, like all output_* functions.
6257
6258 There is no guarantee that the operands are the same mode, as they
0f290768 6259 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 6260
e3c2afab
AM
6261#ifndef SYSV386_COMPAT
6262/* Set to 1 for compatibility with brain-damaged assemblers. No-one
6263 wants to fix the assemblers because that causes incompatibility
6264 with gcc. No-one wants to fix gcc because that causes
6265 incompatibility with assemblers... You can use the option of
6266 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6267#define SYSV386_COMPAT 1
6268#endif
6269
69ddee61 6270const char *
2a2ab3f9
JVA
6271output_387_binary_op (insn, operands)
6272 rtx insn;
6273 rtx *operands;
6274{
e3c2afab 6275 static char buf[30];
69ddee61 6276 const char *p;
1deaa899
JH
6277 const char *ssep;
6278 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
2a2ab3f9 6279
e3c2afab
AM
6280#ifdef ENABLE_CHECKING
6281 /* Even if we do not want to check the inputs, this documents input
6282 constraints. Which helps in understanding the following code. */
6283 if (STACK_REG_P (operands[0])
6284 && ((REG_P (operands[1])
6285 && REGNO (operands[0]) == REGNO (operands[1])
6286 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6287 || (REG_P (operands[2])
6288 && REGNO (operands[0]) == REGNO (operands[2])
6289 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6290 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6291 ; /* ok */
1deaa899 6292 else if (!is_sse)
e3c2afab
AM
6293 abort ();
6294#endif
6295
2a2ab3f9
JVA
6296 switch (GET_CODE (operands[3]))
6297 {
6298 case PLUS:
e075ae69
RH
6299 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6300 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6301 p = "fiadd";
6302 else
6303 p = "fadd";
1deaa899 6304 ssep = "add";
2a2ab3f9
JVA
6305 break;
6306
6307 case MINUS:
e075ae69
RH
6308 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6309 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6310 p = "fisub";
6311 else
6312 p = "fsub";
1deaa899 6313 ssep = "sub";
2a2ab3f9
JVA
6314 break;
6315
6316 case MULT:
e075ae69
RH
6317 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6318 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6319 p = "fimul";
6320 else
6321 p = "fmul";
1deaa899 6322 ssep = "mul";
2a2ab3f9
JVA
6323 break;
6324
6325 case DIV:
e075ae69
RH
6326 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6327 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6328 p = "fidiv";
6329 else
6330 p = "fdiv";
1deaa899 6331 ssep = "div";
2a2ab3f9
JVA
6332 break;
6333
6334 default:
6335 abort ();
6336 }
6337
1deaa899
JH
6338 if (is_sse)
6339 {
6340 strcpy (buf, ssep);
6341 if (GET_MODE (operands[0]) == SFmode)
6342 strcat (buf, "ss\t{%2, %0|%0, %2}");
6343 else
6344 strcat (buf, "sd\t{%2, %0|%0, %2}");
6345 return buf;
6346 }
e075ae69 6347 strcpy (buf, p);
2a2ab3f9
JVA
6348
6349 switch (GET_CODE (operands[3]))
6350 {
6351 case MULT:
6352 case PLUS:
6353 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6354 {
e3c2afab 6355 rtx temp = operands[2];
2a2ab3f9
JVA
6356 operands[2] = operands[1];
6357 operands[1] = temp;
6358 }
6359
e3c2afab
AM
6360 /* know operands[0] == operands[1]. */
6361
2a2ab3f9 6362 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
6363 {
6364 p = "%z2\t%2";
6365 break;
6366 }
2a2ab3f9
JVA
6367
6368 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
6369 {
6370 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
6371 /* How is it that we are storing to a dead operand[2]?
6372 Well, presumably operands[1] is dead too. We can't
6373 store the result to st(0) as st(0) gets popped on this
6374 instruction. Instead store to operands[2] (which I
6375 think has to be st(1)). st(1) will be popped later.
6376 gcc <= 2.8.1 didn't have this check and generated
6377 assembly code that the Unixware assembler rejected. */
6378 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 6379 else
e3c2afab 6380 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 6381 break;
6b28fd63 6382 }
2a2ab3f9
JVA
6383
6384 if (STACK_TOP_P (operands[0]))
e3c2afab 6385 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 6386 else
e3c2afab 6387 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 6388 break;
2a2ab3f9
JVA
6389
6390 case MINUS:
6391 case DIV:
6392 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
6393 {
6394 p = "r%z1\t%1";
6395 break;
6396 }
2a2ab3f9
JVA
6397
6398 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
6399 {
6400 p = "%z2\t%2";
6401 break;
6402 }
2a2ab3f9 6403
2a2ab3f9 6404 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 6405 {
e3c2afab
AM
6406#if SYSV386_COMPAT
6407 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6408 derived assemblers, confusingly reverse the direction of
6409 the operation for fsub{r} and fdiv{r} when the
6410 destination register is not st(0). The Intel assembler
6411 doesn't have this brain damage. Read !SYSV386_COMPAT to
6412 figure out what the hardware really does. */
6413 if (STACK_TOP_P (operands[0]))
6414 p = "{p\t%0, %2|rp\t%2, %0}";
6415 else
6416 p = "{rp\t%2, %0|p\t%0, %2}";
6417#else
6b28fd63 6418 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
6419 /* As above for fmul/fadd, we can't store to st(0). */
6420 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 6421 else
e3c2afab
AM
6422 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6423#endif
e075ae69 6424 break;
6b28fd63 6425 }
2a2ab3f9
JVA
6426
6427 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 6428 {
e3c2afab 6429#if SYSV386_COMPAT
6b28fd63 6430 if (STACK_TOP_P (operands[0]))
e3c2afab 6431 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 6432 else
e3c2afab
AM
6433 p = "{p\t%1, %0|rp\t%0, %1}";
6434#else
6435 if (STACK_TOP_P (operands[0]))
6436 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
6437 else
6438 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
6439#endif
e075ae69 6440 break;
6b28fd63 6441 }
2a2ab3f9
JVA
6442
6443 if (STACK_TOP_P (operands[0]))
6444 {
6445 if (STACK_TOP_P (operands[1]))
e3c2afab 6446 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 6447 else
e3c2afab 6448 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 6449 break;
2a2ab3f9
JVA
6450 }
6451 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
6452 {
6453#if SYSV386_COMPAT
6454 p = "{\t%1, %0|r\t%0, %1}";
6455#else
6456 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
6457#endif
6458 }
2a2ab3f9 6459 else
e3c2afab
AM
6460 {
6461#if SYSV386_COMPAT
6462 p = "{r\t%2, %0|\t%0, %2}";
6463#else
6464 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6465#endif
6466 }
e075ae69 6467 break;
2a2ab3f9
JVA
6468
6469 default:
6470 abort ();
6471 }
e075ae69
RH
6472
6473 strcat (buf, p);
6474 return buf;
2a2ab3f9 6475}
e075ae69 6476
a4f31c00 6477/* Output code to initialize control word copies used by
7a2e09f4
JH
6478 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
6479 is set to control word rounding downwards. */
6480void
6481emit_i387_cw_initialization (normal, round_down)
6482 rtx normal, round_down;
6483{
6484 rtx reg = gen_reg_rtx (HImode);
6485
6486 emit_insn (gen_x86_fnstcw_1 (normal));
6487 emit_move_insn (reg, normal);
6488 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
6489 && !TARGET_64BIT)
6490 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
6491 else
6492 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
6493 emit_move_insn (round_down, reg);
6494}
6495
2a2ab3f9 6496/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 6497 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 6498 operand may be [SDX]Fmode. */
2a2ab3f9 6499
69ddee61 6500const char *
2a2ab3f9
JVA
6501output_fix_trunc (insn, operands)
6502 rtx insn;
6503 rtx *operands;
6504{
6505 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69 6506 int dimode_p = GET_MODE (operands[0]) == DImode;
2a2ab3f9 6507
e075ae69
RH
6508 /* Jump through a hoop or two for DImode, since the hardware has no
6509 non-popping instruction. We used to do this a different way, but
6510 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
6511 if (dimode_p && !stack_top_dies)
6512 output_asm_insn ("fld\t%y1", operands);
e075ae69 6513
7a2e09f4 6514 if (!STACK_TOP_P (operands[1]))
10195bd8
JW
6515 abort ();
6516
e075ae69 6517 if (GET_CODE (operands[0]) != MEM)
7a2e09f4 6518 abort ();
e9a25f70 6519
7a2e09f4 6520 output_asm_insn ("fldcw\t%3", operands);
e075ae69 6521 if (stack_top_dies || dimode_p)
7a2e09f4 6522 output_asm_insn ("fistp%z0\t%0", operands);
10195bd8 6523 else
7a2e09f4 6524 output_asm_insn ("fist%z0\t%0", operands);
e075ae69 6525 output_asm_insn ("fldcw\t%2", operands);
10195bd8 6526
e075ae69 6527 return "";
2a2ab3f9 6528}
cda749b1 6529
e075ae69
RH
6530/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
6531 should be used and 2 when fnstsw should be used. UNORDERED_P is true
6532 when fucom should be used. */
6533
69ddee61 6534const char *
e075ae69 6535output_fp_compare (insn, operands, eflags_p, unordered_p)
cda749b1
JW
6536 rtx insn;
6537 rtx *operands;
e075ae69 6538 int eflags_p, unordered_p;
cda749b1 6539{
e075ae69
RH
6540 int stack_top_dies;
6541 rtx cmp_op0 = operands[0];
6542 rtx cmp_op1 = operands[1];
0644b628 6543 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
e075ae69
RH
6544
6545 if (eflags_p == 2)
6546 {
6547 cmp_op0 = cmp_op1;
6548 cmp_op1 = operands[2];
6549 }
0644b628
JH
6550 if (is_sse)
6551 {
6552 if (GET_MODE (operands[0]) == SFmode)
6553 if (unordered_p)
6554 return "ucomiss\t{%1, %0|%0, %1}";
6555 else
6556 return "comiss\t{%1, %0|%0, %y}";
6557 else
6558 if (unordered_p)
6559 return "ucomisd\t{%1, %0|%0, %1}";
6560 else
6561 return "comisd\t{%1, %0|%0, %y}";
6562 }
cda749b1 6563
e075ae69 6564 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
6565 abort ();
6566
e075ae69 6567 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 6568
e075ae69
RH
6569 if (STACK_REG_P (cmp_op1)
6570 && stack_top_dies
6571 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
6572 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 6573 {
e075ae69
RH
6574 /* If both the top of the 387 stack dies, and the other operand
6575 is also a stack register that dies, then this must be a
6576 `fcompp' float compare */
6577
6578 if (eflags_p == 1)
6579 {
6580 /* There is no double popping fcomi variant. Fortunately,
6581 eflags is immune from the fstp's cc clobbering. */
6582 if (unordered_p)
6583 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
6584 else
6585 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
6586 return "fstp\t%y0";
6587 }
6588 else
cda749b1 6589 {
e075ae69
RH
6590 if (eflags_p == 2)
6591 {
6592 if (unordered_p)
6593 return "fucompp\n\tfnstsw\t%0";
6594 else
6595 return "fcompp\n\tfnstsw\t%0";
6596 }
cda749b1
JW
6597 else
6598 {
e075ae69
RH
6599 if (unordered_p)
6600 return "fucompp";
6601 else
6602 return "fcompp";
cda749b1
JW
6603 }
6604 }
cda749b1
JW
6605 }
6606 else
6607 {
e075ae69 6608 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 6609
0f290768 6610 static const char * const alt[24] =
e075ae69
RH
6611 {
6612 "fcom%z1\t%y1",
6613 "fcomp%z1\t%y1",
6614 "fucom%z1\t%y1",
6615 "fucomp%z1\t%y1",
0f290768 6616
e075ae69
RH
6617 "ficom%z1\t%y1",
6618 "ficomp%z1\t%y1",
6619 NULL,
6620 NULL,
6621
6622 "fcomi\t{%y1, %0|%0, %y1}",
6623 "fcomip\t{%y1, %0|%0, %y1}",
6624 "fucomi\t{%y1, %0|%0, %y1}",
6625 "fucomip\t{%y1, %0|%0, %y1}",
6626
6627 NULL,
6628 NULL,
6629 NULL,
6630 NULL,
6631
6632 "fcom%z2\t%y2\n\tfnstsw\t%0",
6633 "fcomp%z2\t%y2\n\tfnstsw\t%0",
6634 "fucom%z2\t%y2\n\tfnstsw\t%0",
6635 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 6636
e075ae69
RH
6637 "ficom%z2\t%y2\n\tfnstsw\t%0",
6638 "ficomp%z2\t%y2\n\tfnstsw\t%0",
6639 NULL,
6640 NULL
6641 };
6642
6643 int mask;
69ddee61 6644 const char *ret;
e075ae69
RH
6645
6646 mask = eflags_p << 3;
6647 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
6648 mask |= unordered_p << 1;
6649 mask |= stack_top_dies;
6650
6651 if (mask >= 24)
6652 abort ();
6653 ret = alt[mask];
6654 if (ret == NULL)
6655 abort ();
cda749b1 6656
e075ae69 6657 return ret;
cda749b1
JW
6658 }
6659}
2a2ab3f9 6660
f88c65f7
RH
6661void
6662ix86_output_addr_vec_elt (file, value)
6663 FILE *file;
6664 int value;
6665{
6666 const char *directive = ASM_LONG;
6667
6668 if (TARGET_64BIT)
6669 {
6670#ifdef ASM_QUAD
6671 directive = ASM_QUAD;
6672#else
6673 abort ();
6674#endif
6675 }
6676
6677 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
6678}
6679
6680void
6681ix86_output_addr_diff_elt (file, value, rel)
6682 FILE *file;
6683 int value, rel;
6684{
6685 if (TARGET_64BIT)
74411039 6686 fprintf (file, "%s%s%d-%s%d\n",
f88c65f7
RH
6687 ASM_LONG, LPREFIX, value, LPREFIX, rel);
6688 else if (HAVE_AS_GOTOFF_IN_DATA)
6689 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
6690 else
6691 asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
6692 ASM_LONG, LPREFIX, value);
6693}
32b5b1aa 6694\f
a8bac9ab
RH
6695/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
6696 for the target. */
6697
6698void
6699ix86_expand_clear (dest)
6700 rtx dest;
6701{
6702 rtx tmp;
6703
6704 /* We play register width games, which are only valid after reload. */
6705 if (!reload_completed)
6706 abort ();
6707
6708 /* Avoid HImode and its attendant prefix byte. */
6709 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
6710 dest = gen_rtx_REG (SImode, REGNO (dest));
6711
6712 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
6713
6714 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
6715 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
6716 {
6717 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
6718 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6719 }
6720
6721 emit_insn (tmp);
6722}
6723
79325812 6724void
e075ae69
RH
6725ix86_expand_move (mode, operands)
6726 enum machine_mode mode;
6727 rtx operands[];
32b5b1aa 6728{
e075ae69 6729 int strict = (reload_in_progress || reload_completed);
e075ae69 6730 rtx insn;
e9a25f70 6731
e075ae69 6732 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
32b5b1aa 6733 {
e075ae69 6734 /* Emit insns to move operands[1] into operands[0]. */
e9a25f70 6735
e075ae69
RH
6736 if (GET_CODE (operands[0]) == MEM)
6737 operands[1] = force_reg (Pmode, operands[1]);
6738 else
32b5b1aa 6739 {
e075ae69
RH
6740 rtx temp = operands[0];
6741 if (GET_CODE (temp) != REG)
6742 temp = gen_reg_rtx (Pmode);
6743 temp = legitimize_pic_address (operands[1], temp);
6744 if (temp == operands[0])
6745 return;
6746 operands[1] = temp;
32b5b1aa 6747 }
e075ae69
RH
6748 }
6749 else
6750 {
d7a29404 6751 if (GET_CODE (operands[0]) == MEM
44cf5b6a 6752 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
d7a29404
JH
6753 || !push_operand (operands[0], mode))
6754 && GET_CODE (operands[1]) == MEM)
e075ae69 6755 operands[1] = force_reg (mode, operands[1]);
e9a25f70 6756
2c5a510c
RH
6757 if (push_operand (operands[0], mode)
6758 && ! general_no_elim_operand (operands[1], mode))
6759 operands[1] = copy_to_mode_reg (mode, operands[1]);
6760
44cf5b6a
JH
6761 /* Force large constants in 64bit compilation into register
6762 to get them CSEed. */
6763 if (TARGET_64BIT && mode == DImode
6764 && immediate_operand (operands[1], mode)
6765 && !x86_64_zero_extended_value (operands[1])
6766 && !register_operand (operands[0], mode)
6767 && optimize && !reload_completed && !reload_in_progress)
6768 operands[1] = copy_to_mode_reg (mode, operands[1]);
6769
e075ae69 6770 if (FLOAT_MODE_P (mode))
32b5b1aa 6771 {
d7a29404
JH
6772 /* If we are loading a floating point constant to a register,
6773 force the value to memory now, since we'll get better code
6774 out the back end. */
e075ae69
RH
6775
6776 if (strict)
6777 ;
e075ae69 6778 else if (GET_CODE (operands[1]) == CONST_DOUBLE
d7a29404 6779 && register_operand (operands[0], mode))
e075ae69 6780 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
32b5b1aa 6781 }
32b5b1aa 6782 }
e9a25f70 6783
e075ae69 6784 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
e9a25f70 6785
e075ae69
RH
6786 emit_insn (insn);
6787}
e9a25f70 6788
e37af218
RH
6789void
6790ix86_expand_vector_move (mode, operands)
6791 enum machine_mode mode;
6792 rtx operands[];
6793{
6794 /* Force constants other than zero into memory. We do not know how
6795 the instructions used to build constants modify the upper 64 bits
6796 of the register, once we have that information we may be able
6797 to handle some of them more efficiently. */
6798 if ((reload_in_progress | reload_completed) == 0
6799 && register_operand (operands[0], mode)
6800 && CONSTANT_P (operands[1]))
6801 {
6802 rtx addr = gen_reg_rtx (Pmode);
6803 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
6804 operands[1] = gen_rtx_MEM (mode, addr);
6805 }
6806
6807 /* Make operand1 a register if it isn't already. */
6808 if ((reload_in_progress | reload_completed) == 0
6809 && !register_operand (operands[0], mode)
6810 && !register_operand (operands[1], mode)
6811 && operands[1] != CONST0_RTX (mode))
6812 {
59bef189 6813 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
e37af218
RH
6814 emit_move_insn (operands[0], temp);
6815 return;
6816 }
6817
6818 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
6819}
6820
e075ae69
RH
6821/* Attempt to expand a binary operator. Make the expansion closer to the
6822 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 6823 memory references (one output, two input) in a single insn. */
e9a25f70 6824
e075ae69
RH
6825void
6826ix86_expand_binary_operator (code, mode, operands)
6827 enum rtx_code code;
6828 enum machine_mode mode;
6829 rtx operands[];
6830{
6831 int matching_memory;
6832 rtx src1, src2, dst, op, clob;
6833
6834 dst = operands[0];
6835 src1 = operands[1];
6836 src2 = operands[2];
6837
6838 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
6839 if (GET_RTX_CLASS (code) == 'c'
6840 && (rtx_equal_p (dst, src2)
6841 || immediate_operand (src1, mode)))
6842 {
6843 rtx temp = src1;
6844 src1 = src2;
6845 src2 = temp;
32b5b1aa 6846 }
e9a25f70 6847
e075ae69
RH
6848 /* If the destination is memory, and we do not have matching source
6849 operands, do things in registers. */
6850 matching_memory = 0;
6851 if (GET_CODE (dst) == MEM)
32b5b1aa 6852 {
e075ae69
RH
6853 if (rtx_equal_p (dst, src1))
6854 matching_memory = 1;
6855 else if (GET_RTX_CLASS (code) == 'c'
6856 && rtx_equal_p (dst, src2))
6857 matching_memory = 2;
6858 else
6859 dst = gen_reg_rtx (mode);
6860 }
0f290768 6861
e075ae69
RH
6862 /* Both source operands cannot be in memory. */
6863 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
6864 {
6865 if (matching_memory != 2)
6866 src2 = force_reg (mode, src2);
6867 else
6868 src1 = force_reg (mode, src1);
32b5b1aa 6869 }
e9a25f70 6870
06a964de
JH
6871 /* If the operation is not commutable, source 1 cannot be a constant
6872 or non-matching memory. */
0f290768 6873 if ((CONSTANT_P (src1)
06a964de
JH
6874 || (!matching_memory && GET_CODE (src1) == MEM))
6875 && GET_RTX_CLASS (code) != 'c')
e075ae69 6876 src1 = force_reg (mode, src1);
0f290768 6877
e075ae69 6878 /* If optimizing, copy to regs to improve CSE */
fe577e58 6879 if (optimize && ! no_new_pseudos)
32b5b1aa 6880 {
e075ae69
RH
6881 if (GET_CODE (dst) == MEM)
6882 dst = gen_reg_rtx (mode);
6883 if (GET_CODE (src1) == MEM)
6884 src1 = force_reg (mode, src1);
6885 if (GET_CODE (src2) == MEM)
6886 src2 = force_reg (mode, src2);
32b5b1aa 6887 }
e9a25f70 6888
e075ae69
RH
6889 /* Emit the instruction. */
6890
6891 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
6892 if (reload_in_progress)
6893 {
6894 /* Reload doesn't know about the flags register, and doesn't know that
6895 it doesn't want to clobber it. We can only do this with PLUS. */
6896 if (code != PLUS)
6897 abort ();
6898 emit_insn (op);
6899 }
6900 else
32b5b1aa 6901 {
e075ae69
RH
6902 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6903 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 6904 }
e9a25f70 6905
e075ae69
RH
6906 /* Fix up the destination if needed. */
6907 if (dst != operands[0])
6908 emit_move_insn (operands[0], dst);
6909}
6910
6911/* Return TRUE or FALSE depending on whether the binary operator meets the
6912 appropriate constraints. */
6913
6914int
6915ix86_binary_operator_ok (code, mode, operands)
6916 enum rtx_code code;
6917 enum machine_mode mode ATTRIBUTE_UNUSED;
6918 rtx operands[3];
6919{
6920 /* Both source operands cannot be in memory. */
6921 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
6922 return 0;
6923 /* If the operation is not commutable, source 1 cannot be a constant. */
6924 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
6925 return 0;
6926 /* If the destination is memory, we must have a matching source operand. */
6927 if (GET_CODE (operands[0]) == MEM
6928 && ! (rtx_equal_p (operands[0], operands[1])
6929 || (GET_RTX_CLASS (code) == 'c'
6930 && rtx_equal_p (operands[0], operands[2]))))
6931 return 0;
06a964de 6932 /* If the operation is not commutable and the source 1 is memory, we must
d6a7951f 6933 have a matching destination. */
06a964de
JH
6934 if (GET_CODE (operands[1]) == MEM
6935 && GET_RTX_CLASS (code) != 'c'
6936 && ! rtx_equal_p (operands[0], operands[1]))
6937 return 0;
e075ae69
RH
6938 return 1;
6939}
6940
6941/* Attempt to expand a unary operator. Make the expansion closer to the
6942 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 6943 memory references (one output, one input) in a single insn. */
e075ae69 6944
9d81fc27 6945void
e075ae69
RH
6946ix86_expand_unary_operator (code, mode, operands)
6947 enum rtx_code code;
6948 enum machine_mode mode;
6949 rtx operands[];
6950{
06a964de
JH
6951 int matching_memory;
6952 rtx src, dst, op, clob;
6953
6954 dst = operands[0];
6955 src = operands[1];
e075ae69 6956
06a964de
JH
6957 /* If the destination is memory, and we do not have matching source
6958 operands, do things in registers. */
6959 matching_memory = 0;
6960 if (GET_CODE (dst) == MEM)
32b5b1aa 6961 {
06a964de
JH
6962 if (rtx_equal_p (dst, src))
6963 matching_memory = 1;
e075ae69 6964 else
06a964de 6965 dst = gen_reg_rtx (mode);
32b5b1aa 6966 }
e9a25f70 6967
06a964de
JH
6968 /* When source operand is memory, destination must match. */
6969 if (!matching_memory && GET_CODE (src) == MEM)
6970 src = force_reg (mode, src);
0f290768 6971
06a964de 6972 /* If optimizing, copy to regs to improve CSE */
fe577e58 6973 if (optimize && ! no_new_pseudos)
06a964de
JH
6974 {
6975 if (GET_CODE (dst) == MEM)
6976 dst = gen_reg_rtx (mode);
6977 if (GET_CODE (src) == MEM)
6978 src = force_reg (mode, src);
6979 }
6980
6981 /* Emit the instruction. */
6982
6983 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
6984 if (reload_in_progress || code == NOT)
6985 {
6986 /* Reload doesn't know about the flags register, and doesn't know that
6987 it doesn't want to clobber it. */
6988 if (code != NOT)
6989 abort ();
6990 emit_insn (op);
6991 }
6992 else
6993 {
6994 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6995 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6996 }
6997
6998 /* Fix up the destination if needed. */
6999 if (dst != operands[0])
7000 emit_move_insn (operands[0], dst);
e075ae69
RH
7001}
7002
7003/* Return TRUE or FALSE depending on whether the unary operator meets the
7004 appropriate constraints. */
7005
7006int
7007ix86_unary_operator_ok (code, mode, operands)
7008 enum rtx_code code ATTRIBUTE_UNUSED;
7009 enum machine_mode mode ATTRIBUTE_UNUSED;
7010 rtx operands[2] ATTRIBUTE_UNUSED;
7011{
06a964de
JH
7012 /* If one of operands is memory, source and destination must match. */
7013 if ((GET_CODE (operands[0]) == MEM
7014 || GET_CODE (operands[1]) == MEM)
7015 && ! rtx_equal_p (operands[0], operands[1]))
7016 return FALSE;
e075ae69
RH
7017 return TRUE;
7018}
7019
16189740
RH
7020/* Return TRUE or FALSE depending on whether the first SET in INSN
7021 has source and destination with matching CC modes, and that the
7022 CC mode is at least as constrained as REQ_MODE. */
7023
7024int
7025ix86_match_ccmode (insn, req_mode)
7026 rtx insn;
7027 enum machine_mode req_mode;
7028{
7029 rtx set;
7030 enum machine_mode set_mode;
7031
7032 set = PATTERN (insn);
7033 if (GET_CODE (set) == PARALLEL)
7034 set = XVECEXP (set, 0, 0);
7035 if (GET_CODE (set) != SET)
7036 abort ();
9076b9c1
JH
7037 if (GET_CODE (SET_SRC (set)) != COMPARE)
7038 abort ();
16189740
RH
7039
7040 set_mode = GET_MODE (SET_DEST (set));
7041 switch (set_mode)
7042 {
9076b9c1
JH
7043 case CCNOmode:
7044 if (req_mode != CCNOmode
7045 && (req_mode != CCmode
7046 || XEXP (SET_SRC (set), 1) != const0_rtx))
7047 return 0;
7048 break;
16189740 7049 case CCmode:
9076b9c1 7050 if (req_mode == CCGCmode)
16189740
RH
7051 return 0;
7052 /* FALLTHRU */
9076b9c1
JH
7053 case CCGCmode:
7054 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7055 return 0;
7056 /* FALLTHRU */
7057 case CCGOCmode:
16189740
RH
7058 if (req_mode == CCZmode)
7059 return 0;
7060 /* FALLTHRU */
7061 case CCZmode:
7062 break;
7063
7064 default:
7065 abort ();
7066 }
7067
7068 return (GET_MODE (SET_SRC (set)) == set_mode);
7069}
7070
e075ae69
RH
7071/* Generate insn patterns to do an integer compare of OPERANDS. */
7072
7073static rtx
7074ix86_expand_int_compare (code, op0, op1)
7075 enum rtx_code code;
7076 rtx op0, op1;
7077{
7078 enum machine_mode cmpmode;
7079 rtx tmp, flags;
7080
7081 cmpmode = SELECT_CC_MODE (code, op0, op1);
7082 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7083
7084 /* This is very simple, but making the interface the same as in the
7085 FP case makes the rest of the code easier. */
7086 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7087 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7088
7089 /* Return the test that should be put into the flags user, i.e.
7090 the bcc, scc, or cmov instruction. */
7091 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7092}
7093
3a3677ff
RH
7094/* Figure out whether to use ordered or unordered fp comparisons.
7095 Return the appropriate mode to use. */
e075ae69 7096
b1cdafbb 7097enum machine_mode
3a3677ff 7098ix86_fp_compare_mode (code)
8752c357 7099 enum rtx_code code ATTRIBUTE_UNUSED;
e075ae69 7100{
9e7adcb3
JH
7101 /* ??? In order to make all comparisons reversible, we do all comparisons
7102 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7103 all forms trapping and nontrapping comparisons, we can make inequality
7104 comparisons trapping again, since it results in better code when using
7105 FCOM based compares. */
7106 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
7107}
7108
9076b9c1
JH
7109enum machine_mode
7110ix86_cc_mode (code, op0, op1)
7111 enum rtx_code code;
7112 rtx op0, op1;
7113{
7114 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7115 return ix86_fp_compare_mode (code);
7116 switch (code)
7117 {
7118 /* Only zero flag is needed. */
7119 case EQ: /* ZF=0 */
7120 case NE: /* ZF!=0 */
7121 return CCZmode;
7122 /* Codes needing carry flag. */
265dab10
JH
7123 case GEU: /* CF=0 */
7124 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
7125 case LTU: /* CF=1 */
7126 case LEU: /* CF=1 | ZF=1 */
265dab10 7127 return CCmode;
9076b9c1
JH
7128 /* Codes possibly doable only with sign flag when
7129 comparing against zero. */
7130 case GE: /* SF=OF or SF=0 */
7e08e190 7131 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
7132 if (op1 == const0_rtx)
7133 return CCGOCmode;
7134 else
7135 /* For other cases Carry flag is not required. */
7136 return CCGCmode;
7137 /* Codes doable only with sign flag when comparing
7138 against zero, but we miss jump instruction for it
7139 so we need to use relational tests agains overflow
7140 that thus needs to be zero. */
7141 case GT: /* ZF=0 & SF=OF */
7142 case LE: /* ZF=1 | SF<>OF */
7143 if (op1 == const0_rtx)
7144 return CCNOmode;
7145 else
7146 return CCGCmode;
7fcd7218
JH
7147 /* strcmp pattern do (use flags) and combine may ask us for proper
7148 mode. */
7149 case USE:
7150 return CCmode;
9076b9c1 7151 default:
0f290768 7152 abort ();
9076b9c1
JH
7153 }
7154}
7155
3a3677ff
RH
7156/* Return true if we should use an FCOMI instruction for this fp comparison. */
7157
a940d8bd 7158int
3a3677ff 7159ix86_use_fcomi_compare (code)
9e7adcb3 7160 enum rtx_code code ATTRIBUTE_UNUSED;
3a3677ff 7161{
9e7adcb3
JH
7162 enum rtx_code swapped_code = swap_condition (code);
7163 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7164 || (ix86_fp_comparison_cost (swapped_code)
7165 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
7166}
7167
0f290768 7168/* Swap, force into registers, or otherwise massage the two operands
3a3677ff
RH
7169 to a fp comparison. The operands are updated in place; the new
7170 comparsion code is returned. */
7171
7172static enum rtx_code
7173ix86_prepare_fp_compare_args (code, pop0, pop1)
7174 enum rtx_code code;
7175 rtx *pop0, *pop1;
7176{
7177 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7178 rtx op0 = *pop0, op1 = *pop1;
7179 enum machine_mode op_mode = GET_MODE (op0);
0644b628 7180 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
3a3677ff 7181
e075ae69 7182 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
7183 The same is true of the XFmode compare instructions. The same is
7184 true of the fcomi compare instructions. */
7185
0644b628
JH
7186 if (!is_sse
7187 && (fpcmp_mode == CCFPUmode
7188 || op_mode == XFmode
7189 || op_mode == TFmode
7190 || ix86_use_fcomi_compare (code)))
e075ae69 7191 {
3a3677ff
RH
7192 op0 = force_reg (op_mode, op0);
7193 op1 = force_reg (op_mode, op1);
e075ae69
RH
7194 }
7195 else
7196 {
7197 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7198 things around if they appear profitable, otherwise force op0
7199 into a register. */
7200
7201 if (standard_80387_constant_p (op0) == 0
7202 || (GET_CODE (op0) == MEM
7203 && ! (standard_80387_constant_p (op1) == 0
7204 || GET_CODE (op1) == MEM)))
32b5b1aa 7205 {
e075ae69
RH
7206 rtx tmp;
7207 tmp = op0, op0 = op1, op1 = tmp;
7208 code = swap_condition (code);
7209 }
7210
7211 if (GET_CODE (op0) != REG)
3a3677ff 7212 op0 = force_reg (op_mode, op0);
e075ae69
RH
7213
7214 if (CONSTANT_P (op1))
7215 {
7216 if (standard_80387_constant_p (op1))
3a3677ff 7217 op1 = force_reg (op_mode, op1);
e075ae69 7218 else
3a3677ff 7219 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
7220 }
7221 }
e9a25f70 7222
9e7adcb3
JH
7223 /* Try to rearrange the comparison to make it cheaper. */
7224 if (ix86_fp_comparison_cost (code)
7225 > ix86_fp_comparison_cost (swap_condition (code))
558740bf 7226 && (GET_CODE (op1) == REG || !no_new_pseudos))
9e7adcb3
JH
7227 {
7228 rtx tmp;
7229 tmp = op0, op0 = op1, op1 = tmp;
7230 code = swap_condition (code);
7231 if (GET_CODE (op0) != REG)
7232 op0 = force_reg (op_mode, op0);
7233 }
7234
3a3677ff
RH
7235 *pop0 = op0;
7236 *pop1 = op1;
7237 return code;
7238}
7239
c0c102a9
JH
7240/* Convert comparison codes we use to represent FP comparison to integer
7241 code that will result in proper branch. Return UNKNOWN if no such code
7242 is available. */
7243static enum rtx_code
7244ix86_fp_compare_code_to_integer (code)
7245 enum rtx_code code;
7246{
7247 switch (code)
7248 {
7249 case GT:
7250 return GTU;
7251 case GE:
7252 return GEU;
7253 case ORDERED:
7254 case UNORDERED:
7255 return code;
7256 break;
7257 case UNEQ:
7258 return EQ;
7259 break;
7260 case UNLT:
7261 return LTU;
7262 break;
7263 case UNLE:
7264 return LEU;
7265 break;
7266 case LTGT:
7267 return NE;
7268 break;
7269 default:
7270 return UNKNOWN;
7271 }
7272}
7273
7274/* Split comparison code CODE into comparisons we can do using branch
7275 instructions. BYPASS_CODE is comparison code for branch that will
7276 branch around FIRST_CODE and SECOND_CODE. If some of branches
7277 is not required, set value to NIL.
7278 We never require more than two branches. */
7279static void
7280ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
7281 enum rtx_code code, *bypass_code, *first_code, *second_code;
7282{
7283 *first_code = code;
7284 *bypass_code = NIL;
7285 *second_code = NIL;
7286
7287 /* The fcomi comparison sets flags as follows:
7288
7289 cmp ZF PF CF
7290 > 0 0 0
7291 < 0 0 1
7292 = 1 0 0
7293 un 1 1 1 */
7294
7295 switch (code)
7296 {
7297 case GT: /* GTU - CF=0 & ZF=0 */
7298 case GE: /* GEU - CF=0 */
7299 case ORDERED: /* PF=0 */
7300 case UNORDERED: /* PF=1 */
7301 case UNEQ: /* EQ - ZF=1 */
7302 case UNLT: /* LTU - CF=1 */
7303 case UNLE: /* LEU - CF=1 | ZF=1 */
7304 case LTGT: /* EQ - ZF=0 */
7305 break;
7306 case LT: /* LTU - CF=1 - fails on unordered */
7307 *first_code = UNLT;
7308 *bypass_code = UNORDERED;
7309 break;
7310 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
7311 *first_code = UNLE;
7312 *bypass_code = UNORDERED;
7313 break;
7314 case EQ: /* EQ - ZF=1 - fails on unordered */
7315 *first_code = UNEQ;
7316 *bypass_code = UNORDERED;
7317 break;
7318 case NE: /* NE - ZF=0 - fails on unordered */
7319 *first_code = LTGT;
7320 *second_code = UNORDERED;
7321 break;
7322 case UNGE: /* GEU - CF=0 - fails on unordered */
7323 *first_code = GE;
7324 *second_code = UNORDERED;
7325 break;
7326 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
7327 *first_code = GT;
7328 *second_code = UNORDERED;
7329 break;
7330 default:
7331 abort ();
7332 }
7333 if (!TARGET_IEEE_FP)
7334 {
7335 *second_code = NIL;
7336 *bypass_code = NIL;
7337 }
7338}
7339
9e7adcb3
JH
7340/* Return cost of comparison done fcom + arithmetics operations on AX.
7341 All following functions do use number of instructions as an cost metrics.
7342 In future this should be tweaked to compute bytes for optimize_size and
7343 take into account performance of various instructions on various CPUs. */
7344static int
7345ix86_fp_comparison_arithmetics_cost (code)
7346 enum rtx_code code;
7347{
7348 if (!TARGET_IEEE_FP)
7349 return 4;
7350 /* The cost of code output by ix86_expand_fp_compare. */
7351 switch (code)
7352 {
7353 case UNLE:
7354 case UNLT:
7355 case LTGT:
7356 case GT:
7357 case GE:
7358 case UNORDERED:
7359 case ORDERED:
7360 case UNEQ:
7361 return 4;
7362 break;
7363 case LT:
7364 case NE:
7365 case EQ:
7366 case UNGE:
7367 return 5;
7368 break;
7369 case LE:
7370 case UNGT:
7371 return 6;
7372 break;
7373 default:
7374 abort ();
7375 }
7376}
7377
7378/* Return cost of comparison done using fcomi operation.
7379 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7380static int
7381ix86_fp_comparison_fcomi_cost (code)
7382 enum rtx_code code;
7383{
7384 enum rtx_code bypass_code, first_code, second_code;
7385 /* Return arbitarily high cost when instruction is not supported - this
7386 prevents gcc from using it. */
7387 if (!TARGET_CMOVE)
7388 return 1024;
7389 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7390 return (bypass_code != NIL || second_code != NIL) + 2;
7391}
7392
7393/* Return cost of comparison done using sahf operation.
7394 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7395static int
7396ix86_fp_comparison_sahf_cost (code)
7397 enum rtx_code code;
7398{
7399 enum rtx_code bypass_code, first_code, second_code;
7400 /* Return arbitarily high cost when instruction is not preferred - this
7401 avoids gcc from using it. */
7402 if (!TARGET_USE_SAHF && !optimize_size)
7403 return 1024;
7404 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7405 return (bypass_code != NIL || second_code != NIL) + 3;
7406}
7407
7408/* Compute cost of the comparison done using any method.
7409 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7410static int
7411ix86_fp_comparison_cost (code)
7412 enum rtx_code code;
7413{
7414 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
7415 int min;
7416
7417 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
7418 sahf_cost = ix86_fp_comparison_sahf_cost (code);
7419
7420 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
7421 if (min > sahf_cost)
7422 min = sahf_cost;
7423 if (min > fcomi_cost)
7424 min = fcomi_cost;
7425 return min;
7426}
c0c102a9 7427
3a3677ff
RH
7428/* Generate insn patterns to do a floating point compare of OPERANDS. */
7429
9e7adcb3
JH
7430static rtx
7431ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
3a3677ff
RH
7432 enum rtx_code code;
7433 rtx op0, op1, scratch;
9e7adcb3
JH
7434 rtx *second_test;
7435 rtx *bypass_test;
3a3677ff
RH
7436{
7437 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 7438 rtx tmp, tmp2;
9e7adcb3 7439 int cost = ix86_fp_comparison_cost (code);
c0c102a9 7440 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
7441
7442 fpcmp_mode = ix86_fp_compare_mode (code);
7443 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
7444
9e7adcb3
JH
7445 if (second_test)
7446 *second_test = NULL_RTX;
7447 if (bypass_test)
7448 *bypass_test = NULL_RTX;
7449
c0c102a9
JH
7450 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7451
9e7adcb3
JH
7452 /* Do fcomi/sahf based test when profitable. */
7453 if ((bypass_code == NIL || bypass_test)
7454 && (second_code == NIL || second_test)
7455 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 7456 {
c0c102a9
JH
7457 if (TARGET_CMOVE)
7458 {
7459 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7460 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
7461 tmp);
7462 emit_insn (tmp);
7463 }
7464 else
7465 {
7466 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 7467 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
7468 if (!scratch)
7469 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
7470 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7471 emit_insn (gen_x86_sahf_1 (scratch));
7472 }
e075ae69
RH
7473
7474 /* The FP codes work out to act like unsigned. */
9a915772 7475 intcmp_mode = fpcmp_mode;
9e7adcb3
JH
7476 code = first_code;
7477 if (bypass_code != NIL)
7478 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
7479 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7480 const0_rtx);
7481 if (second_code != NIL)
7482 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
7483 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7484 const0_rtx);
e075ae69
RH
7485 }
7486 else
7487 {
7488 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69 7489 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 7490 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
7491 if (!scratch)
7492 scratch = gen_reg_rtx (HImode);
3a3677ff 7493 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 7494
9a915772
JH
7495 /* In the unordered case, we have to check C2 for NaN's, which
7496 doesn't happen to work out to anything nice combination-wise.
7497 So do some bit twiddling on the value we've got in AH to come
7498 up with an appropriate set of condition codes. */
e075ae69 7499
9a915772
JH
7500 intcmp_mode = CCNOmode;
7501 switch (code)
32b5b1aa 7502 {
9a915772
JH
7503 case GT:
7504 case UNGT:
7505 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 7506 {
3a3677ff 7507 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 7508 code = EQ;
9a915772
JH
7509 }
7510 else
7511 {
7512 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7513 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7514 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
7515 intcmp_mode = CCmode;
7516 code = GEU;
7517 }
7518 break;
7519 case LT:
7520 case UNLT:
7521 if (code == LT && TARGET_IEEE_FP)
7522 {
3a3677ff
RH
7523 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7524 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
7525 intcmp_mode = CCmode;
7526 code = EQ;
9a915772
JH
7527 }
7528 else
7529 {
7530 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
7531 code = NE;
7532 }
7533 break;
7534 case GE:
7535 case UNGE:
7536 if (code == GE || !TARGET_IEEE_FP)
7537 {
3a3677ff 7538 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 7539 code = EQ;
9a915772
JH
7540 }
7541 else
7542 {
7543 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7544 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7545 GEN_INT (0x01)));
7546 code = NE;
7547 }
7548 break;
7549 case LE:
7550 case UNLE:
7551 if (code == LE && TARGET_IEEE_FP)
7552 {
3a3677ff
RH
7553 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7554 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7555 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
7556 intcmp_mode = CCmode;
7557 code = LTU;
9a915772
JH
7558 }
7559 else
7560 {
7561 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7562 code = NE;
7563 }
7564 break;
7565 case EQ:
7566 case UNEQ:
7567 if (code == EQ && TARGET_IEEE_FP)
7568 {
3a3677ff
RH
7569 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7570 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
7571 intcmp_mode = CCmode;
7572 code = EQ;
9a915772
JH
7573 }
7574 else
7575 {
3a3677ff
RH
7576 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7577 code = NE;
7578 break;
9a915772
JH
7579 }
7580 break;
7581 case NE:
7582 case LTGT:
7583 if (code == NE && TARGET_IEEE_FP)
7584 {
3a3677ff 7585 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
7586 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7587 GEN_INT (0x40)));
3a3677ff 7588 code = NE;
9a915772
JH
7589 }
7590 else
7591 {
3a3677ff
RH
7592 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7593 code = EQ;
32b5b1aa 7594 }
9a915772
JH
7595 break;
7596
7597 case UNORDERED:
7598 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7599 code = NE;
7600 break;
7601 case ORDERED:
7602 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7603 code = EQ;
7604 break;
7605
7606 default:
7607 abort ();
32b5b1aa 7608 }
32b5b1aa 7609 }
e075ae69
RH
7610
7611 /* Return the test that should be put into the flags user, i.e.
7612 the bcc, scc, or cmov instruction. */
7613 return gen_rtx_fmt_ee (code, VOIDmode,
7614 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7615 const0_rtx);
7616}
7617
9e3e266c 7618rtx
a1b8572c 7619ix86_expand_compare (code, second_test, bypass_test)
e075ae69 7620 enum rtx_code code;
a1b8572c 7621 rtx *second_test, *bypass_test;
e075ae69
RH
7622{
7623 rtx op0, op1, ret;
7624 op0 = ix86_compare_op0;
7625 op1 = ix86_compare_op1;
7626
a1b8572c
JH
7627 if (second_test)
7628 *second_test = NULL_RTX;
7629 if (bypass_test)
7630 *bypass_test = NULL_RTX;
7631
e075ae69 7632 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
bf71a4f8 7633 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
77ebd435 7634 second_test, bypass_test);
32b5b1aa 7635 else
e075ae69
RH
7636 ret = ix86_expand_int_compare (code, op0, op1);
7637
7638 return ret;
7639}
7640
03598dea
JH
7641/* Return true if the CODE will result in nontrivial jump sequence. */
7642bool
7643ix86_fp_jump_nontrivial_p (code)
7644 enum rtx_code code;
7645{
7646 enum rtx_code bypass_code, first_code, second_code;
7647 if (!TARGET_CMOVE)
7648 return true;
7649 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7650 return bypass_code != NIL || second_code != NIL;
7651}
7652
e075ae69 7653void
3a3677ff 7654ix86_expand_branch (code, label)
e075ae69 7655 enum rtx_code code;
e075ae69
RH
7656 rtx label;
7657{
3a3677ff 7658 rtx tmp;
e075ae69 7659
3a3677ff 7660 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 7661 {
3a3677ff
RH
7662 case QImode:
7663 case HImode:
7664 case SImode:
0d7d98ee 7665 simple:
a1b8572c 7666 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
7667 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7668 gen_rtx_LABEL_REF (VOIDmode, label),
7669 pc_rtx);
7670 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 7671 return;
e075ae69 7672
3a3677ff
RH
7673 case SFmode:
7674 case DFmode:
0f290768 7675 case XFmode:
2b589241 7676 case TFmode:
3a3677ff
RH
7677 {
7678 rtvec vec;
7679 int use_fcomi;
03598dea 7680 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
7681
7682 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
7683 &ix86_compare_op1);
03598dea
JH
7684
7685 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7686
7687 /* Check whether we will use the natural sequence with one jump. If
7688 so, we can expand jump early. Otherwise delay expansion by
7689 creating compound insn to not confuse optimizers. */
7690 if (bypass_code == NIL && second_code == NIL
7691 && TARGET_CMOVE)
7692 {
7693 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
7694 gen_rtx_LABEL_REF (VOIDmode, label),
7695 pc_rtx, NULL_RTX);
7696 }
7697 else
7698 {
7699 tmp = gen_rtx_fmt_ee (code, VOIDmode,
7700 ix86_compare_op0, ix86_compare_op1);
7701 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7702 gen_rtx_LABEL_REF (VOIDmode, label),
7703 pc_rtx);
7704 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
7705
7706 use_fcomi = ix86_use_fcomi_compare (code);
7707 vec = rtvec_alloc (3 + !use_fcomi);
7708 RTVEC_ELT (vec, 0) = tmp;
7709 RTVEC_ELT (vec, 1)
7710 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
7711 RTVEC_ELT (vec, 2)
7712 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
7713 if (! use_fcomi)
7714 RTVEC_ELT (vec, 3)
7715 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
7716
7717 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
7718 }
3a3677ff
RH
7719 return;
7720 }
32b5b1aa 7721
3a3677ff 7722 case DImode:
0d7d98ee
JH
7723 if (TARGET_64BIT)
7724 goto simple;
3a3677ff
RH
7725 /* Expand DImode branch into multiple compare+branch. */
7726 {
7727 rtx lo[2], hi[2], label2;
7728 enum rtx_code code1, code2, code3;
32b5b1aa 7729
3a3677ff
RH
7730 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
7731 {
7732 tmp = ix86_compare_op0;
7733 ix86_compare_op0 = ix86_compare_op1;
7734 ix86_compare_op1 = tmp;
7735 code = swap_condition (code);
7736 }
7737 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
7738 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 7739
3a3677ff
RH
7740 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
7741 avoid two branches. This costs one extra insn, so disable when
7742 optimizing for size. */
32b5b1aa 7743
3a3677ff
RH
7744 if ((code == EQ || code == NE)
7745 && (!optimize_size
7746 || hi[1] == const0_rtx || lo[1] == const0_rtx))
7747 {
7748 rtx xor0, xor1;
32b5b1aa 7749
3a3677ff
RH
7750 xor1 = hi[0];
7751 if (hi[1] != const0_rtx)
7752 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
7753 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 7754
3a3677ff
RH
7755 xor0 = lo[0];
7756 if (lo[1] != const0_rtx)
7757 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
7758 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 7759
3a3677ff
RH
7760 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
7761 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 7762
3a3677ff
RH
7763 ix86_compare_op0 = tmp;
7764 ix86_compare_op1 = const0_rtx;
7765 ix86_expand_branch (code, label);
7766 return;
7767 }
e075ae69 7768
1f9124e4
JJ
7769 /* Otherwise, if we are doing less-than or greater-or-equal-than,
7770 op1 is a constant and the low word is zero, then we can just
7771 examine the high word. */
32b5b1aa 7772
1f9124e4
JJ
7773 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
7774 switch (code)
7775 {
7776 case LT: case LTU: case GE: case GEU:
7777 ix86_compare_op0 = hi[0];
7778 ix86_compare_op1 = hi[1];
7779 ix86_expand_branch (code, label);
7780 return;
7781 default:
7782 break;
7783 }
e075ae69 7784
3a3677ff 7785 /* Otherwise, we need two or three jumps. */
e075ae69 7786
3a3677ff 7787 label2 = gen_label_rtx ();
e075ae69 7788
3a3677ff
RH
7789 code1 = code;
7790 code2 = swap_condition (code);
7791 code3 = unsigned_condition (code);
e075ae69 7792
3a3677ff
RH
7793 switch (code)
7794 {
7795 case LT: case GT: case LTU: case GTU:
7796 break;
e075ae69 7797
3a3677ff
RH
7798 case LE: code1 = LT; code2 = GT; break;
7799 case GE: code1 = GT; code2 = LT; break;
7800 case LEU: code1 = LTU; code2 = GTU; break;
7801 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 7802
3a3677ff
RH
7803 case EQ: code1 = NIL; code2 = NE; break;
7804 case NE: code2 = NIL; break;
e075ae69 7805
3a3677ff
RH
7806 default:
7807 abort ();
7808 }
e075ae69 7809
3a3677ff
RH
7810 /*
7811 * a < b =>
7812 * if (hi(a) < hi(b)) goto true;
7813 * if (hi(a) > hi(b)) goto false;
7814 * if (lo(a) < lo(b)) goto true;
7815 * false:
7816 */
7817
7818 ix86_compare_op0 = hi[0];
7819 ix86_compare_op1 = hi[1];
7820
7821 if (code1 != NIL)
7822 ix86_expand_branch (code1, label);
7823 if (code2 != NIL)
7824 ix86_expand_branch (code2, label2);
7825
7826 ix86_compare_op0 = lo[0];
7827 ix86_compare_op1 = lo[1];
7828 ix86_expand_branch (code3, label);
7829
7830 if (code2 != NIL)
7831 emit_label (label2);
7832 return;
7833 }
e075ae69 7834
3a3677ff
RH
7835 default:
7836 abort ();
7837 }
32b5b1aa 7838}
e075ae69 7839
9e7adcb3
JH
7840/* Split branch based on floating point condition. */
7841void
03598dea
JH
7842ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
7843 enum rtx_code code;
7844 rtx op1, op2, target1, target2, tmp;
9e7adcb3
JH
7845{
7846 rtx second, bypass;
7847 rtx label = NULL_RTX;
03598dea 7848 rtx condition;
6b24c259
JH
7849 int bypass_probability = -1, second_probability = -1, probability = -1;
7850 rtx i;
9e7adcb3
JH
7851
7852 if (target2 != pc_rtx)
7853 {
7854 rtx tmp = target2;
7855 code = reverse_condition_maybe_unordered (code);
7856 target2 = target1;
7857 target1 = tmp;
7858 }
7859
7860 condition = ix86_expand_fp_compare (code, op1, op2,
7861 tmp, &second, &bypass);
6b24c259
JH
7862
7863 if (split_branch_probability >= 0)
7864 {
7865 /* Distribute the probabilities across the jumps.
7866 Assume the BYPASS and SECOND to be always test
7867 for UNORDERED. */
7868 probability = split_branch_probability;
7869
d6a7951f 7870 /* Value of 1 is low enough to make no need for probability
6b24c259
JH
7871 to be updated. Later we may run some experiments and see
7872 if unordered values are more frequent in practice. */
7873 if (bypass)
7874 bypass_probability = 1;
7875 if (second)
7876 second_probability = 1;
7877 }
9e7adcb3
JH
7878 if (bypass != NULL_RTX)
7879 {
7880 label = gen_label_rtx ();
6b24c259
JH
7881 i = emit_jump_insn (gen_rtx_SET
7882 (VOIDmode, pc_rtx,
7883 gen_rtx_IF_THEN_ELSE (VOIDmode,
7884 bypass,
7885 gen_rtx_LABEL_REF (VOIDmode,
7886 label),
7887 pc_rtx)));
7888 if (bypass_probability >= 0)
7889 REG_NOTES (i)
7890 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7891 GEN_INT (bypass_probability),
7892 REG_NOTES (i));
7893 }
7894 i = emit_jump_insn (gen_rtx_SET
9e7adcb3
JH
7895 (VOIDmode, pc_rtx,
7896 gen_rtx_IF_THEN_ELSE (VOIDmode,
6b24c259
JH
7897 condition, target1, target2)));
7898 if (probability >= 0)
7899 REG_NOTES (i)
7900 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7901 GEN_INT (probability),
7902 REG_NOTES (i));
7903 if (second != NULL_RTX)
9e7adcb3 7904 {
6b24c259
JH
7905 i = emit_jump_insn (gen_rtx_SET
7906 (VOIDmode, pc_rtx,
7907 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
7908 target2)));
7909 if (second_probability >= 0)
7910 REG_NOTES (i)
7911 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7912 GEN_INT (second_probability),
7913 REG_NOTES (i));
9e7adcb3 7914 }
9e7adcb3
JH
7915 if (label != NULL_RTX)
7916 emit_label (label);
7917}
7918
32b5b1aa 7919int
3a3677ff 7920ix86_expand_setcc (code, dest)
e075ae69 7921 enum rtx_code code;
e075ae69 7922 rtx dest;
32b5b1aa 7923{
a1b8572c
JH
7924 rtx ret, tmp, tmpreg;
7925 rtx second_test, bypass_test;
e075ae69 7926
885a70fd
JH
7927 if (GET_MODE (ix86_compare_op0) == DImode
7928 && !TARGET_64BIT)
e075ae69
RH
7929 return 0; /* FAIL */
7930
b932f770
JH
7931 if (GET_MODE (dest) != QImode)
7932 abort ();
e075ae69 7933
a1b8572c 7934 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
7935 PUT_MODE (ret, QImode);
7936
7937 tmp = dest;
a1b8572c 7938 tmpreg = dest;
32b5b1aa 7939
e075ae69 7940 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
7941 if (bypass_test || second_test)
7942 {
7943 rtx test = second_test;
7944 int bypass = 0;
7945 rtx tmp2 = gen_reg_rtx (QImode);
7946 if (bypass_test)
7947 {
7948 if (second_test)
b531087a 7949 abort ();
a1b8572c
JH
7950 test = bypass_test;
7951 bypass = 1;
7952 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
7953 }
7954 PUT_MODE (test, QImode);
7955 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
7956
7957 if (bypass)
7958 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
7959 else
7960 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
7961 }
e075ae69 7962
e075ae69 7963 return 1; /* DONE */
32b5b1aa 7964}
e075ae69 7965
32b5b1aa 7966int
e075ae69
RH
7967ix86_expand_int_movcc (operands)
7968 rtx operands[];
32b5b1aa 7969{
e075ae69
RH
7970 enum rtx_code code = GET_CODE (operands[1]), compare_code;
7971 rtx compare_seq, compare_op;
a1b8572c 7972 rtx second_test, bypass_test;
635559ab 7973 enum machine_mode mode = GET_MODE (operands[0]);
32b5b1aa 7974
36583fea
JH
7975 /* When the compare code is not LTU or GEU, we can not use sbbl case.
7976 In case comparsion is done with immediate, we can convert it to LTU or
7977 GEU by altering the integer. */
7978
7979 if ((code == LEU || code == GTU)
7980 && GET_CODE (ix86_compare_op1) == CONST_INT
635559ab 7981 && mode != HImode
b531087a 7982 && (unsigned int) INTVAL (ix86_compare_op1) != 0xffffffff
74411039
JH
7983 /* The operand still must be representable as sign extended value. */
7984 && (!TARGET_64BIT
7985 || GET_MODE (ix86_compare_op0) != DImode
7986 || (unsigned int) INTVAL (ix86_compare_op1) != 0x7fffffff)
0f290768 7987 && GET_CODE (operands[2]) == CONST_INT
36583fea
JH
7988 && GET_CODE (operands[3]) == CONST_INT)
7989 {
7990 if (code == LEU)
7991 code = LTU;
7992 else
7993 code = GEU;
ce8076ad
JJ
7994 ix86_compare_op1
7995 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
7996 GET_MODE (ix86_compare_op0));
36583fea 7997 }
3a3677ff 7998
e075ae69 7999 start_sequence ();
a1b8572c 8000 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
8001 compare_seq = gen_sequence ();
8002 end_sequence ();
8003
8004 compare_code = GET_CODE (compare_op);
8005
8006 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8007 HImode insns, we'd be swallowed in word prefix ops. */
8008
635559ab
JH
8009 if (mode != HImode
8010 && (mode != DImode || TARGET_64BIT)
0f290768 8011 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
8012 && GET_CODE (operands[3]) == CONST_INT)
8013 {
8014 rtx out = operands[0];
8015 HOST_WIDE_INT ct = INTVAL (operands[2]);
8016 HOST_WIDE_INT cf = INTVAL (operands[3]);
8017 HOST_WIDE_INT diff;
8018
a1b8572c
JH
8019 if ((compare_code == LTU || compare_code == GEU)
8020 && !second_test && !bypass_test)
e075ae69 8021 {
e075ae69
RH
8022
8023 /* Detect overlap between destination and compare sources. */
8024 rtx tmp = out;
8025
0f290768 8026 /* To simplify rest of code, restrict to the GEU case. */
36583fea
JH
8027 if (compare_code == LTU)
8028 {
8029 int tmp = ct;
8030 ct = cf;
8031 cf = tmp;
8032 compare_code = reverse_condition (compare_code);
8033 code = reverse_condition (code);
8034 }
8035 diff = ct - cf;
8036
e075ae69 8037 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
a500c31b 8038 || reg_overlap_mentioned_p (out, ix86_compare_op1))
635559ab 8039 tmp = gen_reg_rtx (mode);
e075ae69
RH
8040
8041 emit_insn (compare_seq);
635559ab 8042 if (mode == DImode)
14f73b5a
JH
8043 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
8044 else
8045 emit_insn (gen_x86_movsicc_0_m1 (tmp));
e075ae69 8046
36583fea
JH
8047 if (diff == 1)
8048 {
8049 /*
8050 * cmpl op0,op1
8051 * sbbl dest,dest
8052 * [addl dest, ct]
8053 *
8054 * Size 5 - 8.
8055 */
8056 if (ct)
635559ab
JH
8057 tmp = expand_simple_binop (mode, PLUS,
8058 tmp, GEN_INT (ct),
8059 tmp, 1, OPTAB_DIRECT);
36583fea
JH
8060 }
8061 else if (cf == -1)
8062 {
8063 /*
8064 * cmpl op0,op1
8065 * sbbl dest,dest
8066 * orl $ct, dest
8067 *
8068 * Size 8.
8069 */
635559ab
JH
8070 tmp = expand_simple_binop (mode, IOR,
8071 tmp, GEN_INT (ct),
8072 tmp, 1, OPTAB_DIRECT);
36583fea
JH
8073 }
8074 else if (diff == -1 && ct)
8075 {
8076 /*
8077 * cmpl op0,op1
8078 * sbbl dest,dest
8079 * xorl $-1, dest
8080 * [addl dest, cf]
8081 *
8082 * Size 8 - 11.
8083 */
635559ab
JH
8084 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8085 if (cf)
8086 tmp = expand_simple_binop (mode, PLUS,
8087 tmp, GEN_INT (cf),
8088 tmp, 1, OPTAB_DIRECT);
36583fea
JH
8089 }
8090 else
8091 {
8092 /*
8093 * cmpl op0,op1
8094 * sbbl dest,dest
8095 * andl cf - ct, dest
8096 * [addl dest, ct]
8097 *
8098 * Size 8 - 11.
8099 */
635559ab
JH
8100 tmp = expand_simple_binop (mode, AND,
8101 tmp,
d8bf17f9 8102 gen_int_mode (cf - ct, mode),
635559ab
JH
8103 tmp, 1, OPTAB_DIRECT);
8104 if (ct)
8105 tmp = expand_simple_binop (mode, PLUS,
8106 tmp, GEN_INT (ct),
8107 tmp, 1, OPTAB_DIRECT);
36583fea 8108 }
e075ae69
RH
8109
8110 if (tmp != out)
8111 emit_move_insn (out, tmp);
8112
8113 return 1; /* DONE */
8114 }
8115
8116 diff = ct - cf;
8117 if (diff < 0)
8118 {
8119 HOST_WIDE_INT tmp;
8120 tmp = ct, ct = cf, cf = tmp;
8121 diff = -diff;
734dba19
JH
8122 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8123 {
8124 /* We may be reversing unordered compare to normal compare, that
8125 is not valid in general (we may convert non-trapping condition
8126 to trapping one), however on i386 we currently emit all
8127 comparisons unordered. */
8128 compare_code = reverse_condition_maybe_unordered (compare_code);
8129 code = reverse_condition_maybe_unordered (code);
8130 }
8131 else
8132 {
8133 compare_code = reverse_condition (compare_code);
8134 code = reverse_condition (code);
8135 }
e075ae69 8136 }
0f2a3457
JJ
8137
8138 compare_code = NIL;
8139 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
8140 && GET_CODE (ix86_compare_op1) == CONST_INT)
8141 {
8142 if (ix86_compare_op1 == const0_rtx
8143 && (code == LT || code == GE))
8144 compare_code = code;
8145 else if (ix86_compare_op1 == constm1_rtx)
8146 {
8147 if (code == LE)
8148 compare_code = LT;
8149 else if (code == GT)
8150 compare_code = GE;
8151 }
8152 }
8153
8154 /* Optimize dest = (op0 < 0) ? -1 : cf. */
8155 if (compare_code != NIL
8156 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
8157 && (cf == -1 || ct == -1))
8158 {
8159 /* If lea code below could be used, only optimize
8160 if it results in a 2 insn sequence. */
8161
8162 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
8163 || diff == 3 || diff == 5 || diff == 9)
8164 || (compare_code == LT && ct == -1)
8165 || (compare_code == GE && cf == -1))
8166 {
8167 /*
8168 * notl op1 (if necessary)
8169 * sarl $31, op1
8170 * orl cf, op1
8171 */
8172 if (ct != -1)
8173 {
8174 cf = ct;
8175 ct = -1;
8176 code = reverse_condition (code);
8177 }
8178
8179 out = emit_store_flag (out, code, ix86_compare_op0,
8180 ix86_compare_op1, VOIDmode, 0, -1);
8181
8182 out = expand_simple_binop (mode, IOR,
8183 out, GEN_INT (cf),
8184 out, 1, OPTAB_DIRECT);
8185 if (out != operands[0])
8186 emit_move_insn (operands[0], out);
8187
8188 return 1; /* DONE */
8189 }
8190 }
8191
635559ab
JH
8192 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
8193 || diff == 3 || diff == 5 || diff == 9)
8194 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
e075ae69
RH
8195 {
8196 /*
8197 * xorl dest,dest
8198 * cmpl op1,op2
8199 * setcc dest
8200 * lea cf(dest*(ct-cf)),dest
8201 *
8202 * Size 14.
8203 *
8204 * This also catches the degenerate setcc-only case.
8205 */
8206
8207 rtx tmp;
8208 int nops;
8209
8210 out = emit_store_flag (out, code, ix86_compare_op0,
8211 ix86_compare_op1, VOIDmode, 0, 1);
8212
8213 nops = 0;
885a70fd
JH
8214 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
8215 done in proper mode to match. */
e075ae69 8216 if (diff == 1)
14f73b5a 8217 tmp = out;
e075ae69
RH
8218 else
8219 {
885a70fd 8220 rtx out1;
14f73b5a 8221 out1 = out;
635559ab 8222 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
e075ae69
RH
8223 nops++;
8224 if (diff & 1)
8225 {
635559ab 8226 tmp = gen_rtx_PLUS (mode, tmp, out1);
e075ae69
RH
8227 nops++;
8228 }
8229 }
8230 if (cf != 0)
8231 {
635559ab 8232 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
e075ae69
RH
8233 nops++;
8234 }
885a70fd
JH
8235 if (tmp != out
8236 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
e075ae69 8237 {
14f73b5a 8238 if (nops == 1)
e075ae69
RH
8239 {
8240 rtx clob;
8241
8242 clob = gen_rtx_REG (CCmode, FLAGS_REG);
8243 clob = gen_rtx_CLOBBER (VOIDmode, clob);
8244
8245 tmp = gen_rtx_SET (VOIDmode, out, tmp);
8246 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8247 emit_insn (tmp);
8248 }
8249 else
8250 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
8251 }
8252 if (out != operands[0])
8253 emit_move_insn (operands[0], out);
8254
8255 return 1; /* DONE */
8256 }
8257
8258 /*
8259 * General case: Jumpful:
8260 * xorl dest,dest cmpl op1, op2
8261 * cmpl op1, op2 movl ct, dest
8262 * setcc dest jcc 1f
8263 * decl dest movl cf, dest
8264 * andl (cf-ct),dest 1:
8265 * addl ct,dest
0f290768 8266 *
e075ae69
RH
8267 * Size 20. Size 14.
8268 *
8269 * This is reasonably steep, but branch mispredict costs are
8270 * high on modern cpus, so consider failing only if optimizing
8271 * for space.
8272 *
8273 * %%% Parameterize branch_cost on the tuning architecture, then
8274 * use that. The 80386 couldn't care less about mispredicts.
8275 */
8276
8277 if (!optimize_size && !TARGET_CMOVE)
8278 {
8279 if (ct == 0)
8280 {
8281 ct = cf;
8282 cf = 0;
734dba19 8283 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
0f2a3457
JJ
8284 /* We may be reversing unordered compare to normal compare,
8285 that is not valid in general (we may convert non-trapping
8286 condition to trapping one), however on i386 we currently
8287 emit all comparisons unordered. */
8288 code = reverse_condition_maybe_unordered (code);
8289 else
8290 {
8291 code = reverse_condition (code);
8292 if (compare_code != NIL)
8293 compare_code = reverse_condition (compare_code);
8294 }
8295 }
8296
8297 if (compare_code != NIL)
8298 {
8299 /* notl op1 (if needed)
8300 sarl $31, op1
8301 andl (cf-ct), op1
8302 addl ct, op1
8303
8304 For x < 0 (resp. x <= -1) there will be no notl,
8305 so if possible swap the constants to get rid of the
8306 complement.
8307 True/false will be -1/0 while code below (store flag
8308 followed by decrement) is 0/-1, so the constants need
8309 to be exchanged once more. */
8310
8311 if (compare_code == GE || !cf)
734dba19 8312 {
0f2a3457
JJ
8313 code = reverse_condition (code);
8314 compare_code = LT;
734dba19
JH
8315 }
8316 else
8317 {
0f2a3457
JJ
8318 HOST_WIDE_INT tmp = cf;
8319 cf = ct;
8320 ct = tmp;
734dba19 8321 }
0f2a3457
JJ
8322
8323 out = emit_store_flag (out, code, ix86_compare_op0,
8324 ix86_compare_op1, VOIDmode, 0, -1);
e075ae69 8325 }
0f2a3457
JJ
8326 else
8327 {
8328 out = emit_store_flag (out, code, ix86_compare_op0,
8329 ix86_compare_op1, VOIDmode, 0, 1);
e075ae69 8330
0f2a3457
JJ
8331 out = expand_simple_binop (mode, PLUS,
8332 out, constm1_rtx,
8333 out, 1, OPTAB_DIRECT);
8334 }
e075ae69 8335
635559ab
JH
8336 out = expand_simple_binop (mode, AND,
8337 out,
d8bf17f9 8338 gen_int_mode (cf - ct, mode),
635559ab
JH
8339 out, 1, OPTAB_DIRECT);
8340 out = expand_simple_binop (mode, PLUS,
8341 out, GEN_INT (ct),
8342 out, 1, OPTAB_DIRECT);
e075ae69
RH
8343 if (out != operands[0])
8344 emit_move_insn (operands[0], out);
8345
8346 return 1; /* DONE */
8347 }
8348 }
8349
8350 if (!TARGET_CMOVE)
8351 {
8352 /* Try a few things more with specific constants and a variable. */
8353
78a0d70c 8354 optab op;
e075ae69
RH
8355 rtx var, orig_out, out, tmp;
8356
8357 if (optimize_size)
8358 return 0; /* FAIL */
8359
0f290768 8360 /* If one of the two operands is an interesting constant, load a
e075ae69 8361 constant with the above and mask it in with a logical operation. */
0f290768 8362
e075ae69
RH
8363 if (GET_CODE (operands[2]) == CONST_INT)
8364 {
8365 var = operands[3];
8366 if (INTVAL (operands[2]) == 0)
8367 operands[3] = constm1_rtx, op = and_optab;
8368 else if (INTVAL (operands[2]) == -1)
8369 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
8370 else
8371 return 0; /* FAIL */
e075ae69
RH
8372 }
8373 else if (GET_CODE (operands[3]) == CONST_INT)
8374 {
8375 var = operands[2];
8376 if (INTVAL (operands[3]) == 0)
8377 operands[2] = constm1_rtx, op = and_optab;
8378 else if (INTVAL (operands[3]) == -1)
8379 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
8380 else
8381 return 0; /* FAIL */
e075ae69 8382 }
78a0d70c 8383 else
e075ae69
RH
8384 return 0; /* FAIL */
8385
8386 orig_out = operands[0];
635559ab 8387 tmp = gen_reg_rtx (mode);
e075ae69
RH
8388 operands[0] = tmp;
8389
8390 /* Recurse to get the constant loaded. */
8391 if (ix86_expand_int_movcc (operands) == 0)
8392 return 0; /* FAIL */
8393
8394 /* Mask in the interesting variable. */
635559ab 8395 out = expand_binop (mode, op, var, tmp, orig_out, 0,
e075ae69
RH
8396 OPTAB_WIDEN);
8397 if (out != orig_out)
8398 emit_move_insn (orig_out, out);
8399
8400 return 1; /* DONE */
8401 }
8402
8403 /*
8404 * For comparison with above,
8405 *
8406 * movl cf,dest
8407 * movl ct,tmp
8408 * cmpl op1,op2
8409 * cmovcc tmp,dest
8410 *
8411 * Size 15.
8412 */
8413
635559ab
JH
8414 if (! nonimmediate_operand (operands[2], mode))
8415 operands[2] = force_reg (mode, operands[2]);
8416 if (! nonimmediate_operand (operands[3], mode))
8417 operands[3] = force_reg (mode, operands[3]);
e075ae69 8418
a1b8572c
JH
8419 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8420 {
635559ab 8421 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
8422 emit_move_insn (tmp, operands[3]);
8423 operands[3] = tmp;
8424 }
8425 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8426 {
635559ab 8427 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
8428 emit_move_insn (tmp, operands[2]);
8429 operands[2] = tmp;
8430 }
c9682caf
JH
8431 if (! register_operand (operands[2], VOIDmode)
8432 && ! register_operand (operands[3], VOIDmode))
635559ab 8433 operands[2] = force_reg (mode, operands[2]);
a1b8572c 8434
e075ae69
RH
8435 emit_insn (compare_seq);
8436 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 8437 gen_rtx_IF_THEN_ELSE (mode,
e075ae69
RH
8438 compare_op, operands[2],
8439 operands[3])));
a1b8572c
JH
8440 if (bypass_test)
8441 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 8442 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c
JH
8443 bypass_test,
8444 operands[3],
8445 operands[0])));
8446 if (second_test)
8447 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 8448 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c
JH
8449 second_test,
8450 operands[2],
8451 operands[0])));
e075ae69
RH
8452
8453 return 1; /* DONE */
e9a25f70 8454}
e075ae69 8455
32b5b1aa 8456int
e075ae69
RH
8457ix86_expand_fp_movcc (operands)
8458 rtx operands[];
32b5b1aa 8459{
e075ae69 8460 enum rtx_code code;
e075ae69 8461 rtx tmp;
a1b8572c 8462 rtx compare_op, second_test, bypass_test;
32b5b1aa 8463
0073023d
JH
8464 /* For SF/DFmode conditional moves based on comparisons
8465 in same mode, we may want to use SSE min/max instructions. */
965f5423
JH
8466 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
8467 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
0073023d 8468 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
fa9f36a1
JH
8469 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
8470 && (!TARGET_IEEE_FP
8471 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
0073023d
JH
8472 /* We may be called from the post-reload splitter. */
8473 && (!REG_P (operands[0])
8474 || SSE_REG_P (operands[0])
52a661a6 8475 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
0073023d
JH
8476 {
8477 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
8478 code = GET_CODE (operands[1]);
8479
8480 /* See if we have (cross) match between comparison operands and
8481 conditional move operands. */
8482 if (rtx_equal_p (operands[2], op1))
8483 {
8484 rtx tmp = op0;
8485 op0 = op1;
8486 op1 = tmp;
8487 code = reverse_condition_maybe_unordered (code);
8488 }
8489 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
8490 {
8491 /* Check for min operation. */
8492 if (code == LT)
8493 {
8494 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8495 if (memory_operand (op0, VOIDmode))
8496 op0 = force_reg (GET_MODE (operands[0]), op0);
8497 if (GET_MODE (operands[0]) == SFmode)
8498 emit_insn (gen_minsf3 (operands[0], op0, op1));
8499 else
8500 emit_insn (gen_mindf3 (operands[0], op0, op1));
8501 return 1;
8502 }
8503 /* Check for max operation. */
8504 if (code == GT)
8505 {
8506 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8507 if (memory_operand (op0, VOIDmode))
8508 op0 = force_reg (GET_MODE (operands[0]), op0);
8509 if (GET_MODE (operands[0]) == SFmode)
8510 emit_insn (gen_maxsf3 (operands[0], op0, op1));
8511 else
8512 emit_insn (gen_maxdf3 (operands[0], op0, op1));
8513 return 1;
8514 }
8515 }
8516 /* Manage condition to be sse_comparison_operator. In case we are
8517 in non-ieee mode, try to canonicalize the destination operand
8518 to be first in the comparison - this helps reload to avoid extra
8519 moves. */
8520 if (!sse_comparison_operator (operands[1], VOIDmode)
8521 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
8522 {
8523 rtx tmp = ix86_compare_op0;
8524 ix86_compare_op0 = ix86_compare_op1;
8525 ix86_compare_op1 = tmp;
8526 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
8527 VOIDmode, ix86_compare_op0,
8528 ix86_compare_op1);
8529 }
8530 /* Similary try to manage result to be first operand of conditional
fa9f36a1
JH
8531 move. We also don't support the NE comparison on SSE, so try to
8532 avoid it. */
037f20f1
JH
8533 if ((rtx_equal_p (operands[0], operands[3])
8534 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
8535 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
0073023d
JH
8536 {
8537 rtx tmp = operands[2];
8538 operands[2] = operands[3];
92d0fb09 8539 operands[3] = tmp;
0073023d
JH
8540 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
8541 (GET_CODE (operands[1])),
8542 VOIDmode, ix86_compare_op0,
8543 ix86_compare_op1);
8544 }
8545 if (GET_MODE (operands[0]) == SFmode)
8546 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
8547 operands[2], operands[3],
8548 ix86_compare_op0, ix86_compare_op1));
8549 else
8550 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
8551 operands[2], operands[3],
8552 ix86_compare_op0, ix86_compare_op1));
8553 return 1;
8554 }
8555
e075ae69 8556 /* The floating point conditional move instructions don't directly
0f290768 8557 support conditions resulting from a signed integer comparison. */
32b5b1aa 8558
e075ae69 8559 code = GET_CODE (operands[1]);
a1b8572c 8560 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
8561
8562 /* The floating point conditional move instructions don't directly
8563 support signed integer comparisons. */
8564
a1b8572c 8565 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 8566 {
a1b8572c 8567 if (second_test != NULL || bypass_test != NULL)
b531087a 8568 abort ();
e075ae69 8569 tmp = gen_reg_rtx (QImode);
3a3677ff 8570 ix86_expand_setcc (code, tmp);
e075ae69
RH
8571 code = NE;
8572 ix86_compare_op0 = tmp;
8573 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
8574 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8575 }
8576 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8577 {
8578 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8579 emit_move_insn (tmp, operands[3]);
8580 operands[3] = tmp;
8581 }
8582 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8583 {
8584 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8585 emit_move_insn (tmp, operands[2]);
8586 operands[2] = tmp;
e075ae69 8587 }
e9a25f70 8588
e075ae69
RH
8589 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8590 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9e7adcb3 8591 compare_op,
e075ae69
RH
8592 operands[2],
8593 operands[3])));
a1b8572c
JH
8594 if (bypass_test)
8595 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8596 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8597 bypass_test,
8598 operands[3],
8599 operands[0])));
8600 if (second_test)
8601 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8602 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8603 second_test,
8604 operands[2],
8605 operands[0])));
32b5b1aa 8606
e075ae69 8607 return 1;
32b5b1aa
SC
8608}
8609
2450a057
JH
8610/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
8611 works for floating pointer parameters and nonoffsetable memories.
8612 For pushes, it returns just stack offsets; the values will be saved
8613 in the right order. Maximally three parts are generated. */
8614
2b589241 8615static int
2450a057
JH
8616ix86_split_to_parts (operand, parts, mode)
8617 rtx operand;
8618 rtx *parts;
8619 enum machine_mode mode;
32b5b1aa 8620{
26e5b205
JH
8621 int size;
8622
8623 if (!TARGET_64BIT)
8624 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
8625 else
8626 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 8627
a7180f70
BS
8628 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
8629 abort ();
2450a057
JH
8630 if (size < 2 || size > 3)
8631 abort ();
8632
d7a29404
JH
8633 /* Optimize constant pool reference to immediates. This is used by fp moves,
8634 that force all constants to memory to allow combining. */
8635
8636 if (GET_CODE (operand) == MEM
8637 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
8638 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
8639 operand = get_pool_constant (XEXP (operand, 0));
8640
2450a057 8641 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 8642 {
2450a057
JH
8643 /* The only non-offsetable memories we handle are pushes. */
8644 if (! push_operand (operand, VOIDmode))
8645 abort ();
8646
26e5b205
JH
8647 operand = copy_rtx (operand);
8648 PUT_MODE (operand, Pmode);
2450a057
JH
8649 parts[0] = parts[1] = parts[2] = operand;
8650 }
26e5b205 8651 else if (!TARGET_64BIT)
2450a057
JH
8652 {
8653 if (mode == DImode)
8654 split_di (&operand, 1, &parts[0], &parts[1]);
8655 else
e075ae69 8656 {
2450a057
JH
8657 if (REG_P (operand))
8658 {
8659 if (!reload_completed)
8660 abort ();
8661 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
8662 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8663 if (size == 3)
8664 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
8665 }
8666 else if (offsettable_memref_p (operand))
8667 {
f4ef873c 8668 operand = adjust_address (operand, SImode, 0);
2450a057 8669 parts[0] = operand;
b72f00af 8670 parts[1] = adjust_address (operand, SImode, 4);
2450a057 8671 if (size == 3)
b72f00af 8672 parts[2] = adjust_address (operand, SImode, 8);
2450a057
JH
8673 }
8674 else if (GET_CODE (operand) == CONST_DOUBLE)
8675 {
8676 REAL_VALUE_TYPE r;
2b589241 8677 long l[4];
2450a057
JH
8678
8679 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8680 switch (mode)
8681 {
8682 case XFmode:
2b589241 8683 case TFmode:
2450a057 8684 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
d8bf17f9 8685 parts[2] = gen_int_mode (l[2], SImode);
2450a057
JH
8686 break;
8687 case DFmode:
8688 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
8689 break;
8690 default:
8691 abort ();
8692 }
d8bf17f9
LB
8693 parts[1] = gen_int_mode (l[1], SImode);
8694 parts[0] = gen_int_mode (l[0], SImode);
2450a057
JH
8695 }
8696 else
8697 abort ();
e075ae69 8698 }
2450a057 8699 }
26e5b205
JH
8700 else
8701 {
44cf5b6a
JH
8702 if (mode == TImode)
8703 split_ti (&operand, 1, &parts[0], &parts[1]);
26e5b205
JH
8704 if (mode == XFmode || mode == TFmode)
8705 {
8706 if (REG_P (operand))
8707 {
8708 if (!reload_completed)
8709 abort ();
8710 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
8711 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8712 }
8713 else if (offsettable_memref_p (operand))
8714 {
b72f00af 8715 operand = adjust_address (operand, DImode, 0);
26e5b205 8716 parts[0] = operand;
b72f00af 8717 parts[1] = adjust_address (operand, SImode, 8);
26e5b205
JH
8718 }
8719 else if (GET_CODE (operand) == CONST_DOUBLE)
8720 {
8721 REAL_VALUE_TYPE r;
8722 long l[3];
8723
8724 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8725 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8726 /* Do not use shift by 32 to avoid warning on 32bit systems. */
8727 if (HOST_BITS_PER_WIDE_INT >= 64)
523fbd9d 8728 parts[0]
d8bf17f9 8729 = gen_int_mode
44cf5b6a 8730 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
b531087a 8731 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
d8bf17f9 8732 DImode);
26e5b205
JH
8733 else
8734 parts[0] = immed_double_const (l[0], l[1], DImode);
d8bf17f9 8735 parts[1] = gen_int_mode (l[2], SImode);
26e5b205
JH
8736 }
8737 else
8738 abort ();
8739 }
8740 }
2450a057 8741
2b589241 8742 return size;
2450a057
JH
8743}
8744
8745/* Emit insns to perform a move or push of DI, DF, and XF values.
8746 Return false when normal moves are needed; true when all required
8747 insns have been emitted. Operands 2-4 contain the input values
8748 int the correct order; operands 5-7 contain the output values. */
8749
26e5b205
JH
8750void
8751ix86_split_long_move (operands)
8752 rtx operands[];
2450a057
JH
8753{
8754 rtx part[2][3];
26e5b205 8755 int nparts;
2450a057
JH
8756 int push = 0;
8757 int collisions = 0;
26e5b205
JH
8758 enum machine_mode mode = GET_MODE (operands[0]);
8759
8760 /* The DFmode expanders may ask us to move double.
8761 For 64bit target this is single move. By hiding the fact
8762 here we simplify i386.md splitters. */
8763 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
8764 {
8cdfa312
RH
8765 /* Optimize constant pool reference to immediates. This is used by
8766 fp moves, that force all constants to memory to allow combining. */
26e5b205
JH
8767
8768 if (GET_CODE (operands[1]) == MEM
8769 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8770 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
8771 operands[1] = get_pool_constant (XEXP (operands[1], 0));
8772 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
8773 {
8774 operands[0] = copy_rtx (operands[0]);
8775 PUT_MODE (operands[0], Pmode);
8776 }
26e5b205
JH
8777 else
8778 operands[0] = gen_lowpart (DImode, operands[0]);
8779 operands[1] = gen_lowpart (DImode, operands[1]);
8780 emit_move_insn (operands[0], operands[1]);
8781 return;
8782 }
2450a057 8783
2450a057
JH
8784 /* The only non-offsettable memory we handle is push. */
8785 if (push_operand (operands[0], VOIDmode))
8786 push = 1;
8787 else if (GET_CODE (operands[0]) == MEM
8788 && ! offsettable_memref_p (operands[0]))
8789 abort ();
8790
26e5b205
JH
8791 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
8792 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
8793
8794 /* When emitting push, take care for source operands on the stack. */
8795 if (push && GET_CODE (operands[1]) == MEM
8796 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
8797 {
26e5b205 8798 if (nparts == 3)
886cbb88
JH
8799 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
8800 XEXP (part[1][2], 0));
8801 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
8802 XEXP (part[1][1], 0));
2450a057
JH
8803 }
8804
0f290768 8805 /* We need to do copy in the right order in case an address register
2450a057
JH
8806 of the source overlaps the destination. */
8807 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
8808 {
8809 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
8810 collisions++;
8811 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8812 collisions++;
26e5b205 8813 if (nparts == 3
2450a057
JH
8814 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
8815 collisions++;
8816
8817 /* Collision in the middle part can be handled by reordering. */
26e5b205 8818 if (collisions == 1 && nparts == 3
2450a057 8819 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 8820 {
2450a057
JH
8821 rtx tmp;
8822 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
8823 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
8824 }
e075ae69 8825
2450a057
JH
8826 /* If there are more collisions, we can't handle it by reordering.
8827 Do an lea to the last part and use only one colliding move. */
8828 else if (collisions > 1)
8829 {
8830 collisions = 1;
26e5b205 8831 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
2450a057 8832 XEXP (part[1][0], 0)));
26e5b205
JH
8833 part[1][0] = change_address (part[1][0],
8834 TARGET_64BIT ? DImode : SImode,
8835 part[0][nparts - 1]);
b72f00af 8836 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
26e5b205 8837 if (nparts == 3)
b72f00af 8838 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
2450a057
JH
8839 }
8840 }
8841
8842 if (push)
8843 {
26e5b205 8844 if (!TARGET_64BIT)
2b589241 8845 {
26e5b205
JH
8846 if (nparts == 3)
8847 {
8848 /* We use only first 12 bytes of TFmode value, but for pushing we
8849 are required to adjust stack as if we were pushing real 16byte
8850 value. */
8851 if (mode == TFmode && !TARGET_64BIT)
8852 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
8853 GEN_INT (-4)));
8854 emit_move_insn (part[0][2], part[1][2]);
8855 }
2b589241 8856 }
26e5b205
JH
8857 else
8858 {
8859 /* In 64bit mode we don't have 32bit push available. In case this is
8860 register, it is OK - we will just use larger counterpart. We also
8861 retype memory - these comes from attempt to avoid REX prefix on
8862 moving of second half of TFmode value. */
8863 if (GET_MODE (part[1][1]) == SImode)
8864 {
8865 if (GET_CODE (part[1][1]) == MEM)
f4ef873c 8866 part[1][1] = adjust_address (part[1][1], DImode, 0);
26e5b205
JH
8867 else if (REG_P (part[1][1]))
8868 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
8869 else
b531087a 8870 abort ();
886cbb88
JH
8871 if (GET_MODE (part[1][0]) == SImode)
8872 part[1][0] = part[1][1];
26e5b205
JH
8873 }
8874 }
8875 emit_move_insn (part[0][1], part[1][1]);
8876 emit_move_insn (part[0][0], part[1][0]);
8877 return;
2450a057
JH
8878 }
8879
8880 /* Choose correct order to not overwrite the source before it is copied. */
8881 if ((REG_P (part[0][0])
8882 && REG_P (part[1][1])
8883 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 8884 || (nparts == 3
2450a057
JH
8885 && REGNO (part[0][0]) == REGNO (part[1][2]))))
8886 || (collisions > 0
8887 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
8888 {
26e5b205 8889 if (nparts == 3)
2450a057 8890 {
26e5b205
JH
8891 operands[2] = part[0][2];
8892 operands[3] = part[0][1];
8893 operands[4] = part[0][0];
8894 operands[5] = part[1][2];
8895 operands[6] = part[1][1];
8896 operands[7] = part[1][0];
2450a057
JH
8897 }
8898 else
8899 {
26e5b205
JH
8900 operands[2] = part[0][1];
8901 operands[3] = part[0][0];
8902 operands[5] = part[1][1];
8903 operands[6] = part[1][0];
2450a057
JH
8904 }
8905 }
8906 else
8907 {
26e5b205 8908 if (nparts == 3)
2450a057 8909 {
26e5b205
JH
8910 operands[2] = part[0][0];
8911 operands[3] = part[0][1];
8912 operands[4] = part[0][2];
8913 operands[5] = part[1][0];
8914 operands[6] = part[1][1];
8915 operands[7] = part[1][2];
2450a057
JH
8916 }
8917 else
8918 {
26e5b205
JH
8919 operands[2] = part[0][0];
8920 operands[3] = part[0][1];
8921 operands[5] = part[1][0];
8922 operands[6] = part[1][1];
e075ae69
RH
8923 }
8924 }
26e5b205
JH
8925 emit_move_insn (operands[2], operands[5]);
8926 emit_move_insn (operands[3], operands[6]);
8927 if (nparts == 3)
8928 emit_move_insn (operands[4], operands[7]);
32b5b1aa 8929
26e5b205 8930 return;
32b5b1aa 8931}
32b5b1aa 8932
e075ae69
RH
8933void
8934ix86_split_ashldi (operands, scratch)
8935 rtx *operands, scratch;
32b5b1aa 8936{
e075ae69
RH
8937 rtx low[2], high[2];
8938 int count;
b985a30f 8939
e075ae69
RH
8940 if (GET_CODE (operands[2]) == CONST_INT)
8941 {
8942 split_di (operands, 2, low, high);
8943 count = INTVAL (operands[2]) & 63;
32b5b1aa 8944
e075ae69
RH
8945 if (count >= 32)
8946 {
8947 emit_move_insn (high[0], low[1]);
8948 emit_move_insn (low[0], const0_rtx);
b985a30f 8949
e075ae69
RH
8950 if (count > 32)
8951 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
8952 }
8953 else
8954 {
8955 if (!rtx_equal_p (operands[0], operands[1]))
8956 emit_move_insn (operands[0], operands[1]);
8957 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
8958 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
8959 }
8960 }
8961 else
8962 {
8963 if (!rtx_equal_p (operands[0], operands[1]))
8964 emit_move_insn (operands[0], operands[1]);
b985a30f 8965
e075ae69 8966 split_di (operands, 1, low, high);
b985a30f 8967
e075ae69
RH
8968 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
8969 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 8970
fe577e58 8971 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 8972 {
fe577e58 8973 if (! no_new_pseudos)
e075ae69
RH
8974 scratch = force_reg (SImode, const0_rtx);
8975 else
8976 emit_move_insn (scratch, const0_rtx);
8977
8978 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
8979 scratch));
8980 }
8981 else
8982 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
8983 }
e9a25f70 8984}
32b5b1aa 8985
e075ae69
RH
8986void
8987ix86_split_ashrdi (operands, scratch)
8988 rtx *operands, scratch;
32b5b1aa 8989{
e075ae69
RH
8990 rtx low[2], high[2];
8991 int count;
32b5b1aa 8992
e075ae69
RH
8993 if (GET_CODE (operands[2]) == CONST_INT)
8994 {
8995 split_di (operands, 2, low, high);
8996 count = INTVAL (operands[2]) & 63;
32b5b1aa 8997
e075ae69
RH
8998 if (count >= 32)
8999 {
9000 emit_move_insn (low[0], high[1]);
32b5b1aa 9001
e075ae69
RH
9002 if (! reload_completed)
9003 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
9004 else
9005 {
9006 emit_move_insn (high[0], low[0]);
9007 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9008 }
9009
9010 if (count > 32)
9011 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
9012 }
9013 else
9014 {
9015 if (!rtx_equal_p (operands[0], operands[1]))
9016 emit_move_insn (operands[0], operands[1]);
9017 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9018 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
9019 }
9020 }
9021 else
32b5b1aa 9022 {
e075ae69
RH
9023 if (!rtx_equal_p (operands[0], operands[1]))
9024 emit_move_insn (operands[0], operands[1]);
9025
9026 split_di (operands, 1, low, high);
9027
9028 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9029 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
9030
fe577e58 9031 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 9032 {
fe577e58 9033 if (! no_new_pseudos)
e075ae69
RH
9034 scratch = gen_reg_rtx (SImode);
9035 emit_move_insn (scratch, high[0]);
9036 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
9037 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9038 scratch));
9039 }
9040 else
9041 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 9042 }
e075ae69 9043}
32b5b1aa 9044
e075ae69
RH
9045void
9046ix86_split_lshrdi (operands, scratch)
9047 rtx *operands, scratch;
9048{
9049 rtx low[2], high[2];
9050 int count;
32b5b1aa 9051
e075ae69 9052 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 9053 {
e075ae69
RH
9054 split_di (operands, 2, low, high);
9055 count = INTVAL (operands[2]) & 63;
9056
9057 if (count >= 32)
c7271385 9058 {
e075ae69
RH
9059 emit_move_insn (low[0], high[1]);
9060 emit_move_insn (high[0], const0_rtx);
32b5b1aa 9061
e075ae69
RH
9062 if (count > 32)
9063 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
9064 }
9065 else
9066 {
9067 if (!rtx_equal_p (operands[0], operands[1]))
9068 emit_move_insn (operands[0], operands[1]);
9069 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9070 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
9071 }
32b5b1aa 9072 }
e075ae69
RH
9073 else
9074 {
9075 if (!rtx_equal_p (operands[0], operands[1]))
9076 emit_move_insn (operands[0], operands[1]);
32b5b1aa 9077
e075ae69
RH
9078 split_di (operands, 1, low, high);
9079
9080 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9081 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
9082
9083 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 9084 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 9085 {
fe577e58 9086 if (! no_new_pseudos)
e075ae69
RH
9087 scratch = force_reg (SImode, const0_rtx);
9088 else
9089 emit_move_insn (scratch, const0_rtx);
9090
9091 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9092 scratch));
9093 }
9094 else
9095 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
9096 }
32b5b1aa 9097}
3f803cd9 9098
0407c02b 9099/* Helper function for the string operations below. Dest VARIABLE whether
0945b39d
JH
9100 it is aligned to VALUE bytes. If true, jump to the label. */
9101static rtx
9102ix86_expand_aligntest (variable, value)
9103 rtx variable;
9104 int value;
9105{
9106 rtx label = gen_label_rtx ();
9107 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
9108 if (GET_MODE (variable) == DImode)
9109 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
9110 else
9111 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
9112 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
d43e0b7d 9113 1, label);
0945b39d
JH
9114 return label;
9115}
9116
9117/* Adjust COUNTER by the VALUE. */
9118static void
9119ix86_adjust_counter (countreg, value)
9120 rtx countreg;
9121 HOST_WIDE_INT value;
9122{
9123 if (GET_MODE (countreg) == DImode)
9124 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
9125 else
9126 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
9127}
9128
9129/* Zero extend possibly SImode EXP to Pmode register. */
d24b3457 9130rtx
0945b39d
JH
9131ix86_zero_extend_to_Pmode (exp)
9132 rtx exp;
9133{
9134 rtx r;
9135 if (GET_MODE (exp) == VOIDmode)
9136 return force_reg (Pmode, exp);
9137 if (GET_MODE (exp) == Pmode)
9138 return copy_to_mode_reg (Pmode, exp);
9139 r = gen_reg_rtx (Pmode);
9140 emit_insn (gen_zero_extendsidi2 (r, exp));
9141 return r;
9142}
9143
9144/* Expand string move (memcpy) operation. Use i386 string operations when
9145 profitable. expand_clrstr contains similar code. */
9146int
9147ix86_expand_movstr (dst, src, count_exp, align_exp)
9148 rtx dst, src, count_exp, align_exp;
9149{
9150 rtx srcreg, destreg, countreg;
9151 enum machine_mode counter_mode;
9152 HOST_WIDE_INT align = 0;
9153 unsigned HOST_WIDE_INT count = 0;
9154 rtx insns;
9155
9156 start_sequence ();
9157
9158 if (GET_CODE (align_exp) == CONST_INT)
9159 align = INTVAL (align_exp);
9160
5519a4f9 9161 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
9162 if (!TARGET_ALIGN_STRINGOPS)
9163 align = 64;
9164
9165 if (GET_CODE (count_exp) == CONST_INT)
9166 count = INTVAL (count_exp);
9167
9168 /* Figure out proper mode for counter. For 32bits it is always SImode,
9169 for 64bits use SImode when possible, otherwise DImode.
9170 Set count to number of bytes copied when known at compile time. */
9171 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9172 || x86_64_zero_extended_value (count_exp))
9173 counter_mode = SImode;
9174 else
9175 counter_mode = DImode;
9176
9177 if (counter_mode != SImode && counter_mode != DImode)
9178 abort ();
9179
9180 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9181 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9182
9183 emit_insn (gen_cld ());
9184
9185 /* When optimizing for size emit simple rep ; movsb instruction for
9186 counts not divisible by 4. */
9187
9188 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9189 {
9190 countreg = ix86_zero_extend_to_Pmode (count_exp);
9191 if (TARGET_64BIT)
9192 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
9193 destreg, srcreg, countreg));
9194 else
9195 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
9196 destreg, srcreg, countreg));
9197 }
9198
9199 /* For constant aligned (or small unaligned) copies use rep movsl
9200 followed by code copying the rest. For PentiumPro ensure 8 byte
9201 alignment to allow rep movsl acceleration. */
9202
9203 else if (count != 0
9204 && (align >= 8
9205 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 9206 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
9207 {
9208 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9209 if (count & ~(size - 1))
9210 {
9211 countreg = copy_to_mode_reg (counter_mode,
9212 GEN_INT ((count >> (size == 4 ? 2 : 3))
9213 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9214 countreg = ix86_zero_extend_to_Pmode (countreg);
9215 if (size == 4)
9216 {
9217 if (TARGET_64BIT)
9218 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
9219 destreg, srcreg, countreg));
9220 else
9221 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
9222 destreg, srcreg, countreg));
9223 }
9224 else
9225 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
9226 destreg, srcreg, countreg));
9227 }
9228 if (size == 8 && (count & 0x04))
9229 emit_insn (gen_strmovsi (destreg, srcreg));
9230 if (count & 0x02)
9231 emit_insn (gen_strmovhi (destreg, srcreg));
9232 if (count & 0x01)
9233 emit_insn (gen_strmovqi (destreg, srcreg));
9234 }
9235 /* The generic code based on the glibc implementation:
9236 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9237 allowing accelerated copying there)
9238 - copy the data using rep movsl
9239 - copy the rest. */
9240 else
9241 {
9242 rtx countreg2;
9243 rtx label = NULL;
37ad04a5
JH
9244 int desired_alignment = (TARGET_PENTIUMPRO
9245 && (count == 0 || count >= (unsigned int) 260)
9246 ? 8 : UNITS_PER_WORD);
0945b39d
JH
9247
9248 /* In case we don't know anything about the alignment, default to
9249 library version, since it is usually equally fast and result in
9250 shorter code. */
9251 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9252 {
9253 end_sequence ();
9254 return 0;
9255 }
9256
9257 if (TARGET_SINGLE_STRINGOP)
9258 emit_insn (gen_cld ());
9259
9260 countreg2 = gen_reg_rtx (Pmode);
9261 countreg = copy_to_mode_reg (counter_mode, count_exp);
9262
9263 /* We don't use loops to align destination and to copy parts smaller
9264 than 4 bytes, because gcc is able to optimize such code better (in
9265 the case the destination or the count really is aligned, gcc is often
9266 able to predict the branches) and also it is friendlier to the
a4f31c00 9267 hardware branch prediction.
0945b39d
JH
9268
9269 Using loops is benefical for generic case, because we can
9270 handle small counts using the loops. Many CPUs (such as Athlon)
9271 have large REP prefix setup costs.
9272
9273 This is quite costy. Maybe we can revisit this decision later or
9274 add some customizability to this code. */
9275
37ad04a5 9276 if (count == 0 && align < desired_alignment)
0945b39d
JH
9277 {
9278 label = gen_label_rtx ();
9279 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
d43e0b7d 9280 LEU, 0, counter_mode, 1, label);
0945b39d
JH
9281 }
9282 if (align <= 1)
9283 {
9284 rtx label = ix86_expand_aligntest (destreg, 1);
9285 emit_insn (gen_strmovqi (destreg, srcreg));
9286 ix86_adjust_counter (countreg, 1);
9287 emit_label (label);
9288 LABEL_NUSES (label) = 1;
9289 }
9290 if (align <= 2)
9291 {
9292 rtx label = ix86_expand_aligntest (destreg, 2);
9293 emit_insn (gen_strmovhi (destreg, srcreg));
9294 ix86_adjust_counter (countreg, 2);
9295 emit_label (label);
9296 LABEL_NUSES (label) = 1;
9297 }
37ad04a5 9298 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
9299 {
9300 rtx label = ix86_expand_aligntest (destreg, 4);
9301 emit_insn (gen_strmovsi (destreg, srcreg));
9302 ix86_adjust_counter (countreg, 4);
9303 emit_label (label);
9304 LABEL_NUSES (label) = 1;
9305 }
9306
37ad04a5
JH
9307 if (label && desired_alignment > 4 && !TARGET_64BIT)
9308 {
9309 emit_label (label);
9310 LABEL_NUSES (label) = 1;
9311 label = NULL_RTX;
9312 }
0945b39d
JH
9313 if (!TARGET_SINGLE_STRINGOP)
9314 emit_insn (gen_cld ());
9315 if (TARGET_64BIT)
9316 {
9317 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9318 GEN_INT (3)));
9319 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
9320 destreg, srcreg, countreg2));
9321 }
9322 else
9323 {
9324 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9325 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
9326 destreg, srcreg, countreg2));
9327 }
9328
9329 if (label)
9330 {
9331 emit_label (label);
9332 LABEL_NUSES (label) = 1;
9333 }
9334 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9335 emit_insn (gen_strmovsi (destreg, srcreg));
9336 if ((align <= 4 || count == 0) && TARGET_64BIT)
9337 {
9338 rtx label = ix86_expand_aligntest (countreg, 4);
9339 emit_insn (gen_strmovsi (destreg, srcreg));
9340 emit_label (label);
9341 LABEL_NUSES (label) = 1;
9342 }
9343 if (align > 2 && count != 0 && (count & 2))
9344 emit_insn (gen_strmovhi (destreg, srcreg));
9345 if (align <= 2 || count == 0)
9346 {
9347 rtx label = ix86_expand_aligntest (countreg, 2);
9348 emit_insn (gen_strmovhi (destreg, srcreg));
9349 emit_label (label);
9350 LABEL_NUSES (label) = 1;
9351 }
9352 if (align > 1 && count != 0 && (count & 1))
9353 emit_insn (gen_strmovqi (destreg, srcreg));
9354 if (align <= 1 || count == 0)
9355 {
9356 rtx label = ix86_expand_aligntest (countreg, 1);
9357 emit_insn (gen_strmovqi (destreg, srcreg));
9358 emit_label (label);
9359 LABEL_NUSES (label) = 1;
9360 }
9361 }
9362
9363 insns = get_insns ();
9364 end_sequence ();
9365
9366 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
9367 emit_insns (insns);
9368 return 1;
9369}
9370
9371/* Expand string clear operation (bzero). Use i386 string operations when
9372 profitable. expand_movstr contains similar code. */
9373int
9374ix86_expand_clrstr (src, count_exp, align_exp)
9375 rtx src, count_exp, align_exp;
9376{
9377 rtx destreg, zeroreg, countreg;
9378 enum machine_mode counter_mode;
9379 HOST_WIDE_INT align = 0;
9380 unsigned HOST_WIDE_INT count = 0;
9381
9382 if (GET_CODE (align_exp) == CONST_INT)
9383 align = INTVAL (align_exp);
9384
5519a4f9 9385 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
9386 if (!TARGET_ALIGN_STRINGOPS)
9387 align = 32;
9388
9389 if (GET_CODE (count_exp) == CONST_INT)
9390 count = INTVAL (count_exp);
9391 /* Figure out proper mode for counter. For 32bits it is always SImode,
9392 for 64bits use SImode when possible, otherwise DImode.
9393 Set count to number of bytes copied when known at compile time. */
9394 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9395 || x86_64_zero_extended_value (count_exp))
9396 counter_mode = SImode;
9397 else
9398 counter_mode = DImode;
9399
9400 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9401
9402 emit_insn (gen_cld ());
9403
9404 /* When optimizing for size emit simple rep ; movsb instruction for
9405 counts not divisible by 4. */
9406
9407 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9408 {
9409 countreg = ix86_zero_extend_to_Pmode (count_exp);
9410 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
9411 if (TARGET_64BIT)
9412 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
9413 destreg, countreg));
9414 else
9415 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
9416 destreg, countreg));
9417 }
9418 else if (count != 0
9419 && (align >= 8
9420 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 9421 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
9422 {
9423 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9424 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
9425 if (count & ~(size - 1))
9426 {
9427 countreg = copy_to_mode_reg (counter_mode,
9428 GEN_INT ((count >> (size == 4 ? 2 : 3))
9429 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9430 countreg = ix86_zero_extend_to_Pmode (countreg);
9431 if (size == 4)
9432 {
9433 if (TARGET_64BIT)
9434 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
9435 destreg, countreg));
9436 else
9437 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
9438 destreg, countreg));
9439 }
9440 else
9441 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
9442 destreg, countreg));
9443 }
9444 if (size == 8 && (count & 0x04))
9445 emit_insn (gen_strsetsi (destreg,
9446 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9447 if (count & 0x02)
9448 emit_insn (gen_strsethi (destreg,
9449 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9450 if (count & 0x01)
9451 emit_insn (gen_strsetqi (destreg,
9452 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9453 }
9454 else
9455 {
9456 rtx countreg2;
9457 rtx label = NULL;
37ad04a5
JH
9458 /* Compute desired alignment of the string operation. */
9459 int desired_alignment = (TARGET_PENTIUMPRO
9460 && (count == 0 || count >= (unsigned int) 260)
9461 ? 8 : UNITS_PER_WORD);
0945b39d
JH
9462
9463 /* In case we don't know anything about the alignment, default to
9464 library version, since it is usually equally fast and result in
9465 shorter code. */
9466 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9467 return 0;
9468
9469 if (TARGET_SINGLE_STRINGOP)
9470 emit_insn (gen_cld ());
9471
9472 countreg2 = gen_reg_rtx (Pmode);
9473 countreg = copy_to_mode_reg (counter_mode, count_exp);
9474 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
9475
37ad04a5 9476 if (count == 0 && align < desired_alignment)
0945b39d
JH
9477 {
9478 label = gen_label_rtx ();
37ad04a5 9479 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 9480 LEU, 0, counter_mode, 1, label);
0945b39d
JH
9481 }
9482 if (align <= 1)
9483 {
9484 rtx label = ix86_expand_aligntest (destreg, 1);
9485 emit_insn (gen_strsetqi (destreg,
9486 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9487 ix86_adjust_counter (countreg, 1);
9488 emit_label (label);
9489 LABEL_NUSES (label) = 1;
9490 }
9491 if (align <= 2)
9492 {
9493 rtx label = ix86_expand_aligntest (destreg, 2);
9494 emit_insn (gen_strsethi (destreg,
9495 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9496 ix86_adjust_counter (countreg, 2);
9497 emit_label (label);
9498 LABEL_NUSES (label) = 1;
9499 }
37ad04a5 9500 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
9501 {
9502 rtx label = ix86_expand_aligntest (destreg, 4);
9503 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
9504 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
9505 : zeroreg)));
9506 ix86_adjust_counter (countreg, 4);
9507 emit_label (label);
9508 LABEL_NUSES (label) = 1;
9509 }
9510
37ad04a5
JH
9511 if (label && desired_alignment > 4 && !TARGET_64BIT)
9512 {
9513 emit_label (label);
9514 LABEL_NUSES (label) = 1;
9515 label = NULL_RTX;
9516 }
9517
0945b39d
JH
9518 if (!TARGET_SINGLE_STRINGOP)
9519 emit_insn (gen_cld ());
9520 if (TARGET_64BIT)
9521 {
9522 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9523 GEN_INT (3)));
9524 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
9525 destreg, countreg2));
9526 }
9527 else
9528 {
9529 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9530 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
9531 destreg, countreg2));
9532 }
0945b39d
JH
9533 if (label)
9534 {
9535 emit_label (label);
9536 LABEL_NUSES (label) = 1;
9537 }
37ad04a5 9538
0945b39d
JH
9539 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9540 emit_insn (gen_strsetsi (destreg,
9541 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9542 if (TARGET_64BIT && (align <= 4 || count == 0))
9543 {
74411039 9544 rtx label = ix86_expand_aligntest (countreg, 2);
0945b39d
JH
9545 emit_insn (gen_strsetsi (destreg,
9546 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9547 emit_label (label);
9548 LABEL_NUSES (label) = 1;
9549 }
9550 if (align > 2 && count != 0 && (count & 2))
9551 emit_insn (gen_strsethi (destreg,
9552 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9553 if (align <= 2 || count == 0)
9554 {
74411039 9555 rtx label = ix86_expand_aligntest (countreg, 2);
0945b39d
JH
9556 emit_insn (gen_strsethi (destreg,
9557 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9558 emit_label (label);
9559 LABEL_NUSES (label) = 1;
9560 }
9561 if (align > 1 && count != 0 && (count & 1))
9562 emit_insn (gen_strsetqi (destreg,
9563 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9564 if (align <= 1 || count == 0)
9565 {
74411039 9566 rtx label = ix86_expand_aligntest (countreg, 1);
0945b39d
JH
9567 emit_insn (gen_strsetqi (destreg,
9568 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9569 emit_label (label);
9570 LABEL_NUSES (label) = 1;
9571 }
9572 }
9573 return 1;
9574}
9575/* Expand strlen. */
9576int
9577ix86_expand_strlen (out, src, eoschar, align)
9578 rtx out, src, eoschar, align;
9579{
9580 rtx addr, scratch1, scratch2, scratch3, scratch4;
9581
9582 /* The generic case of strlen expander is long. Avoid it's
9583 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
9584
9585 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9586 && !TARGET_INLINE_ALL_STRINGOPS
9587 && !optimize_size
9588 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
9589 return 0;
9590
9591 addr = force_reg (Pmode, XEXP (src, 0));
9592 scratch1 = gen_reg_rtx (Pmode);
9593
9594 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9595 && !optimize_size)
9596 {
9597 /* Well it seems that some optimizer does not combine a call like
9598 foo(strlen(bar), strlen(bar));
9599 when the move and the subtraction is done here. It does calculate
9600 the length just once when these instructions are done inside of
9601 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
9602 often used and I use one fewer register for the lifetime of
9603 output_strlen_unroll() this is better. */
9604
9605 emit_move_insn (out, addr);
9606
9607 ix86_expand_strlensi_unroll_1 (out, align);
9608
9609 /* strlensi_unroll_1 returns the address of the zero at the end of
9610 the string, like memchr(), so compute the length by subtracting
9611 the start address. */
9612 if (TARGET_64BIT)
9613 emit_insn (gen_subdi3 (out, out, addr));
9614 else
9615 emit_insn (gen_subsi3 (out, out, addr));
9616 }
9617 else
9618 {
9619 scratch2 = gen_reg_rtx (Pmode);
9620 scratch3 = gen_reg_rtx (Pmode);
9621 scratch4 = force_reg (Pmode, constm1_rtx);
9622
9623 emit_move_insn (scratch3, addr);
9624 eoschar = force_reg (QImode, eoschar);
9625
9626 emit_insn (gen_cld ());
9627 if (TARGET_64BIT)
9628 {
9629 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
9630 align, scratch4, scratch3));
9631 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
9632 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
9633 }
9634 else
9635 {
9636 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
9637 align, scratch4, scratch3));
9638 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
9639 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
9640 }
9641 }
9642 return 1;
9643}
9644
e075ae69
RH
9645/* Expand the appropriate insns for doing strlen if not just doing
9646 repnz; scasb
9647
9648 out = result, initialized with the start address
9649 align_rtx = alignment of the address.
9650 scratch = scratch register, initialized with the startaddress when
77ebd435 9651 not aligned, otherwise undefined
3f803cd9
SC
9652
9653 This is just the body. It needs the initialisations mentioned above and
9654 some address computing at the end. These things are done in i386.md. */
9655
0945b39d
JH
9656static void
9657ix86_expand_strlensi_unroll_1 (out, align_rtx)
9658 rtx out, align_rtx;
3f803cd9 9659{
e075ae69
RH
9660 int align;
9661 rtx tmp;
9662 rtx align_2_label = NULL_RTX;
9663 rtx align_3_label = NULL_RTX;
9664 rtx align_4_label = gen_label_rtx ();
9665 rtx end_0_label = gen_label_rtx ();
e075ae69 9666 rtx mem;
e2e52e1b 9667 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 9668 rtx scratch = gen_reg_rtx (SImode);
e075ae69
RH
9669
9670 align = 0;
9671 if (GET_CODE (align_rtx) == CONST_INT)
9672 align = INTVAL (align_rtx);
3f803cd9 9673
e9a25f70 9674 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 9675
e9a25f70 9676 /* Is there a known alignment and is it less than 4? */
e075ae69 9677 if (align < 4)
3f803cd9 9678 {
0945b39d
JH
9679 rtx scratch1 = gen_reg_rtx (Pmode);
9680 emit_move_insn (scratch1, out);
e9a25f70 9681 /* Is there a known alignment and is it not 2? */
e075ae69 9682 if (align != 2)
3f803cd9 9683 {
e075ae69
RH
9684 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
9685 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
9686
9687 /* Leave just the 3 lower bits. */
0945b39d 9688 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
9689 NULL_RTX, 0, OPTAB_WIDEN);
9690
9076b9c1 9691 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 9692 Pmode, 1, align_4_label);
9076b9c1 9693 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
d43e0b7d 9694 Pmode, 1, align_2_label);
9076b9c1 9695 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
d43e0b7d 9696 Pmode, 1, align_3_label);
3f803cd9
SC
9697 }
9698 else
9699 {
e9a25f70
JL
9700 /* Since the alignment is 2, we have to check 2 or 0 bytes;
9701 check if is aligned to 4 - byte. */
e9a25f70 9702
0945b39d 9703 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
e075ae69
RH
9704 NULL_RTX, 0, OPTAB_WIDEN);
9705
9076b9c1 9706 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 9707 Pmode, 1, align_4_label);
3f803cd9
SC
9708 }
9709
e075ae69 9710 mem = gen_rtx_MEM (QImode, out);
e9a25f70 9711
e075ae69 9712 /* Now compare the bytes. */
e9a25f70 9713
0f290768 9714 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1 9715 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
d43e0b7d 9716 QImode, 1, end_0_label);
3f803cd9 9717
0f290768 9718 /* Increment the address. */
0945b39d
JH
9719 if (TARGET_64BIT)
9720 emit_insn (gen_adddi3 (out, out, const1_rtx));
9721 else
9722 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 9723
e075ae69
RH
9724 /* Not needed with an alignment of 2 */
9725 if (align != 2)
9726 {
9727 emit_label (align_2_label);
3f803cd9 9728
d43e0b7d
RK
9729 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9730 end_0_label);
e075ae69 9731
0945b39d
JH
9732 if (TARGET_64BIT)
9733 emit_insn (gen_adddi3 (out, out, const1_rtx));
9734 else
9735 emit_insn (gen_addsi3 (out, out, const1_rtx));
e075ae69
RH
9736
9737 emit_label (align_3_label);
9738 }
9739
d43e0b7d
RK
9740 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9741 end_0_label);
e075ae69 9742
0945b39d
JH
9743 if (TARGET_64BIT)
9744 emit_insn (gen_adddi3 (out, out, const1_rtx));
9745 else
9746 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
9747 }
9748
e075ae69
RH
9749 /* Generate loop to check 4 bytes at a time. It is not a good idea to
9750 align this loop. It gives only huge programs, but does not help to
9751 speed up. */
9752 emit_label (align_4_label);
3f803cd9 9753
e075ae69
RH
9754 mem = gen_rtx_MEM (SImode, out);
9755 emit_move_insn (scratch, mem);
0945b39d
JH
9756 if (TARGET_64BIT)
9757 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
9758 else
9759 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 9760
e2e52e1b
JH
9761 /* This formula yields a nonzero result iff one of the bytes is zero.
9762 This saves three branches inside loop and many cycles. */
9763
9764 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
9765 emit_insn (gen_one_cmplsi2 (scratch, scratch));
9766 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0 9767 emit_insn (gen_andsi3 (tmpreg, tmpreg,
d8bf17f9 9768 gen_int_mode (0x80808080, SImode)));
d43e0b7d
RK
9769 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
9770 align_4_label);
e2e52e1b
JH
9771
9772 if (TARGET_CMOVE)
9773 {
9774 rtx reg = gen_reg_rtx (SImode);
0945b39d 9775 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
9776 emit_move_insn (reg, tmpreg);
9777 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
9778
0f290768 9779 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 9780 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
9781 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9782 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9783 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
9784 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
9785 reg,
9786 tmpreg)));
e2e52e1b 9787 /* Emit lea manually to avoid clobbering of flags. */
0945b39d
JH
9788 emit_insn (gen_rtx_SET (SImode, reg2,
9789 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
e2e52e1b
JH
9790
9791 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9792 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9793 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d 9794 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
a4f31c00
AJ
9795 reg2,
9796 out)));
e2e52e1b
JH
9797
9798 }
9799 else
9800 {
9801 rtx end_2_label = gen_label_rtx ();
9802 /* Is zero in the first two bytes? */
9803
16189740 9804 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
9805 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9806 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
9807 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9808 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
9809 pc_rtx);
9810 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9811 JUMP_LABEL (tmp) = end_2_label;
9812
0f290768 9813 /* Not in the first two. Move two bytes forward. */
e2e52e1b 9814 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
0945b39d
JH
9815 if (TARGET_64BIT)
9816 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
9817 else
9818 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
e2e52e1b
JH
9819
9820 emit_label (end_2_label);
9821
9822 }
9823
0f290768 9824 /* Avoid branch in fixing the byte. */
e2e52e1b 9825 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 9826 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
0945b39d
JH
9827 if (TARGET_64BIT)
9828 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
9829 else
9830 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
e075ae69
RH
9831
9832 emit_label (end_0_label);
9833}
9834\f
e075ae69
RH
9835/* Clear stack slot assignments remembered from previous functions.
9836 This is called from INIT_EXPANDERS once before RTL is emitted for each
9837 function. */
9838
36edd3cc
BS
9839static void
9840ix86_init_machine_status (p)
1526a060 9841 struct function *p;
e075ae69 9842{
37b15744
RH
9843 p->machine = (struct machine_function *)
9844 xcalloc (1, sizeof (struct machine_function));
e075ae69
RH
9845}
9846
1526a060
BS
9847/* Mark machine specific bits of P for GC. */
9848static void
9849ix86_mark_machine_status (p)
9850 struct function *p;
9851{
37b15744 9852 struct machine_function *machine = p->machine;
1526a060
BS
9853 enum machine_mode mode;
9854 int n;
9855
37b15744
RH
9856 if (! machine)
9857 return;
9858
1526a060
BS
9859 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
9860 mode = (enum machine_mode) ((int) mode + 1))
9861 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
37b15744
RH
9862 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
9863}
9864
9865static void
9866ix86_free_machine_status (p)
9867 struct function *p;
9868{
9869 free (p->machine);
9870 p->machine = NULL;
1526a060
BS
9871}
9872
e075ae69
RH
9873/* Return a MEM corresponding to a stack slot with mode MODE.
9874 Allocate a new slot if necessary.
9875
9876 The RTL for a function can have several slots available: N is
9877 which slot to use. */
9878
9879rtx
9880assign_386_stack_local (mode, n)
9881 enum machine_mode mode;
9882 int n;
9883{
9884 if (n < 0 || n >= MAX_386_STACK_LOCALS)
9885 abort ();
9886
9887 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
9888 ix86_stack_locals[(int) mode][n]
9889 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
9890
9891 return ix86_stack_locals[(int) mode][n];
9892}
9893\f
9894/* Calculate the length of the memory address in the instruction
9895 encoding. Does not include the one-byte modrm, opcode, or prefix. */
9896
9897static int
9898memory_address_length (addr)
9899 rtx addr;
9900{
9901 struct ix86_address parts;
9902 rtx base, index, disp;
9903 int len;
9904
9905 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
9906 || GET_CODE (addr) == POST_INC
9907 || GET_CODE (addr) == PRE_MODIFY
9908 || GET_CODE (addr) == POST_MODIFY)
e075ae69 9909 return 0;
3f803cd9 9910
e075ae69
RH
9911 if (! ix86_decompose_address (addr, &parts))
9912 abort ();
3f803cd9 9913
e075ae69
RH
9914 base = parts.base;
9915 index = parts.index;
9916 disp = parts.disp;
9917 len = 0;
3f803cd9 9918
e075ae69
RH
9919 /* Register Indirect. */
9920 if (base && !index && !disp)
9921 {
9922 /* Special cases: ebp and esp need the two-byte modrm form. */
9923 if (addr == stack_pointer_rtx
9924 || addr == arg_pointer_rtx
564d80f4
JH
9925 || addr == frame_pointer_rtx
9926 || addr == hard_frame_pointer_rtx)
e075ae69 9927 len = 1;
3f803cd9 9928 }
e9a25f70 9929
e075ae69
RH
9930 /* Direct Addressing. */
9931 else if (disp && !base && !index)
9932 len = 4;
9933
3f803cd9
SC
9934 else
9935 {
e075ae69
RH
9936 /* Find the length of the displacement constant. */
9937 if (disp)
9938 {
9939 if (GET_CODE (disp) == CONST_INT
9940 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
9941 len = 1;
9942 else
9943 len = 4;
9944 }
3f803cd9 9945
e075ae69
RH
9946 /* An index requires the two-byte modrm form. */
9947 if (index)
9948 len += 1;
3f803cd9
SC
9949 }
9950
e075ae69
RH
9951 return len;
9952}
79325812 9953
6ef67412
JH
9954/* Compute default value for "length_immediate" attribute. When SHORTFORM is set
9955 expect that insn have 8bit immediate alternative. */
e075ae69 9956int
6ef67412 9957ix86_attr_length_immediate_default (insn, shortform)
e075ae69 9958 rtx insn;
6ef67412 9959 int shortform;
e075ae69 9960{
6ef67412
JH
9961 int len = 0;
9962 int i;
6c698a6d 9963 extract_insn_cached (insn);
6ef67412
JH
9964 for (i = recog_data.n_operands - 1; i >= 0; --i)
9965 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 9966 {
6ef67412 9967 if (len)
3071fab5 9968 abort ();
6ef67412
JH
9969 if (shortform
9970 && GET_CODE (recog_data.operand[i]) == CONST_INT
9971 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
9972 len = 1;
9973 else
9974 {
9975 switch (get_attr_mode (insn))
9976 {
9977 case MODE_QI:
9978 len+=1;
9979 break;
9980 case MODE_HI:
9981 len+=2;
9982 break;
9983 case MODE_SI:
9984 len+=4;
9985 break;
14f73b5a
JH
9986 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
9987 case MODE_DI:
9988 len+=4;
9989 break;
6ef67412 9990 default:
c725bd79 9991 fatal_insn ("unknown insn mode", insn);
6ef67412
JH
9992 }
9993 }
3071fab5 9994 }
6ef67412
JH
9995 return len;
9996}
9997/* Compute default value for "length_address" attribute. */
9998int
9999ix86_attr_length_address_default (insn)
10000 rtx insn;
10001{
10002 int i;
6c698a6d 10003 extract_insn_cached (insn);
1ccbefce
RH
10004 for (i = recog_data.n_operands - 1; i >= 0; --i)
10005 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 10006 {
6ef67412 10007 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
10008 break;
10009 }
6ef67412 10010 return 0;
3f803cd9 10011}
e075ae69
RH
10012\f
10013/* Return the maximum number of instructions a cpu can issue. */
b657fc39 10014
c237e94a 10015static int
e075ae69 10016ix86_issue_rate ()
b657fc39 10017{
e075ae69 10018 switch (ix86_cpu)
b657fc39 10019 {
e075ae69
RH
10020 case PROCESSOR_PENTIUM:
10021 case PROCESSOR_K6:
10022 return 2;
79325812 10023
e075ae69 10024 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
10025 case PROCESSOR_PENTIUM4:
10026 case PROCESSOR_ATHLON:
e075ae69 10027 return 3;
b657fc39 10028
b657fc39 10029 default:
e075ae69 10030 return 1;
b657fc39 10031 }
b657fc39
L
10032}
10033
e075ae69
RH
10034/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10035 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 10036
e075ae69
RH
10037static int
10038ix86_flags_dependant (insn, dep_insn, insn_type)
10039 rtx insn, dep_insn;
10040 enum attr_type insn_type;
10041{
10042 rtx set, set2;
b657fc39 10043
e075ae69
RH
10044 /* Simplify the test for uninteresting insns. */
10045 if (insn_type != TYPE_SETCC
10046 && insn_type != TYPE_ICMOV
10047 && insn_type != TYPE_FCMOV
10048 && insn_type != TYPE_IBR)
10049 return 0;
b657fc39 10050
e075ae69
RH
10051 if ((set = single_set (dep_insn)) != 0)
10052 {
10053 set = SET_DEST (set);
10054 set2 = NULL_RTX;
10055 }
10056 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
10057 && XVECLEN (PATTERN (dep_insn), 0) == 2
10058 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
10059 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
10060 {
10061 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10062 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10063 }
78a0d70c
ZW
10064 else
10065 return 0;
b657fc39 10066
78a0d70c
ZW
10067 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
10068 return 0;
b657fc39 10069
f5143c46 10070 /* This test is true if the dependent insn reads the flags but
78a0d70c
ZW
10071 not any other potentially set register. */
10072 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
10073 return 0;
10074
10075 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
10076 return 0;
10077
10078 return 1;
e075ae69 10079}
b657fc39 10080
e075ae69
RH
10081/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
10082 address with operands set by DEP_INSN. */
10083
10084static int
10085ix86_agi_dependant (insn, dep_insn, insn_type)
10086 rtx insn, dep_insn;
10087 enum attr_type insn_type;
10088{
10089 rtx addr;
10090
6ad48e84
JH
10091 if (insn_type == TYPE_LEA
10092 && TARGET_PENTIUM)
5fbdde42
RH
10093 {
10094 addr = PATTERN (insn);
10095 if (GET_CODE (addr) == SET)
10096 ;
10097 else if (GET_CODE (addr) == PARALLEL
10098 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
10099 addr = XVECEXP (addr, 0, 0);
10100 else
10101 abort ();
10102 addr = SET_SRC (addr);
10103 }
e075ae69
RH
10104 else
10105 {
10106 int i;
6c698a6d 10107 extract_insn_cached (insn);
1ccbefce
RH
10108 for (i = recog_data.n_operands - 1; i >= 0; --i)
10109 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 10110 {
1ccbefce 10111 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
10112 goto found;
10113 }
10114 return 0;
10115 found:;
b657fc39
L
10116 }
10117
e075ae69 10118 return modified_in_p (addr, dep_insn);
b657fc39 10119}
a269a03c 10120
c237e94a 10121static int
e075ae69 10122ix86_adjust_cost (insn, link, dep_insn, cost)
a269a03c
JC
10123 rtx insn, link, dep_insn;
10124 int cost;
10125{
e075ae69 10126 enum attr_type insn_type, dep_insn_type;
6ad48e84 10127 enum attr_memory memory, dep_memory;
e075ae69 10128 rtx set, set2;
9b00189f 10129 int dep_insn_code_number;
a269a03c 10130
309ada50 10131 /* Anti and output depenancies have zero cost on all CPUs. */
e075ae69 10132 if (REG_NOTE_KIND (link) != 0)
309ada50 10133 return 0;
a269a03c 10134
9b00189f
JH
10135 dep_insn_code_number = recog_memoized (dep_insn);
10136
e075ae69 10137 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 10138 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 10139 return cost;
a269a03c 10140
1c71e60e
JH
10141 insn_type = get_attr_type (insn);
10142 dep_insn_type = get_attr_type (dep_insn);
9b00189f 10143
a269a03c
JC
10144 switch (ix86_cpu)
10145 {
10146 case PROCESSOR_PENTIUM:
e075ae69
RH
10147 /* Address Generation Interlock adds a cycle of latency. */
10148 if (ix86_agi_dependant (insn, dep_insn, insn_type))
10149 cost += 1;
10150
10151 /* ??? Compares pair with jump/setcc. */
10152 if (ix86_flags_dependant (insn, dep_insn, insn_type))
10153 cost = 0;
10154
10155 /* Floating point stores require value to be ready one cycle ealier. */
0f290768 10156 if (insn_type == TYPE_FMOV
e075ae69
RH
10157 && get_attr_memory (insn) == MEMORY_STORE
10158 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10159 cost += 1;
10160 break;
a269a03c 10161
e075ae69 10162 case PROCESSOR_PENTIUMPRO:
6ad48e84
JH
10163 memory = get_attr_memory (insn);
10164 dep_memory = get_attr_memory (dep_insn);
10165
0f290768 10166 /* Since we can't represent delayed latencies of load+operation,
e075ae69
RH
10167 increase the cost here for non-imov insns. */
10168 if (dep_insn_type != TYPE_IMOV
6ad48e84
JH
10169 && dep_insn_type != TYPE_FMOV
10170 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
e075ae69
RH
10171 cost += 1;
10172
10173 /* INT->FP conversion is expensive. */
10174 if (get_attr_fp_int_src (dep_insn))
10175 cost += 5;
10176
10177 /* There is one cycle extra latency between an FP op and a store. */
10178 if (insn_type == TYPE_FMOV
10179 && (set = single_set (dep_insn)) != NULL_RTX
10180 && (set2 = single_set (insn)) != NULL_RTX
10181 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
10182 && GET_CODE (SET_DEST (set2)) == MEM)
10183 cost += 1;
6ad48e84
JH
10184
10185 /* Show ability of reorder buffer to hide latency of load by executing
10186 in parallel with previous instruction in case
10187 previous instruction is not needed to compute the address. */
10188 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10189 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10190 {
10191 /* Claim moves to take one cycle, as core can issue one load
10192 at time and the next load can start cycle later. */
10193 if (dep_insn_type == TYPE_IMOV
10194 || dep_insn_type == TYPE_FMOV)
10195 cost = 1;
10196 else if (cost > 1)
10197 cost--;
10198 }
e075ae69 10199 break;
a269a03c 10200
e075ae69 10201 case PROCESSOR_K6:
6ad48e84
JH
10202 memory = get_attr_memory (insn);
10203 dep_memory = get_attr_memory (dep_insn);
e075ae69
RH
10204 /* The esp dependency is resolved before the instruction is really
10205 finished. */
10206 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
10207 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
10208 return 1;
a269a03c 10209
0f290768 10210 /* Since we can't represent delayed latencies of load+operation,
e075ae69 10211 increase the cost here for non-imov insns. */
6ad48e84 10212 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
e075ae69
RH
10213 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
10214
10215 /* INT->FP conversion is expensive. */
10216 if (get_attr_fp_int_src (dep_insn))
10217 cost += 5;
6ad48e84
JH
10218
10219 /* Show ability of reorder buffer to hide latency of load by executing
10220 in parallel with previous instruction in case
10221 previous instruction is not needed to compute the address. */
10222 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10223 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10224 {
10225 /* Claim moves to take one cycle, as core can issue one load
10226 at time and the next load can start cycle later. */
10227 if (dep_insn_type == TYPE_IMOV
10228 || dep_insn_type == TYPE_FMOV)
10229 cost = 1;
10230 else if (cost > 2)
10231 cost -= 2;
10232 else
10233 cost = 1;
10234 }
a14003ee 10235 break;
e075ae69 10236
309ada50 10237 case PROCESSOR_ATHLON:
6ad48e84
JH
10238 memory = get_attr_memory (insn);
10239 dep_memory = get_attr_memory (dep_insn);
10240
10241 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
0b5107cf
JH
10242 {
10243 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
10244 cost += 2;
10245 else
10246 cost += 3;
10247 }
6ad48e84
JH
10248 /* Show ability of reorder buffer to hide latency of load by executing
10249 in parallel with previous instruction in case
10250 previous instruction is not needed to compute the address. */
10251 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10252 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10253 {
10254 /* Claim moves to take one cycle, as core can issue one load
10255 at time and the next load can start cycle later. */
10256 if (dep_insn_type == TYPE_IMOV
10257 || dep_insn_type == TYPE_FMOV)
10258 cost = 0;
10259 else if (cost >= 3)
10260 cost -= 3;
10261 else
10262 cost = 0;
10263 }
309ada50 10264
a269a03c 10265 default:
a269a03c
JC
10266 break;
10267 }
10268
10269 return cost;
10270}
0a726ef1 10271
e075ae69
RH
10272static union
10273{
10274 struct ppro_sched_data
10275 {
10276 rtx decode[3];
10277 int issued_this_cycle;
10278 } ppro;
10279} ix86_sched_data;
0a726ef1 10280
e075ae69
RH
10281static enum attr_ppro_uops
10282ix86_safe_ppro_uops (insn)
10283 rtx insn;
10284{
10285 if (recog_memoized (insn) >= 0)
10286 return get_attr_ppro_uops (insn);
10287 else
10288 return PPRO_UOPS_MANY;
10289}
0a726ef1 10290
e075ae69
RH
10291static void
10292ix86_dump_ppro_packet (dump)
10293 FILE *dump;
0a726ef1 10294{
e075ae69 10295 if (ix86_sched_data.ppro.decode[0])
0a726ef1 10296 {
e075ae69
RH
10297 fprintf (dump, "PPRO packet: %d",
10298 INSN_UID (ix86_sched_data.ppro.decode[0]));
10299 if (ix86_sched_data.ppro.decode[1])
10300 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
10301 if (ix86_sched_data.ppro.decode[2])
10302 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
10303 fputc ('\n', dump);
10304 }
10305}
0a726ef1 10306
e075ae69 10307/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 10308
c237e94a
ZW
10309static void
10310ix86_sched_init (dump, sched_verbose, veclen)
e075ae69
RH
10311 FILE *dump ATTRIBUTE_UNUSED;
10312 int sched_verbose ATTRIBUTE_UNUSED;
c237e94a 10313 int veclen ATTRIBUTE_UNUSED;
e075ae69
RH
10314{
10315 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
10316}
10317
10318/* Shift INSN to SLOT, and shift everything else down. */
10319
10320static void
10321ix86_reorder_insn (insnp, slot)
10322 rtx *insnp, *slot;
10323{
10324 if (insnp != slot)
10325 {
10326 rtx insn = *insnp;
0f290768 10327 do
e075ae69
RH
10328 insnp[0] = insnp[1];
10329 while (++insnp != slot);
10330 *insnp = insn;
0a726ef1 10331 }
e075ae69
RH
10332}
10333
c6991660 10334static void
78a0d70c
ZW
10335ix86_sched_reorder_ppro (ready, e_ready)
10336 rtx *ready;
10337 rtx *e_ready;
10338{
10339 rtx decode[3];
10340 enum attr_ppro_uops cur_uops;
10341 int issued_this_cycle;
10342 rtx *insnp;
10343 int i;
e075ae69 10344
0f290768 10345 /* At this point .ppro.decode contains the state of the three
78a0d70c 10346 decoders from last "cycle". That is, those insns that were
0f290768 10347 actually independent. But here we're scheduling for the
78a0d70c
ZW
10348 decoder, and we may find things that are decodable in the
10349 same cycle. */
e075ae69 10350
0f290768 10351 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
78a0d70c 10352 issued_this_cycle = 0;
e075ae69 10353
78a0d70c
ZW
10354 insnp = e_ready;
10355 cur_uops = ix86_safe_ppro_uops (*insnp);
0a726ef1 10356
78a0d70c
ZW
10357 /* If the decoders are empty, and we've a complex insn at the
10358 head of the priority queue, let it issue without complaint. */
10359 if (decode[0] == NULL)
10360 {
10361 if (cur_uops == PPRO_UOPS_MANY)
10362 {
10363 decode[0] = *insnp;
10364 goto ppro_done;
10365 }
10366
10367 /* Otherwise, search for a 2-4 uop unsn to issue. */
10368 while (cur_uops != PPRO_UOPS_FEW)
10369 {
10370 if (insnp == ready)
10371 break;
10372 cur_uops = ix86_safe_ppro_uops (*--insnp);
10373 }
10374
10375 /* If so, move it to the head of the line. */
10376 if (cur_uops == PPRO_UOPS_FEW)
10377 ix86_reorder_insn (insnp, e_ready);
0a726ef1 10378
78a0d70c
ZW
10379 /* Issue the head of the queue. */
10380 issued_this_cycle = 1;
10381 decode[0] = *e_ready--;
10382 }
fb693d44 10383
78a0d70c
ZW
10384 /* Look for simple insns to fill in the other two slots. */
10385 for (i = 1; i < 3; ++i)
10386 if (decode[i] == NULL)
10387 {
10388 if (ready >= e_ready)
10389 goto ppro_done;
fb693d44 10390
e075ae69
RH
10391 insnp = e_ready;
10392 cur_uops = ix86_safe_ppro_uops (*insnp);
78a0d70c
ZW
10393 while (cur_uops != PPRO_UOPS_ONE)
10394 {
10395 if (insnp == ready)
10396 break;
10397 cur_uops = ix86_safe_ppro_uops (*--insnp);
10398 }
fb693d44 10399
78a0d70c
ZW
10400 /* Found one. Move it to the head of the queue and issue it. */
10401 if (cur_uops == PPRO_UOPS_ONE)
e075ae69 10402 {
78a0d70c
ZW
10403 ix86_reorder_insn (insnp, e_ready);
10404 decode[i] = *e_ready--;
10405 issued_this_cycle++;
10406 continue;
10407 }
fb693d44 10408
78a0d70c
ZW
10409 /* ??? Didn't find one. Ideally, here we would do a lazy split
10410 of 2-uop insns, issue one and queue the other. */
10411 }
fb693d44 10412
78a0d70c
ZW
10413 ppro_done:
10414 if (issued_this_cycle == 0)
10415 issued_this_cycle = 1;
10416 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
10417}
fb693d44 10418
0f290768 10419/* We are about to being issuing insns for this clock cycle.
78a0d70c 10420 Override the default sort algorithm to better slot instructions. */
c237e94a
ZW
10421static int
10422ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
78a0d70c
ZW
10423 FILE *dump ATTRIBUTE_UNUSED;
10424 int sched_verbose ATTRIBUTE_UNUSED;
10425 rtx *ready;
c237e94a 10426 int *n_readyp;
78a0d70c
ZW
10427 int clock_var ATTRIBUTE_UNUSED;
10428{
c237e94a 10429 int n_ready = *n_readyp;
78a0d70c 10430 rtx *e_ready = ready + n_ready - 1;
fb693d44 10431
78a0d70c
ZW
10432 if (n_ready < 2)
10433 goto out;
e075ae69 10434
78a0d70c
ZW
10435 switch (ix86_cpu)
10436 {
10437 default:
10438 break;
e075ae69 10439
78a0d70c
ZW
10440 case PROCESSOR_PENTIUMPRO:
10441 ix86_sched_reorder_ppro (ready, e_ready);
e075ae69 10442 break;
fb693d44
RH
10443 }
10444
e075ae69
RH
10445out:
10446 return ix86_issue_rate ();
10447}
fb693d44 10448
e075ae69
RH
10449/* We are about to issue INSN. Return the number of insns left on the
10450 ready queue that can be issued this cycle. */
b222082e 10451
c237e94a 10452static int
e075ae69
RH
10453ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
10454 FILE *dump;
10455 int sched_verbose;
10456 rtx insn;
10457 int can_issue_more;
10458{
10459 int i;
10460 switch (ix86_cpu)
fb693d44 10461 {
e075ae69
RH
10462 default:
10463 return can_issue_more - 1;
fb693d44 10464
e075ae69
RH
10465 case PROCESSOR_PENTIUMPRO:
10466 {
10467 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 10468
e075ae69
RH
10469 if (uops == PPRO_UOPS_MANY)
10470 {
10471 if (sched_verbose)
10472 ix86_dump_ppro_packet (dump);
10473 ix86_sched_data.ppro.decode[0] = insn;
10474 ix86_sched_data.ppro.decode[1] = NULL;
10475 ix86_sched_data.ppro.decode[2] = NULL;
10476 if (sched_verbose)
10477 ix86_dump_ppro_packet (dump);
10478 ix86_sched_data.ppro.decode[0] = NULL;
10479 }
10480 else if (uops == PPRO_UOPS_FEW)
10481 {
10482 if (sched_verbose)
10483 ix86_dump_ppro_packet (dump);
10484 ix86_sched_data.ppro.decode[0] = insn;
10485 ix86_sched_data.ppro.decode[1] = NULL;
10486 ix86_sched_data.ppro.decode[2] = NULL;
10487 }
10488 else
10489 {
10490 for (i = 0; i < 3; ++i)
10491 if (ix86_sched_data.ppro.decode[i] == NULL)
10492 {
10493 ix86_sched_data.ppro.decode[i] = insn;
10494 break;
10495 }
10496 if (i == 3)
10497 abort ();
10498 if (i == 2)
10499 {
10500 if (sched_verbose)
10501 ix86_dump_ppro_packet (dump);
10502 ix86_sched_data.ppro.decode[0] = NULL;
10503 ix86_sched_data.ppro.decode[1] = NULL;
10504 ix86_sched_data.ppro.decode[2] = NULL;
10505 }
10506 }
10507 }
10508 return --ix86_sched_data.ppro.issued_this_cycle;
10509 }
fb693d44 10510}
9b690711
RH
10511
10512static int
10513ia32_use_dfa_pipeline_interface ()
10514{
10515 if (ix86_cpu == PROCESSOR_PENTIUM)
10516 return 1;
10517 return 0;
10518}
10519
10520/* How many alternative schedules to try. This should be as wide as the
10521 scheduling freedom in the DFA, but no wider. Making this value too
10522 large results extra work for the scheduler. */
10523
10524static int
10525ia32_multipass_dfa_lookahead ()
10526{
10527 if (ix86_cpu == PROCESSOR_PENTIUM)
10528 return 2;
10529 else
10530 return 0;
10531}
10532
a7180f70 10533\f
0e4970d7
RK
10534/* Walk through INSNS and look for MEM references whose address is DSTREG or
10535 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
10536 appropriate. */
10537
10538void
10539ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
10540 rtx insns;
10541 rtx dstref, srcref, dstreg, srcreg;
10542{
10543 rtx insn;
10544
10545 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
10546 if (INSN_P (insn))
10547 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
10548 dstreg, srcreg);
10549}
10550
10551/* Subroutine of above to actually do the updating by recursively walking
10552 the rtx. */
10553
10554static void
10555ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
10556 rtx x;
10557 rtx dstref, srcref, dstreg, srcreg;
10558{
10559 enum rtx_code code = GET_CODE (x);
10560 const char *format_ptr = GET_RTX_FORMAT (code);
10561 int i, j;
10562
10563 if (code == MEM && XEXP (x, 0) == dstreg)
10564 MEM_COPY_ATTRIBUTES (x, dstref);
10565 else if (code == MEM && XEXP (x, 0) == srcreg)
10566 MEM_COPY_ATTRIBUTES (x, srcref);
10567
10568 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
10569 {
10570 if (*format_ptr == 'e')
10571 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
10572 dstreg, srcreg);
10573 else if (*format_ptr == 'E')
10574 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
397be6cf 10575 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
0e4970d7
RK
10576 dstreg, srcreg);
10577 }
10578}
10579\f
a7180f70
BS
10580/* Compute the alignment given to a constant that is being placed in memory.
10581 EXP is the constant and ALIGN is the alignment that the object would
10582 ordinarily have.
10583 The value of this function is used instead of that alignment to align
10584 the object. */
10585
10586int
10587ix86_constant_alignment (exp, align)
10588 tree exp;
10589 int align;
10590{
10591 if (TREE_CODE (exp) == REAL_CST)
10592 {
10593 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
10594 return 64;
10595 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
10596 return 128;
10597 }
10598 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
10599 && align < 256)
10600 return 256;
10601
10602 return align;
10603}
10604
10605/* Compute the alignment for a static variable.
10606 TYPE is the data type, and ALIGN is the alignment that
10607 the object would ordinarily have. The value of this function is used
10608 instead of that alignment to align the object. */
10609
10610int
10611ix86_data_alignment (type, align)
10612 tree type;
10613 int align;
10614{
10615 if (AGGREGATE_TYPE_P (type)
10616 && TYPE_SIZE (type)
10617 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10618 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
10619 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
10620 return 256;
10621
0d7d98ee
JH
10622 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10623 to 16byte boundary. */
10624 if (TARGET_64BIT)
10625 {
10626 if (AGGREGATE_TYPE_P (type)
10627 && TYPE_SIZE (type)
10628 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10629 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
10630 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10631 return 128;
10632 }
10633
a7180f70
BS
10634 if (TREE_CODE (type) == ARRAY_TYPE)
10635 {
10636 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10637 return 64;
10638 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10639 return 128;
10640 }
10641 else if (TREE_CODE (type) == COMPLEX_TYPE)
10642 {
0f290768 10643
a7180f70
BS
10644 if (TYPE_MODE (type) == DCmode && align < 64)
10645 return 64;
10646 if (TYPE_MODE (type) == XCmode && align < 128)
10647 return 128;
10648 }
10649 else if ((TREE_CODE (type) == RECORD_TYPE
10650 || TREE_CODE (type) == UNION_TYPE
10651 || TREE_CODE (type) == QUAL_UNION_TYPE)
10652 && TYPE_FIELDS (type))
10653 {
10654 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10655 return 64;
10656 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10657 return 128;
10658 }
10659 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10660 || TREE_CODE (type) == INTEGER_TYPE)
10661 {
10662 if (TYPE_MODE (type) == DFmode && align < 64)
10663 return 64;
10664 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10665 return 128;
10666 }
10667
10668 return align;
10669}
10670
10671/* Compute the alignment for a local variable.
10672 TYPE is the data type, and ALIGN is the alignment that
10673 the object would ordinarily have. The value of this macro is used
10674 instead of that alignment to align the object. */
10675
10676int
10677ix86_local_alignment (type, align)
10678 tree type;
10679 int align;
10680{
0d7d98ee
JH
10681 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10682 to 16byte boundary. */
10683 if (TARGET_64BIT)
10684 {
10685 if (AGGREGATE_TYPE_P (type)
10686 && TYPE_SIZE (type)
10687 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10688 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
10689 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10690 return 128;
10691 }
a7180f70
BS
10692 if (TREE_CODE (type) == ARRAY_TYPE)
10693 {
10694 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10695 return 64;
10696 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10697 return 128;
10698 }
10699 else if (TREE_CODE (type) == COMPLEX_TYPE)
10700 {
10701 if (TYPE_MODE (type) == DCmode && align < 64)
10702 return 64;
10703 if (TYPE_MODE (type) == XCmode && align < 128)
10704 return 128;
10705 }
10706 else if ((TREE_CODE (type) == RECORD_TYPE
10707 || TREE_CODE (type) == UNION_TYPE
10708 || TREE_CODE (type) == QUAL_UNION_TYPE)
10709 && TYPE_FIELDS (type))
10710 {
10711 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10712 return 64;
10713 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10714 return 128;
10715 }
10716 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10717 || TREE_CODE (type) == INTEGER_TYPE)
10718 {
0f290768 10719
a7180f70
BS
10720 if (TYPE_MODE (type) == DFmode && align < 64)
10721 return 64;
10722 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10723 return 128;
10724 }
10725 return align;
10726}
0ed08620
JH
10727\f
10728/* Emit RTL insns to initialize the variable parts of a trampoline.
10729 FNADDR is an RTX for the address of the function's pure code.
10730 CXT is an RTX for the static chain value for the function. */
10731void
10732x86_initialize_trampoline (tramp, fnaddr, cxt)
10733 rtx tramp, fnaddr, cxt;
10734{
10735 if (!TARGET_64BIT)
10736 {
10737 /* Compute offset from the end of the jmp to the target function. */
10738 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
10739 plus_constant (tramp, 10),
10740 NULL_RTX, 1, OPTAB_DIRECT);
10741 emit_move_insn (gen_rtx_MEM (QImode, tramp),
d8bf17f9 10742 gen_int_mode (0xb9, QImode));
0ed08620
JH
10743 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
10744 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
d8bf17f9 10745 gen_int_mode (0xe9, QImode));
0ed08620
JH
10746 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
10747 }
10748 else
10749 {
10750 int offset = 0;
10751 /* Try to load address using shorter movl instead of movabs.
10752 We may want to support movq for kernel mode, but kernel does not use
10753 trampolines at the moment. */
10754 if (x86_64_zero_extended_value (fnaddr))
10755 {
10756 fnaddr = copy_to_mode_reg (DImode, fnaddr);
10757 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 10758 gen_int_mode (0xbb41, HImode));
0ed08620
JH
10759 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
10760 gen_lowpart (SImode, fnaddr));
10761 offset += 6;
10762 }
10763 else
10764 {
10765 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 10766 gen_int_mode (0xbb49, HImode));
0ed08620
JH
10767 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10768 fnaddr);
10769 offset += 10;
10770 }
10771 /* Load static chain using movabs to r10. */
10772 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 10773 gen_int_mode (0xba49, HImode));
0ed08620
JH
10774 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10775 cxt);
10776 offset += 10;
10777 /* Jump to the r11 */
10778 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 10779 gen_int_mode (0xff49, HImode));
0ed08620 10780 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
d8bf17f9 10781 gen_int_mode (0xe3, QImode));
0ed08620
JH
10782 offset += 3;
10783 if (offset > TRAMPOLINE_SIZE)
b531087a 10784 abort ();
0ed08620
JH
10785 }
10786}
eeb06b1b
BS
10787\f
10788#define def_builtin(MASK, NAME, TYPE, CODE) \
10789do { \
10790 if ((MASK) & target_flags) \
10791 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL); \
10792} while (0)
bd793c65 10793
bd793c65
BS
10794struct builtin_description
10795{
8b60264b
KG
10796 const unsigned int mask;
10797 const enum insn_code icode;
10798 const char *const name;
10799 const enum ix86_builtins code;
10800 const enum rtx_code comparison;
10801 const unsigned int flag;
bd793c65
BS
10802};
10803
fbe5eb6d
BS
10804/* Used for builtins that are enabled both by -msse and -msse2. */
10805#define MASK_SSE1 (MASK_SSE | MASK_SSE2)
10806
8b60264b 10807static const struct builtin_description bdesc_comi[] =
bd793c65 10808{
fbe5eb6d
BS
10809 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
10810 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
10811 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
10812 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
10813 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
10814 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
10815 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
10816 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
10817 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
10818 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
10819 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
10820 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 },
10821 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0 },
10822 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0 },
10823 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0 },
10824 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, LT, 1 },
10825 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, LE, 1 },
10826 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0 },
10827 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 },
10828 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 },
10829 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 },
10830 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, LT, 1 },
10831 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, LE, 1 },
10832 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0 },
bd793c65
BS
10833};
10834
8b60264b 10835static const struct builtin_description bdesc_2arg[] =
bd793c65
BS
10836{
10837 /* SSE */
fbe5eb6d
BS
10838 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
10839 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
10840 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
10841 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
10842 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
10843 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
10844 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
10845 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
10846
10847 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
10848 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
10849 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
10850 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
10851 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
10852 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
10853 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
10854 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
10855 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
10856 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
10857 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
10858 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
10859 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
10860 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
10861 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
10862 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
10863 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
10864 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
10865 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
10866 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
10867 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
10868 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
10869 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
10870 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
10871
10872 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
10873 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
10874 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
10875 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
10876
10877 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
10878 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
10879 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
10880 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
10881 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
bd793c65
BS
10882
10883 /* MMX */
eeb06b1b
BS
10884 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
10885 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
10886 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
10887 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
10888 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
10889 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
10890
10891 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
10892 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
10893 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
10894 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
10895 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
10896 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
10897 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
10898 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
10899
10900 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
10901 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
fbe5eb6d 10902 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
eeb06b1b
BS
10903
10904 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
10905 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
10906 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
10907 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
10908
fbe5eb6d
BS
10909 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
10910 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
eeb06b1b
BS
10911
10912 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
10913 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
10914 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
10915 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
10916 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
10917 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
10918
fbe5eb6d
BS
10919 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
10920 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
10921 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
10922 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
eeb06b1b
BS
10923
10924 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
10925 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
10926 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
10927 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
10928 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
10929 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
bd793c65
BS
10930
10931 /* Special. */
eeb06b1b
BS
10932 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
10933 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
10934 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
10935
fbe5eb6d
BS
10936 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
10937 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
eeb06b1b
BS
10938
10939 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
10940 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
10941 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
10942 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
10943 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
10944 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
10945
10946 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
10947 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
10948 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
10949 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
10950 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
10951 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
10952
10953 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
10954 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
10955 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
10956 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
10957
fbe5eb6d
BS
10958 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
10959 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
10960
10961 /* SSE2 */
10962 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
10963 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
10964 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
10965 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
10966 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
10967 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
10968 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
10969 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
10970
10971 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
10972 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
10973 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
10974 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
10975 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
10976 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
10977 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
10978 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
10979 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
10980 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
10981 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
10982 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
10983 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
10984 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
10985 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
10986 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD, LT, 1 },
10987 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD, LE, 1 },
10988 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
10989 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
10990 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
10991 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
10992 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD, LT, 1 },
10993 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD, LE, 1 },
10994 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
10995
10996 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
10997 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
10998 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
10999 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
11000
11001 { MASK_SSE2, CODE_FOR_sse2_anddf3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
11002 { MASK_SSE2, CODE_FOR_sse2_nanddf3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
11003 { MASK_SSE2, CODE_FOR_sse2_iordf3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
11004 { MASK_SSE2, CODE_FOR_sse2_xordf3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
11005
11006 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
11007 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
11008 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
11009
11010 /* SSE2 MMX */
11011 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
11012 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
11013 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
11014 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
11015 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
11016 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
11017 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
11018 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
11019
11020 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
11021 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
11022 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
11023 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
11024 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
11025 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
11026 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
11027 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
11028
11029 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
11030 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
11031 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
11032 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
11033
916b60b7
BS
11034 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
11035 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
11036 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
11037 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
fbe5eb6d
BS
11038
11039 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
11040 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
11041
11042 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
11043 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
11044 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
11045 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
11046 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
11047 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
11048
11049 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
11050 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
11051 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
11052 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
11053
11054 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
11055 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
11056 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
11057 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
11058 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
11059 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
11060
916b60b7
BS
11061 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
11062 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
11063 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
11064
11065 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
11066 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
11067
11068 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
11069 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
11070 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
11071 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
11072 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
11073 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
11074
11075 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
11076 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
11077 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
11078 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
11079 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
11080 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
11081
11082 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
11083 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
11084 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
11085 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
11086
11087 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
11088
fbe5eb6d
BS
11089 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
11090 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
11091 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
bd793c65
BS
11092};
11093
8b60264b 11094static const struct builtin_description bdesc_1arg[] =
bd793c65 11095{
fbe5eb6d
BS
11096 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
11097 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
11098
11099 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
11100 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
11101 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
11102
11103 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
11104 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
11105 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
11106 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
11107
11108 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
11109 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
11110 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
11111
11112 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
11113
11114 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
11115 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
bd793c65 11116
fbe5eb6d
BS
11117 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
11118 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
11119 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
11120 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
11121 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
bd793c65 11122
fbe5eb6d 11123 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
bd793c65 11124
fbe5eb6d
BS
11125 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
11126 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
11127
11128 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
11129 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
11130 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }
bd793c65
BS
11131};
11132
f6155fda
SS
11133void
11134ix86_init_builtins ()
11135{
11136 if (TARGET_MMX)
11137 ix86_init_mmx_sse_builtins ();
11138}
11139
11140/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
bd793c65
BS
11141 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
11142 builtins. */
e37af218 11143static void
f6155fda 11144ix86_init_mmx_sse_builtins ()
bd793c65 11145{
8b60264b 11146 const struct builtin_description * d;
77ebd435 11147 size_t i;
cbd5937a 11148 tree endlink = void_list_node;
bd793c65
BS
11149
11150 tree pchar_type_node = build_pointer_type (char_type_node);
11151 tree pfloat_type_node = build_pointer_type (float_type_node);
11152 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
916b60b7 11153 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
bd793c65
BS
11154 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
11155
11156 /* Comparisons. */
11157 tree int_ftype_v4sf_v4sf
11158 = build_function_type (integer_type_node,
11159 tree_cons (NULL_TREE, V4SF_type_node,
11160 tree_cons (NULL_TREE,
11161 V4SF_type_node,
11162 endlink)));
11163 tree v4si_ftype_v4sf_v4sf
11164 = build_function_type (V4SI_type_node,
11165 tree_cons (NULL_TREE, V4SF_type_node,
11166 tree_cons (NULL_TREE,
11167 V4SF_type_node,
11168 endlink)));
11169 /* MMX/SSE/integer conversions. */
bd793c65
BS
11170 tree int_ftype_v4sf
11171 = build_function_type (integer_type_node,
11172 tree_cons (NULL_TREE, V4SF_type_node,
11173 endlink));
11174 tree int_ftype_v8qi
11175 = build_function_type (integer_type_node,
11176 tree_cons (NULL_TREE, V8QI_type_node,
11177 endlink));
bd793c65 11178 tree v4sf_ftype_v4sf_int
21e1b5f1 11179 = build_function_type (V4SF_type_node,
bd793c65
BS
11180 tree_cons (NULL_TREE, V4SF_type_node,
11181 tree_cons (NULL_TREE, integer_type_node,
11182 endlink)));
11183 tree v4sf_ftype_v4sf_v2si
11184 = build_function_type (V4SF_type_node,
11185 tree_cons (NULL_TREE, V4SF_type_node,
11186 tree_cons (NULL_TREE, V2SI_type_node,
11187 endlink)));
11188 tree int_ftype_v4hi_int
11189 = build_function_type (integer_type_node,
11190 tree_cons (NULL_TREE, V4HI_type_node,
11191 tree_cons (NULL_TREE, integer_type_node,
11192 endlink)));
11193 tree v4hi_ftype_v4hi_int_int
332316cd 11194 = build_function_type (V4HI_type_node,
bd793c65
BS
11195 tree_cons (NULL_TREE, V4HI_type_node,
11196 tree_cons (NULL_TREE, integer_type_node,
11197 tree_cons (NULL_TREE,
11198 integer_type_node,
11199 endlink))));
11200 /* Miscellaneous. */
11201 tree v8qi_ftype_v4hi_v4hi
11202 = build_function_type (V8QI_type_node,
11203 tree_cons (NULL_TREE, V4HI_type_node,
11204 tree_cons (NULL_TREE, V4HI_type_node,
11205 endlink)));
11206 tree v4hi_ftype_v2si_v2si
11207 = build_function_type (V4HI_type_node,
11208 tree_cons (NULL_TREE, V2SI_type_node,
11209 tree_cons (NULL_TREE, V2SI_type_node,
11210 endlink)));
11211 tree v4sf_ftype_v4sf_v4sf_int
11212 = build_function_type (V4SF_type_node,
11213 tree_cons (NULL_TREE, V4SF_type_node,
11214 tree_cons (NULL_TREE, V4SF_type_node,
11215 tree_cons (NULL_TREE,
11216 integer_type_node,
11217 endlink))));
bd793c65
BS
11218 tree v2si_ftype_v4hi_v4hi
11219 = build_function_type (V2SI_type_node,
11220 tree_cons (NULL_TREE, V4HI_type_node,
11221 tree_cons (NULL_TREE, V4HI_type_node,
11222 endlink)));
11223 tree v4hi_ftype_v4hi_int
11224 = build_function_type (V4HI_type_node,
11225 tree_cons (NULL_TREE, V4HI_type_node,
11226 tree_cons (NULL_TREE, integer_type_node,
11227 endlink)));
bd793c65
BS
11228 tree v4hi_ftype_v4hi_di
11229 = build_function_type (V4HI_type_node,
11230 tree_cons (NULL_TREE, V4HI_type_node,
11231 tree_cons (NULL_TREE,
11232 long_long_integer_type_node,
11233 endlink)));
11234 tree v2si_ftype_v2si_di
11235 = build_function_type (V2SI_type_node,
11236 tree_cons (NULL_TREE, V2SI_type_node,
11237 tree_cons (NULL_TREE,
11238 long_long_integer_type_node,
11239 endlink)));
11240 tree void_ftype_void
11241 = build_function_type (void_type_node, endlink);
bd793c65
BS
11242 tree void_ftype_unsigned
11243 = build_function_type (void_type_node,
11244 tree_cons (NULL_TREE, unsigned_type_node,
11245 endlink));
11246 tree unsigned_ftype_void
11247 = build_function_type (unsigned_type_node, endlink);
11248 tree di_ftype_void
11249 = build_function_type (long_long_unsigned_type_node, endlink);
e37af218
RH
11250 tree v4sf_ftype_void
11251 = build_function_type (V4SF_type_node, endlink);
bd793c65
BS
11252 tree v2si_ftype_v4sf
11253 = build_function_type (V2SI_type_node,
11254 tree_cons (NULL_TREE, V4SF_type_node,
11255 endlink));
11256 /* Loads/stores. */
11257 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
11258 tree_cons (NULL_TREE, V8QI_type_node,
11259 tree_cons (NULL_TREE,
11260 pchar_type_node,
11261 endlink)));
11262 tree void_ftype_v8qi_v8qi_pchar
11263 = build_function_type (void_type_node, maskmovq_args);
11264 tree v4sf_ftype_pfloat
11265 = build_function_type (V4SF_type_node,
11266 tree_cons (NULL_TREE, pfloat_type_node,
11267 endlink));
bd793c65
BS
11268 /* @@@ the type is bogus */
11269 tree v4sf_ftype_v4sf_pv2si
11270 = build_function_type (V4SF_type_node,
11271 tree_cons (NULL_TREE, V4SF_type_node,
11272 tree_cons (NULL_TREE, pv2si_type_node,
11273 endlink)));
1255c85c
BS
11274 tree void_ftype_pv2si_v4sf
11275 = build_function_type (void_type_node,
11276 tree_cons (NULL_TREE, pv2si_type_node,
11277 tree_cons (NULL_TREE, V4SF_type_node,
bd793c65
BS
11278 endlink)));
11279 tree void_ftype_pfloat_v4sf
11280 = build_function_type (void_type_node,
11281 tree_cons (NULL_TREE, pfloat_type_node,
11282 tree_cons (NULL_TREE, V4SF_type_node,
11283 endlink)));
11284 tree void_ftype_pdi_di
11285 = build_function_type (void_type_node,
11286 tree_cons (NULL_TREE, pdi_type_node,
11287 tree_cons (NULL_TREE,
11288 long_long_unsigned_type_node,
11289 endlink)));
916b60b7
BS
11290 tree void_ftype_pv2di_v2di
11291 = build_function_type (void_type_node,
11292 tree_cons (NULL_TREE, pv2di_type_node,
11293 tree_cons (NULL_TREE,
11294 V2DI_type_node,
11295 endlink)));
bd793c65
BS
11296 /* Normal vector unops. */
11297 tree v4sf_ftype_v4sf
11298 = build_function_type (V4SF_type_node,
11299 tree_cons (NULL_TREE, V4SF_type_node,
11300 endlink));
0f290768 11301
bd793c65
BS
11302 /* Normal vector binops. */
11303 tree v4sf_ftype_v4sf_v4sf
11304 = build_function_type (V4SF_type_node,
11305 tree_cons (NULL_TREE, V4SF_type_node,
11306 tree_cons (NULL_TREE, V4SF_type_node,
11307 endlink)));
11308 tree v8qi_ftype_v8qi_v8qi
11309 = build_function_type (V8QI_type_node,
11310 tree_cons (NULL_TREE, V8QI_type_node,
11311 tree_cons (NULL_TREE, V8QI_type_node,
11312 endlink)));
11313 tree v4hi_ftype_v4hi_v4hi
11314 = build_function_type (V4HI_type_node,
11315 tree_cons (NULL_TREE, V4HI_type_node,
11316 tree_cons (NULL_TREE, V4HI_type_node,
11317 endlink)));
11318 tree v2si_ftype_v2si_v2si
11319 = build_function_type (V2SI_type_node,
11320 tree_cons (NULL_TREE, V2SI_type_node,
11321 tree_cons (NULL_TREE, V2SI_type_node,
11322 endlink)));
bd793c65
BS
11323 tree di_ftype_di_di
11324 = build_function_type (long_long_unsigned_type_node,
11325 tree_cons (NULL_TREE, long_long_unsigned_type_node,
11326 tree_cons (NULL_TREE,
11327 long_long_unsigned_type_node,
11328 endlink)));
11329
47f339cf
BS
11330 tree v2si_ftype_v2sf
11331 = build_function_type (V2SI_type_node,
11332 tree_cons (NULL_TREE, V2SF_type_node,
11333 endlink));
11334 tree v2sf_ftype_v2si
11335 = build_function_type (V2SF_type_node,
11336 tree_cons (NULL_TREE, V2SI_type_node,
11337 endlink));
11338 tree v2si_ftype_v2si
11339 = build_function_type (V2SI_type_node,
11340 tree_cons (NULL_TREE, V2SI_type_node,
11341 endlink));
11342 tree v2sf_ftype_v2sf
11343 = build_function_type (V2SF_type_node,
11344 tree_cons (NULL_TREE, V2SF_type_node,
11345 endlink));
11346 tree v2sf_ftype_v2sf_v2sf
11347 = build_function_type (V2SF_type_node,
11348 tree_cons (NULL_TREE, V2SF_type_node,
11349 tree_cons (NULL_TREE,
11350 V2SF_type_node,
11351 endlink)));
11352 tree v2si_ftype_v2sf_v2sf
11353 = build_function_type (V2SI_type_node,
11354 tree_cons (NULL_TREE, V2SF_type_node,
11355 tree_cons (NULL_TREE,
11356 V2SF_type_node,
11357 endlink)));
fbe5eb6d
BS
11358 tree pint_type_node = build_pointer_type (integer_type_node);
11359 tree pdouble_type_node = build_pointer_type (double_type_node);
11360 tree int_ftype_v2df_v2df
11361 = build_function_type (integer_type_node,
11362 tree_cons (NULL_TREE, V2DF_type_node,
11363 tree_cons (NULL_TREE, V2DF_type_node, endlink)));
11364
11365 tree ti_ftype_void
11366 = build_function_type (intTI_type_node, endlink);
11367 tree ti_ftype_ti_ti
11368 = build_function_type (intTI_type_node,
11369 tree_cons (NULL_TREE, intTI_type_node,
11370 tree_cons (NULL_TREE, intTI_type_node,
11371 endlink)));
11372 tree void_ftype_pvoid
11373 = build_function_type (void_type_node,
11374 tree_cons (NULL_TREE, ptr_type_node, endlink));
11375 tree v2di_ftype_di
11376 = build_function_type (V2DI_type_node,
11377 tree_cons (NULL_TREE, long_long_unsigned_type_node,
11378 endlink));
11379 tree v4sf_ftype_v4si
11380 = build_function_type (V4SF_type_node,
11381 tree_cons (NULL_TREE, V4SI_type_node, endlink));
11382 tree v4si_ftype_v4sf
11383 = build_function_type (V4SI_type_node,
11384 tree_cons (NULL_TREE, V4SF_type_node, endlink));
11385 tree v2df_ftype_v4si
11386 = build_function_type (V2DF_type_node,
11387 tree_cons (NULL_TREE, V4SI_type_node, endlink));
11388 tree v4si_ftype_v2df
11389 = build_function_type (V4SI_type_node,
11390 tree_cons (NULL_TREE, V2DF_type_node, endlink));
11391 tree v2si_ftype_v2df
11392 = build_function_type (V2SI_type_node,
11393 tree_cons (NULL_TREE, V2DF_type_node, endlink));
11394 tree v4sf_ftype_v2df
11395 = build_function_type (V4SF_type_node,
11396 tree_cons (NULL_TREE, V2DF_type_node, endlink));
11397 tree v2df_ftype_v2si
11398 = build_function_type (V2DF_type_node,
11399 tree_cons (NULL_TREE, V2SI_type_node, endlink));
11400 tree v2df_ftype_v4sf
11401 = build_function_type (V2DF_type_node,
11402 tree_cons (NULL_TREE, V4SF_type_node, endlink));
11403 tree int_ftype_v2df
11404 = build_function_type (integer_type_node,
11405 tree_cons (NULL_TREE, V2DF_type_node, endlink));
11406 tree v2df_ftype_v2df_int
11407 = build_function_type (V2DF_type_node,
11408 tree_cons (NULL_TREE, V2DF_type_node,
11409 tree_cons (NULL_TREE, integer_type_node,
11410 endlink)));
11411 tree v4sf_ftype_v4sf_v2df
11412 = build_function_type (V4SF_type_node,
11413 tree_cons (NULL_TREE, V4SF_type_node,
11414 tree_cons (NULL_TREE, V2DF_type_node,
11415 endlink)));
11416 tree v2df_ftype_v2df_v4sf
11417 = build_function_type (V2DF_type_node,
11418 tree_cons (NULL_TREE, V2DF_type_node,
11419 tree_cons (NULL_TREE, V4SF_type_node,
11420 endlink)));
11421 tree v2df_ftype_v2df_v2df_int
11422 = build_function_type (V2DF_type_node,
11423 tree_cons (NULL_TREE, V2DF_type_node,
11424 tree_cons (NULL_TREE, V2DF_type_node,
11425 tree_cons (NULL_TREE,
11426 integer_type_node,
11427 endlink))));
11428 tree v2df_ftype_v2df_pv2si
11429 = build_function_type (V2DF_type_node,
11430 tree_cons (NULL_TREE, V2DF_type_node,
11431 tree_cons (NULL_TREE, pv2si_type_node,
11432 endlink)));
11433 tree void_ftype_pv2si_v2df
11434 = build_function_type (void_type_node,
11435 tree_cons (NULL_TREE, pv2si_type_node,
11436 tree_cons (NULL_TREE, V2DF_type_node,
11437 endlink)));
11438 tree void_ftype_pdouble_v2df
11439 = build_function_type (void_type_node,
11440 tree_cons (NULL_TREE, pdouble_type_node,
11441 tree_cons (NULL_TREE, V2DF_type_node,
11442 endlink)));
11443 tree void_ftype_pint_int
11444 = build_function_type (void_type_node,
11445 tree_cons (NULL_TREE, pint_type_node,
11446 tree_cons (NULL_TREE, integer_type_node,
11447 endlink)));
11448 tree maskmovdqu_args = tree_cons (NULL_TREE, V16QI_type_node,
11449 tree_cons (NULL_TREE, V16QI_type_node,
11450 tree_cons (NULL_TREE,
11451 pchar_type_node,
11452 endlink)));
11453 tree void_ftype_v16qi_v16qi_pchar
11454 = build_function_type (void_type_node, maskmovdqu_args);
11455 tree v2df_ftype_pdouble
11456 = build_function_type (V2DF_type_node,
11457 tree_cons (NULL_TREE, pdouble_type_node,
11458 endlink));
11459 tree v2df_ftype_v2df_v2df
11460 = build_function_type (V2DF_type_node,
11461 tree_cons (NULL_TREE, V2DF_type_node,
11462 tree_cons (NULL_TREE, V2DF_type_node,
11463 endlink)));
11464 tree v16qi_ftype_v16qi_v16qi
11465 = build_function_type (V16QI_type_node,
11466 tree_cons (NULL_TREE, V16QI_type_node,
11467 tree_cons (NULL_TREE, V16QI_type_node,
11468 endlink)));
11469 tree v8hi_ftype_v8hi_v8hi
11470 = build_function_type (V8HI_type_node,
11471 tree_cons (NULL_TREE, V8HI_type_node,
11472 tree_cons (NULL_TREE, V8HI_type_node,
11473 endlink)));
11474 tree v4si_ftype_v4si_v4si
11475 = build_function_type (V4SI_type_node,
11476 tree_cons (NULL_TREE, V4SI_type_node,
11477 tree_cons (NULL_TREE, V4SI_type_node,
11478 endlink)));
11479 tree v2di_ftype_v2di_v2di
11480 = build_function_type (V2DI_type_node,
11481 tree_cons (NULL_TREE, V2DI_type_node,
11482 tree_cons (NULL_TREE, V2DI_type_node,
11483 endlink)));
11484 tree v2di_ftype_v2df_v2df
11485 = build_function_type (V2DI_type_node,
11486 tree_cons (NULL_TREE, V2DF_type_node,
11487 tree_cons (NULL_TREE, V2DF_type_node,
11488 endlink)));
11489 tree v2df_ftype_v2df
11490 = build_function_type (V2DF_type_node,
11491 tree_cons (NULL_TREE, V2DF_type_node,
11492 endlink));
11493 tree v2df_ftype_double
11494 = build_function_type (V2DF_type_node,
11495 tree_cons (NULL_TREE, double_type_node,
11496 endlink));
11497 tree v2df_ftype_double_double
11498 = build_function_type (V2DF_type_node,
11499 tree_cons (NULL_TREE, double_type_node,
11500 tree_cons (NULL_TREE, double_type_node,
11501 endlink)));
11502 tree int_ftype_v8hi_int
11503 = build_function_type (integer_type_node,
11504 tree_cons (NULL_TREE, V8HI_type_node,
11505 tree_cons (NULL_TREE, integer_type_node,
11506 endlink)));
11507 tree v8hi_ftype_v8hi_int_int
11508 = build_function_type (V8HI_type_node,
11509 tree_cons (NULL_TREE, V8HI_type_node,
11510 tree_cons (NULL_TREE, integer_type_node,
11511 tree_cons (NULL_TREE,
11512 integer_type_node,
11513 endlink))));
916b60b7
BS
11514 tree v2di_ftype_v2di_int
11515 = build_function_type (V2DI_type_node,
11516 tree_cons (NULL_TREE, V2DI_type_node,
11517 tree_cons (NULL_TREE, integer_type_node,
11518 endlink)));
fbe5eb6d
BS
11519 tree v4si_ftype_v4si_int
11520 = build_function_type (V4SI_type_node,
11521 tree_cons (NULL_TREE, V4SI_type_node,
11522 tree_cons (NULL_TREE, integer_type_node,
11523 endlink)));
11524 tree v8hi_ftype_v8hi_int
11525 = build_function_type (V8HI_type_node,
11526 tree_cons (NULL_TREE, V8HI_type_node,
11527 tree_cons (NULL_TREE, integer_type_node,
11528 endlink)));
916b60b7
BS
11529 tree v8hi_ftype_v8hi_v2di
11530 = build_function_type (V8HI_type_node,
11531 tree_cons (NULL_TREE, V8HI_type_node,
11532 tree_cons (NULL_TREE, V2DI_type_node,
11533 endlink)));
11534 tree v4si_ftype_v4si_v2di
11535 = build_function_type (V4SI_type_node,
11536 tree_cons (NULL_TREE, V4SI_type_node,
11537 tree_cons (NULL_TREE, V2DI_type_node,
11538 endlink)));
11539 tree v4si_ftype_v8hi_v8hi
11540 = build_function_type (V4SI_type_node,
11541 tree_cons (NULL_TREE, V8HI_type_node,
11542 tree_cons (NULL_TREE, V8HI_type_node,
11543 endlink)));
11544 tree di_ftype_v8qi_v8qi
11545 = build_function_type (long_long_unsigned_type_node,
11546 tree_cons (NULL_TREE, V8QI_type_node,
11547 tree_cons (NULL_TREE, V8QI_type_node,
11548 endlink)));
11549 tree v2di_ftype_v16qi_v16qi
11550 = build_function_type (V2DI_type_node,
11551 tree_cons (NULL_TREE, V16QI_type_node,
11552 tree_cons (NULL_TREE, V16QI_type_node,
11553 endlink)));
11554 tree int_ftype_v16qi
11555 = build_function_type (integer_type_node,
11556 tree_cons (NULL_TREE, V16QI_type_node, endlink));
47f339cf 11557
bd793c65
BS
11558 /* Add all builtins that are more or less simple operations on two
11559 operands. */
ca7558fc 11560 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
11561 {
11562 /* Use one of the operands; the target can have a different mode for
11563 mask-generating compares. */
11564 enum machine_mode mode;
11565 tree type;
11566
11567 if (d->name == 0)
11568 continue;
11569 mode = insn_data[d->icode].operand[1].mode;
11570
bd793c65
BS
11571 switch (mode)
11572 {
fbe5eb6d
BS
11573 case V16QImode:
11574 type = v16qi_ftype_v16qi_v16qi;
11575 break;
11576 case V8HImode:
11577 type = v8hi_ftype_v8hi_v8hi;
11578 break;
11579 case V4SImode:
11580 type = v4si_ftype_v4si_v4si;
11581 break;
11582 case V2DImode:
11583 type = v2di_ftype_v2di_v2di;
11584 break;
11585 case V2DFmode:
11586 type = v2df_ftype_v2df_v2df;
11587 break;
11588 case TImode:
11589 type = ti_ftype_ti_ti;
11590 break;
bd793c65
BS
11591 case V4SFmode:
11592 type = v4sf_ftype_v4sf_v4sf;
11593 break;
11594 case V8QImode:
11595 type = v8qi_ftype_v8qi_v8qi;
11596 break;
11597 case V4HImode:
11598 type = v4hi_ftype_v4hi_v4hi;
11599 break;
11600 case V2SImode:
11601 type = v2si_ftype_v2si_v2si;
11602 break;
bd793c65
BS
11603 case DImode:
11604 type = di_ftype_di_di;
11605 break;
11606
11607 default:
11608 abort ();
11609 }
0f290768 11610
bd793c65
BS
11611 /* Override for comparisons. */
11612 if (d->icode == CODE_FOR_maskcmpv4sf3
11613 || d->icode == CODE_FOR_maskncmpv4sf3
11614 || d->icode == CODE_FOR_vmmaskcmpv4sf3
11615 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
11616 type = v4si_ftype_v4sf_v4sf;
11617
fbe5eb6d
BS
11618 if (d->icode == CODE_FOR_maskcmpv2df3
11619 || d->icode == CODE_FOR_maskncmpv2df3
11620 || d->icode == CODE_FOR_vmmaskcmpv2df3
11621 || d->icode == CODE_FOR_vmmaskncmpv2df3)
11622 type = v2di_ftype_v2df_v2df;
11623
eeb06b1b 11624 def_builtin (d->mask, d->name, type, d->code);
bd793c65
BS
11625 }
11626
11627 /* Add the remaining MMX insns with somewhat more complicated types. */
eeb06b1b
BS
11628 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
11629 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
11630 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
11631 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
11632 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
11633 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
11634 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
11635
11636 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
11637 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
11638 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
11639
11640 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
11641 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
11642
11643 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
11644 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
0f290768 11645
bd793c65 11646 /* comi/ucomi insns. */
ca7558fc 11647 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
fbe5eb6d
BS
11648 if (d->mask == MASK_SSE2)
11649 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
11650 else
11651 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
bd793c65 11652
1255c85c
BS
11653 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
11654 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
11655 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
bd793c65 11656
fbe5eb6d
BS
11657 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
11658 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
11659 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
11660 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
11661 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
11662 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
bd793c65 11663
fbe5eb6d
BS
11664 def_builtin (MASK_SSE1, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
11665 def_builtin (MASK_SSE1, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
11666 def_builtin (MASK_SSE1, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
11667 def_builtin (MASK_SSE1, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
e37af218 11668
fbe5eb6d
BS
11669 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
11670 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
bd793c65 11671
fbe5eb6d 11672 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
bd793c65 11673
fbe5eb6d
BS
11674 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
11675 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
11676 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
11677 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
11678 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
11679 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
bd793c65 11680
fbe5eb6d
BS
11681 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
11682 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
11683 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
11684 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
bd793c65 11685
fbe5eb6d
BS
11686 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
11687 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
11688 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
11689 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
bd793c65 11690
fbe5eb6d 11691 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
bd793c65 11692
916b60b7 11693 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
bd793c65 11694
fbe5eb6d
BS
11695 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
11696 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
11697 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
11698 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
11699 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
11700 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
bd793c65 11701
fbe5eb6d 11702 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
bd793c65 11703
47f339cf
BS
11704 /* Original 3DNow! */
11705 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
11706 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
11707 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
11708 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
11709 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
11710 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
11711 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
11712 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
11713 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
11714 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
11715 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
11716 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
11717 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
11718 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
11719 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
11720 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
11721 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
11722 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
11723 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
11724 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
47f339cf
BS
11725
11726 /* 3DNow! extension as used in the Athlon CPU. */
11727 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
11728 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
11729 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
11730 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
11731 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
11732 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
11733
fbe5eb6d
BS
11734 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
11735
11736 /* SSE2 */
11737 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
11738 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
11739
11740 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
11741 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
11742
11743 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
11744 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
11745 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
11746 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
11747 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
11748 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
11749
11750 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
11751 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
11752 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
11753 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
11754
11755 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
916b60b7 11756 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
fbe5eb6d
BS
11757 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
11758 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
916b60b7 11759 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
fbe5eb6d
BS
11760
11761 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
11762 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
11763 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
916b60b7 11764 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
fbe5eb6d
BS
11765
11766 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
11767 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
11768
11769 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
11770
11771 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
916b60b7 11772 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
fbe5eb6d
BS
11773
11774 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
11775 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
11776 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
11777 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
11778 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
11779
11780 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
11781
11782 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
11783 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
11784
11785 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
11786 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
11787 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
11788
11789 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
11790 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
11791 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
11792
11793 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
11794 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
11795 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
11796 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
11797 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
11798 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
11799 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
11800
11801 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
11802 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
11803 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
916b60b7
BS
11804
11805 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
11806 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
11807 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
11808
11809 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
11810 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
11811 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
11812
11813 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
11814 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
11815
11816 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
11817 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
11818 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
11819
11820 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
11821 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
11822 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
11823
11824 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
11825 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
11826
11827 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
bd793c65
BS
11828}
11829
11830/* Errors in the source file can cause expand_expr to return const0_rtx
11831 where we expect a vector. To avoid crashing, use one of the vector
11832 clear instructions. */
11833static rtx
11834safe_vector_operand (x, mode)
11835 rtx x;
11836 enum machine_mode mode;
11837{
11838 if (x != const0_rtx)
11839 return x;
11840 x = gen_reg_rtx (mode);
11841
47f339cf 11842 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
bd793c65
BS
11843 emit_insn (gen_mmx_clrdi (mode == DImode ? x
11844 : gen_rtx_SUBREG (DImode, x, 0)));
11845 else
e37af218
RH
11846 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
11847 : gen_rtx_SUBREG (V4SFmode, x, 0)));
bd793c65
BS
11848 return x;
11849}
11850
11851/* Subroutine of ix86_expand_builtin to take care of binop insns. */
11852
11853static rtx
11854ix86_expand_binop_builtin (icode, arglist, target)
11855 enum insn_code icode;
11856 tree arglist;
11857 rtx target;
11858{
11859 rtx pat;
11860 tree arg0 = TREE_VALUE (arglist);
11861 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11862 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11863 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11864 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11865 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11866 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
11867
11868 if (VECTOR_MODE_P (mode0))
11869 op0 = safe_vector_operand (op0, mode0);
11870 if (VECTOR_MODE_P (mode1))
11871 op1 = safe_vector_operand (op1, mode1);
11872
11873 if (! target
11874 || GET_MODE (target) != tmode
11875 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11876 target = gen_reg_rtx (tmode);
11877
11878 /* In case the insn wants input operands in modes different from
11879 the result, abort. */
11880 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
11881 abort ();
11882
11883 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11884 op0 = copy_to_mode_reg (mode0, op0);
11885 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11886 op1 = copy_to_mode_reg (mode1, op1);
11887
59bef189
RH
11888 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
11889 yet one of the two must not be a memory. This is normally enforced
11890 by expanders, but we didn't bother to create one here. */
11891 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
11892 op0 = copy_to_mode_reg (mode0, op0);
11893
bd793c65
BS
11894 pat = GEN_FCN (icode) (target, op0, op1);
11895 if (! pat)
11896 return 0;
11897 emit_insn (pat);
11898 return target;
11899}
11900
e37af218
RH
11901/* In type_for_mode we restrict the ability to create TImode types
11902 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
11903 to have a V4SFmode signature. Convert them in-place to TImode. */
11904
11905static rtx
11906ix86_expand_timode_binop_builtin (icode, arglist, target)
11907 enum insn_code icode;
11908 tree arglist;
11909 rtx target;
11910{
11911 rtx pat;
11912 tree arg0 = TREE_VALUE (arglist);
11913 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11914 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11915 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11916
11917 op0 = gen_lowpart (TImode, op0);
11918 op1 = gen_lowpart (TImode, op1);
11919 target = gen_reg_rtx (TImode);
11920
11921 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
11922 op0 = copy_to_mode_reg (TImode, op0);
11923 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
11924 op1 = copy_to_mode_reg (TImode, op1);
11925
59bef189
RH
11926 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
11927 yet one of the two must not be a memory. This is normally enforced
11928 by expanders, but we didn't bother to create one here. */
11929 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
11930 op0 = copy_to_mode_reg (TImode, op0);
11931
e37af218
RH
11932 pat = GEN_FCN (icode) (target, op0, op1);
11933 if (! pat)
11934 return 0;
11935 emit_insn (pat);
11936
11937 return gen_lowpart (V4SFmode, target);
11938}
11939
bd793c65
BS
11940/* Subroutine of ix86_expand_builtin to take care of stores. */
11941
11942static rtx
e37af218 11943ix86_expand_store_builtin (icode, arglist)
bd793c65
BS
11944 enum insn_code icode;
11945 tree arglist;
bd793c65
BS
11946{
11947 rtx pat;
11948 tree arg0 = TREE_VALUE (arglist);
11949 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11950 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11951 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11952 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
11953 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
11954
11955 if (VECTOR_MODE_P (mode1))
11956 op1 = safe_vector_operand (op1, mode1);
11957
11958 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
59bef189
RH
11959
11960 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11961 op1 = copy_to_mode_reg (mode1, op1);
11962
bd793c65
BS
11963 pat = GEN_FCN (icode) (op0, op1);
11964 if (pat)
11965 emit_insn (pat);
11966 return 0;
11967}
11968
11969/* Subroutine of ix86_expand_builtin to take care of unop insns. */
11970
11971static rtx
11972ix86_expand_unop_builtin (icode, arglist, target, do_load)
11973 enum insn_code icode;
11974 tree arglist;
11975 rtx target;
11976 int do_load;
11977{
11978 rtx pat;
11979 tree arg0 = TREE_VALUE (arglist);
11980 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11981 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11982 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11983
11984 if (! target
11985 || GET_MODE (target) != tmode
11986 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11987 target = gen_reg_rtx (tmode);
11988 if (do_load)
11989 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11990 else
11991 {
11992 if (VECTOR_MODE_P (mode0))
11993 op0 = safe_vector_operand (op0, mode0);
11994
11995 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11996 op0 = copy_to_mode_reg (mode0, op0);
11997 }
11998
11999 pat = GEN_FCN (icode) (target, op0);
12000 if (! pat)
12001 return 0;
12002 emit_insn (pat);
12003 return target;
12004}
12005
12006/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12007 sqrtss, rsqrtss, rcpss. */
12008
12009static rtx
12010ix86_expand_unop1_builtin (icode, arglist, target)
12011 enum insn_code icode;
12012 tree arglist;
12013 rtx target;
12014{
12015 rtx pat;
12016 tree arg0 = TREE_VALUE (arglist);
59bef189 12017 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
bd793c65
BS
12018 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12019 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12020
12021 if (! target
12022 || GET_MODE (target) != tmode
12023 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12024 target = gen_reg_rtx (tmode);
12025
12026 if (VECTOR_MODE_P (mode0))
12027 op0 = safe_vector_operand (op0, mode0);
12028
12029 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12030 op0 = copy_to_mode_reg (mode0, op0);
59bef189
RH
12031
12032 op1 = op0;
12033 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12034 op1 = copy_to_mode_reg (mode0, op1);
12035
12036 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
12037 if (! pat)
12038 return 0;
12039 emit_insn (pat);
12040 return target;
12041}
12042
12043/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12044
12045static rtx
12046ix86_expand_sse_compare (d, arglist, target)
8b60264b 12047 const struct builtin_description *d;
bd793c65
BS
12048 tree arglist;
12049 rtx target;
12050{
12051 rtx pat;
12052 tree arg0 = TREE_VALUE (arglist);
12053 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12054 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12055 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12056 rtx op2;
12057 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12058 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12059 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12060 enum rtx_code comparison = d->comparison;
12061
12062 if (VECTOR_MODE_P (mode0))
12063 op0 = safe_vector_operand (op0, mode0);
12064 if (VECTOR_MODE_P (mode1))
12065 op1 = safe_vector_operand (op1, mode1);
12066
12067 /* Swap operands if we have a comparison that isn't available in
12068 hardware. */
12069 if (d->flag)
12070 {
21e1b5f1
BS
12071 rtx tmp = gen_reg_rtx (mode1);
12072 emit_move_insn (tmp, op1);
bd793c65 12073 op1 = op0;
21e1b5f1 12074 op0 = tmp;
bd793c65 12075 }
21e1b5f1
BS
12076
12077 if (! target
12078 || GET_MODE (target) != tmode
12079 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
bd793c65
BS
12080 target = gen_reg_rtx (tmode);
12081
12082 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12083 op0 = copy_to_mode_reg (mode0, op0);
12084 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12085 op1 = copy_to_mode_reg (mode1, op1);
12086
12087 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12088 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12089 if (! pat)
12090 return 0;
12091 emit_insn (pat);
12092 return target;
12093}
12094
12095/* Subroutine of ix86_expand_builtin to take care of comi insns. */
12096
12097static rtx
12098ix86_expand_sse_comi (d, arglist, target)
8b60264b 12099 const struct builtin_description *d;
bd793c65
BS
12100 tree arglist;
12101 rtx target;
12102{
12103 rtx pat;
12104 tree arg0 = TREE_VALUE (arglist);
12105 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12106 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12107 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12108 rtx op2;
12109 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
12110 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
12111 enum rtx_code comparison = d->comparison;
12112
12113 if (VECTOR_MODE_P (mode0))
12114 op0 = safe_vector_operand (op0, mode0);
12115 if (VECTOR_MODE_P (mode1))
12116 op1 = safe_vector_operand (op1, mode1);
12117
12118 /* Swap operands if we have a comparison that isn't available in
12119 hardware. */
12120 if (d->flag)
12121 {
12122 rtx tmp = op1;
12123 op1 = op0;
12124 op0 = tmp;
bd793c65
BS
12125 }
12126
12127 target = gen_reg_rtx (SImode);
12128 emit_move_insn (target, const0_rtx);
12129 target = gen_rtx_SUBREG (QImode, target, 0);
12130
12131 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12132 op0 = copy_to_mode_reg (mode0, op0);
12133 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12134 op1 = copy_to_mode_reg (mode1, op1);
12135
12136 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12137 pat = GEN_FCN (d->icode) (op0, op1, op2);
12138 if (! pat)
12139 return 0;
12140 emit_insn (pat);
29628f27
BS
12141 emit_insn (gen_rtx_SET (VOIDmode,
12142 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
12143 gen_rtx_fmt_ee (comparison, QImode,
12144 gen_rtx_REG (CCmode, FLAGS_REG),
12145 const0_rtx)));
bd793c65 12146
6f1a6c5b 12147 return SUBREG_REG (target);
bd793c65
BS
12148}
12149
12150/* Expand an expression EXP that calls a built-in function,
12151 with result going to TARGET if that's convenient
12152 (and in mode MODE if that's convenient).
12153 SUBTARGET may be used as the target for computing one of EXP's operands.
12154 IGNORE is nonzero if the value is to be ignored. */
12155
12156rtx
12157ix86_expand_builtin (exp, target, subtarget, mode, ignore)
12158 tree exp;
12159 rtx target;
12160 rtx subtarget ATTRIBUTE_UNUSED;
12161 enum machine_mode mode ATTRIBUTE_UNUSED;
12162 int ignore ATTRIBUTE_UNUSED;
12163{
8b60264b 12164 const struct builtin_description *d;
77ebd435 12165 size_t i;
bd793c65
BS
12166 enum insn_code icode;
12167 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12168 tree arglist = TREE_OPERAND (exp, 1);
e37af218 12169 tree arg0, arg1, arg2;
bd793c65
BS
12170 rtx op0, op1, op2, pat;
12171 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 12172 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
12173
12174 switch (fcode)
12175 {
12176 case IX86_BUILTIN_EMMS:
12177 emit_insn (gen_emms ());
12178 return 0;
12179
12180 case IX86_BUILTIN_SFENCE:
12181 emit_insn (gen_sfence ());
12182 return 0;
12183
bd793c65 12184 case IX86_BUILTIN_PEXTRW:
fbe5eb6d
BS
12185 case IX86_BUILTIN_PEXTRW128:
12186 icode = (fcode == IX86_BUILTIN_PEXTRW
12187 ? CODE_FOR_mmx_pextrw
12188 : CODE_FOR_sse2_pextrw);
bd793c65
BS
12189 arg0 = TREE_VALUE (arglist);
12190 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12191 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12192 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12193 tmode = insn_data[icode].operand[0].mode;
12194 mode0 = insn_data[icode].operand[1].mode;
12195 mode1 = insn_data[icode].operand[2].mode;
12196
12197 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12198 op0 = copy_to_mode_reg (mode0, op0);
12199 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12200 {
12201 /* @@@ better error message */
12202 error ("selector must be an immediate");
6f1a6c5b 12203 return gen_reg_rtx (tmode);
bd793c65
BS
12204 }
12205 if (target == 0
12206 || GET_MODE (target) != tmode
12207 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12208 target = gen_reg_rtx (tmode);
12209 pat = GEN_FCN (icode) (target, op0, op1);
12210 if (! pat)
12211 return 0;
12212 emit_insn (pat);
12213 return target;
12214
12215 case IX86_BUILTIN_PINSRW:
fbe5eb6d
BS
12216 case IX86_BUILTIN_PINSRW128:
12217 icode = (fcode == IX86_BUILTIN_PINSRW
12218 ? CODE_FOR_mmx_pinsrw
12219 : CODE_FOR_sse2_pinsrw);
bd793c65
BS
12220 arg0 = TREE_VALUE (arglist);
12221 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12222 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12223 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12224 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12225 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12226 tmode = insn_data[icode].operand[0].mode;
12227 mode0 = insn_data[icode].operand[1].mode;
12228 mode1 = insn_data[icode].operand[2].mode;
12229 mode2 = insn_data[icode].operand[3].mode;
12230
12231 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12232 op0 = copy_to_mode_reg (mode0, op0);
12233 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12234 op1 = copy_to_mode_reg (mode1, op1);
12235 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12236 {
12237 /* @@@ better error message */
12238 error ("selector must be an immediate");
12239 return const0_rtx;
12240 }
12241 if (target == 0
12242 || GET_MODE (target) != tmode
12243 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12244 target = gen_reg_rtx (tmode);
12245 pat = GEN_FCN (icode) (target, op0, op1, op2);
12246 if (! pat)
12247 return 0;
12248 emit_insn (pat);
12249 return target;
12250
12251 case IX86_BUILTIN_MASKMOVQ:
fbe5eb6d
BS
12252 icode = (fcode == IX86_BUILTIN_MASKMOVQ
12253 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
12254 : CODE_FOR_sse2_maskmovdqu);
bd793c65
BS
12255 /* Note the arg order is different from the operand order. */
12256 arg1 = TREE_VALUE (arglist);
12257 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
12258 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12259 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12260 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12261 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12262 mode0 = insn_data[icode].operand[0].mode;
12263 mode1 = insn_data[icode].operand[1].mode;
12264 mode2 = insn_data[icode].operand[2].mode;
12265
5c464583 12266 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
bd793c65
BS
12267 op0 = copy_to_mode_reg (mode0, op0);
12268 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12269 op1 = copy_to_mode_reg (mode1, op1);
12270 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
12271 op2 = copy_to_mode_reg (mode2, op2);
12272 pat = GEN_FCN (icode) (op0, op1, op2);
12273 if (! pat)
12274 return 0;
12275 emit_insn (pat);
12276 return 0;
12277
12278 case IX86_BUILTIN_SQRTSS:
12279 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
12280 case IX86_BUILTIN_RSQRTSS:
12281 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
12282 case IX86_BUILTIN_RCPSS:
12283 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
12284
e37af218
RH
12285 case IX86_BUILTIN_ANDPS:
12286 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
12287 arglist, target);
12288 case IX86_BUILTIN_ANDNPS:
12289 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
12290 arglist, target);
12291 case IX86_BUILTIN_ORPS:
12292 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
12293 arglist, target);
12294 case IX86_BUILTIN_XORPS:
12295 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
12296 arglist, target);
12297
bd793c65
BS
12298 case IX86_BUILTIN_LOADAPS:
12299 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
12300
12301 case IX86_BUILTIN_LOADUPS:
12302 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
12303
12304 case IX86_BUILTIN_STOREAPS:
e37af218 12305 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
bd793c65 12306 case IX86_BUILTIN_STOREUPS:
e37af218 12307 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
bd793c65
BS
12308
12309 case IX86_BUILTIN_LOADSS:
12310 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
12311
12312 case IX86_BUILTIN_STORESS:
e37af218 12313 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
bd793c65 12314
0f290768 12315 case IX86_BUILTIN_LOADHPS:
bd793c65 12316 case IX86_BUILTIN_LOADLPS:
fbe5eb6d
BS
12317 case IX86_BUILTIN_LOADHPD:
12318 case IX86_BUILTIN_LOADLPD:
12319 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
12320 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
12321 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
12322 : CODE_FOR_sse2_movlpd);
bd793c65
BS
12323 arg0 = TREE_VALUE (arglist);
12324 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12325 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12326 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12327 tmode = insn_data[icode].operand[0].mode;
12328 mode0 = insn_data[icode].operand[1].mode;
12329 mode1 = insn_data[icode].operand[2].mode;
12330
12331 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12332 op0 = copy_to_mode_reg (mode0, op0);
12333 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
12334 if (target == 0
12335 || GET_MODE (target) != tmode
12336 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12337 target = gen_reg_rtx (tmode);
12338 pat = GEN_FCN (icode) (target, op0, op1);
12339 if (! pat)
12340 return 0;
12341 emit_insn (pat);
12342 return target;
0f290768 12343
bd793c65
BS
12344 case IX86_BUILTIN_STOREHPS:
12345 case IX86_BUILTIN_STORELPS:
fbe5eb6d
BS
12346 case IX86_BUILTIN_STOREHPD:
12347 case IX86_BUILTIN_STORELPD:
12348 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
12349 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
12350 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
12351 : CODE_FOR_sse2_movlpd);
bd793c65
BS
12352 arg0 = TREE_VALUE (arglist);
12353 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12354 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12355 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12356 mode0 = insn_data[icode].operand[1].mode;
12357 mode1 = insn_data[icode].operand[2].mode;
12358
12359 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12360 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12361 op1 = copy_to_mode_reg (mode1, op1);
12362
12363 pat = GEN_FCN (icode) (op0, op0, op1);
12364 if (! pat)
12365 return 0;
12366 emit_insn (pat);
12367 return 0;
12368
12369 case IX86_BUILTIN_MOVNTPS:
e37af218 12370 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
bd793c65 12371 case IX86_BUILTIN_MOVNTQ:
e37af218 12372 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
bd793c65
BS
12373
12374 case IX86_BUILTIN_LDMXCSR:
12375 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
12376 target = assign_386_stack_local (SImode, 0);
12377 emit_move_insn (target, op0);
12378 emit_insn (gen_ldmxcsr (target));
12379 return 0;
12380
12381 case IX86_BUILTIN_STMXCSR:
12382 target = assign_386_stack_local (SImode, 0);
12383 emit_insn (gen_stmxcsr (target));
12384 return copy_to_mode_reg (SImode, target);
12385
bd793c65 12386 case IX86_BUILTIN_SHUFPS:
fbe5eb6d
BS
12387 case IX86_BUILTIN_SHUFPD:
12388 icode = (fcode == IX86_BUILTIN_SHUFPS
12389 ? CODE_FOR_sse_shufps
12390 : CODE_FOR_sse2_shufpd);
bd793c65
BS
12391 arg0 = TREE_VALUE (arglist);
12392 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12393 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12394 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12395 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12396 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12397 tmode = insn_data[icode].operand[0].mode;
12398 mode0 = insn_data[icode].operand[1].mode;
12399 mode1 = insn_data[icode].operand[2].mode;
12400 mode2 = insn_data[icode].operand[3].mode;
12401
12402 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12403 op0 = copy_to_mode_reg (mode0, op0);
12404 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12405 op1 = copy_to_mode_reg (mode1, op1);
12406 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12407 {
12408 /* @@@ better error message */
12409 error ("mask must be an immediate");
6f1a6c5b 12410 return gen_reg_rtx (tmode);
bd793c65
BS
12411 }
12412 if (target == 0
12413 || GET_MODE (target) != tmode
12414 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12415 target = gen_reg_rtx (tmode);
12416 pat = GEN_FCN (icode) (target, op0, op1, op2);
12417 if (! pat)
12418 return 0;
12419 emit_insn (pat);
12420 return target;
12421
12422 case IX86_BUILTIN_PSHUFW:
fbe5eb6d
BS
12423 case IX86_BUILTIN_PSHUFD:
12424 case IX86_BUILTIN_PSHUFHW:
12425 case IX86_BUILTIN_PSHUFLW:
12426 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
12427 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
12428 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
12429 : CODE_FOR_mmx_pshufw);
bd793c65
BS
12430 arg0 = TREE_VALUE (arglist);
12431 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12432 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12433 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12434 tmode = insn_data[icode].operand[0].mode;
29628f27
BS
12435 mode1 = insn_data[icode].operand[1].mode;
12436 mode2 = insn_data[icode].operand[2].mode;
bd793c65 12437
29628f27
BS
12438 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
12439 op0 = copy_to_mode_reg (mode1, op0);
12440 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
bd793c65
BS
12441 {
12442 /* @@@ better error message */
12443 error ("mask must be an immediate");
12444 return const0_rtx;
12445 }
12446 if (target == 0
12447 || GET_MODE (target) != tmode
12448 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12449 target = gen_reg_rtx (tmode);
29628f27 12450 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
12451 if (! pat)
12452 return 0;
12453 emit_insn (pat);
12454 return target;
12455
47f339cf
BS
12456 case IX86_BUILTIN_FEMMS:
12457 emit_insn (gen_femms ());
12458 return NULL_RTX;
12459
12460 case IX86_BUILTIN_PAVGUSB:
12461 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
12462
12463 case IX86_BUILTIN_PF2ID:
12464 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
12465
12466 case IX86_BUILTIN_PFACC:
12467 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
12468
12469 case IX86_BUILTIN_PFADD:
12470 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
12471
12472 case IX86_BUILTIN_PFCMPEQ:
12473 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
12474
12475 case IX86_BUILTIN_PFCMPGE:
12476 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
12477
12478 case IX86_BUILTIN_PFCMPGT:
12479 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
12480
12481 case IX86_BUILTIN_PFMAX:
12482 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
12483
12484 case IX86_BUILTIN_PFMIN:
12485 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
12486
12487 case IX86_BUILTIN_PFMUL:
12488 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
12489
12490 case IX86_BUILTIN_PFRCP:
12491 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
12492
12493 case IX86_BUILTIN_PFRCPIT1:
12494 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
12495
12496 case IX86_BUILTIN_PFRCPIT2:
12497 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
12498
12499 case IX86_BUILTIN_PFRSQIT1:
12500 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
12501
12502 case IX86_BUILTIN_PFRSQRT:
12503 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
12504
12505 case IX86_BUILTIN_PFSUB:
12506 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
12507
12508 case IX86_BUILTIN_PFSUBR:
12509 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
12510
12511 case IX86_BUILTIN_PI2FD:
12512 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
12513
12514 case IX86_BUILTIN_PMULHRW:
12515 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
12516
47f339cf
BS
12517 case IX86_BUILTIN_PF2IW:
12518 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
12519
12520 case IX86_BUILTIN_PFNACC:
12521 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
12522
12523 case IX86_BUILTIN_PFPNACC:
12524 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
12525
12526 case IX86_BUILTIN_PI2FW:
12527 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
12528
12529 case IX86_BUILTIN_PSWAPDSI:
12530 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
12531
12532 case IX86_BUILTIN_PSWAPDSF:
12533 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
12534
e37af218
RH
12535 case IX86_BUILTIN_SSE_ZERO:
12536 target = gen_reg_rtx (V4SFmode);
12537 emit_insn (gen_sse_clrv4sf (target));
bd793c65
BS
12538 return target;
12539
bd793c65
BS
12540 case IX86_BUILTIN_MMX_ZERO:
12541 target = gen_reg_rtx (DImode);
12542 emit_insn (gen_mmx_clrdi (target));
12543 return target;
12544
fbe5eb6d
BS
12545 case IX86_BUILTIN_SQRTSD:
12546 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
12547 case IX86_BUILTIN_LOADAPD:
12548 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
12549 case IX86_BUILTIN_LOADUPD:
12550 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
12551
12552 case IX86_BUILTIN_STOREAPD:
12553 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
12554 case IX86_BUILTIN_STOREUPD:
12555 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
12556
12557 case IX86_BUILTIN_LOADSD:
12558 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
12559
12560 case IX86_BUILTIN_STORESD:
12561 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
12562
12563 case IX86_BUILTIN_SETPD1:
12564 target = assign_386_stack_local (DFmode, 0);
12565 arg0 = TREE_VALUE (arglist);
12566 emit_move_insn (adjust_address (target, DFmode, 0),
12567 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
12568 op0 = gen_reg_rtx (V2DFmode);
12569 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
12570 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
12571 return op0;
12572
12573 case IX86_BUILTIN_SETPD:
12574 target = assign_386_stack_local (V2DFmode, 0);
12575 arg0 = TREE_VALUE (arglist);
12576 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12577 emit_move_insn (adjust_address (target, DFmode, 0),
12578 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
12579 emit_move_insn (adjust_address (target, DFmode, 8),
12580 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
12581 op0 = gen_reg_rtx (V2DFmode);
12582 emit_insn (gen_sse2_movapd (op0, target));
12583 return op0;
12584
12585 case IX86_BUILTIN_LOADRPD:
12586 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
12587 gen_reg_rtx (V2DFmode), 1);
12588 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
12589 return target;
12590
12591 case IX86_BUILTIN_LOADPD1:
12592 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
12593 gen_reg_rtx (V2DFmode), 1);
12594 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
12595 return target;
12596
12597 case IX86_BUILTIN_STOREPD1:
12598 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
12599 case IX86_BUILTIN_STORERPD:
12600 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
12601
12602 case IX86_BUILTIN_MFENCE:
12603 emit_insn (gen_sse2_mfence ());
12604 return 0;
12605 case IX86_BUILTIN_LFENCE:
12606 emit_insn (gen_sse2_lfence ());
12607 return 0;
12608
12609 case IX86_BUILTIN_CLFLUSH:
12610 arg0 = TREE_VALUE (arglist);
12611 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12612 icode = CODE_FOR_sse2_clflush;
12613 mode0 = insn_data[icode].operand[0].mode;
12614 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12615 op0 = copy_to_mode_reg (mode0, op0);
12616
12617 emit_insn (gen_sse2_clflush (op0));
12618 return 0;
12619
12620 case IX86_BUILTIN_MOVNTPD:
12621 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
12622 case IX86_BUILTIN_MOVNTDQ:
916b60b7 12623 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
fbe5eb6d
BS
12624 case IX86_BUILTIN_MOVNTI:
12625 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
12626
bd793c65
BS
12627 default:
12628 break;
12629 }
12630
ca7558fc 12631 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
12632 if (d->code == fcode)
12633 {
12634 /* Compares are treated specially. */
12635 if (d->icode == CODE_FOR_maskcmpv4sf3
12636 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12637 || d->icode == CODE_FOR_maskncmpv4sf3
fbe5eb6d
BS
12638 || d->icode == CODE_FOR_vmmaskncmpv4sf3
12639 || d->icode == CODE_FOR_maskcmpv2df3
12640 || d->icode == CODE_FOR_vmmaskcmpv2df3
12641 || d->icode == CODE_FOR_maskncmpv2df3
12642 || d->icode == CODE_FOR_vmmaskncmpv2df3)
bd793c65
BS
12643 return ix86_expand_sse_compare (d, arglist, target);
12644
12645 return ix86_expand_binop_builtin (d->icode, arglist, target);
12646 }
12647
ca7558fc 12648 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
bd793c65
BS
12649 if (d->code == fcode)
12650 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 12651
ca7558fc 12652 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
bd793c65
BS
12653 if (d->code == fcode)
12654 return ix86_expand_sse_comi (d, arglist, target);
0f290768 12655
bd793c65
BS
12656 /* @@@ Should really do something sensible here. */
12657 return 0;
bd793c65 12658}
4211a8fb
JH
12659
12660/* Store OPERAND to the memory after reload is completed. This means
f710504c 12661 that we can't easily use assign_stack_local. */
4211a8fb
JH
12662rtx
12663ix86_force_to_memory (mode, operand)
12664 enum machine_mode mode;
12665 rtx operand;
12666{
898d374d 12667 rtx result;
4211a8fb
JH
12668 if (!reload_completed)
12669 abort ();
898d374d
JH
12670 if (TARGET_64BIT && TARGET_RED_ZONE)
12671 {
12672 result = gen_rtx_MEM (mode,
12673 gen_rtx_PLUS (Pmode,
12674 stack_pointer_rtx,
12675 GEN_INT (-RED_ZONE_SIZE)));
12676 emit_move_insn (result, operand);
12677 }
12678 else if (TARGET_64BIT && !TARGET_RED_ZONE)
4211a8fb 12679 {
898d374d 12680 switch (mode)
4211a8fb 12681 {
898d374d
JH
12682 case HImode:
12683 case SImode:
12684 operand = gen_lowpart (DImode, operand);
12685 /* FALLTHRU */
12686 case DImode:
4211a8fb 12687 emit_insn (
898d374d
JH
12688 gen_rtx_SET (VOIDmode,
12689 gen_rtx_MEM (DImode,
12690 gen_rtx_PRE_DEC (DImode,
12691 stack_pointer_rtx)),
12692 operand));
12693 break;
12694 default:
12695 abort ();
12696 }
12697 result = gen_rtx_MEM (mode, stack_pointer_rtx);
12698 }
12699 else
12700 {
12701 switch (mode)
12702 {
12703 case DImode:
12704 {
12705 rtx operands[2];
12706 split_di (&operand, 1, operands, operands + 1);
12707 emit_insn (
12708 gen_rtx_SET (VOIDmode,
12709 gen_rtx_MEM (SImode,
12710 gen_rtx_PRE_DEC (Pmode,
12711 stack_pointer_rtx)),
12712 operands[1]));
12713 emit_insn (
12714 gen_rtx_SET (VOIDmode,
12715 gen_rtx_MEM (SImode,
12716 gen_rtx_PRE_DEC (Pmode,
12717 stack_pointer_rtx)),
12718 operands[0]));
12719 }
12720 break;
12721 case HImode:
12722 /* It is better to store HImodes as SImodes. */
12723 if (!TARGET_PARTIAL_REG_STALL)
12724 operand = gen_lowpart (SImode, operand);
12725 /* FALLTHRU */
12726 case SImode:
4211a8fb 12727 emit_insn (
898d374d
JH
12728 gen_rtx_SET (VOIDmode,
12729 gen_rtx_MEM (GET_MODE (operand),
12730 gen_rtx_PRE_DEC (SImode,
12731 stack_pointer_rtx)),
12732 operand));
12733 break;
12734 default:
12735 abort ();
4211a8fb 12736 }
898d374d 12737 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 12738 }
898d374d 12739 return result;
4211a8fb
JH
12740}
12741
12742/* Free operand from the memory. */
12743void
12744ix86_free_from_memory (mode)
12745 enum machine_mode mode;
12746{
898d374d
JH
12747 if (!TARGET_64BIT || !TARGET_RED_ZONE)
12748 {
12749 int size;
12750
12751 if (mode == DImode || TARGET_64BIT)
12752 size = 8;
12753 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
12754 size = 2;
12755 else
12756 size = 4;
12757 /* Use LEA to deallocate stack space. In peephole2 it will be converted
12758 to pop or add instruction if registers are available. */
12759 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12760 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12761 GEN_INT (size))));
12762 }
4211a8fb 12763}
a946dd00 12764
f84aa48a
JH
12765/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
12766 QImode must go into class Q_REGS.
12767 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
892a2d68 12768 movdf to do mem-to-mem moves through integer regs. */
f84aa48a
JH
12769enum reg_class
12770ix86_preferred_reload_class (x, class)
12771 rtx x;
12772 enum reg_class class;
12773{
12774 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
12775 {
12776 /* SSE can't load any constant directly yet. */
12777 if (SSE_CLASS_P (class))
12778 return NO_REGS;
12779 /* Floats can load 0 and 1. */
12780 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
12781 {
12782 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
12783 if (MAYBE_SSE_CLASS_P (class))
12784 return (reg_class_subset_p (class, GENERAL_REGS)
12785 ? GENERAL_REGS : FLOAT_REGS);
12786 else
12787 return class;
12788 }
12789 /* General regs can load everything. */
12790 if (reg_class_subset_p (class, GENERAL_REGS))
12791 return GENERAL_REGS;
12792 /* In case we haven't resolved FLOAT or SSE yet, give up. */
12793 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
12794 return NO_REGS;
12795 }
12796 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
12797 return NO_REGS;
12798 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
12799 return Q_REGS;
12800 return class;
12801}
12802
12803/* If we are copying between general and FP registers, we need a memory
12804 location. The same is true for SSE and MMX registers.
12805
12806 The macro can't work reliably when one of the CLASSES is class containing
12807 registers from multiple units (SSE, MMX, integer). We avoid this by never
12808 combining those units in single alternative in the machine description.
12809 Ensure that this constraint holds to avoid unexpected surprises.
12810
12811 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
12812 enforce these sanity checks. */
12813int
12814ix86_secondary_memory_needed (class1, class2, mode, strict)
12815 enum reg_class class1, class2;
12816 enum machine_mode mode;
12817 int strict;
12818{
12819 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
12820 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
12821 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
12822 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
12823 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
12824 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
12825 {
12826 if (strict)
12827 abort ();
12828 else
12829 return 1;
12830 }
12831 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
12832 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
12833 && (mode) != SImode)
12834 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12835 && (mode) != SImode));
12836}
12837/* Return the cost of moving data from a register in class CLASS1 to
a4f31c00 12838 one in class CLASS2.
f84aa48a
JH
12839
12840 It is not required that the cost always equal 2 when FROM is the same as TO;
12841 on some machines it is expensive to move between registers if they are not
12842 general registers. */
12843int
12844ix86_register_move_cost (mode, class1, class2)
12845 enum machine_mode mode;
12846 enum reg_class class1, class2;
12847{
12848 /* In case we require secondary memory, compute cost of the store followed
12849 by load. In case of copying from general_purpose_register we may emit
12850 multiple stores followed by single load causing memory size mismatch
12851 stall. Count this as arbitarily high cost of 20. */
12852 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
12853 {
92d0fb09 12854 int add_cost = 0;
62415523 12855 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
92d0fb09 12856 add_cost = 20;
62415523 12857 return (MEMORY_MOVE_COST (mode, class1, 0)
92d0fb09 12858 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
f84aa48a 12859 }
92d0fb09 12860 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
12861 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12862 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
12863 return ix86_cost->mmxsse_to_integer;
12864 if (MAYBE_FLOAT_CLASS_P (class1))
12865 return ix86_cost->fp_move;
12866 if (MAYBE_SSE_CLASS_P (class1))
12867 return ix86_cost->sse_move;
12868 if (MAYBE_MMX_CLASS_P (class1))
12869 return ix86_cost->mmx_move;
f84aa48a
JH
12870 return 2;
12871}
12872
a946dd00
JH
12873/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
12874int
12875ix86_hard_regno_mode_ok (regno, mode)
12876 int regno;
12877 enum machine_mode mode;
12878{
12879 /* Flags and only flags can only hold CCmode values. */
12880 if (CC_REGNO_P (regno))
12881 return GET_MODE_CLASS (mode) == MODE_CC;
12882 if (GET_MODE_CLASS (mode) == MODE_CC
12883 || GET_MODE_CLASS (mode) == MODE_RANDOM
12884 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
12885 return 0;
12886 if (FP_REGNO_P (regno))
12887 return VALID_FP_MODE_P (mode);
12888 if (SSE_REGNO_P (regno))
12889 return VALID_SSE_REG_MODE (mode);
12890 if (MMX_REGNO_P (regno))
47f339cf 12891 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
a946dd00
JH
12892 /* We handle both integer and floats in the general purpose registers.
12893 In future we should be able to handle vector modes as well. */
12894 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
12895 return 0;
12896 /* Take care for QImode values - they can be in non-QI regs, but then
12897 they do cause partial register stalls. */
d2836273 12898 if (regno < 4 || mode != QImode || TARGET_64BIT)
a946dd00
JH
12899 return 1;
12900 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
12901}
fa79946e
JH
12902
12903/* Return the cost of moving data of mode M between a
12904 register and memory. A value of 2 is the default; this cost is
12905 relative to those in `REGISTER_MOVE_COST'.
12906
12907 If moving between registers and memory is more expensive than
12908 between two registers, you should define this macro to express the
a4f31c00
AJ
12909 relative cost.
12910
fa79946e
JH
12911 Model also increased moving costs of QImode registers in non
12912 Q_REGS classes.
12913 */
12914int
12915ix86_memory_move_cost (mode, class, in)
12916 enum machine_mode mode;
12917 enum reg_class class;
12918 int in;
12919{
12920 if (FLOAT_CLASS_P (class))
12921 {
12922 int index;
12923 switch (mode)
12924 {
12925 case SFmode:
12926 index = 0;
12927 break;
12928 case DFmode:
12929 index = 1;
12930 break;
12931 case XFmode:
12932 case TFmode:
12933 index = 2;
12934 break;
12935 default:
12936 return 100;
12937 }
12938 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
12939 }
12940 if (SSE_CLASS_P (class))
12941 {
12942 int index;
12943 switch (GET_MODE_SIZE (mode))
12944 {
12945 case 4:
12946 index = 0;
12947 break;
12948 case 8:
12949 index = 1;
12950 break;
12951 case 16:
12952 index = 2;
12953 break;
12954 default:
12955 return 100;
12956 }
12957 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
12958 }
12959 if (MMX_CLASS_P (class))
12960 {
12961 int index;
12962 switch (GET_MODE_SIZE (mode))
12963 {
12964 case 4:
12965 index = 0;
12966 break;
12967 case 8:
12968 index = 1;
12969 break;
12970 default:
12971 return 100;
12972 }
12973 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
12974 }
12975 switch (GET_MODE_SIZE (mode))
12976 {
12977 case 1:
12978 if (in)
12979 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
12980 : ix86_cost->movzbl_load);
12981 else
12982 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
12983 : ix86_cost->int_store[0] + 4);
12984 break;
12985 case 2:
12986 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
12987 default:
12988 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
12989 if (mode == TFmode)
12990 mode = XFmode;
3bb7e126 12991 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
fa79946e
JH
12992 * (int) GET_MODE_SIZE (mode) / 4);
12993 }
12994}
0ecf09f9 12995
2cc07db4
RH
12996#ifdef DO_GLOBAL_CTORS_BODY
12997static void
12998ix86_svr3_asm_out_constructor (symbol, priority)
12999 rtx symbol;
13000 int priority ATTRIBUTE_UNUSED;
13001{
13002 init_section ();
13003 fputs ("\tpushl $", asm_out_file);
13004 assemble_name (asm_out_file, XSTR (symbol, 0));
13005 fputc ('\n', asm_out_file);
13006}
13007#endif
162f023b
JH
13008
13009/* Order the registers for register allocator. */
13010
13011void
13012x86_order_regs_for_local_alloc ()
13013{
13014 int pos = 0;
13015 int i;
13016
13017 /* First allocate the local general purpose registers. */
13018 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13019 if (GENERAL_REGNO_P (i) && call_used_regs[i])
13020 reg_alloc_order [pos++] = i;
13021
13022 /* Global general purpose registers. */
13023 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13024 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
13025 reg_alloc_order [pos++] = i;
13026
13027 /* x87 registers come first in case we are doing FP math
13028 using them. */
13029 if (!TARGET_SSE_MATH)
13030 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13031 reg_alloc_order [pos++] = i;
13032
13033 /* SSE registers. */
13034 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
13035 reg_alloc_order [pos++] = i;
13036 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
13037 reg_alloc_order [pos++] = i;
13038
13039 /* x87 registerts. */
13040 if (TARGET_SSE_MATH)
13041 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13042 reg_alloc_order [pos++] = i;
13043
13044 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
13045 reg_alloc_order [pos++] = i;
13046
13047 /* Initialize the rest of array as we do not allocate some registers
13048 at all. */
13049 while (pos < FIRST_PSEUDO_REGISTER)
13050 reg_alloc_order [pos++] = 0;
13051}
194734e9
JH
13052
13053void
13054x86_output_mi_thunk (file, delta, function)
13055 FILE *file;
13056 int delta;
13057 tree function;
13058{
13059 tree parm;
13060 rtx xops[3];
13061
13062 if (ix86_regparm > 0)
13063 parm = TYPE_ARG_TYPES (TREE_TYPE (function));
13064 else
13065 parm = NULL_TREE;
13066 for (; parm; parm = TREE_CHAIN (parm))
13067 if (TREE_VALUE (parm) == void_type_node)
13068 break;
13069
13070 xops[0] = GEN_INT (delta);
13071 if (TARGET_64BIT)
13072 {
13073 int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
13074 xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
13075 output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
13076 if (flag_pic)
13077 {
13078 fprintf (file, "\tjmp *");
13079 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13080 fprintf (file, "@GOTPCREL(%%rip)\n");
13081 }
13082 else
13083 {
13084 fprintf (file, "\tjmp ");
13085 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13086 fprintf (file, "\n");
13087 }
13088 }
13089 else
13090 {
13091 if (parm)
13092 xops[1] = gen_rtx_REG (SImode, 0);
13093 else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
13094 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
13095 else
13096 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
13097 output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);
13098
13099 if (flag_pic)
13100 {
13101 xops[0] = pic_offset_table_rtx;
13102 xops[1] = gen_label_rtx ();
13103 xops[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
13104
13105 if (ix86_regparm > 2)
13106 abort ();
13107 output_asm_insn ("push{l}\t%0", xops);
13108 output_asm_insn ("call\t%P1", xops);
13109 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
13110 output_asm_insn ("pop{l}\t%0", xops);
13111 output_asm_insn
13112 ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
13113 xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
13114 output_asm_insn
13115 ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
13116 asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
13117 asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
13118 }
13119 else
13120 {
13121 fprintf (file, "\tjmp ");
13122 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13123 fprintf (file, "\n");
13124 }
13125 }
13126}
This page took 3.268469 seconds and 5 git commands to generate.