]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
i386.c (output_pic_addr_const): Lowercase rip.
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
07933f72
GS
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
2a2ab3f9
JVA
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
bb5177ac 23#include "system.h"
2a2ab3f9 24#include "rtl.h"
6baf1cc8
BS
25#include "tree.h"
26#include "tm_p.h"
2a2ab3f9
JVA
27#include "regs.h"
28#include "hard-reg-set.h"
29#include "real.h"
30#include "insn-config.h"
31#include "conditions.h"
2a2ab3f9
JVA
32#include "output.h"
33#include "insn-attr.h"
2a2ab3f9 34#include "flags.h"
a8ffcc81 35#include "except.h"
ecbc4695 36#include "function.h"
00c79232 37#include "recog.h"
ced8dd8c 38#include "expr.h"
e78d8e51 39#include "optabs.h"
f103890b 40#include "toplev.h"
e075ae69 41#include "basic-block.h"
1526a060 42#include "ggc.h"
672a6f42
NB
43#include "target.h"
44#include "target-def.h"
f1e639b1 45#include "langhooks.h"
2a2ab3f9 46
8dfe5673 47#ifndef CHECK_STACK_LIMIT
07933f72 48#define CHECK_STACK_LIMIT (-1)
8dfe5673
RK
49#endif
50
2ab0437e 51/* Processor costs (relative to an add) */
8b60264b 52static const
2ab0437e
JH
53struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
44cf5b6a
JH
61 3, /* cost of movsx */
62 3, /* cost of movzx */
2ab0437e
JH
63 0, /* "large" insn */
64 2, /* MOVE_RATIO */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
f4365627
JH
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
2ab0437e 87};
32b5b1aa 88/* Processor costs (relative to an add) */
8b60264b 89static const
32b5b1aa 90struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 91 1, /* cost of an add instruction */
32b5b1aa
SC
92 1, /* cost of a lea instruction */
93 3, /* variable shift costs */
94 2, /* constant shift costs */
95 6, /* cost of starting a multiply */
96 1, /* cost of multiply per each bit set */
e075ae69 97 23, /* cost of a divide/mod */
44cf5b6a
JH
98 3, /* cost of movsx */
99 2, /* cost of movzx */
96e7ae40 100 15, /* "large" insn */
e2e52e1b 101 3, /* MOVE_RATIO */
7c6b971d 102 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
0f290768 105 Relative to reg-reg move (2). */
96e7ae40
JH
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {8, 8, 8}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
fa79946e
JH
110 {8, 8, 8}, /* cost of loading integer registers */
111 2, /* cost of moving MMX register */
112 {4, 8}, /* cost of loading MMX registers
113 in SImode and DImode */
114 {4, 8}, /* cost of storing MMX registers
115 in SImode and DImode */
116 2, /* cost of moving SSE register */
117 {4, 8, 16}, /* cost of loading SSE registers
118 in SImode, DImode and TImode */
119 {4, 8, 16}, /* cost of storing SSE registers
120 in SImode, DImode and TImode */
121 3, /* MMX or SSE register to integer */
f4365627
JH
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
32b5b1aa
SC
124};
125
8b60264b 126static const
32b5b1aa
SC
127struct processor_costs i486_cost = { /* 486 specific costs */
128 1, /* cost of an add instruction */
129 1, /* cost of a lea instruction */
130 3, /* variable shift costs */
131 2, /* constant shift costs */
132 12, /* cost of starting a multiply */
133 1, /* cost of multiply per each bit set */
e075ae69 134 40, /* cost of a divide/mod */
44cf5b6a
JH
135 3, /* cost of movsx */
136 2, /* cost of movzx */
96e7ae40 137 15, /* "large" insn */
e2e52e1b 138 3, /* MOVE_RATIO */
7c6b971d 139 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
140 {2, 4, 2}, /* cost of loading integer registers
141 in QImode, HImode and SImode.
0f290768 142 Relative to reg-reg move (2). */
96e7ae40
JH
143 {2, 4, 2}, /* cost of storing integer registers */
144 2, /* cost of reg,reg fld/fst */
145 {8, 8, 8}, /* cost of loading fp registers
146 in SFmode, DFmode and XFmode */
fa79946e
JH
147 {8, 8, 8}, /* cost of loading integer registers */
148 2, /* cost of moving MMX register */
149 {4, 8}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {4, 8}, /* cost of storing MMX registers
152 in SImode and DImode */
153 2, /* cost of moving SSE register */
154 {4, 8, 16}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {4, 8, 16}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
f4365627
JH
158 3, /* MMX or SSE register to integer */
159 0, /* size of prefetch block */
160 0, /* number of parallel prefetches */
32b5b1aa
SC
161};
162
8b60264b 163static const
e5cb57e8 164struct processor_costs pentium_cost = {
32b5b1aa
SC
165 1, /* cost of an add instruction */
166 1, /* cost of a lea instruction */
856b07a1 167 4, /* variable shift costs */
e5cb57e8 168 1, /* constant shift costs */
856b07a1
SC
169 11, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
e075ae69 171 25, /* cost of a divide/mod */
44cf5b6a
JH
172 3, /* cost of movsx */
173 2, /* cost of movzx */
96e7ae40 174 8, /* "large" insn */
e2e52e1b 175 6, /* MOVE_RATIO */
7c6b971d 176 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
177 {2, 4, 2}, /* cost of loading integer registers
178 in QImode, HImode and SImode.
0f290768 179 Relative to reg-reg move (2). */
96e7ae40
JH
180 {2, 4, 2}, /* cost of storing integer registers */
181 2, /* cost of reg,reg fld/fst */
182 {2, 2, 6}, /* cost of loading fp registers
183 in SFmode, DFmode and XFmode */
fa79946e
JH
184 {4, 4, 6}, /* cost of loading integer registers */
185 8, /* cost of moving MMX register */
186 {8, 8}, /* cost of loading MMX registers
187 in SImode and DImode */
188 {8, 8}, /* cost of storing MMX registers
189 in SImode and DImode */
190 2, /* cost of moving SSE register */
191 {4, 8, 16}, /* cost of loading SSE registers
192 in SImode, DImode and TImode */
193 {4, 8, 16}, /* cost of storing SSE registers
194 in SImode, DImode and TImode */
f4365627
JH
195 3, /* MMX or SSE register to integer */
196 0, /* size of prefetch block */
197 0, /* number of parallel prefetches */
32b5b1aa
SC
198};
199
8b60264b 200static const
856b07a1
SC
201struct processor_costs pentiumpro_cost = {
202 1, /* cost of an add instruction */
203 1, /* cost of a lea instruction */
e075ae69 204 1, /* variable shift costs */
856b07a1 205 1, /* constant shift costs */
369e59b1 206 4, /* cost of starting a multiply */
856b07a1 207 0, /* cost of multiply per each bit set */
e075ae69 208 17, /* cost of a divide/mod */
44cf5b6a
JH
209 1, /* cost of movsx */
210 1, /* cost of movzx */
96e7ae40 211 8, /* "large" insn */
e2e52e1b 212 6, /* MOVE_RATIO */
7c6b971d 213 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
214 {4, 4, 4}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
0f290768 216 Relative to reg-reg move (2). */
96e7ae40
JH
217 {2, 2, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {2, 2, 6}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
fa79946e
JH
221 {4, 4, 6}, /* cost of loading integer registers */
222 2, /* cost of moving MMX register */
223 {2, 2}, /* cost of loading MMX registers
224 in SImode and DImode */
225 {2, 2}, /* cost of storing MMX registers
226 in SImode and DImode */
227 2, /* cost of moving SSE register */
228 {2, 2, 8}, /* cost of loading SSE registers
229 in SImode, DImode and TImode */
230 {2, 2, 8}, /* cost of storing SSE registers
231 in SImode, DImode and TImode */
f4365627
JH
232 3, /* MMX or SSE register to integer */
233 32, /* size of prefetch block */
234 6, /* number of parallel prefetches */
856b07a1
SC
235};
236
8b60264b 237static const
a269a03c
JC
238struct processor_costs k6_cost = {
239 1, /* cost of an add instruction */
e075ae69 240 2, /* cost of a lea instruction */
a269a03c
JC
241 1, /* variable shift costs */
242 1, /* constant shift costs */
73fe76e4 243 3, /* cost of starting a multiply */
a269a03c 244 0, /* cost of multiply per each bit set */
e075ae69 245 18, /* cost of a divide/mod */
44cf5b6a
JH
246 2, /* cost of movsx */
247 2, /* cost of movzx */
96e7ae40 248 8, /* "large" insn */
e2e52e1b 249 4, /* MOVE_RATIO */
7c6b971d 250 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
251 {4, 5, 4}, /* cost of loading integer registers
252 in QImode, HImode and SImode.
0f290768 253 Relative to reg-reg move (2). */
96e7ae40
JH
254 {2, 3, 2}, /* cost of storing integer registers */
255 4, /* cost of reg,reg fld/fst */
256 {6, 6, 6}, /* cost of loading fp registers
257 in SFmode, DFmode and XFmode */
fa79946e
JH
258 {4, 4, 4}, /* cost of loading integer registers */
259 2, /* cost of moving MMX register */
260 {2, 2}, /* cost of loading MMX registers
261 in SImode and DImode */
262 {2, 2}, /* cost of storing MMX registers
263 in SImode and DImode */
264 2, /* cost of moving SSE register */
265 {2, 2, 8}, /* cost of loading SSE registers
266 in SImode, DImode and TImode */
267 {2, 2, 8}, /* cost of storing SSE registers
268 in SImode, DImode and TImode */
f4365627
JH
269 6, /* MMX or SSE register to integer */
270 32, /* size of prefetch block */
271 1, /* number of parallel prefetches */
a269a03c
JC
272};
273
8b60264b 274static const
309ada50
JH
275struct processor_costs athlon_cost = {
276 1, /* cost of an add instruction */
0b5107cf 277 2, /* cost of a lea instruction */
309ada50
JH
278 1, /* variable shift costs */
279 1, /* constant shift costs */
280 5, /* cost of starting a multiply */
281 0, /* cost of multiply per each bit set */
0b5107cf 282 42, /* cost of a divide/mod */
44cf5b6a
JH
283 1, /* cost of movsx */
284 1, /* cost of movzx */
309ada50 285 8, /* "large" insn */
e2e52e1b 286 9, /* MOVE_RATIO */
309ada50
JH
287 4, /* cost for loading QImode using movzbl */
288 {4, 5, 4}, /* cost of loading integer registers
289 in QImode, HImode and SImode.
0f290768 290 Relative to reg-reg move (2). */
309ada50
JH
291 {2, 3, 2}, /* cost of storing integer registers */
292 4, /* cost of reg,reg fld/fst */
0b5107cf 293 {6, 6, 20}, /* cost of loading fp registers
309ada50 294 in SFmode, DFmode and XFmode */
fa79946e
JH
295 {4, 4, 16}, /* cost of loading integer registers */
296 2, /* cost of moving MMX register */
297 {2, 2}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {2, 2}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {2, 2, 8}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {2, 2, 8}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
f4365627
JH
306 6, /* MMX or SSE register to integer */
307 64, /* size of prefetch block */
308 6, /* number of parallel prefetches */
309ada50
JH
309};
310
8b60264b 311static const
b4e89e2d
JH
312struct processor_costs pentium4_cost = {
313 1, /* cost of an add instruction */
314 1, /* cost of a lea instruction */
315 8, /* variable shift costs */
316 8, /* constant shift costs */
317 30, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 112, /* cost of a divide/mod */
44cf5b6a
JH
320 1, /* cost of movsx */
321 1, /* cost of movzx */
b4e89e2d
JH
322 16, /* "large" insn */
323 6, /* MOVE_RATIO */
324 2, /* cost for loading QImode using movzbl */
325 {4, 5, 4}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {2, 3, 2}, /* cost of storing integer registers */
329 2, /* cost of reg,reg fld/fst */
330 {2, 2, 6}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {4, 4, 6}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 12, /* cost of moving SSE register */
339 {12, 12, 12}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 10, /* MMX or SSE register to integer */
f4365627
JH
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
b4e89e2d
JH
346};
347
8b60264b 348const struct processor_costs *ix86_cost = &pentium_cost;
32b5b1aa 349
a269a03c
JC
350/* Processor feature/optimization bitmasks. */
351#define m_386 (1<<PROCESSOR_I386)
352#define m_486 (1<<PROCESSOR_I486)
353#define m_PENT (1<<PROCESSOR_PENTIUM)
354#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
355#define m_K6 (1<<PROCESSOR_K6)
309ada50 356#define m_ATHLON (1<<PROCESSOR_ATHLON)
b4e89e2d 357#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
a269a03c 358
309ada50 359const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
b4e89e2d 360const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
a269a03c 361const int x86_zero_extend_with_and = m_486 | m_PENT;
b4e89e2d 362const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
e075ae69 363const int x86_double_with_add = ~m_386;
a269a03c 364const int x86_use_bit_test = m_386;
e2e52e1b 365const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
b4e89e2d 366const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
47f339cf 367const int x86_3dnow_a = m_ATHLON;
b4e89e2d 368const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
ef6257cd 369const int x86_branch_hints = m_PENT4;
b4e89e2d 370const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
e075ae69
RH
371const int x86_partial_reg_stall = m_PPRO;
372const int x86_use_loop = m_K6;
309ada50 373const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
e075ae69
RH
374const int x86_use_mov0 = m_K6;
375const int x86_use_cltd = ~(m_PENT | m_K6);
376const int x86_read_modify_write = ~m_PENT;
377const int x86_read_modify = ~(m_PENT | m_PPRO);
378const int x86_split_long_moves = m_PPRO;
e9e80858 379const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
b4e89e2d 380const int x86_single_stringop = m_386 | m_PENT4;
d9f32422
JH
381const int x86_qimode_math = ~(0);
382const int x86_promote_qi_regs = 0;
383const int x86_himode_math = ~(m_PPRO);
384const int x86_promote_hi_regs = m_PPRO;
b4e89e2d
JH
385const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
386const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
387const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
388const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
389const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
390const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
391const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
c6036a37
JH
392const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
393const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
394const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
b972dd02 395const int x86_decompose_lea = m_PENT4;
30c99a84 396const int x86_arch_always_fancy_math_387 = m_PENT|m_PPRO|m_ATHLON|m_PENT4;
a269a03c 397
6ab16dd9
JH
398/* In case the avreage insn count for single function invocation is
399 lower than this constant, emit fast (but longer) prologue and
400 epilogue code. */
401#define FAST_PROLOGUE_INSN_COUNT 30
5bf0ebab 402
6ab16dd9
JH
403/* Set by prologue expander and used by epilogue expander to determine
404 the style used. */
405static int use_fast_prologue_epilogue;
406
07933f72 407#define AT_BP(MODE) (gen_rtx_MEM ((MODE), hard_frame_pointer_rtx))
2a2ab3f9 408
5bf0ebab
RH
409/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
410static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
411static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
412static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
4c0d89b5
RS
413
414/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 415 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 416
e075ae69 417enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
418{
419 /* ax, dx, cx, bx */
ab408a86 420 AREG, DREG, CREG, BREG,
4c0d89b5 421 /* si, di, bp, sp */
e075ae69 422 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
423 /* FP registers */
424 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 425 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 426 /* arg pointer */
83774849 427 NON_Q_REGS,
564d80f4 428 /* flags, fpsr, dirflag, frame */
a7180f70
BS
429 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
430 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
431 SSE_REGS, SSE_REGS,
432 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30
JH
433 MMX_REGS, MMX_REGS,
434 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
435 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
436 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
437 SSE_REGS, SSE_REGS,
4c0d89b5 438};
c572e5ba 439
3d117b30 440/* The "default" register map used in 32bit mode. */
83774849 441
0f290768 442int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
443{
444 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
445 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
3d117b30 446 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
447 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
448 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
449 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
450 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
451};
452
5bf0ebab
RH
453static int const x86_64_int_parameter_registers[6] =
454{
455 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
456 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
457};
458
459static int const x86_64_int_return_registers[4] =
460{
461 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
462};
53c17031 463
0f7fa3d0
JH
464/* The "default" register map used in 64bit mode. */
465int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
466{
467 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
3de72741 468 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
0f7fa3d0
JH
469 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
470 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
471 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
472 8,9,10,11,12,13,14,15, /* extended integer registers */
473 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
474};
475
83774849
RH
476/* Define the register numbers to be used in Dwarf debugging information.
477 The SVR4 reference port C compiler uses the following register numbers
478 in its Dwarf output code:
479 0 for %eax (gcc regno = 0)
480 1 for %ecx (gcc regno = 2)
481 2 for %edx (gcc regno = 1)
482 3 for %ebx (gcc regno = 3)
483 4 for %esp (gcc regno = 7)
484 5 for %ebp (gcc regno = 6)
485 6 for %esi (gcc regno = 4)
486 7 for %edi (gcc regno = 5)
487 The following three DWARF register numbers are never generated by
488 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
489 believes these numbers have these meanings.
490 8 for %eip (no gcc equivalent)
491 9 for %eflags (gcc regno = 17)
492 10 for %trapno (no gcc equivalent)
493 It is not at all clear how we should number the FP stack registers
494 for the x86 architecture. If the version of SDB on x86/svr4 were
495 a bit less brain dead with respect to floating-point then we would
496 have a precedent to follow with respect to DWARF register numbers
497 for x86 FP registers, but the SDB on x86/svr4 is so completely
498 broken with respect to FP registers that it is hardly worth thinking
499 of it as something to strive for compatibility with.
500 The version of x86/svr4 SDB I have at the moment does (partially)
501 seem to believe that DWARF register number 11 is associated with
502 the x86 register %st(0), but that's about all. Higher DWARF
503 register numbers don't seem to be associated with anything in
504 particular, and even for DWARF regno 11, SDB only seems to under-
505 stand that it should say that a variable lives in %st(0) (when
506 asked via an `=' command) if we said it was in DWARF regno 11,
507 but SDB still prints garbage when asked for the value of the
508 variable in question (via a `/' command).
509 (Also note that the labels SDB prints for various FP stack regs
510 when doing an `x' command are all wrong.)
511 Note that these problems generally don't affect the native SVR4
512 C compiler because it doesn't allow the use of -O with -g and
513 because when it is *not* optimizing, it allocates a memory
514 location for each floating-point variable, and the memory
515 location is what gets described in the DWARF AT_location
516 attribute for the variable in question.
517 Regardless of the severe mental illness of the x86/svr4 SDB, we
518 do something sensible here and we use the following DWARF
519 register numbers. Note that these are all stack-top-relative
520 numbers.
521 11 for %st(0) (gcc regno = 8)
522 12 for %st(1) (gcc regno = 9)
523 13 for %st(2) (gcc regno = 10)
524 14 for %st(3) (gcc regno = 11)
525 15 for %st(4) (gcc regno = 12)
526 16 for %st(5) (gcc regno = 13)
527 17 for %st(6) (gcc regno = 14)
528 18 for %st(7) (gcc regno = 15)
529*/
0f290768 530int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
531{
532 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
533 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
3f3f2124 534 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
535 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
536 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
3f3f2124
JH
537 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
538 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
83774849
RH
539};
540
c572e5ba
JVA
541/* Test and compare insns in i386.md store the information needed to
542 generate branch and scc insns here. */
543
07933f72
GS
544rtx ix86_compare_op0 = NULL_RTX;
545rtx ix86_compare_op1 = NULL_RTX;
f5316dfe 546
f996902d
RH
547/* The encoding characters for the four TLS models present in ELF. */
548
755ac5d4 549static char const tls_model_chars[] = " GLil";
f996902d 550
7a2e09f4 551#define MAX_386_STACK_LOCALS 3
8362f420
JH
552/* Size of the register save area. */
553#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
36edd3cc
BS
554
555/* Define the structure for the machine field in struct function. */
556struct machine_function
557{
558 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
f996902d 559 const char *some_ld_name;
8362f420 560 int save_varrargs_registers;
6fca22eb 561 int accesses_prev_frame;
36edd3cc
BS
562};
563
01d939e8 564#define ix86_stack_locals (cfun->machine->stack_locals)
8362f420 565#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
36edd3cc 566
4dd2ac2c
JH
567/* Structure describing stack frame layout.
568 Stack grows downward:
569
570 [arguments]
571 <- ARG_POINTER
572 saved pc
573
574 saved frame pointer if frame_pointer_needed
575 <- HARD_FRAME_POINTER
576 [saved regs]
577
578 [padding1] \
579 )
580 [va_arg registers] (
581 > to_allocate <- FRAME_POINTER
582 [frame] (
583 )
584 [padding2] /
585 */
586struct ix86_frame
587{
588 int nregs;
589 int padding1;
8362f420 590 int va_arg_size;
4dd2ac2c
JH
591 HOST_WIDE_INT frame;
592 int padding2;
593 int outgoing_arguments_size;
8362f420 594 int red_zone_size;
4dd2ac2c
JH
595
596 HOST_WIDE_INT to_allocate;
597 /* The offsets relative to ARG_POINTER. */
598 HOST_WIDE_INT frame_pointer_offset;
599 HOST_WIDE_INT hard_frame_pointer_offset;
600 HOST_WIDE_INT stack_pointer_offset;
601};
602
c93e80a5
JH
603/* Used to enable/disable debugging features. */
604const char *ix86_debug_arg_string, *ix86_debug_addr_string;
6189a572
JH
605/* Code model option as passed by user. */
606const char *ix86_cmodel_string;
607/* Parsed value. */
608enum cmodel ix86_cmodel;
80f33d06
GS
609/* Asm dialect. */
610const char *ix86_asm_string;
611enum asm_dialect ix86_asm_dialect = ASM_ATT;
f996902d
RH
612/* TLS dialext. */
613const char *ix86_tls_dialect_string;
614enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
6189a572 615
5bf0ebab 616/* Which unit we are generating floating point math for. */
965f5423
JH
617enum fpmath_unit ix86_fpmath;
618
5bf0ebab
RH
619/* Which cpu are we scheduling for. */
620enum processor_type ix86_cpu;
621/* Which instruction set architecture to use. */
622enum processor_type ix86_arch;
c8c5cb99
SC
623
624/* Strings to hold which cpu and instruction set architecture to use. */
9c23aa47
ZW
625const char *ix86_cpu_string; /* for -mcpu=<xxx> */
626const char *ix86_arch_string; /* for -march=<xxx> */
965f5423 627const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
c8c5cb99 628
0f290768 629/* # of registers to use to pass arguments. */
e075ae69 630const char *ix86_regparm_string;
e9a25f70 631
f4365627
JH
632/* true if sse prefetch instruction is not NOOP. */
633int x86_prefetch_sse;
634
e075ae69
RH
635/* ix86_regparm_string as a number */
636int ix86_regparm;
e9a25f70
JL
637
638/* Alignment to use for loops and jumps: */
639
0f290768 640/* Power of two alignment for loops. */
e075ae69 641const char *ix86_align_loops_string;
e9a25f70 642
0f290768 643/* Power of two alignment for non-loop jumps. */
e075ae69 644const char *ix86_align_jumps_string;
e9a25f70 645
3af4bd89 646/* Power of two alignment for stack boundary in bytes. */
e075ae69 647const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
648
649/* Preferred alignment for stack boundary in bits. */
e075ae69 650int ix86_preferred_stack_boundary;
3af4bd89 651
e9a25f70 652/* Values 1-5: see jump.c */
e075ae69
RH
653int ix86_branch_cost;
654const char *ix86_branch_cost_string;
e9a25f70 655
0f290768 656/* Power of two alignment for functions. */
e075ae69 657const char *ix86_align_funcs_string;
623fe810
RH
658
659/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
660static char internal_label_prefix[16];
661static int internal_label_prefix_len;
e075ae69 662\f
623fe810 663static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
f996902d 664static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
f6da8bc3
KG
665static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
666static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
e075ae69 667 int, int, FILE *));
f996902d
RH
668static const char *get_some_local_dynamic_name PARAMS ((void));
669static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
670static rtx maybe_get_pool_constant PARAMS ((rtx));
f6da8bc3 671static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
3a3677ff
RH
672static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
673 rtx *, rtx *));
f996902d 674static rtx get_thread_pointer PARAMS ((void));
f6da8bc3
KG
675static rtx gen_push PARAMS ((rtx));
676static int memory_address_length PARAMS ((rtx addr));
677static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
678static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
f6da8bc3
KG
679static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
680static void ix86_dump_ppro_packet PARAMS ((FILE *));
681static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
f6da8bc3
KG
682static void ix86_init_machine_status PARAMS ((struct function *));
683static void ix86_mark_machine_status PARAMS ((struct function *));
37b15744 684static void ix86_free_machine_status PARAMS ((struct function *));
2b589241 685static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
b531087a
KH
686static int ix86_nsaved_regs PARAMS ((void));
687static void ix86_emit_save_regs PARAMS ((void));
c6036a37 688static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
37a58036 689static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
0e4970d7 690static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
b531087a 691static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
55efb413 692static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
0945b39d 693static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
0945b39d
JH
694static rtx ix86_expand_aligntest PARAMS ((rtx, int));
695static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
c237e94a
ZW
696static int ix86_issue_rate PARAMS ((void));
697static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
698static void ix86_sched_init PARAMS ((FILE *, int, int));
699static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
700static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
9b690711
RH
701static int ia32_use_dfa_pipeline_interface PARAMS ((void));
702static int ia32_multipass_dfa_lookahead PARAMS ((void));
e37af218 703static void ix86_init_mmx_sse_builtins PARAMS ((void));
e075ae69
RH
704
705struct ix86_address
706{
707 rtx base, index, disp;
708 HOST_WIDE_INT scale;
709};
b08de47e 710
e075ae69 711static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
bd793c65 712
f996902d
RH
713static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
714static const char *ix86_strip_name_encoding PARAMS ((const char *))
715 ATTRIBUTE_UNUSED;
fb49053f 716
bd793c65 717struct builtin_description;
8b60264b
KG
718static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
719 tree, rtx));
720static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
721 tree, rtx));
bd793c65
BS
722static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
723static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
724static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
e37af218
RH
725static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
726 tree, rtx));
727static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
bd793c65 728static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
c0c102a9
JH
729static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
730static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
731 enum rtx_code *,
732 enum rtx_code *,
733 enum rtx_code *));
9e7adcb3
JH
734static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
735 rtx *, rtx *));
736static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
737static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
738static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
739static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
9b690711 740static int ix86_save_reg PARAMS ((unsigned int, int));
4dd2ac2c 741static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
8d8e52be 742static int ix86_comp_type_attributes PARAMS ((tree, tree));
91d231cb
JM
743const struct attribute_spec ix86_attribute_table[];
744static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
745static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
7c262518 746
2cc07db4
RH
747#ifdef DO_GLOBAL_CTORS_BODY
748static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
749#endif
e56feed6 750
53c17031
JH
751/* Register class used for passing given 64bit part of the argument.
752 These represent classes as documented by the PS ABI, with the exception
753 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
754 use SF or DFmode move instead of DImode to avoid reformating penalties.
755
756 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
757 whenever possible (upper half does contain padding).
758 */
759enum x86_64_reg_class
760 {
761 X86_64_NO_CLASS,
762 X86_64_INTEGER_CLASS,
763 X86_64_INTEGERSI_CLASS,
764 X86_64_SSE_CLASS,
765 X86_64_SSESF_CLASS,
766 X86_64_SSEDF_CLASS,
767 X86_64_SSEUP_CLASS,
768 X86_64_X87_CLASS,
769 X86_64_X87UP_CLASS,
770 X86_64_MEMORY_CLASS
771 };
0b5826ac 772static const char * const x86_64_reg_class_name[] =
53c17031
JH
773 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
774
775#define MAX_CLASSES 4
776static int classify_argument PARAMS ((enum machine_mode, tree,
777 enum x86_64_reg_class [MAX_CLASSES],
778 int));
779static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
780 int *));
781static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
07933f72 782 const int *, int));
53c17031
JH
783static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
784 enum x86_64_reg_class));
672a6f42
NB
785\f
786/* Initialize the GCC target structure. */
91d231cb
JM
787#undef TARGET_ATTRIBUTE_TABLE
788#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
672a6f42 789#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
f5f4be42
NB
790# undef TARGET_MERGE_DECL_ATTRIBUTES
791# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
672a6f42
NB
792#endif
793
8d8e52be
JM
794#undef TARGET_COMP_TYPE_ATTRIBUTES
795#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
796
f6155fda
SS
797#undef TARGET_INIT_BUILTINS
798#define TARGET_INIT_BUILTINS ix86_init_builtins
799
800#undef TARGET_EXPAND_BUILTIN
801#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
802
08c148a8
NB
803#if defined (OSF_OS) || defined (TARGET_OSF1ELF)
804 static void ix86_osf_output_function_prologue PARAMS ((FILE *,
805 HOST_WIDE_INT));
806# undef TARGET_ASM_FUNCTION_PROLOGUE
807# define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue
808#endif
809
17b53c33
NB
810#undef TARGET_ASM_OPEN_PAREN
811#define TARGET_ASM_OPEN_PAREN ""
812#undef TARGET_ASM_CLOSE_PAREN
813#define TARGET_ASM_CLOSE_PAREN ""
814
301d03af
RS
815#undef TARGET_ASM_ALIGNED_HI_OP
816#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
817#undef TARGET_ASM_ALIGNED_SI_OP
818#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
819#ifdef ASM_QUAD
820#undef TARGET_ASM_ALIGNED_DI_OP
821#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
822#endif
823
824#undef TARGET_ASM_UNALIGNED_HI_OP
825#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
826#undef TARGET_ASM_UNALIGNED_SI_OP
827#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
828#undef TARGET_ASM_UNALIGNED_DI_OP
829#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
830
c237e94a
ZW
831#undef TARGET_SCHED_ADJUST_COST
832#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
833#undef TARGET_SCHED_ISSUE_RATE
834#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
835#undef TARGET_SCHED_VARIABLE_ISSUE
836#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
837#undef TARGET_SCHED_INIT
838#define TARGET_SCHED_INIT ix86_sched_init
839#undef TARGET_SCHED_REORDER
840#define TARGET_SCHED_REORDER ix86_sched_reorder
9b690711
RH
841#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
842#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
843 ia32_use_dfa_pipeline_interface
844#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
845#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
846 ia32_multipass_dfa_lookahead
c237e94a 847
f996902d
RH
848#ifdef HAVE_AS_TLS
849#undef TARGET_HAVE_TLS
850#define TARGET_HAVE_TLS true
851#endif
852
f6897b10 853struct gcc_target targetm = TARGET_INITIALIZER;
e075ae69 854\f
f5316dfe
MM
855/* Sometimes certain combinations of command options do not make
856 sense on a particular target machine. You can define a macro
857 `OVERRIDE_OPTIONS' to take account of this. This macro, if
858 defined, is executed once just after all the command options have
859 been parsed.
860
861 Don't use this macro to turn on various extra optimizations for
862 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
863
864void
865override_options ()
866{
400500c4 867 int i;
e075ae69
RH
868 /* Comes from final.c -- no real reason to change it. */
869#define MAX_CODE_ALIGN 16
f5316dfe 870
c8c5cb99
SC
871 static struct ptt
872 {
8b60264b
KG
873 const struct processor_costs *cost; /* Processor costs */
874 const int target_enable; /* Target flags to enable. */
875 const int target_disable; /* Target flags to disable. */
876 const int align_loop; /* Default alignments. */
2cca7283 877 const int align_loop_max_skip;
8b60264b 878 const int align_jump;
2cca7283 879 const int align_jump_max_skip;
8b60264b
KG
880 const int align_func;
881 const int branch_cost;
e075ae69 882 }
0f290768 883 const processor_target_table[PROCESSOR_max] =
e075ae69 884 {
2cca7283
JH
885 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
886 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
887 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
888 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
889 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
890 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
891 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
e075ae69
RH
892 };
893
f4365627 894 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
e075ae69
RH
895 static struct pta
896 {
8b60264b
KG
897 const char *const name; /* processor name or nickname. */
898 const enum processor_type processor;
0dd0e980
JH
899 const enum pta_flags
900 {
901 PTA_SSE = 1,
902 PTA_SSE2 = 2,
903 PTA_MMX = 4,
f4365627 904 PTA_PREFETCH_SSE = 8,
0dd0e980
JH
905 PTA_3DNOW = 16,
906 PTA_3DNOW_A = 64
907 } flags;
e075ae69 908 }
0f290768 909 const processor_alias_table[] =
e075ae69 910 {
0dd0e980
JH
911 {"i386", PROCESSOR_I386, 0},
912 {"i486", PROCESSOR_I486, 0},
913 {"i586", PROCESSOR_PENTIUM, 0},
914 {"pentium", PROCESSOR_PENTIUM, 0},
915 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
916 {"i686", PROCESSOR_PENTIUMPRO, 0},
917 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
918 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
f4365627 919 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
0dd0e980 920 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
f4365627 921 PTA_MMX | PTA_PREFETCH_SSE},
0dd0e980
JH
922 {"k6", PROCESSOR_K6, PTA_MMX},
923 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
924 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
f4365627 925 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 926 | PTA_3DNOW_A},
f4365627 927 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
0dd0e980 928 | PTA_3DNOW | PTA_3DNOW_A},
f4365627 929 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 930 | PTA_3DNOW_A | PTA_SSE},
f4365627 931 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 932 | PTA_3DNOW_A | PTA_SSE},
f4365627 933 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 934 | PTA_3DNOW_A | PTA_SSE},
3af4bd89 935 };
c8c5cb99 936
ca7558fc 937 int const pta_size = ARRAY_SIZE (processor_alias_table);
c8c5cb99 938
f5316dfe
MM
939#ifdef SUBTARGET_OVERRIDE_OPTIONS
940 SUBTARGET_OVERRIDE_OPTIONS;
941#endif
942
f4365627
JH
943 if (!ix86_cpu_string && ix86_arch_string)
944 ix86_cpu_string = ix86_arch_string;
945 if (!ix86_cpu_string)
946 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
947 if (!ix86_arch_string)
948 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
e075ae69 949
6189a572
JH
950 if (ix86_cmodel_string != 0)
951 {
952 if (!strcmp (ix86_cmodel_string, "small"))
953 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
954 else if (flag_pic)
c725bd79 955 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
6189a572
JH
956 else if (!strcmp (ix86_cmodel_string, "32"))
957 ix86_cmodel = CM_32;
958 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
959 ix86_cmodel = CM_KERNEL;
960 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
961 ix86_cmodel = CM_MEDIUM;
962 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
963 ix86_cmodel = CM_LARGE;
964 else
965 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
966 }
967 else
968 {
969 ix86_cmodel = CM_32;
970 if (TARGET_64BIT)
971 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
972 }
c93e80a5
JH
973 if (ix86_asm_string != 0)
974 {
975 if (!strcmp (ix86_asm_string, "intel"))
976 ix86_asm_dialect = ASM_INTEL;
977 else if (!strcmp (ix86_asm_string, "att"))
978 ix86_asm_dialect = ASM_ATT;
979 else
980 error ("bad value (%s) for -masm= switch", ix86_asm_string);
981 }
6189a572 982 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
c725bd79 983 error ("code model `%s' not supported in the %s bit mode",
6189a572
JH
984 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
985 if (ix86_cmodel == CM_LARGE)
c725bd79 986 sorry ("code model `large' not supported yet");
0c2dc519 987 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
c725bd79 988 sorry ("%i-bit mode not compiled in",
0c2dc519 989 (target_flags & MASK_64BIT) ? 64 : 32);
6189a572 990
f4365627
JH
991 for (i = 0; i < pta_size; i++)
992 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
993 {
994 ix86_arch = processor_alias_table[i].processor;
995 /* Default cpu tuning to the architecture. */
996 ix86_cpu = ix86_arch;
997 if (processor_alias_table[i].flags & PTA_MMX
998 && !(target_flags & MASK_MMX_SET))
999 target_flags |= MASK_MMX;
1000 if (processor_alias_table[i].flags & PTA_3DNOW
1001 && !(target_flags & MASK_3DNOW_SET))
1002 target_flags |= MASK_3DNOW;
1003 if (processor_alias_table[i].flags & PTA_3DNOW_A
1004 && !(target_flags & MASK_3DNOW_A_SET))
1005 target_flags |= MASK_3DNOW_A;
1006 if (processor_alias_table[i].flags & PTA_SSE
1007 && !(target_flags & MASK_SSE_SET))
1008 target_flags |= MASK_SSE;
1009 if (processor_alias_table[i].flags & PTA_SSE2
1010 && !(target_flags & MASK_SSE2_SET))
1011 target_flags |= MASK_SSE2;
1012 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1013 x86_prefetch_sse = true;
1014 break;
1015 }
400500c4 1016
f4365627
JH
1017 if (i == pta_size)
1018 error ("bad value (%s) for -march= switch", ix86_arch_string);
e075ae69 1019
f4365627
JH
1020 for (i = 0; i < pta_size; i++)
1021 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1022 {
1023 ix86_cpu = processor_alias_table[i].processor;
1024 break;
1025 }
1026 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1027 x86_prefetch_sse = true;
1028 if (i == pta_size)
1029 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
e075ae69 1030
2ab0437e
JH
1031 if (optimize_size)
1032 ix86_cost = &size_cost;
1033 else
1034 ix86_cost = processor_target_table[ix86_cpu].cost;
e075ae69
RH
1035 target_flags |= processor_target_table[ix86_cpu].target_enable;
1036 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1037
36edd3cc
BS
1038 /* Arrange to set up i386_stack_locals for all functions. */
1039 init_machine_status = ix86_init_machine_status;
1526a060 1040 mark_machine_status = ix86_mark_machine_status;
37b15744 1041 free_machine_status = ix86_free_machine_status;
36edd3cc 1042
0f290768 1043 /* Validate -mregparm= value. */
e075ae69 1044 if (ix86_regparm_string)
b08de47e 1045 {
400500c4
RK
1046 i = atoi (ix86_regparm_string);
1047 if (i < 0 || i > REGPARM_MAX)
1048 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1049 else
1050 ix86_regparm = i;
b08de47e 1051 }
0d7d98ee
JH
1052 else
1053 if (TARGET_64BIT)
1054 ix86_regparm = REGPARM_MAX;
b08de47e 1055
3e18fdf6 1056 /* If the user has provided any of the -malign-* options,
a4f31c00 1057 warn and use that value only if -falign-* is not set.
3e18fdf6 1058 Remove this code in GCC 3.2 or later. */
e075ae69 1059 if (ix86_align_loops_string)
b08de47e 1060 {
3e18fdf6
GK
1061 warning ("-malign-loops is obsolete, use -falign-loops");
1062 if (align_loops == 0)
1063 {
1064 i = atoi (ix86_align_loops_string);
1065 if (i < 0 || i > MAX_CODE_ALIGN)
1066 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1067 else
1068 align_loops = 1 << i;
1069 }
b08de47e 1070 }
3af4bd89 1071
e075ae69 1072 if (ix86_align_jumps_string)
b08de47e 1073 {
3e18fdf6
GK
1074 warning ("-malign-jumps is obsolete, use -falign-jumps");
1075 if (align_jumps == 0)
1076 {
1077 i = atoi (ix86_align_jumps_string);
1078 if (i < 0 || i > MAX_CODE_ALIGN)
1079 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1080 else
1081 align_jumps = 1 << i;
1082 }
b08de47e 1083 }
b08de47e 1084
e075ae69 1085 if (ix86_align_funcs_string)
b08de47e 1086 {
3e18fdf6
GK
1087 warning ("-malign-functions is obsolete, use -falign-functions");
1088 if (align_functions == 0)
1089 {
1090 i = atoi (ix86_align_funcs_string);
1091 if (i < 0 || i > MAX_CODE_ALIGN)
1092 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1093 else
1094 align_functions = 1 << i;
1095 }
b08de47e 1096 }
3af4bd89 1097
3e18fdf6 1098 /* Default align_* from the processor table. */
3e18fdf6 1099 if (align_loops == 0)
2cca7283
JH
1100 {
1101 align_loops = processor_target_table[ix86_cpu].align_loop;
1102 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1103 }
3e18fdf6 1104 if (align_jumps == 0)
2cca7283
JH
1105 {
1106 align_jumps = processor_target_table[ix86_cpu].align_jump;
1107 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1108 }
3e18fdf6 1109 if (align_functions == 0)
2cca7283
JH
1110 {
1111 align_functions = processor_target_table[ix86_cpu].align_func;
1112 }
3e18fdf6 1113
e4c0478d 1114 /* Validate -mpreferred-stack-boundary= value, or provide default.
fbb83b43
AO
1115 The default of 128 bits is for Pentium III's SSE __m128, but we
1116 don't want additional code to keep the stack aligned when
1117 optimizing for code size. */
1118 ix86_preferred_stack_boundary = (optimize_size
1119 ? TARGET_64BIT ? 64 : 32
1120 : 128);
e075ae69 1121 if (ix86_preferred_stack_boundary_string)
3af4bd89 1122 {
400500c4 1123 i = atoi (ix86_preferred_stack_boundary_string);
c6257c5d
AO
1124 if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
1125 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
0d7d98ee 1126 TARGET_64BIT ? 3 : 2);
400500c4
RK
1127 else
1128 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 1129 }
77a989d1 1130
0f290768 1131 /* Validate -mbranch-cost= value, or provide default. */
e075ae69
RH
1132 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1133 if (ix86_branch_cost_string)
804a8ee0 1134 {
400500c4
RK
1135 i = atoi (ix86_branch_cost_string);
1136 if (i < 0 || i > 5)
1137 error ("-mbranch-cost=%d is not between 0 and 5", i);
1138 else
1139 ix86_branch_cost = i;
804a8ee0 1140 }
804a8ee0 1141
f996902d
RH
1142 if (ix86_tls_dialect_string)
1143 {
1144 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1145 ix86_tls_dialect = TLS_DIALECT_GNU;
1146 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1147 ix86_tls_dialect = TLS_DIALECT_SUN;
1148 else
1149 error ("bad value (%s) for -mtls-dialect= switch",
1150 ix86_tls_dialect_string);
1151 }
1152
e9a25f70
JL
1153 /* Keep nonleaf frame pointers. */
1154 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 1155 flag_omit_frame_pointer = 1;
e075ae69
RH
1156
1157 /* If we're doing fast math, we don't care about comparison order
1158 wrt NaNs. This lets us use a shorter comparison sequence. */
de6c5979 1159 if (flag_unsafe_math_optimizations)
e075ae69
RH
1160 target_flags &= ~MASK_IEEE_FP;
1161
30c99a84
RH
1162 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1163 since the insns won't need emulation. */
1164 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1165 target_flags &= ~MASK_NO_FANCY_MATH_387;
1166
14f73b5a
JH
1167 if (TARGET_64BIT)
1168 {
1169 if (TARGET_ALIGN_DOUBLE)
c725bd79 1170 error ("-malign-double makes no sense in the 64bit mode");
14f73b5a 1171 if (TARGET_RTD)
c725bd79 1172 error ("-mrtd calling convention not supported in the 64bit mode");
14f73b5a 1173 /* Enable by default the SSE and MMX builtins. */
965f5423
JH
1174 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1175 ix86_fpmath = FPMATH_SSE;
14f73b5a 1176 }
965f5423
JH
1177 else
1178 ix86_fpmath = FPMATH_387;
1179
1180 if (ix86_fpmath_string != 0)
1181 {
1182 if (! strcmp (ix86_fpmath_string, "387"))
1183 ix86_fpmath = FPMATH_387;
1184 else if (! strcmp (ix86_fpmath_string, "sse"))
1185 {
1186 if (!TARGET_SSE)
1187 {
1188 warning ("SSE instruction set disabled, using 387 arithmetics");
1189 ix86_fpmath = FPMATH_387;
1190 }
1191 else
1192 ix86_fpmath = FPMATH_SSE;
1193 }
1194 else if (! strcmp (ix86_fpmath_string, "387,sse")
1195 || ! strcmp (ix86_fpmath_string, "sse,387"))
1196 {
1197 if (!TARGET_SSE)
1198 {
1199 warning ("SSE instruction set disabled, using 387 arithmetics");
1200 ix86_fpmath = FPMATH_387;
1201 }
1202 else if (!TARGET_80387)
1203 {
1204 warning ("387 instruction set disabled, using SSE arithmetics");
1205 ix86_fpmath = FPMATH_SSE;
1206 }
1207 else
1208 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1209 }
1210 else
1211 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1212 }
14f73b5a 1213
a7180f70
BS
1214 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1215 on by -msse. */
1216 if (TARGET_SSE)
e37af218
RH
1217 {
1218 target_flags |= MASK_MMX;
1219 x86_prefetch_sse = true;
1220 }
c6036a37 1221
47f339cf
BS
1222 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1223 if (TARGET_3DNOW)
1224 {
1225 target_flags |= MASK_MMX;
1226 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1227 extensions it adds. */
1228 if (x86_3dnow_a & (1 << ix86_arch))
1229 target_flags |= MASK_3DNOW_A;
1230 }
c6036a37 1231 if ((x86_accumulate_outgoing_args & CPUMASK)
0dd0e980 1232 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
c6036a37
JH
1233 && !optimize_size)
1234 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
623fe810
RH
1235
1236 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1237 {
1238 char *p;
1239 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1240 p = strchr (internal_label_prefix, 'X');
1241 internal_label_prefix_len = p - internal_label_prefix;
1242 *p = '\0';
1243 }
f5316dfe
MM
1244}
1245\f
32b5b1aa 1246void
c6aded7c 1247optimization_options (level, size)
32b5b1aa 1248 int level;
bb5177ac 1249 int size ATTRIBUTE_UNUSED;
32b5b1aa 1250{
e9a25f70
JL
1251 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1252 make the problem with not enough registers even worse. */
32b5b1aa
SC
1253#ifdef INSN_SCHEDULING
1254 if (level > 1)
1255 flag_schedule_insns = 0;
1256#endif
53c17031
JH
1257 if (TARGET_64BIT && optimize >= 1)
1258 flag_omit_frame_pointer = 1;
1259 if (TARGET_64BIT)
b932f770
JH
1260 {
1261 flag_pcc_struct_return = 0;
1262 flag_asynchronous_unwind_tables = 1;
1263 }
32b5b1aa 1264}
b08de47e 1265\f
91d231cb
JM
1266/* Table of valid machine attributes. */
1267const struct attribute_spec ix86_attribute_table[] =
b08de47e 1268{
91d231cb 1269 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
b08de47e
MM
1270 /* Stdcall attribute says callee is responsible for popping arguments
1271 if they are not variable. */
91d231cb
JM
1272 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1273 /* Cdecl attribute says the callee is a normal C declaration */
1274 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
b08de47e 1275 /* Regparm attribute specifies how many integer arguments are to be
0f290768 1276 passed in registers. */
91d231cb
JM
1277 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1278#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
3da1eb0b
DS
1279 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1280 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1281 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
91d231cb
JM
1282#endif
1283 { NULL, 0, 0, false, false, false, NULL }
1284};
1285
1286/* Handle a "cdecl" or "stdcall" attribute;
1287 arguments as in struct attribute_spec.handler. */
1288static tree
1289ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1290 tree *node;
1291 tree name;
1292 tree args ATTRIBUTE_UNUSED;
1293 int flags ATTRIBUTE_UNUSED;
1294 bool *no_add_attrs;
1295{
1296 if (TREE_CODE (*node) != FUNCTION_TYPE
1297 && TREE_CODE (*node) != METHOD_TYPE
1298 && TREE_CODE (*node) != FIELD_DECL
1299 && TREE_CODE (*node) != TYPE_DECL)
b08de47e 1300 {
91d231cb
JM
1301 warning ("`%s' attribute only applies to functions",
1302 IDENTIFIER_POINTER (name));
1303 *no_add_attrs = true;
1304 }
b08de47e 1305
91d231cb
JM
1306 if (TARGET_64BIT)
1307 {
1308 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1309 *no_add_attrs = true;
1310 }
b08de47e 1311
91d231cb
JM
1312 return NULL_TREE;
1313}
b08de47e 1314
91d231cb
JM
1315/* Handle a "regparm" attribute;
1316 arguments as in struct attribute_spec.handler. */
1317static tree
1318ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1319 tree *node;
1320 tree name;
1321 tree args;
1322 int flags ATTRIBUTE_UNUSED;
1323 bool *no_add_attrs;
1324{
1325 if (TREE_CODE (*node) != FUNCTION_TYPE
1326 && TREE_CODE (*node) != METHOD_TYPE
1327 && TREE_CODE (*node) != FIELD_DECL
1328 && TREE_CODE (*node) != TYPE_DECL)
1329 {
1330 warning ("`%s' attribute only applies to functions",
1331 IDENTIFIER_POINTER (name));
1332 *no_add_attrs = true;
1333 }
1334 else
1335 {
1336 tree cst;
b08de47e 1337
91d231cb
JM
1338 cst = TREE_VALUE (args);
1339 if (TREE_CODE (cst) != INTEGER_CST)
1340 {
1341 warning ("`%s' attribute requires an integer constant argument",
1342 IDENTIFIER_POINTER (name));
1343 *no_add_attrs = true;
1344 }
1345 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1346 {
1347 warning ("argument to `%s' attribute larger than %d",
1348 IDENTIFIER_POINTER (name), REGPARM_MAX);
1349 *no_add_attrs = true;
1350 }
b08de47e
MM
1351 }
1352
91d231cb 1353 return NULL_TREE;
b08de47e
MM
1354}
1355
08c148a8
NB
1356#if defined (OSF_OS) || defined (TARGET_OSF1ELF)
1357
1358/* Generate the assembly code for function entry. FILE is a stdio
1359 stream to output the code to. SIZE is an int: how many units of
1360 temporary storage to allocate.
1361
1362 Refer to the array `regs_ever_live' to determine which registers to
1363 save; `regs_ever_live[I]' is nonzero if register number I is ever
1364 used in the function. This function is responsible for knowing
1365 which registers should not be saved even if used.
1366
1367 We override it here to allow for the new profiling code to go before
1368 the prologue and the old mcount code to go after the prologue (and
1369 after %ebx has been set up for ELF shared library support). */
1370
1371static void
1372ix86_osf_output_function_prologue (file, size)
1373 FILE *file;
1374 HOST_WIDE_INT size;
1375{
5f37d07c
KG
1376 const char *prefix = "";
1377 const char *const lprefix = LPREFIX;
f6f315fe 1378 int labelno = current_function_profile_label_no;
08c148a8
NB
1379
1380#ifdef OSF_OS
1381
1382 if (TARGET_UNDERSCORES)
1383 prefix = "_";
1384
70f4f91c 1385 if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE)
08c148a8
NB
1386 {
1387 if (!flag_pic && !HALF_PIC_P ())
1388 {
1389 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1390 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1391 }
1392
1393 else if (HALF_PIC_P ())
1394 {
1395 rtx symref;
1396
1397 HALF_PIC_EXTERNAL ("_mcount_ptr");
1398 symref = HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode,
1399 "_mcount_ptr"));
1400
1401 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1402 fprintf (file, "\tmovl %s%s,%%eax\n", prefix,
1403 XSTR (symref, 0));
1404 fprintf (file, "\tcall *(%%eax)\n");
1405 }
1406
1407 else
1408 {
1409 static int call_no = 0;
1410
1411 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1412 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1413 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1414 lprefix, call_no++);
1415 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1416 lprefix, labelno);
1417 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1418 prefix);
1419 fprintf (file, "\tcall *(%%eax)\n");
1420 }
1421 }
1422
1423#else /* !OSF_OS */
1424
70f4f91c 1425 if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE)
08c148a8
NB
1426 {
1427 if (!flag_pic)
1428 {
1429 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1430 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1431 }
1432
1433 else
1434 {
1435 static int call_no = 0;
1436
1437 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1438 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1439 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1440 lprefix, call_no++);
1441 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1442 lprefix, labelno);
1443 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1444 prefix);
1445 fprintf (file, "\tcall *(%%eax)\n");
1446 }
1447 }
1448#endif /* !OSF_OS */
1449
1450 function_prologue (file, size);
1451}
1452
1453#endif /* OSF_OS || TARGET_OSF1ELF */
1454
b08de47e
MM
1455/* Return 0 if the attributes for two types are incompatible, 1 if they
1456 are compatible, and 2 if they are nearly compatible (which causes a
1457 warning to be generated). */
1458
8d8e52be 1459static int
e075ae69 1460ix86_comp_type_attributes (type1, type2)
afcfe58c
MM
1461 tree type1;
1462 tree type2;
b08de47e 1463{
0f290768 1464 /* Check for mismatch of non-default calling convention. */
27c38fbe 1465 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
1466
1467 if (TREE_CODE (type1) != FUNCTION_TYPE)
1468 return 1;
1469
1470 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
1471 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1472 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 1473 return 0;
b08de47e
MM
1474 return 1;
1475}
b08de47e
MM
1476\f
1477/* Value is the number of bytes of arguments automatically
1478 popped when returning from a subroutine call.
1479 FUNDECL is the declaration node of the function (as a tree),
1480 FUNTYPE is the data type of the function (as a tree),
1481 or for a library call it is an identifier node for the subroutine name.
1482 SIZE is the number of bytes of arguments passed on the stack.
1483
1484 On the 80386, the RTD insn may be used to pop them if the number
1485 of args is fixed, but if the number is variable then the caller
1486 must pop them all. RTD can't be used for library calls now
1487 because the library is compiled with the Unix compiler.
1488 Use of RTD is a selectable option, since it is incompatible with
1489 standard Unix calling sequences. If the option is not selected,
1490 the caller must always pop the args.
1491
1492 The attribute stdcall is equivalent to RTD on a per module basis. */
1493
1494int
e075ae69 1495ix86_return_pops_args (fundecl, funtype, size)
b08de47e
MM
1496 tree fundecl;
1497 tree funtype;
1498 int size;
79325812 1499{
3345ee7d 1500 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 1501
0f290768 1502 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 1503 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 1504
0f290768 1505 /* Stdcall functions will pop the stack if not variable args. */
698cdd84
SC
1506 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1507 rtd = 1;
79325812 1508
698cdd84
SC
1509 if (rtd
1510 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
1511 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1512 == void_type_node)))
698cdd84
SC
1513 return size;
1514 }
79325812 1515
232b8f52 1516 /* Lose any fake structure return argument if it is passed on the stack. */
0d7d98ee
JH
1517 if (aggregate_value_p (TREE_TYPE (funtype))
1518 && !TARGET_64BIT)
232b8f52
JJ
1519 {
1520 int nregs = ix86_regparm;
79325812 1521
232b8f52
JJ
1522 if (funtype)
1523 {
1524 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype));
1525
1526 if (attr)
1527 nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1528 }
1529
1530 if (!nregs)
1531 return GET_MODE_SIZE (Pmode);
1532 }
1533
1534 return 0;
b08de47e 1535}
b08de47e
MM
1536\f
1537/* Argument support functions. */
1538
53c17031
JH
1539/* Return true when register may be used to pass function parameters. */
1540bool
1541ix86_function_arg_regno_p (regno)
1542 int regno;
1543{
1544 int i;
1545 if (!TARGET_64BIT)
0333394e
JJ
1546 return (regno < REGPARM_MAX
1547 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
53c17031
JH
1548 if (SSE_REGNO_P (regno) && TARGET_SSE)
1549 return true;
1550 /* RAX is used as hidden argument to va_arg functions. */
1551 if (!regno)
1552 return true;
1553 for (i = 0; i < REGPARM_MAX; i++)
1554 if (regno == x86_64_int_parameter_registers[i])
1555 return true;
1556 return false;
1557}
1558
b08de47e
MM
1559/* Initialize a variable CUM of type CUMULATIVE_ARGS
1560 for a call to a function whose data type is FNTYPE.
1561 For a library call, FNTYPE is 0. */
1562
1563void
1564init_cumulative_args (cum, fntype, libname)
e9a25f70 1565 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
b08de47e
MM
1566 tree fntype; /* tree ptr for function decl */
1567 rtx libname; /* SYMBOL_REF of library name or 0 */
1568{
1569 static CUMULATIVE_ARGS zero_cum;
1570 tree param, next_param;
1571
1572 if (TARGET_DEBUG_ARG)
1573 {
1574 fprintf (stderr, "\ninit_cumulative_args (");
1575 if (fntype)
e9a25f70
JL
1576 fprintf (stderr, "fntype code = %s, ret code = %s",
1577 tree_code_name[(int) TREE_CODE (fntype)],
1578 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
1579 else
1580 fprintf (stderr, "no fntype");
1581
1582 if (libname)
1583 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1584 }
1585
1586 *cum = zero_cum;
1587
1588 /* Set up the number of registers to use for passing arguments. */
e075ae69 1589 cum->nregs = ix86_regparm;
53c17031
JH
1590 cum->sse_nregs = SSE_REGPARM_MAX;
1591 if (fntype && !TARGET_64BIT)
b08de47e
MM
1592 {
1593 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
e9a25f70 1594
b08de47e
MM
1595 if (attr)
1596 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1597 }
53c17031 1598 cum->maybe_vaarg = false;
b08de47e
MM
1599
1600 /* Determine if this function has variable arguments. This is
1601 indicated by the last argument being 'void_type_mode' if there
1602 are no variable arguments. If there are variable arguments, then
1603 we won't pass anything in registers */
1604
1605 if (cum->nregs)
1606 {
1607 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 1608 param != 0; param = next_param)
b08de47e
MM
1609 {
1610 next_param = TREE_CHAIN (param);
e9a25f70 1611 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
53c17031
JH
1612 {
1613 if (!TARGET_64BIT)
1614 cum->nregs = 0;
1615 cum->maybe_vaarg = true;
1616 }
b08de47e
MM
1617 }
1618 }
53c17031
JH
1619 if ((!fntype && !libname)
1620 || (fntype && !TYPE_ARG_TYPES (fntype)))
1621 cum->maybe_vaarg = 1;
b08de47e
MM
1622
1623 if (TARGET_DEBUG_ARG)
1624 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1625
1626 return;
1627}
1628
53c17031 1629/* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
f710504c 1630 of this code is to classify each 8bytes of incoming argument by the register
53c17031
JH
1631 class and assign registers accordingly. */
1632
1633/* Return the union class of CLASS1 and CLASS2.
1634 See the x86-64 PS ABI for details. */
1635
1636static enum x86_64_reg_class
1637merge_classes (class1, class2)
1638 enum x86_64_reg_class class1, class2;
1639{
1640 /* Rule #1: If both classes are equal, this is the resulting class. */
1641 if (class1 == class2)
1642 return class1;
1643
1644 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1645 the other class. */
1646 if (class1 == X86_64_NO_CLASS)
1647 return class2;
1648 if (class2 == X86_64_NO_CLASS)
1649 return class1;
1650
1651 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1652 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1653 return X86_64_MEMORY_CLASS;
1654
1655 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1656 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1657 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1658 return X86_64_INTEGERSI_CLASS;
1659 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1660 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1661 return X86_64_INTEGER_CLASS;
1662
1663 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1664 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1665 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1666 return X86_64_MEMORY_CLASS;
1667
1668 /* Rule #6: Otherwise class SSE is used. */
1669 return X86_64_SSE_CLASS;
1670}
1671
1672/* Classify the argument of type TYPE and mode MODE.
1673 CLASSES will be filled by the register class used to pass each word
1674 of the operand. The number of words is returned. In case the parameter
1675 should be passed in memory, 0 is returned. As a special case for zero
1676 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1677
1678 BIT_OFFSET is used internally for handling records and specifies offset
1679 of the offset in bits modulo 256 to avoid overflow cases.
1680
1681 See the x86-64 PS ABI for details.
1682*/
1683
1684static int
1685classify_argument (mode, type, classes, bit_offset)
1686 enum machine_mode mode;
1687 tree type;
1688 enum x86_64_reg_class classes[MAX_CLASSES];
1689 int bit_offset;
1690{
1691 int bytes =
1692 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1693 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1694
1695 if (type && AGGREGATE_TYPE_P (type))
1696 {
1697 int i;
1698 tree field;
1699 enum x86_64_reg_class subclasses[MAX_CLASSES];
1700
1701 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1702 if (bytes > 16)
1703 return 0;
1704
1705 for (i = 0; i < words; i++)
1706 classes[i] = X86_64_NO_CLASS;
1707
1708 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1709 signalize memory class, so handle it as special case. */
1710 if (!words)
1711 {
1712 classes[0] = X86_64_NO_CLASS;
1713 return 1;
1714 }
1715
1716 /* Classify each field of record and merge classes. */
1717 if (TREE_CODE (type) == RECORD_TYPE)
1718 {
1719 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1720 {
1721 if (TREE_CODE (field) == FIELD_DECL)
1722 {
1723 int num;
1724
1725 /* Bitfields are always classified as integer. Handle them
1726 early, since later code would consider them to be
1727 misaligned integers. */
1728 if (DECL_BIT_FIELD (field))
1729 {
1730 for (i = int_bit_position (field) / 8 / 8;
1731 i < (int_bit_position (field)
1732 + tree_low_cst (DECL_SIZE (field), 0)
1733 + 63) / 8 / 8; i++)
1734 classes[i] =
1735 merge_classes (X86_64_INTEGER_CLASS,
1736 classes[i]);
1737 }
1738 else
1739 {
1740 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1741 TREE_TYPE (field), subclasses,
1742 (int_bit_position (field)
1743 + bit_offset) % 256);
1744 if (!num)
1745 return 0;
1746 for (i = 0; i < num; i++)
1747 {
1748 int pos =
1749 (int_bit_position (field) + bit_offset) / 8 / 8;
1750 classes[i + pos] =
1751 merge_classes (subclasses[i], classes[i + pos]);
1752 }
1753 }
1754 }
1755 }
1756 }
1757 /* Arrays are handled as small records. */
1758 else if (TREE_CODE (type) == ARRAY_TYPE)
1759 {
1760 int num;
1761 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1762 TREE_TYPE (type), subclasses, bit_offset);
1763 if (!num)
1764 return 0;
1765
1766 /* The partial classes are now full classes. */
1767 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1768 subclasses[0] = X86_64_SSE_CLASS;
1769 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1770 subclasses[0] = X86_64_INTEGER_CLASS;
1771
1772 for (i = 0; i < words; i++)
1773 classes[i] = subclasses[i % num];
1774 }
1775 /* Unions are similar to RECORD_TYPE but offset is always 0. */
e4dbaed5
AS
1776 else if (TREE_CODE (type) == UNION_TYPE
1777 || TREE_CODE (type) == QUAL_UNION_TYPE)
53c17031
JH
1778 {
1779 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1780 {
1781 if (TREE_CODE (field) == FIELD_DECL)
1782 {
1783 int num;
1784 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1785 TREE_TYPE (field), subclasses,
1786 bit_offset);
1787 if (!num)
1788 return 0;
1789 for (i = 0; i < num; i++)
1790 classes[i] = merge_classes (subclasses[i], classes[i]);
1791 }
1792 }
1793 }
1794 else
1795 abort ();
1796
1797 /* Final merger cleanup. */
1798 for (i = 0; i < words; i++)
1799 {
1800 /* If one class is MEMORY, everything should be passed in
1801 memory. */
1802 if (classes[i] == X86_64_MEMORY_CLASS)
1803 return 0;
1804
d6a7951f 1805 /* The X86_64_SSEUP_CLASS should be always preceded by
53c17031
JH
1806 X86_64_SSE_CLASS. */
1807 if (classes[i] == X86_64_SSEUP_CLASS
1808 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1809 classes[i] = X86_64_SSE_CLASS;
1810
d6a7951f 1811 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
53c17031
JH
1812 if (classes[i] == X86_64_X87UP_CLASS
1813 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1814 classes[i] = X86_64_SSE_CLASS;
1815 }
1816 return words;
1817 }
1818
1819 /* Compute alignment needed. We align all types to natural boundaries with
1820 exception of XFmode that is aligned to 64bits. */
1821 if (mode != VOIDmode && mode != BLKmode)
1822 {
1823 int mode_alignment = GET_MODE_BITSIZE (mode);
1824
1825 if (mode == XFmode)
1826 mode_alignment = 128;
1827 else if (mode == XCmode)
1828 mode_alignment = 256;
f5143c46 1829 /* Misaligned fields are always returned in memory. */
53c17031
JH
1830 if (bit_offset % mode_alignment)
1831 return 0;
1832 }
1833
1834 /* Classification of atomic types. */
1835 switch (mode)
1836 {
1837 case DImode:
1838 case SImode:
1839 case HImode:
1840 case QImode:
1841 case CSImode:
1842 case CHImode:
1843 case CQImode:
1844 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1845 classes[0] = X86_64_INTEGERSI_CLASS;
1846 else
1847 classes[0] = X86_64_INTEGER_CLASS;
1848 return 1;
1849 case CDImode:
1850 case TImode:
1851 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1852 return 2;
1853 case CTImode:
1854 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1855 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1856 return 4;
1857 case SFmode:
1858 if (!(bit_offset % 64))
1859 classes[0] = X86_64_SSESF_CLASS;
1860 else
1861 classes[0] = X86_64_SSE_CLASS;
1862 return 1;
1863 case DFmode:
1864 classes[0] = X86_64_SSEDF_CLASS;
1865 return 1;
1866 case TFmode:
1867 classes[0] = X86_64_X87_CLASS;
1868 classes[1] = X86_64_X87UP_CLASS;
1869 return 2;
1870 case TCmode:
1871 classes[0] = X86_64_X87_CLASS;
1872 classes[1] = X86_64_X87UP_CLASS;
1873 classes[2] = X86_64_X87_CLASS;
1874 classes[3] = X86_64_X87UP_CLASS;
1875 return 4;
1876 case DCmode:
1877 classes[0] = X86_64_SSEDF_CLASS;
1878 classes[1] = X86_64_SSEDF_CLASS;
1879 return 2;
1880 case SCmode:
1881 classes[0] = X86_64_SSE_CLASS;
1882 return 1;
e95d6b23
JH
1883 case V4SFmode:
1884 case V4SImode:
1885 classes[0] = X86_64_SSE_CLASS;
1886 classes[1] = X86_64_SSEUP_CLASS;
1887 return 2;
1888 case V2SFmode:
1889 case V2SImode:
1890 case V4HImode:
1891 case V8QImode:
1892 classes[0] = X86_64_SSE_CLASS;
1893 return 1;
53c17031 1894 case BLKmode:
e95d6b23 1895 case VOIDmode:
53c17031
JH
1896 return 0;
1897 default:
1898 abort ();
1899 }
1900}
1901
1902/* Examine the argument and return set number of register required in each
f5143c46 1903 class. Return 0 iff parameter should be passed in memory. */
53c17031
JH
1904static int
1905examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1906 enum machine_mode mode;
1907 tree type;
1908 int *int_nregs, *sse_nregs;
1909 int in_return;
1910{
1911 enum x86_64_reg_class class[MAX_CLASSES];
1912 int n = classify_argument (mode, type, class, 0);
1913
1914 *int_nregs = 0;
1915 *sse_nregs = 0;
1916 if (!n)
1917 return 0;
1918 for (n--; n >= 0; n--)
1919 switch (class[n])
1920 {
1921 case X86_64_INTEGER_CLASS:
1922 case X86_64_INTEGERSI_CLASS:
1923 (*int_nregs)++;
1924 break;
1925 case X86_64_SSE_CLASS:
1926 case X86_64_SSESF_CLASS:
1927 case X86_64_SSEDF_CLASS:
1928 (*sse_nregs)++;
1929 break;
1930 case X86_64_NO_CLASS:
1931 case X86_64_SSEUP_CLASS:
1932 break;
1933 case X86_64_X87_CLASS:
1934 case X86_64_X87UP_CLASS:
1935 if (!in_return)
1936 return 0;
1937 break;
1938 case X86_64_MEMORY_CLASS:
1939 abort ();
1940 }
1941 return 1;
1942}
1943/* Construct container for the argument used by GCC interface. See
1944 FUNCTION_ARG for the detailed description. */
1945static rtx
1946construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1947 enum machine_mode mode;
1948 tree type;
1949 int in_return;
1950 int nintregs, nsseregs;
07933f72
GS
1951 const int * intreg;
1952 int sse_regno;
53c17031
JH
1953{
1954 enum machine_mode tmpmode;
1955 int bytes =
1956 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1957 enum x86_64_reg_class class[MAX_CLASSES];
1958 int n;
1959 int i;
1960 int nexps = 0;
1961 int needed_sseregs, needed_intregs;
1962 rtx exp[MAX_CLASSES];
1963 rtx ret;
1964
1965 n = classify_argument (mode, type, class, 0);
1966 if (TARGET_DEBUG_ARG)
1967 {
1968 if (!n)
1969 fprintf (stderr, "Memory class\n");
1970 else
1971 {
1972 fprintf (stderr, "Classes:");
1973 for (i = 0; i < n; i++)
1974 {
1975 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1976 }
1977 fprintf (stderr, "\n");
1978 }
1979 }
1980 if (!n)
1981 return NULL;
1982 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1983 return NULL;
1984 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1985 return NULL;
1986
1987 /* First construct simple cases. Avoid SCmode, since we want to use
1988 single register to pass this type. */
1989 if (n == 1 && mode != SCmode)
1990 switch (class[0])
1991 {
1992 case X86_64_INTEGER_CLASS:
1993 case X86_64_INTEGERSI_CLASS:
1994 return gen_rtx_REG (mode, intreg[0]);
1995 case X86_64_SSE_CLASS:
1996 case X86_64_SSESF_CLASS:
1997 case X86_64_SSEDF_CLASS:
1998 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1999 case X86_64_X87_CLASS:
2000 return gen_rtx_REG (mode, FIRST_STACK_REG);
2001 case X86_64_NO_CLASS:
2002 /* Zero sized array, struct or class. */
2003 return NULL;
2004 default:
2005 abort ();
2006 }
2007 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
e95d6b23 2008 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
53c17031
JH
2009 if (n == 2
2010 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2011 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2012 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2013 && class[1] == X86_64_INTEGER_CLASS
2014 && (mode == CDImode || mode == TImode)
2015 && intreg[0] + 1 == intreg[1])
2016 return gen_rtx_REG (mode, intreg[0]);
2017 if (n == 4
2018 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2019 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2020 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2021
2022 /* Otherwise figure out the entries of the PARALLEL. */
2023 for (i = 0; i < n; i++)
2024 {
2025 switch (class[i])
2026 {
2027 case X86_64_NO_CLASS:
2028 break;
2029 case X86_64_INTEGER_CLASS:
2030 case X86_64_INTEGERSI_CLASS:
2031 /* Merge TImodes on aligned occassions here too. */
2032 if (i * 8 + 8 > bytes)
2033 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2034 else if (class[i] == X86_64_INTEGERSI_CLASS)
2035 tmpmode = SImode;
2036 else
2037 tmpmode = DImode;
2038 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2039 if (tmpmode == BLKmode)
2040 tmpmode = DImode;
2041 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2042 gen_rtx_REG (tmpmode, *intreg),
2043 GEN_INT (i*8));
2044 intreg++;
2045 break;
2046 case X86_64_SSESF_CLASS:
2047 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2048 gen_rtx_REG (SFmode,
2049 SSE_REGNO (sse_regno)),
2050 GEN_INT (i*8));
2051 sse_regno++;
2052 break;
2053 case X86_64_SSEDF_CLASS:
2054 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2055 gen_rtx_REG (DFmode,
2056 SSE_REGNO (sse_regno)),
2057 GEN_INT (i*8));
2058 sse_regno++;
2059 break;
2060 case X86_64_SSE_CLASS:
2061 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2062 tmpmode = TImode, i++;
2063 else
2064 tmpmode = DImode;
2065 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2066 gen_rtx_REG (tmpmode,
2067 SSE_REGNO (sse_regno)),
2068 GEN_INT (i*8));
2069 sse_regno++;
2070 break;
2071 default:
2072 abort ();
2073 }
2074 }
2075 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2076 for (i = 0; i < nexps; i++)
2077 XVECEXP (ret, 0, i) = exp [i];
2078 return ret;
2079}
2080
b08de47e
MM
2081/* Update the data in CUM to advance over an argument
2082 of mode MODE and data type TYPE.
2083 (TYPE is null for libcalls where that information may not be available.) */
2084
2085void
2086function_arg_advance (cum, mode, type, named)
2087 CUMULATIVE_ARGS *cum; /* current arg information */
2088 enum machine_mode mode; /* current arg mode */
2089 tree type; /* type of the argument or 0 if lib support */
2090 int named; /* whether or not the argument was named */
2091{
5ac9118e
KG
2092 int bytes =
2093 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2094 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2095
2096 if (TARGET_DEBUG_ARG)
2097 fprintf (stderr,
e9a25f70 2098 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
b08de47e 2099 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
53c17031 2100 if (TARGET_64BIT)
b08de47e 2101 {
53c17031
JH
2102 int int_nregs, sse_nregs;
2103 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2104 cum->words += words;
2105 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
82a127a9 2106 {
53c17031
JH
2107 cum->nregs -= int_nregs;
2108 cum->sse_nregs -= sse_nregs;
2109 cum->regno += int_nregs;
2110 cum->sse_regno += sse_nregs;
82a127a9 2111 }
53c17031
JH
2112 else
2113 cum->words += words;
b08de47e 2114 }
a4f31c00 2115 else
82a127a9 2116 {
53c17031
JH
2117 if (TARGET_SSE && mode == TImode)
2118 {
2119 cum->sse_words += words;
2120 cum->sse_nregs -= 1;
2121 cum->sse_regno += 1;
2122 if (cum->sse_nregs <= 0)
2123 {
2124 cum->sse_nregs = 0;
2125 cum->sse_regno = 0;
2126 }
2127 }
2128 else
82a127a9 2129 {
53c17031
JH
2130 cum->words += words;
2131 cum->nregs -= words;
2132 cum->regno += words;
2133
2134 if (cum->nregs <= 0)
2135 {
2136 cum->nregs = 0;
2137 cum->regno = 0;
2138 }
82a127a9
CM
2139 }
2140 }
b08de47e
MM
2141 return;
2142}
2143
2144/* Define where to put the arguments to a function.
2145 Value is zero to push the argument on the stack,
2146 or a hard register in which to store the argument.
2147
2148 MODE is the argument's machine mode.
2149 TYPE is the data type of the argument (as a tree).
2150 This is null for libcalls where that information may
2151 not be available.
2152 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2153 the preceding args and about the function being called.
2154 NAMED is nonzero if this argument is a named parameter
2155 (otherwise it is an extra parameter matching an ellipsis). */
2156
07933f72 2157rtx
b08de47e
MM
2158function_arg (cum, mode, type, named)
2159 CUMULATIVE_ARGS *cum; /* current arg information */
2160 enum machine_mode mode; /* current arg mode */
2161 tree type; /* type of the argument or 0 if lib support */
2162 int named; /* != 0 for normal args, == 0 for ... args */
2163{
2164 rtx ret = NULL_RTX;
5ac9118e
KG
2165 int bytes =
2166 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2167 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2168
53c17031
JH
2169 /* Handle an hidden AL argument containing number of registers for varargs
2170 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2171 any AL settings. */
32ee7d1d 2172 if (mode == VOIDmode)
b08de47e 2173 {
53c17031
JH
2174 if (TARGET_64BIT)
2175 return GEN_INT (cum->maybe_vaarg
2176 ? (cum->sse_nregs < 0
2177 ? SSE_REGPARM_MAX
2178 : cum->sse_regno)
2179 : -1);
2180 else
2181 return constm1_rtx;
b08de47e 2182 }
53c17031
JH
2183 if (TARGET_64BIT)
2184 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2185 &x86_64_int_parameter_registers [cum->regno],
2186 cum->sse_regno);
2187 else
2188 switch (mode)
2189 {
2190 /* For now, pass fp/complex values on the stack. */
2191 default:
2192 break;
2193
2194 case BLKmode:
2195 case DImode:
2196 case SImode:
2197 case HImode:
2198 case QImode:
2199 if (words <= cum->nregs)
2200 ret = gen_rtx_REG (mode, cum->regno);
2201 break;
2202 case TImode:
2203 if (cum->sse_nregs)
2204 ret = gen_rtx_REG (mode, cum->sse_regno);
2205 break;
2206 }
b08de47e
MM
2207
2208 if (TARGET_DEBUG_ARG)
2209 {
2210 fprintf (stderr,
e9a25f70 2211 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
b08de47e
MM
2212 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2213
2214 if (ret)
b531087a 2215 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO (ret) ]);
b08de47e
MM
2216 else
2217 fprintf (stderr, ", stack");
2218
2219 fprintf (stderr, " )\n");
2220 }
2221
2222 return ret;
2223}
53c17031
JH
2224
2225/* Gives the alignment boundary, in bits, of an argument with the specified mode
2226 and type. */
2227
2228int
2229ix86_function_arg_boundary (mode, type)
2230 enum machine_mode mode;
2231 tree type;
2232{
2233 int align;
2234 if (!TARGET_64BIT)
2235 return PARM_BOUNDARY;
2236 if (type)
2237 align = TYPE_ALIGN (type);
2238 else
2239 align = GET_MODE_ALIGNMENT (mode);
2240 if (align < PARM_BOUNDARY)
2241 align = PARM_BOUNDARY;
2242 if (align > 128)
2243 align = 128;
2244 return align;
2245}
2246
2247/* Return true if N is a possible register number of function value. */
2248bool
2249ix86_function_value_regno_p (regno)
2250 int regno;
2251{
2252 if (!TARGET_64BIT)
2253 {
2254 return ((regno) == 0
2255 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2256 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2257 }
2258 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2259 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2260 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2261}
2262
2263/* Define how to find the value returned by a function.
2264 VALTYPE is the data type of the value (as a tree).
2265 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2266 otherwise, FUNC is 0. */
2267rtx
2268ix86_function_value (valtype)
2269 tree valtype;
2270{
2271 if (TARGET_64BIT)
2272 {
2273 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2274 REGPARM_MAX, SSE_REGPARM_MAX,
2275 x86_64_int_return_registers, 0);
2276 /* For zero sized structures, construct_continer return NULL, but we need
2277 to keep rest of compiler happy by returning meaningfull value. */
2278 if (!ret)
2279 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2280 return ret;
2281 }
2282 else
2283 return gen_rtx_REG (TYPE_MODE (valtype), VALUE_REGNO (TYPE_MODE (valtype)));
2284}
2285
f5143c46 2286/* Return false iff type is returned in memory. */
53c17031
JH
2287int
2288ix86_return_in_memory (type)
2289 tree type;
2290{
2291 int needed_intregs, needed_sseregs;
2292 if (TARGET_64BIT)
2293 {
2294 return !examine_argument (TYPE_MODE (type), type, 1,
2295 &needed_intregs, &needed_sseregs);
2296 }
2297 else
2298 {
2299 if (TYPE_MODE (type) == BLKmode
2300 || (VECTOR_MODE_P (TYPE_MODE (type))
2301 && int_size_in_bytes (type) == 8)
2302 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2303 && TYPE_MODE (type) != TFmode
2304 && !VECTOR_MODE_P (TYPE_MODE (type))))
2305 return 1;
2306 return 0;
2307 }
2308}
2309
2310/* Define how to find the value returned by a library function
2311 assuming the value has mode MODE. */
2312rtx
2313ix86_libcall_value (mode)
2314 enum machine_mode mode;
2315{
2316 if (TARGET_64BIT)
2317 {
2318 switch (mode)
2319 {
2320 case SFmode:
2321 case SCmode:
2322 case DFmode:
2323 case DCmode:
2324 return gen_rtx_REG (mode, FIRST_SSE_REG);
2325 case TFmode:
2326 case TCmode:
2327 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2328 default:
2329 return gen_rtx_REG (mode, 0);
2330 }
2331 }
2332 else
2333 return gen_rtx_REG (mode, VALUE_REGNO (mode));
2334}
ad919812
JH
2335\f
2336/* Create the va_list data type. */
53c17031 2337
ad919812
JH
2338tree
2339ix86_build_va_list ()
2340{
2341 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
53c17031 2342
ad919812
JH
2343 /* For i386 we use plain pointer to argument area. */
2344 if (!TARGET_64BIT)
2345 return build_pointer_type (char_type_node);
2346
f1e639b1 2347 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
ad919812
JH
2348 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2349
2350 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2351 unsigned_type_node);
2352 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2353 unsigned_type_node);
2354 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2355 ptr_type_node);
2356 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2357 ptr_type_node);
2358
2359 DECL_FIELD_CONTEXT (f_gpr) = record;
2360 DECL_FIELD_CONTEXT (f_fpr) = record;
2361 DECL_FIELD_CONTEXT (f_ovf) = record;
2362 DECL_FIELD_CONTEXT (f_sav) = record;
2363
2364 TREE_CHAIN (record) = type_decl;
2365 TYPE_NAME (record) = type_decl;
2366 TYPE_FIELDS (record) = f_gpr;
2367 TREE_CHAIN (f_gpr) = f_fpr;
2368 TREE_CHAIN (f_fpr) = f_ovf;
2369 TREE_CHAIN (f_ovf) = f_sav;
2370
2371 layout_type (record);
2372
2373 /* The correct type is an array type of one element. */
2374 return build_array_type (record, build_index_type (size_zero_node));
2375}
2376
2377/* Perform any needed actions needed for a function that is receiving a
2378 variable number of arguments.
2379
2380 CUM is as above.
2381
2382 MODE and TYPE are the mode and type of the current parameter.
2383
2384 PRETEND_SIZE is a variable that should be set to the amount of stack
2385 that must be pushed by the prolog to pretend that our caller pushed
2386 it.
2387
2388 Normally, this macro will push all remaining incoming registers on the
2389 stack and set PRETEND_SIZE to the length of the registers pushed. */
2390
2391void
2392ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2393 CUMULATIVE_ARGS *cum;
2394 enum machine_mode mode;
2395 tree type;
2396 int *pretend_size ATTRIBUTE_UNUSED;
2397 int no_rtl;
2398
2399{
2400 CUMULATIVE_ARGS next_cum;
2401 rtx save_area = NULL_RTX, mem;
2402 rtx label;
2403 rtx label_ref;
2404 rtx tmp_reg;
2405 rtx nsse_reg;
2406 int set;
2407 tree fntype;
2408 int stdarg_p;
2409 int i;
2410
2411 if (!TARGET_64BIT)
2412 return;
2413
2414 /* Indicate to allocate space on the stack for varargs save area. */
2415 ix86_save_varrargs_registers = 1;
2416
2417 fntype = TREE_TYPE (current_function_decl);
2418 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2419 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2420 != void_type_node));
2421
2422 /* For varargs, we do not want to skip the dummy va_dcl argument.
2423 For stdargs, we do want to skip the last named argument. */
2424 next_cum = *cum;
2425 if (stdarg_p)
2426 function_arg_advance (&next_cum, mode, type, 1);
2427
2428 if (!no_rtl)
2429 save_area = frame_pointer_rtx;
2430
2431 set = get_varargs_alias_set ();
2432
2433 for (i = next_cum.regno; i < ix86_regparm; i++)
2434 {
2435 mem = gen_rtx_MEM (Pmode,
2436 plus_constant (save_area, i * UNITS_PER_WORD));
0692acba 2437 set_mem_alias_set (mem, set);
ad919812
JH
2438 emit_move_insn (mem, gen_rtx_REG (Pmode,
2439 x86_64_int_parameter_registers[i]));
2440 }
2441
2442 if (next_cum.sse_nregs)
2443 {
2444 /* Now emit code to save SSE registers. The AX parameter contains number
2445 of SSE parameter regsiters used to call this function. We use
2446 sse_prologue_save insn template that produces computed jump across
2447 SSE saves. We need some preparation work to get this working. */
2448
2449 label = gen_label_rtx ();
2450 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2451
2452 /* Compute address to jump to :
2453 label - 5*eax + nnamed_sse_arguments*5 */
2454 tmp_reg = gen_reg_rtx (Pmode);
2455 nsse_reg = gen_reg_rtx (Pmode);
2456 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2457 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
44cf5b6a 2458 gen_rtx_MULT (Pmode, nsse_reg,
ad919812
JH
2459 GEN_INT (4))));
2460 if (next_cum.sse_regno)
2461 emit_move_insn
2462 (nsse_reg,
2463 gen_rtx_CONST (DImode,
2464 gen_rtx_PLUS (DImode,
2465 label_ref,
2466 GEN_INT (next_cum.sse_regno * 4))));
2467 else
2468 emit_move_insn (nsse_reg, label_ref);
2469 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2470
2471 /* Compute address of memory block we save into. We always use pointer
2472 pointing 127 bytes after first byte to store - this is needed to keep
2473 instruction size limited by 4 bytes. */
2474 tmp_reg = gen_reg_rtx (Pmode);
8ac61af7
RK
2475 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2476 plus_constant (save_area,
2477 8 * REGPARM_MAX + 127)));
ad919812 2478 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
14f73b5a 2479 set_mem_alias_set (mem, set);
8ac61af7 2480 set_mem_align (mem, BITS_PER_WORD);
ad919812
JH
2481
2482 /* And finally do the dirty job! */
8ac61af7
RK
2483 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2484 GEN_INT (next_cum.sse_regno), label));
ad919812
JH
2485 }
2486
2487}
2488
2489/* Implement va_start. */
2490
2491void
2492ix86_va_start (stdarg_p, valist, nextarg)
2493 int stdarg_p;
2494 tree valist;
2495 rtx nextarg;
2496{
2497 HOST_WIDE_INT words, n_gpr, n_fpr;
2498 tree f_gpr, f_fpr, f_ovf, f_sav;
2499 tree gpr, fpr, ovf, sav, t;
2500
2501 /* Only 64bit target needs something special. */
2502 if (!TARGET_64BIT)
2503 {
2504 std_expand_builtin_va_start (stdarg_p, valist, nextarg);
2505 return;
2506 }
2507
2508 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2509 f_fpr = TREE_CHAIN (f_gpr);
2510 f_ovf = TREE_CHAIN (f_fpr);
2511 f_sav = TREE_CHAIN (f_ovf);
2512
2513 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2514 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2515 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2516 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2517 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2518
2519 /* Count number of gp and fp argument registers used. */
2520 words = current_function_args_info.words;
2521 n_gpr = current_function_args_info.regno;
2522 n_fpr = current_function_args_info.sse_regno;
2523
2524 if (TARGET_DEBUG_ARG)
2525 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
b531087a 2526 (int) words, (int) n_gpr, (int) n_fpr);
ad919812
JH
2527
2528 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2529 build_int_2 (n_gpr * 8, 0));
2530 TREE_SIDE_EFFECTS (t) = 1;
2531 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2532
2533 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2534 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2535 TREE_SIDE_EFFECTS (t) = 1;
2536 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2537
2538 /* Find the overflow area. */
2539 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2540 if (words != 0)
2541 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2542 build_int_2 (words * UNITS_PER_WORD, 0));
2543 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2544 TREE_SIDE_EFFECTS (t) = 1;
2545 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2546
2547 /* Find the register save area.
2548 Prologue of the function save it right above stack frame. */
2549 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2550 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2551 TREE_SIDE_EFFECTS (t) = 1;
2552 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2553}
2554
2555/* Implement va_arg. */
2556rtx
2557ix86_va_arg (valist, type)
2558 tree valist, type;
2559{
0139adca 2560 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
ad919812
JH
2561 tree f_gpr, f_fpr, f_ovf, f_sav;
2562 tree gpr, fpr, ovf, sav, t;
b932f770 2563 int size, rsize;
ad919812
JH
2564 rtx lab_false, lab_over = NULL_RTX;
2565 rtx addr_rtx, r;
2566 rtx container;
2567
2568 /* Only 64bit target needs something special. */
2569 if (!TARGET_64BIT)
2570 {
2571 return std_expand_builtin_va_arg (valist, type);
2572 }
2573
2574 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2575 f_fpr = TREE_CHAIN (f_gpr);
2576 f_ovf = TREE_CHAIN (f_fpr);
2577 f_sav = TREE_CHAIN (f_ovf);
2578
2579 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2580 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2581 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2582 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2583 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2584
2585 size = int_size_in_bytes (type);
2586 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2587
2588 container = construct_container (TYPE_MODE (type), type, 0,
2589 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2590 /*
2591 * Pull the value out of the saved registers ...
2592 */
2593
2594 addr_rtx = gen_reg_rtx (Pmode);
2595
2596 if (container)
2597 {
2598 rtx int_addr_rtx, sse_addr_rtx;
2599 int needed_intregs, needed_sseregs;
2600 int need_temp;
2601
2602 lab_over = gen_label_rtx ();
2603 lab_false = gen_label_rtx ();
8bad7136 2604
ad919812
JH
2605 examine_argument (TYPE_MODE (type), type, 0,
2606 &needed_intregs, &needed_sseregs);
2607
2608
2609 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2610 || TYPE_ALIGN (type) > 128);
2611
2612 /* In case we are passing structure, verify that it is consetuctive block
2613 on the register save area. If not we need to do moves. */
2614 if (!need_temp && !REG_P (container))
2615 {
2616 /* Verify that all registers are strictly consetuctive */
2617 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2618 {
2619 int i;
2620
2621 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2622 {
2623 rtx slot = XVECEXP (container, 0, i);
b531087a 2624 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
ad919812
JH
2625 || INTVAL (XEXP (slot, 1)) != i * 16)
2626 need_temp = 1;
2627 }
2628 }
2629 else
2630 {
2631 int i;
2632
2633 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2634 {
2635 rtx slot = XVECEXP (container, 0, i);
b531087a 2636 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
ad919812
JH
2637 || INTVAL (XEXP (slot, 1)) != i * 8)
2638 need_temp = 1;
2639 }
2640 }
2641 }
2642 if (!need_temp)
2643 {
2644 int_addr_rtx = addr_rtx;
2645 sse_addr_rtx = addr_rtx;
2646 }
2647 else
2648 {
2649 int_addr_rtx = gen_reg_rtx (Pmode);
2650 sse_addr_rtx = gen_reg_rtx (Pmode);
2651 }
2652 /* First ensure that we fit completely in registers. */
2653 if (needed_intregs)
2654 {
2655 emit_cmp_and_jump_insns (expand_expr
2656 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2657 GEN_INT ((REGPARM_MAX - needed_intregs +
2658 1) * 8), GE, const1_rtx, SImode,
d43e0b7d 2659 1, lab_false);
ad919812
JH
2660 }
2661 if (needed_sseregs)
2662 {
2663 emit_cmp_and_jump_insns (expand_expr
2664 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2665 GEN_INT ((SSE_REGPARM_MAX -
2666 needed_sseregs + 1) * 16 +
2667 REGPARM_MAX * 8), GE, const1_rtx,
d43e0b7d 2668 SImode, 1, lab_false);
ad919812
JH
2669 }
2670
2671 /* Compute index to start of area used for integer regs. */
2672 if (needed_intregs)
2673 {
2674 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2675 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2676 if (r != int_addr_rtx)
2677 emit_move_insn (int_addr_rtx, r);
2678 }
2679 if (needed_sseregs)
2680 {
2681 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2682 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2683 if (r != sse_addr_rtx)
2684 emit_move_insn (sse_addr_rtx, r);
2685 }
2686 if (need_temp)
2687 {
2688 int i;
2689 rtx mem;
2690
b932f770
JH
2691 /* Never use the memory itself, as it has the alias set. */
2692 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2693 mem = gen_rtx_MEM (BLKmode, addr_rtx);
0692acba 2694 set_mem_alias_set (mem, get_varargs_alias_set ());
8ac61af7 2695 set_mem_align (mem, BITS_PER_UNIT);
b932f770 2696
ad919812
JH
2697 for (i = 0; i < XVECLEN (container, 0); i++)
2698 {
2699 rtx slot = XVECEXP (container, 0, i);
2700 rtx reg = XEXP (slot, 0);
2701 enum machine_mode mode = GET_MODE (reg);
2702 rtx src_addr;
2703 rtx src_mem;
2704 int src_offset;
2705 rtx dest_mem;
2706
2707 if (SSE_REGNO_P (REGNO (reg)))
2708 {
2709 src_addr = sse_addr_rtx;
2710 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2711 }
2712 else
2713 {
2714 src_addr = int_addr_rtx;
2715 src_offset = REGNO (reg) * 8;
2716 }
2717 src_mem = gen_rtx_MEM (mode, src_addr);
0692acba 2718 set_mem_alias_set (src_mem, get_varargs_alias_set ());
ad919812
JH
2719 src_mem = adjust_address (src_mem, mode, src_offset);
2720 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
ad919812
JH
2721 emit_move_insn (dest_mem, src_mem);
2722 }
2723 }
2724
2725 if (needed_intregs)
2726 {
2727 t =
2728 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2729 build_int_2 (needed_intregs * 8, 0));
2730 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2731 TREE_SIDE_EFFECTS (t) = 1;
2732 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2733 }
2734 if (needed_sseregs)
2735 {
2736 t =
2737 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2738 build_int_2 (needed_sseregs * 16, 0));
2739 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2740 TREE_SIDE_EFFECTS (t) = 1;
2741 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2742 }
2743
2744 emit_jump_insn (gen_jump (lab_over));
2745 emit_barrier ();
2746 emit_label (lab_false);
2747 }
2748
2749 /* ... otherwise out of the overflow area. */
2750
2751 /* Care for on-stack alignment if needed. */
2752 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2753 t = ovf;
2754 else
2755 {
2756 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2757 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2758 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2759 }
2760 t = save_expr (t);
2761
2762 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2763 if (r != addr_rtx)
2764 emit_move_insn (addr_rtx, r);
2765
2766 t =
2767 build (PLUS_EXPR, TREE_TYPE (t), t,
2768 build_int_2 (rsize * UNITS_PER_WORD, 0));
2769 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2770 TREE_SIDE_EFFECTS (t) = 1;
2771 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2772
2773 if (container)
2774 emit_label (lab_over);
2775
ad919812
JH
2776 return addr_rtx;
2777}
2778\f
7dd4b4a3
JH
2779/* Return nonzero if OP is general operand representable on x86_64. */
2780
2781int
2782x86_64_general_operand (op, mode)
2783 rtx op;
2784 enum machine_mode mode;
2785{
2786 if (!TARGET_64BIT)
2787 return general_operand (op, mode);
2788 if (nonimmediate_operand (op, mode))
2789 return 1;
2790 return x86_64_sign_extended_value (op);
2791}
2792
2793/* Return nonzero if OP is general operand representable on x86_64
d6a7951f 2794 as either sign extended or zero extended constant. */
7dd4b4a3
JH
2795
2796int
2797x86_64_szext_general_operand (op, mode)
2798 rtx op;
2799 enum machine_mode mode;
2800{
2801 if (!TARGET_64BIT)
2802 return general_operand (op, mode);
2803 if (nonimmediate_operand (op, mode))
2804 return 1;
2805 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2806}
2807
2808/* Return nonzero if OP is nonmemory operand representable on x86_64. */
2809
2810int
2811x86_64_nonmemory_operand (op, mode)
2812 rtx op;
2813 enum machine_mode mode;
2814{
2815 if (!TARGET_64BIT)
2816 return nonmemory_operand (op, mode);
2817 if (register_operand (op, mode))
2818 return 1;
2819 return x86_64_sign_extended_value (op);
2820}
2821
2822/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2823
2824int
2825x86_64_movabs_operand (op, mode)
2826 rtx op;
2827 enum machine_mode mode;
2828{
2829 if (!TARGET_64BIT || !flag_pic)
2830 return nonmemory_operand (op, mode);
2831 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2832 return 1;
2833 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2834 return 1;
2835 return 0;
2836}
2837
2838/* Return nonzero if OP is nonmemory operand representable on x86_64. */
2839
2840int
2841x86_64_szext_nonmemory_operand (op, mode)
2842 rtx op;
2843 enum machine_mode mode;
2844{
2845 if (!TARGET_64BIT)
2846 return nonmemory_operand (op, mode);
2847 if (register_operand (op, mode))
2848 return 1;
2849 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2850}
2851
2852/* Return nonzero if OP is immediate operand representable on x86_64. */
2853
2854int
2855x86_64_immediate_operand (op, mode)
2856 rtx op;
2857 enum machine_mode mode;
2858{
2859 if (!TARGET_64BIT)
2860 return immediate_operand (op, mode);
2861 return x86_64_sign_extended_value (op);
2862}
2863
2864/* Return nonzero if OP is immediate operand representable on x86_64. */
2865
2866int
2867x86_64_zext_immediate_operand (op, mode)
2868 rtx op;
2869 enum machine_mode mode ATTRIBUTE_UNUSED;
2870{
2871 return x86_64_zero_extended_value (op);
2872}
2873
8bad7136
JL
2874/* Return nonzero if OP is (const_int 1), else return zero. */
2875
2876int
2877const_int_1_operand (op, mode)
2878 rtx op;
2879 enum machine_mode mode ATTRIBUTE_UNUSED;
2880{
2881 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2882}
2883
e075ae69
RH
2884/* Returns 1 if OP is either a symbol reference or a sum of a symbol
2885 reference and a constant. */
b08de47e
MM
2886
2887int
e075ae69
RH
2888symbolic_operand (op, mode)
2889 register rtx op;
2890 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 2891{
e075ae69 2892 switch (GET_CODE (op))
2a2ab3f9 2893 {
e075ae69
RH
2894 case SYMBOL_REF:
2895 case LABEL_REF:
2896 return 1;
2897
2898 case CONST:
2899 op = XEXP (op, 0);
2900 if (GET_CODE (op) == SYMBOL_REF
2901 || GET_CODE (op) == LABEL_REF
2902 || (GET_CODE (op) == UNSPEC
8ee41eaf
RH
2903 && (XINT (op, 1) == UNSPEC_GOT
2904 || XINT (op, 1) == UNSPEC_GOTOFF
2905 || XINT (op, 1) == UNSPEC_GOTPCREL)))
e075ae69
RH
2906 return 1;
2907 if (GET_CODE (op) != PLUS
2908 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2909 return 0;
2910
2911 op = XEXP (op, 0);
2912 if (GET_CODE (op) == SYMBOL_REF
2913 || GET_CODE (op) == LABEL_REF)
2914 return 1;
2915 /* Only @GOTOFF gets offsets. */
2916 if (GET_CODE (op) != UNSPEC
8ee41eaf 2917 || XINT (op, 1) != UNSPEC_GOTOFF)
e075ae69
RH
2918 return 0;
2919
2920 op = XVECEXP (op, 0, 0);
2921 if (GET_CODE (op) == SYMBOL_REF
2922 || GET_CODE (op) == LABEL_REF)
2923 return 1;
2924 return 0;
2925
2926 default:
2927 return 0;
2a2ab3f9
JVA
2928 }
2929}
2a2ab3f9 2930
e075ae69 2931/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 2932
e075ae69
RH
2933int
2934pic_symbolic_operand (op, mode)
2935 register rtx op;
2936 enum machine_mode mode ATTRIBUTE_UNUSED;
2937{
6eb791fc
JH
2938 if (GET_CODE (op) != CONST)
2939 return 0;
2940 op = XEXP (op, 0);
2941 if (TARGET_64BIT)
2942 {
2943 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2944 return 1;
2945 }
2946 else
2a2ab3f9 2947 {
e075ae69
RH
2948 if (GET_CODE (op) == UNSPEC)
2949 return 1;
2950 if (GET_CODE (op) != PLUS
2951 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2952 return 0;
2953 op = XEXP (op, 0);
2954 if (GET_CODE (op) == UNSPEC)
2955 return 1;
2a2ab3f9 2956 }
e075ae69 2957 return 0;
2a2ab3f9 2958}
2a2ab3f9 2959
623fe810
RH
2960/* Return true if OP is a symbolic operand that resolves locally. */
2961
2962static int
2963local_symbolic_operand (op, mode)
2964 rtx op;
2965 enum machine_mode mode ATTRIBUTE_UNUSED;
2966{
2967 if (GET_CODE (op) == LABEL_REF)
2968 return 1;
2969
2970 if (GET_CODE (op) == CONST
2971 && GET_CODE (XEXP (op, 0)) == PLUS
2972 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2973 op = XEXP (XEXP (op, 0), 0);
2974
2975 if (GET_CODE (op) != SYMBOL_REF)
2976 return 0;
2977
2978 /* These we've been told are local by varasm and encode_section_info
2979 respectively. */
2980 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2981 return 1;
2982
2983 /* There is, however, a not insubstantial body of code in the rest of
2984 the compiler that assumes it can just stick the results of
2985 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2986 /* ??? This is a hack. Should update the body of the compiler to
fb49053f 2987 always create a DECL an invoke targetm.encode_section_info. */
623fe810
RH
2988 if (strncmp (XSTR (op, 0), internal_label_prefix,
2989 internal_label_prefix_len) == 0)
2990 return 1;
2991
2992 return 0;
2993}
2994
f996902d
RH
2995/* Test for various thread-local symbols. See ix86_encode_section_info. */
2996
2997int
2998tls_symbolic_operand (op, mode)
2999 register rtx op;
3000 enum machine_mode mode ATTRIBUTE_UNUSED;
3001{
3002 const char *symbol_str;
3003
3004 if (GET_CODE (op) != SYMBOL_REF)
3005 return 0;
3006 symbol_str = XSTR (op, 0);
3007
3008 if (symbol_str[0] != '%')
3009 return 0;
755ac5d4 3010 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
f996902d
RH
3011}
3012
3013static int
3014tls_symbolic_operand_1 (op, kind)
3015 rtx op;
3016 enum tls_model kind;
3017{
3018 const char *symbol_str;
3019
3020 if (GET_CODE (op) != SYMBOL_REF)
3021 return 0;
3022 symbol_str = XSTR (op, 0);
3023
3024 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3025}
3026
3027int
3028global_dynamic_symbolic_operand (op, mode)
3029 register rtx op;
3030 enum machine_mode mode ATTRIBUTE_UNUSED;
3031{
3032 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3033}
3034
3035int
3036local_dynamic_symbolic_operand (op, mode)
3037 register rtx op;
3038 enum machine_mode mode ATTRIBUTE_UNUSED;
3039{
3040 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3041}
3042
3043int
3044initial_exec_symbolic_operand (op, mode)
3045 register rtx op;
3046 enum machine_mode mode ATTRIBUTE_UNUSED;
3047{
3048 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3049}
3050
3051int
3052local_exec_symbolic_operand (op, mode)
3053 register rtx op;
3054 enum machine_mode mode ATTRIBUTE_UNUSED;
3055{
3056 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3057}
3058
28d52ffb
RH
3059/* Test for a valid operand for a call instruction. Don't allow the
3060 arg pointer register or virtual regs since they may decay into
3061 reg + const, which the patterns can't handle. */
2a2ab3f9 3062
e075ae69
RH
3063int
3064call_insn_operand (op, mode)
3065 rtx op;
3066 enum machine_mode mode ATTRIBUTE_UNUSED;
3067{
e075ae69
RH
3068 /* Disallow indirect through a virtual register. This leads to
3069 compiler aborts when trying to eliminate them. */
3070 if (GET_CODE (op) == REG
3071 && (op == arg_pointer_rtx
564d80f4 3072 || op == frame_pointer_rtx
e075ae69
RH
3073 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3074 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3075 return 0;
2a2ab3f9 3076
28d52ffb
RH
3077 /* Disallow `call 1234'. Due to varying assembler lameness this
3078 gets either rejected or translated to `call .+1234'. */
3079 if (GET_CODE (op) == CONST_INT)
3080 return 0;
3081
cbbf65e0
RH
3082 /* Explicitly allow SYMBOL_REF even if pic. */
3083 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 3084 return 1;
2a2ab3f9 3085
cbbf65e0
RH
3086 /* Half-pic doesn't allow anything but registers and constants.
3087 We've just taken care of the later. */
3088 if (HALF_PIC_P ())
3089 return register_operand (op, Pmode);
3090
3091 /* Otherwise we can allow any general_operand in the address. */
3092 return general_operand (op, Pmode);
e075ae69 3093}
79325812 3094
e075ae69
RH
3095int
3096constant_call_address_operand (op, mode)
3097 rtx op;
3098 enum machine_mode mode ATTRIBUTE_UNUSED;
3099{
eaf19aba
JJ
3100 if (GET_CODE (op) == CONST
3101 && GET_CODE (XEXP (op, 0)) == PLUS
3102 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3103 op = XEXP (XEXP (op, 0), 0);
e1ff012c 3104 return GET_CODE (op) == SYMBOL_REF;
e075ae69 3105}
2a2ab3f9 3106
e075ae69 3107/* Match exactly zero and one. */
e9a25f70 3108
0f290768 3109int
e075ae69
RH
3110const0_operand (op, mode)
3111 register rtx op;
3112 enum machine_mode mode;
3113{
3114 return op == CONST0_RTX (mode);
3115}
e9a25f70 3116
0f290768 3117int
e075ae69
RH
3118const1_operand (op, mode)
3119 register rtx op;
3120 enum machine_mode mode ATTRIBUTE_UNUSED;
3121{
3122 return op == const1_rtx;
3123}
2a2ab3f9 3124
e075ae69 3125/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 3126
e075ae69
RH
3127int
3128const248_operand (op, mode)
3129 register rtx op;
3130 enum machine_mode mode ATTRIBUTE_UNUSED;
3131{
3132 return (GET_CODE (op) == CONST_INT
3133 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3134}
e9a25f70 3135
e075ae69 3136/* True if this is a constant appropriate for an increment or decremenmt. */
81fd0956 3137
e075ae69
RH
3138int
3139incdec_operand (op, mode)
3140 register rtx op;
0631e0bf 3141 enum machine_mode mode ATTRIBUTE_UNUSED;
e075ae69 3142{
f5143c46 3143 /* On Pentium4, the inc and dec operations causes extra dependency on flag
b4e89e2d
JH
3144 registers, since carry flag is not set. */
3145 if (TARGET_PENTIUM4 && !optimize_size)
3146 return 0;
2b1c08f5 3147 return op == const1_rtx || op == constm1_rtx;
e075ae69 3148}
2a2ab3f9 3149
371bc54b
JH
3150/* Return nonzero if OP is acceptable as operand of DImode shift
3151 expander. */
3152
3153int
3154shiftdi_operand (op, mode)
3155 rtx op;
3156 enum machine_mode mode ATTRIBUTE_UNUSED;
3157{
3158 if (TARGET_64BIT)
3159 return nonimmediate_operand (op, mode);
3160 else
3161 return register_operand (op, mode);
3162}
3163
0f290768 3164/* Return false if this is the stack pointer, or any other fake
e075ae69
RH
3165 register eliminable to the stack pointer. Otherwise, this is
3166 a register operand.
2a2ab3f9 3167
e075ae69
RH
3168 This is used to prevent esp from being used as an index reg.
3169 Which would only happen in pathological cases. */
5f1ec3e6 3170
e075ae69
RH
3171int
3172reg_no_sp_operand (op, mode)
3173 register rtx op;
3174 enum machine_mode mode;
3175{
3176 rtx t = op;
3177 if (GET_CODE (t) == SUBREG)
3178 t = SUBREG_REG (t);
564d80f4 3179 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 3180 return 0;
2a2ab3f9 3181
e075ae69 3182 return register_operand (op, mode);
2a2ab3f9 3183}
b840bfb0 3184
915119a5
BS
3185int
3186mmx_reg_operand (op, mode)
3187 register rtx op;
bd793c65 3188 enum machine_mode mode ATTRIBUTE_UNUSED;
915119a5
BS
3189{
3190 return MMX_REG_P (op);
3191}
3192
2c5a510c
RH
3193/* Return false if this is any eliminable register. Otherwise
3194 general_operand. */
3195
3196int
3197general_no_elim_operand (op, mode)
3198 register rtx op;
3199 enum machine_mode mode;
3200{
3201 rtx t = op;
3202 if (GET_CODE (t) == SUBREG)
3203 t = SUBREG_REG (t);
3204 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3205 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3206 || t == virtual_stack_dynamic_rtx)
3207 return 0;
1020a5ab
RH
3208 if (REG_P (t)
3209 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3210 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3211 return 0;
2c5a510c
RH
3212
3213 return general_operand (op, mode);
3214}
3215
3216/* Return false if this is any eliminable register. Otherwise
3217 register_operand or const_int. */
3218
3219int
3220nonmemory_no_elim_operand (op, mode)
3221 register rtx op;
3222 enum machine_mode mode;
3223{
3224 rtx t = op;
3225 if (GET_CODE (t) == SUBREG)
3226 t = SUBREG_REG (t);
3227 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3228 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3229 || t == virtual_stack_dynamic_rtx)
3230 return 0;
3231
3232 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3233}
3234
e075ae69 3235/* Return true if op is a Q_REGS class register. */
b840bfb0 3236
e075ae69
RH
3237int
3238q_regs_operand (op, mode)
3239 register rtx op;
3240 enum machine_mode mode;
b840bfb0 3241{
e075ae69
RH
3242 if (mode != VOIDmode && GET_MODE (op) != mode)
3243 return 0;
3244 if (GET_CODE (op) == SUBREG)
3245 op = SUBREG_REG (op);
7799175f 3246 return ANY_QI_REG_P (op);
0f290768 3247}
b840bfb0 3248
e075ae69 3249/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 3250
e075ae69
RH
3251int
3252non_q_regs_operand (op, mode)
3253 register rtx op;
3254 enum machine_mode mode;
3255{
3256 if (mode != VOIDmode && GET_MODE (op) != mode)
3257 return 0;
3258 if (GET_CODE (op) == SUBREG)
3259 op = SUBREG_REG (op);
3260 return NON_QI_REG_P (op);
0f290768 3261}
b840bfb0 3262
915119a5
BS
3263/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3264 insns. */
3265int
3266sse_comparison_operator (op, mode)
3267 rtx op;
3268 enum machine_mode mode ATTRIBUTE_UNUSED;
3269{
3270 enum rtx_code code = GET_CODE (op);
a46d1d38
JH
3271 switch (code)
3272 {
3273 /* Operations supported directly. */
3274 case EQ:
3275 case LT:
3276 case LE:
3277 case UNORDERED:
3278 case NE:
3279 case UNGE:
3280 case UNGT:
3281 case ORDERED:
3282 return 1;
3283 /* These are equivalent to ones above in non-IEEE comparisons. */
3284 case UNEQ:
3285 case UNLT:
3286 case UNLE:
3287 case LTGT:
3288 case GE:
3289 case GT:
3290 return !TARGET_IEEE_FP;
3291 default:
3292 return 0;
3293 }
915119a5 3294}
9076b9c1 3295/* Return 1 if OP is a valid comparison operator in valid mode. */
e075ae69 3296int
9076b9c1
JH
3297ix86_comparison_operator (op, mode)
3298 register rtx op;
3299 enum machine_mode mode;
e075ae69 3300{
9076b9c1 3301 enum machine_mode inmode;
9a915772 3302 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
3303 if (mode != VOIDmode && GET_MODE (op) != mode)
3304 return 0;
9a915772
JH
3305 if (GET_RTX_CLASS (code) != '<')
3306 return 0;
3307 inmode = GET_MODE (XEXP (op, 0));
3308
3309 if (inmode == CCFPmode || inmode == CCFPUmode)
3310 {
3311 enum rtx_code second_code, bypass_code;
3312 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3313 return (bypass_code == NIL && second_code == NIL);
3314 }
3315 switch (code)
3a3677ff
RH
3316 {
3317 case EQ: case NE:
3a3677ff 3318 return 1;
9076b9c1 3319 case LT: case GE:
7e08e190 3320 if (inmode == CCmode || inmode == CCGCmode
9076b9c1
JH
3321 || inmode == CCGOCmode || inmode == CCNOmode)
3322 return 1;
3323 return 0;
7e08e190 3324 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
7e08e190 3325 if (inmode == CCmode)
9076b9c1
JH
3326 return 1;
3327 return 0;
3328 case GT: case LE:
7e08e190 3329 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
9076b9c1
JH
3330 return 1;
3331 return 0;
3a3677ff
RH
3332 default:
3333 return 0;
3334 }
3335}
3336
9076b9c1 3337/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3a3677ff 3338
9076b9c1
JH
3339int
3340fcmov_comparison_operator (op, mode)
3a3677ff
RH
3341 register rtx op;
3342 enum machine_mode mode;
3343{
b62d22a2 3344 enum machine_mode inmode;
9a915772 3345 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
3346 if (mode != VOIDmode && GET_MODE (op) != mode)
3347 return 0;
9a915772
JH
3348 if (GET_RTX_CLASS (code) != '<')
3349 return 0;
3350 inmode = GET_MODE (XEXP (op, 0));
3351 if (inmode == CCFPmode || inmode == CCFPUmode)
3a3677ff 3352 {
9a915772
JH
3353 enum rtx_code second_code, bypass_code;
3354 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3355 if (bypass_code != NIL || second_code != NIL)
3356 return 0;
3357 code = ix86_fp_compare_code_to_integer (code);
3358 }
3359 /* i387 supports just limited amount of conditional codes. */
3360 switch (code)
3361 {
3362 case LTU: case GTU: case LEU: case GEU:
3363 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
9076b9c1
JH
3364 return 1;
3365 return 0;
9a915772
JH
3366 case ORDERED: case UNORDERED:
3367 case EQ: case NE:
3368 return 1;
3a3677ff
RH
3369 default:
3370 return 0;
3371 }
e075ae69 3372}
b840bfb0 3373
e9e80858
JH
3374/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3375
3376int
3377promotable_binary_operator (op, mode)
3378 register rtx op;
3379 enum machine_mode mode ATTRIBUTE_UNUSED;
3380{
3381 switch (GET_CODE (op))
3382 {
3383 case MULT:
3384 /* Modern CPUs have same latency for HImode and SImode multiply,
3385 but 386 and 486 do HImode multiply faster. */
3386 return ix86_cpu > PROCESSOR_I486;
3387 case PLUS:
3388 case AND:
3389 case IOR:
3390 case XOR:
3391 case ASHIFT:
3392 return 1;
3393 default:
3394 return 0;
3395 }
3396}
3397
e075ae69
RH
3398/* Nearly general operand, but accept any const_double, since we wish
3399 to be able to drop them into memory rather than have them get pulled
3400 into registers. */
b840bfb0 3401
2a2ab3f9 3402int
e075ae69
RH
3403cmp_fp_expander_operand (op, mode)
3404 register rtx op;
3405 enum machine_mode mode;
2a2ab3f9 3406{
e075ae69 3407 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 3408 return 0;
e075ae69 3409 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 3410 return 1;
e075ae69 3411 return general_operand (op, mode);
2a2ab3f9
JVA
3412}
3413
e075ae69 3414/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
3415
3416int
e075ae69 3417ext_register_operand (op, mode)
2a2ab3f9 3418 register rtx op;
bb5177ac 3419 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 3420{
3522082b 3421 int regno;
0d7d98ee
JH
3422 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3423 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
e075ae69 3424 return 0;
3522082b
JH
3425
3426 if (!register_operand (op, VOIDmode))
3427 return 0;
3428
3429 /* Be curefull to accept only registers having upper parts. */
3430 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3431 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
e075ae69
RH
3432}
3433
3434/* Return 1 if this is a valid binary floating-point operation.
0f290768 3435 OP is the expression matched, and MODE is its mode. */
e075ae69
RH
3436
3437int
3438binary_fp_operator (op, mode)
3439 register rtx op;
3440 enum machine_mode mode;
3441{
3442 if (mode != VOIDmode && mode != GET_MODE (op))
3443 return 0;
3444
2a2ab3f9
JVA
3445 switch (GET_CODE (op))
3446 {
e075ae69
RH
3447 case PLUS:
3448 case MINUS:
3449 case MULT:
3450 case DIV:
3451 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 3452
2a2ab3f9
JVA
3453 default:
3454 return 0;
3455 }
3456}
fee2770d 3457
e075ae69 3458int
b531087a 3459mult_operator (op, mode)
e075ae69
RH
3460 register rtx op;
3461 enum machine_mode mode ATTRIBUTE_UNUSED;
3462{
3463 return GET_CODE (op) == MULT;
3464}
3465
3466int
b531087a 3467div_operator (op, mode)
e075ae69
RH
3468 register rtx op;
3469 enum machine_mode mode ATTRIBUTE_UNUSED;
3470{
3471 return GET_CODE (op) == DIV;
3472}
0a726ef1
JL
3473
3474int
e075ae69
RH
3475arith_or_logical_operator (op, mode)
3476 rtx op;
3477 enum machine_mode mode;
0a726ef1 3478{
e075ae69
RH
3479 return ((mode == VOIDmode || GET_MODE (op) == mode)
3480 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3481 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
3482}
3483
e075ae69 3484/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
3485
3486int
e075ae69
RH
3487memory_displacement_operand (op, mode)
3488 register rtx op;
3489 enum machine_mode mode;
4f2c8ebb 3490{
e075ae69 3491 struct ix86_address parts;
e9a25f70 3492
e075ae69
RH
3493 if (! memory_operand (op, mode))
3494 return 0;
3495
3496 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3497 abort ();
3498
3499 return parts.disp != NULL_RTX;
4f2c8ebb
RS
3500}
3501
16189740 3502/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
3503 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3504
3505 ??? It seems likely that this will only work because cmpsi is an
3506 expander, and no actual insns use this. */
4f2c8ebb
RS
3507
3508int
e075ae69
RH
3509cmpsi_operand (op, mode)
3510 rtx op;
3511 enum machine_mode mode;
fee2770d 3512{
b9b2c339 3513 if (nonimmediate_operand (op, mode))
e075ae69
RH
3514 return 1;
3515
3516 if (GET_CODE (op) == AND
3517 && GET_MODE (op) == SImode
3518 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3519 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3520 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3521 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3522 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3523 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 3524 return 1;
e9a25f70 3525
fee2770d
RS
3526 return 0;
3527}
d784886d 3528
e075ae69
RH
3529/* Returns 1 if OP is memory operand that can not be represented by the
3530 modRM array. */
d784886d
RK
3531
3532int
e075ae69 3533long_memory_operand (op, mode)
d784886d
RK
3534 register rtx op;
3535 enum machine_mode mode;
3536{
e075ae69 3537 if (! memory_operand (op, mode))
d784886d
RK
3538 return 0;
3539
e075ae69 3540 return memory_address_length (op) != 0;
d784886d 3541}
2247f6ed
JH
3542
3543/* Return nonzero if the rtx is known aligned. */
3544
3545int
3546aligned_operand (op, mode)
3547 rtx op;
3548 enum machine_mode mode;
3549{
3550 struct ix86_address parts;
3551
3552 if (!general_operand (op, mode))
3553 return 0;
3554
0f290768 3555 /* Registers and immediate operands are always "aligned". */
2247f6ed
JH
3556 if (GET_CODE (op) != MEM)
3557 return 1;
3558
0f290768 3559 /* Don't even try to do any aligned optimizations with volatiles. */
2247f6ed
JH
3560 if (MEM_VOLATILE_P (op))
3561 return 0;
3562
3563 op = XEXP (op, 0);
3564
3565 /* Pushes and pops are only valid on the stack pointer. */
3566 if (GET_CODE (op) == PRE_DEC
3567 || GET_CODE (op) == POST_INC)
3568 return 1;
3569
3570 /* Decode the address. */
3571 if (! ix86_decompose_address (op, &parts))
3572 abort ();
3573
1540f9eb
JH
3574 if (parts.base && GET_CODE (parts.base) == SUBREG)
3575 parts.base = SUBREG_REG (parts.base);
3576 if (parts.index && GET_CODE (parts.index) == SUBREG)
3577 parts.index = SUBREG_REG (parts.index);
3578
2247f6ed
JH
3579 /* Look for some component that isn't known to be aligned. */
3580 if (parts.index)
3581 {
3582 if (parts.scale < 4
bdb429a5 3583 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
3584 return 0;
3585 }
3586 if (parts.base)
3587 {
bdb429a5 3588 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
3589 return 0;
3590 }
3591 if (parts.disp)
3592 {
3593 if (GET_CODE (parts.disp) != CONST_INT
3594 || (INTVAL (parts.disp) & 3) != 0)
3595 return 0;
3596 }
3597
3598 /* Didn't find one -- this must be an aligned address. */
3599 return 1;
3600}
e075ae69
RH
3601\f
3602/* Return true if the constant is something that can be loaded with
3603 a special instruction. Only handle 0.0 and 1.0; others are less
3604 worthwhile. */
57dbca5e
BS
3605
3606int
e075ae69
RH
3607standard_80387_constant_p (x)
3608 rtx x;
57dbca5e 3609{
2b04e52b 3610 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 3611 return -1;
2b04e52b
JH
3612 /* Note that on the 80387, other constants, such as pi, that we should support
3613 too. On some machines, these are much slower to load as standard constant,
3614 than to load from doubles in memory. */
3615 if (x == CONST0_RTX (GET_MODE (x)))
3616 return 1;
3617 if (x == CONST1_RTX (GET_MODE (x)))
3618 return 2;
e075ae69 3619 return 0;
57dbca5e
BS
3620}
3621
2b04e52b
JH
3622/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3623 */
3624int
3625standard_sse_constant_p (x)
3626 rtx x;
3627{
3628 if (GET_CODE (x) != CONST_DOUBLE)
3629 return -1;
3630 return (x == CONST0_RTX (GET_MODE (x)));
3631}
3632
2a2ab3f9
JVA
3633/* Returns 1 if OP contains a symbol reference */
3634
3635int
3636symbolic_reference_mentioned_p (op)
3637 rtx op;
3638{
6f7d635c 3639 register const char *fmt;
2a2ab3f9
JVA
3640 register int i;
3641
3642 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3643 return 1;
3644
3645 fmt = GET_RTX_FORMAT (GET_CODE (op));
3646 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3647 {
3648 if (fmt[i] == 'E')
3649 {
3650 register int j;
3651
3652 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3653 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3654 return 1;
3655 }
e9a25f70 3656
2a2ab3f9
JVA
3657 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3658 return 1;
3659 }
3660
3661 return 0;
3662}
e075ae69
RH
3663
3664/* Return 1 if it is appropriate to emit `ret' instructions in the
3665 body of a function. Do this only if the epilogue is simple, needing a
3666 couple of insns. Prior to reloading, we can't tell how many registers
3667 must be saved, so return 0 then. Return 0 if there is no frame
3668 marker to de-allocate.
3669
3670 If NON_SAVING_SETJMP is defined and true, then it is not possible
3671 for the epilogue to be simple, so return 0. This is a special case
3672 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3673 until final, but jump_optimize may need to know sooner if a
3674 `return' is OK. */
32b5b1aa
SC
3675
3676int
e075ae69 3677ix86_can_use_return_insn_p ()
32b5b1aa 3678{
4dd2ac2c 3679 struct ix86_frame frame;
9a7372d6 3680
e075ae69
RH
3681#ifdef NON_SAVING_SETJMP
3682 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3683 return 0;
3684#endif
9a7372d6
RH
3685
3686 if (! reload_completed || frame_pointer_needed)
3687 return 0;
32b5b1aa 3688
9a7372d6
RH
3689 /* Don't allow more than 32 pop, since that's all we can do
3690 with one instruction. */
3691 if (current_function_pops_args
3692 && current_function_args_size >= 32768)
e075ae69 3693 return 0;
32b5b1aa 3694
4dd2ac2c
JH
3695 ix86_compute_frame_layout (&frame);
3696 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 3697}
6189a572
JH
3698\f
3699/* Return 1 if VALUE can be stored in the sign extended immediate field. */
3700int
3701x86_64_sign_extended_value (value)
3702 rtx value;
3703{
3704 switch (GET_CODE (value))
3705 {
3706 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3707 to be at least 32 and this all acceptable constants are
3708 represented as CONST_INT. */
3709 case CONST_INT:
3710 if (HOST_BITS_PER_WIDE_INT == 32)
3711 return 1;
3712 else
3713 {
3714 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
fa9f36a1 3715 return trunc_int_for_mode (val, SImode) == val;
6189a572
JH
3716 }
3717 break;
3718
3719 /* For certain code models, the symbolic references are known to fit. */
3720 case SYMBOL_REF:
3721 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3722
3723 /* For certain code models, the code is near as well. */
3724 case LABEL_REF:
3725 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3726
3727 /* We also may accept the offsetted memory references in certain special
3728 cases. */
3729 case CONST:
3730 if (GET_CODE (XEXP (value, 0)) == UNSPEC
8ee41eaf 3731 && XINT (XEXP (value, 0), 1) == UNSPEC_GOTPCREL)
6189a572
JH
3732 return 1;
3733 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3734 {
3735 rtx op1 = XEXP (XEXP (value, 0), 0);
3736 rtx op2 = XEXP (XEXP (value, 0), 1);
3737 HOST_WIDE_INT offset;
3738
3739 if (ix86_cmodel == CM_LARGE)
3740 return 0;
3741 if (GET_CODE (op2) != CONST_INT)
3742 return 0;
3743 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3744 switch (GET_CODE (op1))
3745 {
3746 case SYMBOL_REF:
3747 /* For CM_SMALL assume that latest object is 1MB before
3748 end of 31bits boundary. We may also accept pretty
3749 large negative constants knowing that all objects are
3750 in the positive half of address space. */
3751 if (ix86_cmodel == CM_SMALL
3752 && offset < 1024*1024*1024
3753 && trunc_int_for_mode (offset, SImode) == offset)
3754 return 1;
3755 /* For CM_KERNEL we know that all object resist in the
3756 negative half of 32bits address space. We may not
3757 accept negative offsets, since they may be just off
d6a7951f 3758 and we may accept pretty large positive ones. */
6189a572
JH
3759 if (ix86_cmodel == CM_KERNEL
3760 && offset > 0
3761 && trunc_int_for_mode (offset, SImode) == offset)
3762 return 1;
3763 break;
3764 case LABEL_REF:
3765 /* These conditions are similar to SYMBOL_REF ones, just the
3766 constraints for code models differ. */
3767 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3768 && offset < 1024*1024*1024
3769 && trunc_int_for_mode (offset, SImode) == offset)
3770 return 1;
3771 if (ix86_cmodel == CM_KERNEL
3772 && offset > 0
3773 && trunc_int_for_mode (offset, SImode) == offset)
3774 return 1;
3775 break;
3776 default:
3777 return 0;
3778 }
3779 }
3780 return 0;
3781 default:
3782 return 0;
3783 }
3784}
3785
3786/* Return 1 if VALUE can be stored in the zero extended immediate field. */
3787int
3788x86_64_zero_extended_value (value)
3789 rtx value;
3790{
3791 switch (GET_CODE (value))
3792 {
3793 case CONST_DOUBLE:
3794 if (HOST_BITS_PER_WIDE_INT == 32)
3795 return (GET_MODE (value) == VOIDmode
3796 && !CONST_DOUBLE_HIGH (value));
3797 else
3798 return 0;
3799 case CONST_INT:
3800 if (HOST_BITS_PER_WIDE_INT == 32)
3801 return INTVAL (value) >= 0;
3802 else
b531087a 3803 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
6189a572
JH
3804 break;
3805
3806 /* For certain code models, the symbolic references are known to fit. */
3807 case SYMBOL_REF:
3808 return ix86_cmodel == CM_SMALL;
3809
3810 /* For certain code models, the code is near as well. */
3811 case LABEL_REF:
3812 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3813
3814 /* We also may accept the offsetted memory references in certain special
3815 cases. */
3816 case CONST:
3817 if (GET_CODE (XEXP (value, 0)) == PLUS)
3818 {
3819 rtx op1 = XEXP (XEXP (value, 0), 0);
3820 rtx op2 = XEXP (XEXP (value, 0), 1);
3821
3822 if (ix86_cmodel == CM_LARGE)
3823 return 0;
3824 switch (GET_CODE (op1))
3825 {
3826 case SYMBOL_REF:
3827 return 0;
d6a7951f 3828 /* For small code model we may accept pretty large positive
6189a572
JH
3829 offsets, since one bit is available for free. Negative
3830 offsets are limited by the size of NULL pointer area
3831 specified by the ABI. */
3832 if (ix86_cmodel == CM_SMALL
3833 && GET_CODE (op2) == CONST_INT
3834 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3835 && (trunc_int_for_mode (INTVAL (op2), SImode)
3836 == INTVAL (op2)))
3837 return 1;
3838 /* ??? For the kernel, we may accept adjustment of
3839 -0x10000000, since we know that it will just convert
d6a7951f 3840 negative address space to positive, but perhaps this
6189a572
JH
3841 is not worthwhile. */
3842 break;
3843 case LABEL_REF:
3844 /* These conditions are similar to SYMBOL_REF ones, just the
3845 constraints for code models differ. */
3846 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3847 && GET_CODE (op2) == CONST_INT
3848 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3849 && (trunc_int_for_mode (INTVAL (op2), SImode)
3850 == INTVAL (op2)))
3851 return 1;
3852 break;
3853 default:
3854 return 0;
3855 }
3856 }
3857 return 0;
3858 default:
3859 return 0;
3860 }
3861}
6fca22eb
RH
3862
3863/* Value should be nonzero if functions must have frame pointers.
3864 Zero means the frame pointer need not be set up (and parms may
3865 be accessed via the stack pointer) in functions that seem suitable. */
3866
3867int
3868ix86_frame_pointer_required ()
3869{
3870 /* If we accessed previous frames, then the generated code expects
3871 to be able to access the saved ebp value in our frame. */
3872 if (cfun->machine->accesses_prev_frame)
3873 return 1;
a4f31c00 3874
6fca22eb
RH
3875 /* Several x86 os'es need a frame pointer for other reasons,
3876 usually pertaining to setjmp. */
3877 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3878 return 1;
3879
3880 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3881 the frame pointer by default. Turn it back on now if we've not
3882 got a leaf function. */
3883 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
3884 return 1;
3885
3886 return 0;
3887}
3888
3889/* Record that the current function accesses previous call frames. */
3890
3891void
3892ix86_setup_frame_addresses ()
3893{
3894 cfun->machine->accesses_prev_frame = 1;
3895}
e075ae69 3896\f
4cf12e7e 3897static char pic_label_name[32];
e9a25f70 3898
e075ae69
RH
3899/* This function generates code for -fpic that loads %ebx with
3900 the return address of the caller and then returns. */
3901
3902void
4cf12e7e 3903ix86_asm_file_end (file)
e075ae69 3904 FILE *file;
e075ae69
RH
3905{
3906 rtx xops[2];
32b5b1aa 3907
f996902d 3908 if (pic_label_name[0] == 0)
4cf12e7e 3909 return;
32b5b1aa 3910
c7f0da1d
RH
3911 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
3912 to updating relocations to a section being discarded such that this
3913 doesn't work. Ought to detect this at configure time. */
7c262518 3914#if 0
4cf12e7e
RH
3915 /* The trick here is to create a linkonce section containing the
3916 pic label thunk, but to refer to it with an internal label.
3917 Because the label is internal, we don't have inter-dso name
3918 binding issues on hosts that don't support ".hidden".
e9a25f70 3919
4cf12e7e
RH
3920 In order to use these macros, however, we must create a fake
3921 function decl. */
7c262518
RH
3922 if (targetm.have_named_sections)
3923 {
3924 tree decl = build_decl (FUNCTION_DECL,
3925 get_identifier ("i686.get_pc_thunk"),
3926 error_mark_node);
3927 DECL_ONE_ONLY (decl) = 1;
ae46c4e0 3928 (*targetm.asm_out.unique_section) (decl, 0);
715bdd29 3929 named_section (decl, NULL);
7c262518
RH
3930 }
3931 else
4cf12e7e 3932#else
7c262518 3933 text_section ();
4cf12e7e 3934#endif
0afeb08a 3935
4cf12e7e
RH
3936 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
3937 internal (non-global) label that's being emitted, it didn't make
3938 sense to have .type information for local labels. This caused
3939 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
3940 me debug info for a label that you're declaring non-global?) this
3941 was changed to call ASM_OUTPUT_LABEL() instead. */
3942
3943 ASM_OUTPUT_LABEL (file, pic_label_name);
3944
3945 xops[0] = pic_offset_table_rtx;
3946 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3947 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3948 output_asm_insn ("ret", xops);
32b5b1aa 3949}
32b5b1aa 3950
c8c03509 3951/* Emit code for the SET_GOT patterns. */
32b5b1aa 3952
c8c03509
RH
3953const char *
3954output_set_got (dest)
3955 rtx dest;
3956{
3957 rtx xops[3];
0d7d98ee 3958
c8c03509
RH
3959 xops[0] = dest;
3960 xops[1] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
32b5b1aa 3961
c8c03509 3962 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
32b5b1aa 3963 {
c8c03509
RH
3964 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
3965
3966 if (!flag_pic)
3967 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3968 else
3969 output_asm_insn ("call\t%a2", xops);
3970
3971 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
3972 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
3973
3974 if (flag_pic)
3975 output_asm_insn ("pop{l}\t%0", xops);
32b5b1aa 3976 }
e075ae69 3977 else
e5cb57e8 3978 {
f996902d
RH
3979 if (! pic_label_name[0])
3980 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
3981
3982 xops[2] = gen_rtx_SYMBOL_REF (Pmode, pic_label_name);
c8c03509
RH
3983 xops[2] = gen_rtx_MEM (QImode, xops[2]);
3984 output_asm_insn ("call\t%X2", xops);
e5cb57e8 3985 }
e5cb57e8 3986
c8c03509
RH
3987 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
3988 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
3989 else
8e9fadc3 3990 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
79325812 3991
c8c03509 3992 return "";
e9a25f70 3993}
8dfe5673 3994
0d7d98ee 3995/* Generate an "push" pattern for input ARG. */
e9a25f70 3996
e075ae69
RH
3997static rtx
3998gen_push (arg)
3999 rtx arg;
e9a25f70 4000{
c5c76735 4001 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
4002 gen_rtx_MEM (Pmode,
4003 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
4004 stack_pointer_rtx)),
4005 arg);
e9a25f70
JL
4006}
4007
4dd2ac2c
JH
4008/* Return 1 if we need to save REGNO. */
4009static int
1020a5ab 4010ix86_save_reg (regno, maybe_eh_return)
9b690711 4011 unsigned int regno;
37a58036 4012 int maybe_eh_return;
1020a5ab 4013{
5b43fed1 4014 if (regno == PIC_OFFSET_TABLE_REGNUM
66edd3b4
RH
4015 && (regs_ever_live[regno]
4016 || current_function_profile
1020a5ab
RH
4017 || current_function_calls_eh_return))
4018 return 1;
4019
4020 if (current_function_calls_eh_return && maybe_eh_return)
4021 {
4022 unsigned i;
4023 for (i = 0; ; i++)
4024 {
b531087a 4025 unsigned test = EH_RETURN_DATA_REGNO (i);
1020a5ab
RH
4026 if (test == INVALID_REGNUM)
4027 break;
9b690711 4028 if (test == regno)
1020a5ab
RH
4029 return 1;
4030 }
4031 }
4dd2ac2c 4032
1020a5ab
RH
4033 return (regs_ever_live[regno]
4034 && !call_used_regs[regno]
4035 && !fixed_regs[regno]
4036 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
4037}
4038
0903fcab
JH
4039/* Return number of registers to be saved on the stack. */
4040
4041static int
4042ix86_nsaved_regs ()
4043{
4044 int nregs = 0;
0903fcab
JH
4045 int regno;
4046
4dd2ac2c 4047 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4048 if (ix86_save_reg (regno, true))
4dd2ac2c 4049 nregs++;
0903fcab
JH
4050 return nregs;
4051}
4052
4053/* Return the offset between two registers, one to be eliminated, and the other
4054 its replacement, at the start of a routine. */
4055
4056HOST_WIDE_INT
4057ix86_initial_elimination_offset (from, to)
4058 int from;
4059 int to;
4060{
4dd2ac2c
JH
4061 struct ix86_frame frame;
4062 ix86_compute_frame_layout (&frame);
564d80f4
JH
4063
4064 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4065 return frame.hard_frame_pointer_offset;
564d80f4
JH
4066 else if (from == FRAME_POINTER_REGNUM
4067 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4068 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4069 else
4070 {
564d80f4
JH
4071 if (to != STACK_POINTER_REGNUM)
4072 abort ();
4073 else if (from == ARG_POINTER_REGNUM)
4dd2ac2c 4074 return frame.stack_pointer_offset;
564d80f4
JH
4075 else if (from != FRAME_POINTER_REGNUM)
4076 abort ();
0903fcab 4077 else
4dd2ac2c 4078 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4079 }
4080}
4081
4dd2ac2c 4082/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 4083
4dd2ac2c
JH
4084static void
4085ix86_compute_frame_layout (frame)
4086 struct ix86_frame *frame;
65954bd8 4087{
65954bd8 4088 HOST_WIDE_INT total_size;
564d80f4 4089 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
44affdae
JH
4090 int offset;
4091 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4dd2ac2c 4092 HOST_WIDE_INT size = get_frame_size ();
65954bd8 4093
4dd2ac2c 4094 frame->nregs = ix86_nsaved_regs ();
564d80f4 4095 total_size = size;
65954bd8 4096
9ba81eaa 4097 /* Skip return address and saved base pointer. */
4dd2ac2c
JH
4098 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4099
4100 frame->hard_frame_pointer_offset = offset;
564d80f4 4101
fcbfaa65
RK
4102 /* Do some sanity checking of stack_alignment_needed and
4103 preferred_alignment, since i386 port is the only using those features
f710504c 4104 that may break easily. */
564d80f4 4105
44affdae
JH
4106 if (size && !stack_alignment_needed)
4107 abort ();
44affdae
JH
4108 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4109 abort ();
4110 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4111 abort ();
4112 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4113 abort ();
564d80f4 4114
4dd2ac2c
JH
4115 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4116 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 4117
4dd2ac2c
JH
4118 /* Register save area */
4119 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 4120
8362f420
JH
4121 /* Va-arg area */
4122 if (ix86_save_varrargs_registers)
4123 {
4124 offset += X86_64_VARARGS_SIZE;
4125 frame->va_arg_size = X86_64_VARARGS_SIZE;
4126 }
4127 else
4128 frame->va_arg_size = 0;
4129
4dd2ac2c
JH
4130 /* Align start of frame for local function. */
4131 frame->padding1 = ((offset + stack_alignment_needed - 1)
4132 & -stack_alignment_needed) - offset;
f73ad30e 4133
4dd2ac2c 4134 offset += frame->padding1;
65954bd8 4135
4dd2ac2c
JH
4136 /* Frame pointer points here. */
4137 frame->frame_pointer_offset = offset;
54ff41b7 4138
4dd2ac2c 4139 offset += size;
65954bd8 4140
0b7ae565
RH
4141 /* Add outgoing arguments area. Can be skipped if we eliminated
4142 all the function calls as dead code. */
4143 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4dd2ac2c
JH
4144 {
4145 offset += current_function_outgoing_args_size;
4146 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4147 }
4148 else
4149 frame->outgoing_arguments_size = 0;
564d80f4 4150
0b7ae565
RH
4151 /* Align stack boundary. Only needed if we're calling another function. */
4152 if (!current_function_is_leaf)
4153 frame->padding2 = ((offset + preferred_alignment - 1)
4154 & -preferred_alignment) - offset;
4155 else
4156 frame->padding2 = 0;
4dd2ac2c
JH
4157
4158 offset += frame->padding2;
4159
4160 /* We've reached end of stack frame. */
4161 frame->stack_pointer_offset = offset;
4162
4163 /* Size prologue needs to allocate. */
4164 frame->to_allocate =
4165 (size + frame->padding1 + frame->padding2
8362f420 4166 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 4167
8362f420
JH
4168 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4169 && current_function_is_leaf)
4170 {
4171 frame->red_zone_size = frame->to_allocate;
4172 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4173 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4174 }
4175 else
4176 frame->red_zone_size = 0;
4177 frame->to_allocate -= frame->red_zone_size;
4178 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c
JH
4179#if 0
4180 fprintf (stderr, "nregs: %i\n", frame->nregs);
4181 fprintf (stderr, "size: %i\n", size);
4182 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4183 fprintf (stderr, "padding1: %i\n", frame->padding1);
8362f420 4184 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4dd2ac2c
JH
4185 fprintf (stderr, "padding2: %i\n", frame->padding2);
4186 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
8362f420 4187 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4dd2ac2c
JH
4188 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4189 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4190 frame->hard_frame_pointer_offset);
4191 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4192#endif
65954bd8
JL
4193}
4194
0903fcab
JH
4195/* Emit code to save registers in the prologue. */
4196
4197static void
4198ix86_emit_save_regs ()
4199{
4200 register int regno;
0903fcab 4201 rtx insn;
0903fcab 4202
4dd2ac2c 4203 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4204 if (ix86_save_reg (regno, true))
0903fcab 4205 {
0d7d98ee 4206 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
4207 RTX_FRAME_RELATED_P (insn) = 1;
4208 }
4209}
4210
c6036a37
JH
4211/* Emit code to save registers using MOV insns. First register
4212 is restored from POINTER + OFFSET. */
4213static void
4214ix86_emit_save_regs_using_mov (pointer, offset)
b72f00af
RK
4215 rtx pointer;
4216 HOST_WIDE_INT offset;
c6036a37
JH
4217{
4218 int regno;
4219 rtx insn;
4220
4221 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4222 if (ix86_save_reg (regno, true))
4223 {
b72f00af
RK
4224 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4225 Pmode, offset),
c6036a37
JH
4226 gen_rtx_REG (Pmode, regno));
4227 RTX_FRAME_RELATED_P (insn) = 1;
4228 offset += UNITS_PER_WORD;
4229 }
4230}
4231
0f290768 4232/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
4233
4234void
4235ix86_expand_prologue ()
2a2ab3f9 4236{
564d80f4 4237 rtx insn;
66edd3b4
RH
4238 int pic_reg_used = (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
4239 && (regs_ever_live[PIC_OFFSET_TABLE_REGNUM]
4240 || current_function_profile));
4dd2ac2c 4241 struct ix86_frame frame;
6ab16dd9 4242 int use_mov = 0;
c6036a37 4243 HOST_WIDE_INT allocate;
4dd2ac2c 4244
2ab0437e 4245 if (!optimize_size)
6ab16dd9
JH
4246 {
4247 use_fast_prologue_epilogue
4248 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
2ab0437e
JH
4249 if (TARGET_PROLOGUE_USING_MOVE)
4250 use_mov = use_fast_prologue_epilogue;
6ab16dd9 4251 }
4dd2ac2c 4252 ix86_compute_frame_layout (&frame);
79325812 4253
e075ae69
RH
4254 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4255 slower on all targets. Also sdb doesn't like it. */
e9a25f70 4256
2a2ab3f9
JVA
4257 if (frame_pointer_needed)
4258 {
564d80f4 4259 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 4260 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 4261
564d80f4 4262 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 4263 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
4264 }
4265
c6036a37
JH
4266 allocate = frame.to_allocate;
4267 /* In case we are dealing only with single register and empty frame,
4268 push is equivalent of the mov+add sequence. */
4269 if (allocate == 0 && frame.nregs <= 1)
4270 use_mov = 0;
4271
4272 if (!use_mov)
4273 ix86_emit_save_regs ();
4274 else
4275 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 4276
c6036a37 4277 if (allocate == 0)
8dfe5673 4278 ;
e323735c 4279 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
469ac993 4280 {
f2042df3
RH
4281 insn = emit_insn (gen_pro_epilogue_adjust_stack
4282 (stack_pointer_rtx, stack_pointer_rtx,
e323735c 4283 GEN_INT (-allocate)));
e075ae69 4284 RTX_FRAME_RELATED_P (insn) = 1;
469ac993 4285 }
79325812 4286 else
8dfe5673 4287 {
e075ae69 4288 /* ??? Is this only valid for Win32? */
e9a25f70 4289
e075ae69 4290 rtx arg0, sym;
e9a25f70 4291
8362f420 4292 if (TARGET_64BIT)
b531087a 4293 abort ();
8362f420 4294
e075ae69 4295 arg0 = gen_rtx_REG (SImode, 0);
c6036a37 4296 emit_move_insn (arg0, GEN_INT (allocate));
77a989d1 4297
e075ae69
RH
4298 sym = gen_rtx_MEM (FUNCTION_MODE,
4299 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
32ee7d1d 4300 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
e075ae69
RH
4301
4302 CALL_INSN_FUNCTION_USAGE (insn)
276ab4a4
RH
4303 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4304 CALL_INSN_FUNCTION_USAGE (insn));
e075ae69 4305 }
c6036a37
JH
4306 if (use_mov)
4307 {
4308 if (!frame_pointer_needed || !frame.to_allocate)
4309 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4310 else
4311 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4312 -frame.nregs * UNITS_PER_WORD);
4313 }
e9a25f70 4314
84530511
SC
4315#ifdef SUBTARGET_PROLOGUE
4316 SUBTARGET_PROLOGUE;
0f290768 4317#endif
84530511 4318
e9a25f70 4319 if (pic_reg_used)
c8c03509
RH
4320 {
4321 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4322
66edd3b4
RH
4323 /* Even with accurate pre-reload life analysis, we can wind up
4324 deleting all references to the pic register after reload.
4325 Consider if cross-jumping unifies two sides of a branch
4326 controled by a comparison vs the only read from a global.
4327 In which case, allow the set_got to be deleted, though we're
4328 too late to do anything about the ebx save in the prologue. */
c8c03509
RH
4329 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4330 }
77a989d1 4331
66edd3b4
RH
4332 /* Prevent function calls from be scheduled before the call to mcount.
4333 In the pic_reg_used case, make sure that the got load isn't deleted. */
4334 if (current_function_profile)
4335 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
77a989d1
SC
4336}
4337
da2d1d3a
JH
4338/* Emit code to restore saved registers using MOV insns. First register
4339 is restored from POINTER + OFFSET. */
4340static void
1020a5ab
RH
4341ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4342 rtx pointer;
4343 int offset;
37a58036 4344 int maybe_eh_return;
da2d1d3a
JH
4345{
4346 int regno;
da2d1d3a 4347
4dd2ac2c 4348 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4349 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 4350 {
4dd2ac2c 4351 emit_move_insn (gen_rtx_REG (Pmode, regno),
b72f00af
RK
4352 adjust_address (gen_rtx_MEM (Pmode, pointer),
4353 Pmode, offset));
4dd2ac2c 4354 offset += UNITS_PER_WORD;
da2d1d3a
JH
4355 }
4356}
4357
0f290768 4358/* Restore function stack, frame, and registers. */
e9a25f70 4359
2a2ab3f9 4360void
1020a5ab
RH
4361ix86_expand_epilogue (style)
4362 int style;
2a2ab3f9 4363{
1c71e60e 4364 int regno;
fdb8a883 4365 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 4366 struct ix86_frame frame;
65954bd8 4367 HOST_WIDE_INT offset;
4dd2ac2c
JH
4368
4369 ix86_compute_frame_layout (&frame);
2a2ab3f9 4370
a4f31c00 4371 /* Calculate start of saved registers relative to ebp. Special care
84e306b4
RH
4372 must be taken for the normal return case of a function using
4373 eh_return: the eax and edx registers are marked as saved, but not
4374 restored along this path. */
4375 offset = frame.nregs;
4376 if (current_function_calls_eh_return && style != 2)
4377 offset -= 2;
4378 offset *= -UNITS_PER_WORD;
2a2ab3f9 4379
fdb8a883
JW
4380 /* If we're only restoring one register and sp is not valid then
4381 using a move instruction to restore the register since it's
0f290768 4382 less work than reloading sp and popping the register.
da2d1d3a
JH
4383
4384 The default code result in stack adjustment using add/lea instruction,
4385 while this code results in LEAVE instruction (or discrete equivalent),
4386 so it is profitable in some other cases as well. Especially when there
4387 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4388 and there is exactly one register to pop. This heruistic may need some
4389 tuning in future. */
4dd2ac2c 4390 if ((!sp_valid && frame.nregs <= 1)
2ab0437e 4391 || (TARGET_EPILOGUE_USING_MOVE
6ab16dd9 4392 && use_fast_prologue_epilogue
c6036a37 4393 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 4394 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2ab0437e 4395 || (frame_pointer_needed && TARGET_USE_LEAVE
6ab16dd9 4396 && use_fast_prologue_epilogue && frame.nregs == 1)
2ab0437e 4397 || current_function_calls_eh_return)
2a2ab3f9 4398 {
da2d1d3a
JH
4399 /* Restore registers. We can use ebp or esp to address the memory
4400 locations. If both are available, default to ebp, since offsets
4401 are known to be small. Only exception is esp pointing directly to the
4402 end of block of saved registers, where we may simplify addressing
4403 mode. */
4404
4dd2ac2c 4405 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
4406 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4407 frame.to_allocate, style == 2);
da2d1d3a 4408 else
1020a5ab
RH
4409 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4410 offset, style == 2);
4411
4412 /* eh_return epilogues need %ecx added to the stack pointer. */
4413 if (style == 2)
4414 {
4415 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 4416
1020a5ab
RH
4417 if (frame_pointer_needed)
4418 {
4419 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4420 tmp = plus_constant (tmp, UNITS_PER_WORD);
4421 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4422
4423 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4424 emit_move_insn (hard_frame_pointer_rtx, tmp);
4425
4426 emit_insn (gen_pro_epilogue_adjust_stack
f2042df3 4427 (stack_pointer_rtx, sa, const0_rtx));
1020a5ab
RH
4428 }
4429 else
4430 {
4431 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4432 tmp = plus_constant (tmp, (frame.to_allocate
4433 + frame.nregs * UNITS_PER_WORD));
4434 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4435 }
4436 }
4437 else if (!frame_pointer_needed)
f2042df3
RH
4438 emit_insn (gen_pro_epilogue_adjust_stack
4439 (stack_pointer_rtx, stack_pointer_rtx,
4440 GEN_INT (frame.to_allocate
4441 + frame.nregs * UNITS_PER_WORD)));
0f290768 4442 /* If not an i386, mov & pop is faster than "leave". */
6ab16dd9 4443 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
8362f420 4444 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
c8c5cb99 4445 else
2a2ab3f9 4446 {
1c71e60e
JH
4447 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4448 hard_frame_pointer_rtx,
f2042df3 4449 const0_rtx));
8362f420
JH
4450 if (TARGET_64BIT)
4451 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4452 else
4453 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
4454 }
4455 }
1c71e60e 4456 else
68f654ec 4457 {
1c71e60e
JH
4458 /* First step is to deallocate the stack frame so that we can
4459 pop the registers. */
4460 if (!sp_valid)
4461 {
4462 if (!frame_pointer_needed)
4463 abort ();
4464 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4465 hard_frame_pointer_rtx,
f2042df3 4466 GEN_INT (offset)));
1c71e60e 4467 }
4dd2ac2c 4468 else if (frame.to_allocate)
f2042df3
RH
4469 emit_insn (gen_pro_epilogue_adjust_stack
4470 (stack_pointer_rtx, stack_pointer_rtx,
4471 GEN_INT (frame.to_allocate)));
1c71e60e 4472
4dd2ac2c 4473 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4474 if (ix86_save_reg (regno, false))
8362f420
JH
4475 {
4476 if (TARGET_64BIT)
4477 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4478 else
4479 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4480 }
4dd2ac2c 4481 if (frame_pointer_needed)
8362f420 4482 {
f5143c46 4483 /* Leave results in shorter dependency chains on CPUs that are
2ab0437e
JH
4484 able to grok it fast. */
4485 if (TARGET_USE_LEAVE)
4486 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4487 else if (TARGET_64BIT)
8362f420
JH
4488 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4489 else
4490 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4491 }
68f654ec 4492 }
68f654ec 4493
cbbf65e0 4494 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 4495 if (style == 0)
cbbf65e0
RH
4496 return;
4497
2a2ab3f9
JVA
4498 if (current_function_pops_args && current_function_args_size)
4499 {
e075ae69 4500 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 4501
b8c752c8
UD
4502 /* i386 can only pop 64K bytes. If asked to pop more, pop
4503 return address, do explicit add, and jump indirectly to the
0f290768 4504 caller. */
2a2ab3f9 4505
b8c752c8 4506 if (current_function_pops_args >= 65536)
2a2ab3f9 4507 {
e075ae69 4508 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 4509
8362f420
JH
4510 /* There are is no "pascal" calling convention in 64bit ABI. */
4511 if (TARGET_64BIT)
b531087a 4512 abort ();
8362f420 4513
e075ae69
RH
4514 emit_insn (gen_popsi1 (ecx));
4515 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 4516 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 4517 }
79325812 4518 else
e075ae69
RH
4519 emit_jump_insn (gen_return_pop_internal (popc));
4520 }
4521 else
4522 emit_jump_insn (gen_return_internal ());
4523}
4524\f
4525/* Extract the parts of an RTL expression that is a valid memory address
b446e5a2
JH
4526 for an instruction. Return 0 if the structure of the address is
4527 grossly off. Return -1 if the address contains ASHIFT, so it is not
4528 strictly valid, but still used for computing length of lea instruction.
4529 */
e075ae69
RH
4530
4531static int
4532ix86_decompose_address (addr, out)
4533 register rtx addr;
4534 struct ix86_address *out;
4535{
4536 rtx base = NULL_RTX;
4537 rtx index = NULL_RTX;
4538 rtx disp = NULL_RTX;
4539 HOST_WIDE_INT scale = 1;
4540 rtx scale_rtx = NULL_RTX;
b446e5a2 4541 int retval = 1;
e075ae69 4542
1540f9eb 4543 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
e075ae69
RH
4544 base = addr;
4545 else if (GET_CODE (addr) == PLUS)
4546 {
4547 rtx op0 = XEXP (addr, 0);
4548 rtx op1 = XEXP (addr, 1);
4549 enum rtx_code code0 = GET_CODE (op0);
4550 enum rtx_code code1 = GET_CODE (op1);
4551
4552 if (code0 == REG || code0 == SUBREG)
4553 {
4554 if (code1 == REG || code1 == SUBREG)
4555 index = op0, base = op1; /* index + base */
4556 else
4557 base = op0, disp = op1; /* base + displacement */
4558 }
4559 else if (code0 == MULT)
e9a25f70 4560 {
e075ae69
RH
4561 index = XEXP (op0, 0);
4562 scale_rtx = XEXP (op0, 1);
4563 if (code1 == REG || code1 == SUBREG)
4564 base = op1; /* index*scale + base */
e9a25f70 4565 else
e075ae69
RH
4566 disp = op1; /* index*scale + disp */
4567 }
4568 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4569 {
4570 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4571 scale_rtx = XEXP (XEXP (op0, 0), 1);
4572 base = XEXP (op0, 1);
4573 disp = op1;
2a2ab3f9 4574 }
e075ae69
RH
4575 else if (code0 == PLUS)
4576 {
4577 index = XEXP (op0, 0); /* index + base + disp */
4578 base = XEXP (op0, 1);
4579 disp = op1;
4580 }
4581 else
b446e5a2 4582 return 0;
e075ae69
RH
4583 }
4584 else if (GET_CODE (addr) == MULT)
4585 {
4586 index = XEXP (addr, 0); /* index*scale */
4587 scale_rtx = XEXP (addr, 1);
4588 }
4589 else if (GET_CODE (addr) == ASHIFT)
4590 {
4591 rtx tmp;
4592
4593 /* We're called for lea too, which implements ashift on occasion. */
4594 index = XEXP (addr, 0);
4595 tmp = XEXP (addr, 1);
4596 if (GET_CODE (tmp) != CONST_INT)
b446e5a2 4597 return 0;
e075ae69
RH
4598 scale = INTVAL (tmp);
4599 if ((unsigned HOST_WIDE_INT) scale > 3)
b446e5a2 4600 return 0;
e075ae69 4601 scale = 1 << scale;
b446e5a2 4602 retval = -1;
2a2ab3f9 4603 }
2a2ab3f9 4604 else
e075ae69
RH
4605 disp = addr; /* displacement */
4606
4607 /* Extract the integral value of scale. */
4608 if (scale_rtx)
e9a25f70 4609 {
e075ae69 4610 if (GET_CODE (scale_rtx) != CONST_INT)
b446e5a2 4611 return 0;
e075ae69 4612 scale = INTVAL (scale_rtx);
e9a25f70 4613 }
3b3c6a3f 4614
e075ae69
RH
4615 /* Allow arg pointer and stack pointer as index if there is not scaling */
4616 if (base && index && scale == 1
564d80f4
JH
4617 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4618 || index == stack_pointer_rtx))
e075ae69
RH
4619 {
4620 rtx tmp = base;
4621 base = index;
4622 index = tmp;
4623 }
4624
4625 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
4626 if ((base == hard_frame_pointer_rtx
4627 || base == frame_pointer_rtx
4628 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
4629 disp = const0_rtx;
4630
4631 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4632 Avoid this by transforming to [%esi+0]. */
4633 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4634 && base && !index && !disp
329e1d01 4635 && REG_P (base)
e075ae69
RH
4636 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4637 disp = const0_rtx;
4638
4639 /* Special case: encode reg+reg instead of reg*2. */
4640 if (!base && index && scale && scale == 2)
4641 base = index, scale = 1;
0f290768 4642
e075ae69
RH
4643 /* Special case: scaling cannot be encoded without base or displacement. */
4644 if (!base && !disp && index && scale != 1)
4645 disp = const0_rtx;
4646
4647 out->base = base;
4648 out->index = index;
4649 out->disp = disp;
4650 out->scale = scale;
3b3c6a3f 4651
b446e5a2 4652 return retval;
e075ae69 4653}
01329426
JH
4654\f
4655/* Return cost of the memory address x.
4656 For i386, it is better to use a complex address than let gcc copy
4657 the address into a reg and make a new pseudo. But not if the address
4658 requires to two regs - that would mean more pseudos with longer
4659 lifetimes. */
4660int
4661ix86_address_cost (x)
4662 rtx x;
4663{
4664 struct ix86_address parts;
4665 int cost = 1;
3b3c6a3f 4666
01329426
JH
4667 if (!ix86_decompose_address (x, &parts))
4668 abort ();
4669
1540f9eb
JH
4670 if (parts.base && GET_CODE (parts.base) == SUBREG)
4671 parts.base = SUBREG_REG (parts.base);
4672 if (parts.index && GET_CODE (parts.index) == SUBREG)
4673 parts.index = SUBREG_REG (parts.index);
4674
01329426
JH
4675 /* More complex memory references are better. */
4676 if (parts.disp && parts.disp != const0_rtx)
4677 cost--;
4678
4679 /* Attempt to minimize number of registers in the address. */
4680 if ((parts.base
4681 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4682 || (parts.index
4683 && (!REG_P (parts.index)
4684 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4685 cost++;
4686
4687 if (parts.base
4688 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4689 && parts.index
4690 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4691 && parts.base != parts.index)
4692 cost++;
4693
4694 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4695 since it's predecode logic can't detect the length of instructions
4696 and it degenerates to vector decoded. Increase cost of such
4697 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 4698 to split such addresses or even refuse such addresses at all.
01329426
JH
4699
4700 Following addressing modes are affected:
4701 [base+scale*index]
4702 [scale*index+disp]
4703 [base+index]
0f290768 4704
01329426
JH
4705 The first and last case may be avoidable by explicitly coding the zero in
4706 memory address, but I don't have AMD-K6 machine handy to check this
4707 theory. */
4708
4709 if (TARGET_K6
4710 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4711 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4712 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4713 cost += 10;
0f290768 4714
01329426
JH
4715 return cost;
4716}
4717\f
b949ea8b
JW
4718/* If X is a machine specific address (i.e. a symbol or label being
4719 referenced as a displacement from the GOT implemented using an
4720 UNSPEC), then return the base term. Otherwise return X. */
4721
4722rtx
4723ix86_find_base_term (x)
4724 rtx x;
4725{
4726 rtx term;
4727
6eb791fc
JH
4728 if (TARGET_64BIT)
4729 {
4730 if (GET_CODE (x) != CONST)
4731 return x;
4732 term = XEXP (x, 0);
4733 if (GET_CODE (term) == PLUS
4734 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4735 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4736 term = XEXP (term, 0);
4737 if (GET_CODE (term) != UNSPEC
8ee41eaf 4738 || XINT (term, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
4739 return x;
4740
4741 term = XVECEXP (term, 0, 0);
4742
4743 if (GET_CODE (term) != SYMBOL_REF
4744 && GET_CODE (term) != LABEL_REF)
4745 return x;
4746
4747 return term;
4748 }
4749
b949ea8b
JW
4750 if (GET_CODE (x) != PLUS
4751 || XEXP (x, 0) != pic_offset_table_rtx
4752 || GET_CODE (XEXP (x, 1)) != CONST)
4753 return x;
4754
4755 term = XEXP (XEXP (x, 1), 0);
4756
4757 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4758 term = XEXP (term, 0);
4759
4760 if (GET_CODE (term) != UNSPEC
8ee41eaf 4761 || XINT (term, 1) != UNSPEC_GOTOFF)
b949ea8b
JW
4762 return x;
4763
4764 term = XVECEXP (term, 0, 0);
4765
4766 if (GET_CODE (term) != SYMBOL_REF
4767 && GET_CODE (term) != LABEL_REF)
4768 return x;
4769
4770 return term;
4771}
4772\f
f996902d
RH
4773/* Determine if a given RTX is a valid constant. We already know this
4774 satisfies CONSTANT_P. */
4775
4776bool
4777legitimate_constant_p (x)
4778 rtx x;
4779{
4780 rtx inner;
4781
4782 switch (GET_CODE (x))
4783 {
4784 case SYMBOL_REF:
4785 /* TLS symbols are not constant. */
4786 if (tls_symbolic_operand (x, Pmode))
4787 return false;
4788 break;
4789
4790 case CONST:
4791 inner = XEXP (x, 0);
4792
4793 /* Offsets of TLS symbols are never valid.
4794 Discourage CSE from creating them. */
4795 if (GET_CODE (inner) == PLUS
4796 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
4797 return false;
4798
4799 /* Only some unspecs are valid as "constants". */
4800 if (GET_CODE (inner) == UNSPEC)
4801 switch (XINT (inner, 1))
4802 {
4803 case UNSPEC_TPOFF:
4804 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4805 case UNSPEC_TP:
4806 return true;
4807 default:
4808 return false;
4809 }
4810 break;
4811
4812 default:
4813 break;
4814 }
4815
4816 /* Otherwise we handle everything else in the move patterns. */
4817 return true;
4818}
4819
4820/* Determine if a given RTX is a valid constant address. */
4821
4822bool
4823constant_address_p (x)
4824 rtx x;
4825{
4826 switch (GET_CODE (x))
4827 {
4828 case LABEL_REF:
4829 case CONST_INT:
4830 return true;
4831
4832 case CONST_DOUBLE:
4833 return TARGET_64BIT;
4834
4835 case CONST:
4836 case SYMBOL_REF:
4837 return !flag_pic && legitimate_constant_p (x);
4838
4839 default:
4840 return false;
4841 }
4842}
4843
4844/* Nonzero if the constant value X is a legitimate general operand
4845 when generating PIC code. It is given that flag_pic is on and
4846 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4847
4848bool
4849legitimate_pic_operand_p (x)
4850 rtx x;
4851{
4852 rtx inner;
4853
4854 switch (GET_CODE (x))
4855 {
4856 case CONST:
4857 inner = XEXP (x, 0);
4858
4859 /* Only some unspecs are valid as "constants". */
4860 if (GET_CODE (inner) == UNSPEC)
4861 switch (XINT (inner, 1))
4862 {
4863 case UNSPEC_TPOFF:
4864 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4865 case UNSPEC_TP:
4866 return true;
4867 default:
4868 return false;
4869 }
4870 /* FALLTHRU */
4871
4872 case SYMBOL_REF:
4873 case LABEL_REF:
4874 return legitimate_pic_address_disp_p (x);
4875
4876 default:
4877 return true;
4878 }
4879}
4880
e075ae69
RH
4881/* Determine if a given CONST RTX is a valid memory displacement
4882 in PIC mode. */
0f290768 4883
59be65f6 4884int
91bb873f
RH
4885legitimate_pic_address_disp_p (disp)
4886 register rtx disp;
4887{
f996902d
RH
4888 bool saw_plus;
4889
6eb791fc
JH
4890 /* In 64bit mode we can allow direct addresses of symbols and labels
4891 when they are not dynamic symbols. */
4892 if (TARGET_64BIT)
4893 {
4894 rtx x = disp;
4895 if (GET_CODE (disp) == CONST)
4896 x = XEXP (disp, 0);
4897 /* ??? Handle PIC code models */
4898 if (GET_CODE (x) == PLUS
4899 && (GET_CODE (XEXP (x, 1)) == CONST_INT
4900 && ix86_cmodel == CM_SMALL_PIC
4901 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4902 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4903 x = XEXP (x, 0);
4904 if (local_symbolic_operand (x, Pmode))
4905 return 1;
4906 }
91bb873f
RH
4907 if (GET_CODE (disp) != CONST)
4908 return 0;
4909 disp = XEXP (disp, 0);
4910
6eb791fc
JH
4911 if (TARGET_64BIT)
4912 {
4913 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4914 of GOT tables. We should not need these anyway. */
4915 if (GET_CODE (disp) != UNSPEC
8ee41eaf 4916 || XINT (disp, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
4917 return 0;
4918
4919 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4920 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4921 return 0;
4922 return 1;
4923 }
4924
f996902d 4925 saw_plus = false;
91bb873f
RH
4926 if (GET_CODE (disp) == PLUS)
4927 {
4928 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4929 return 0;
4930 disp = XEXP (disp, 0);
f996902d 4931 saw_plus = true;
91bb873f
RH
4932 }
4933
8ee41eaf 4934 if (GET_CODE (disp) != UNSPEC)
91bb873f
RH
4935 return 0;
4936
623fe810
RH
4937 switch (XINT (disp, 1))
4938 {
8ee41eaf 4939 case UNSPEC_GOT:
f996902d
RH
4940 if (saw_plus)
4941 return false;
623fe810 4942 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
8ee41eaf 4943 case UNSPEC_GOTOFF:
623fe810 4944 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
f996902d
RH
4945 case UNSPEC_GOTTPOFF:
4946 if (saw_plus)
4947 return false;
4948 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4949 case UNSPEC_NTPOFF:
4950 /* ??? Could support offset here. */
4951 if (saw_plus)
4952 return false;
4953 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4954 case UNSPEC_DTPOFF:
4955 /* ??? Could support offset here. */
4956 if (saw_plus)
4957 return false;
4958 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
623fe810
RH
4959 }
4960
4961 return 0;
91bb873f
RH
4962}
4963
e075ae69
RH
4964/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4965 memory address for an instruction. The MODE argument is the machine mode
4966 for the MEM expression that wants to use this address.
4967
4968 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
4969 convert common non-canonical forms to canonical form so that they will
4970 be recognized. */
4971
3b3c6a3f
MM
4972int
4973legitimate_address_p (mode, addr, strict)
4974 enum machine_mode mode;
4975 register rtx addr;
4976 int strict;
4977{
e075ae69
RH
4978 struct ix86_address parts;
4979 rtx base, index, disp;
4980 HOST_WIDE_INT scale;
4981 const char *reason = NULL;
4982 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
4983
4984 if (TARGET_DEBUG_ADDR)
4985 {
4986 fprintf (stderr,
e9a25f70 4987 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 4988 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
4989 debug_rtx (addr);
4990 }
4991
b446e5a2 4992 if (ix86_decompose_address (addr, &parts) <= 0)
3b3c6a3f 4993 {
e075ae69 4994 reason = "decomposition failed";
50e60bc3 4995 goto report_error;
3b3c6a3f
MM
4996 }
4997
e075ae69
RH
4998 base = parts.base;
4999 index = parts.index;
5000 disp = parts.disp;
5001 scale = parts.scale;
91f0226f 5002
e075ae69 5003 /* Validate base register.
e9a25f70
JL
5004
5005 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
5006 is one word out of a two word structure, which is represented internally
5007 as a DImode int. */
e9a25f70 5008
3b3c6a3f
MM
5009 if (base)
5010 {
1540f9eb 5011 rtx reg;
e075ae69
RH
5012 reason_rtx = base;
5013
1540f9eb
JH
5014 if (GET_CODE (base) == SUBREG)
5015 reg = SUBREG_REG (base);
5016 else
5017 reg = base;
5018
5019 if (GET_CODE (reg) != REG)
3b3c6a3f 5020 {
e075ae69 5021 reason = "base is not a register";
50e60bc3 5022 goto report_error;
3b3c6a3f
MM
5023 }
5024
c954bd01
RH
5025 if (GET_MODE (base) != Pmode)
5026 {
e075ae69 5027 reason = "base is not in Pmode";
50e60bc3 5028 goto report_error;
c954bd01
RH
5029 }
5030
1540f9eb
JH
5031 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5032 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
3b3c6a3f 5033 {
e075ae69 5034 reason = "base is not valid";
50e60bc3 5035 goto report_error;
3b3c6a3f
MM
5036 }
5037 }
5038
e075ae69 5039 /* Validate index register.
e9a25f70
JL
5040
5041 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
5042 is one word out of a two word structure, which is represented internally
5043 as a DImode int. */
e075ae69
RH
5044
5045 if (index)
3b3c6a3f 5046 {
1540f9eb 5047 rtx reg;
e075ae69
RH
5048 reason_rtx = index;
5049
1540f9eb
JH
5050 if (GET_CODE (index) == SUBREG)
5051 reg = SUBREG_REG (index);
5052 else
5053 reg = index;
5054
5055 if (GET_CODE (reg) != REG)
3b3c6a3f 5056 {
e075ae69 5057 reason = "index is not a register";
50e60bc3 5058 goto report_error;
3b3c6a3f
MM
5059 }
5060
e075ae69 5061 if (GET_MODE (index) != Pmode)
c954bd01 5062 {
e075ae69 5063 reason = "index is not in Pmode";
50e60bc3 5064 goto report_error;
c954bd01
RH
5065 }
5066
1540f9eb
JH
5067 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5068 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
3b3c6a3f 5069 {
e075ae69 5070 reason = "index is not valid";
50e60bc3 5071 goto report_error;
3b3c6a3f
MM
5072 }
5073 }
3b3c6a3f 5074
e075ae69
RH
5075 /* Validate scale factor. */
5076 if (scale != 1)
3b3c6a3f 5077 {
e075ae69
RH
5078 reason_rtx = GEN_INT (scale);
5079 if (!index)
3b3c6a3f 5080 {
e075ae69 5081 reason = "scale without index";
50e60bc3 5082 goto report_error;
3b3c6a3f
MM
5083 }
5084
e075ae69 5085 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 5086 {
e075ae69 5087 reason = "scale is not a valid multiplier";
50e60bc3 5088 goto report_error;
3b3c6a3f
MM
5089 }
5090 }
5091
91bb873f 5092 /* Validate displacement. */
3b3c6a3f
MM
5093 if (disp)
5094 {
e075ae69
RH
5095 reason_rtx = disp;
5096
0d7d98ee 5097 if (TARGET_64BIT)
3b3c6a3f 5098 {
0d7d98ee
JH
5099 if (!x86_64_sign_extended_value (disp))
5100 {
5101 reason = "displacement is out of range";
5102 goto report_error;
5103 }
5104 }
5105 else
5106 {
5107 if (GET_CODE (disp) == CONST_DOUBLE)
5108 {
5109 reason = "displacement is a const_double";
5110 goto report_error;
5111 }
3b3c6a3f
MM
5112 }
5113
f996902d
RH
5114 if (GET_CODE (disp) == CONST
5115 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5116 switch (XINT (XEXP (disp, 0), 1))
5117 {
5118 case UNSPEC_GOT:
5119 case UNSPEC_GOTOFF:
5120 case UNSPEC_GOTPCREL:
5121 if (!flag_pic)
5122 abort ();
5123 goto is_legitimate_pic;
5124
5125 case UNSPEC_GOTTPOFF:
5126 case UNSPEC_NTPOFF:
5127 case UNSPEC_DTPOFF:
5128 break;
5129
5130 default:
5131 reason = "invalid address unspec";
5132 goto report_error;
5133 }
5134
5135 else if (flag_pic && SYMBOLIC_CONST (disp))
3b3c6a3f 5136 {
f996902d 5137 is_legitimate_pic:
0d7d98ee
JH
5138 if (TARGET_64BIT && (index || base))
5139 {
5140 reason = "non-constant pic memory reference";
5141 goto report_error;
5142 }
91bb873f
RH
5143 if (! legitimate_pic_address_disp_p (disp))
5144 {
e075ae69 5145 reason = "displacement is an invalid pic construct";
50e60bc3 5146 goto report_error;
91bb873f
RH
5147 }
5148
4e9efe54 5149 /* This code used to verify that a symbolic pic displacement
0f290768
KH
5150 includes the pic_offset_table_rtx register.
5151
4e9efe54
JH
5152 While this is good idea, unfortunately these constructs may
5153 be created by "adds using lea" optimization for incorrect
5154 code like:
5155
5156 int a;
5157 int foo(int i)
5158 {
5159 return *(&a+i);
5160 }
5161
50e60bc3 5162 This code is nonsensical, but results in addressing
4e9efe54 5163 GOT table with pic_offset_table_rtx base. We can't
f710504c 5164 just refuse it easily, since it gets matched by
4e9efe54
JH
5165 "addsi3" pattern, that later gets split to lea in the
5166 case output register differs from input. While this
5167 can be handled by separate addsi pattern for this case
5168 that never results in lea, this seems to be easier and
5169 correct fix for crash to disable this test. */
3b3c6a3f 5170 }
91bb873f 5171 else if (HALF_PIC_P ())
3b3c6a3f 5172 {
91bb873f 5173 if (! HALF_PIC_ADDRESS_P (disp)
e075ae69 5174 || (base != NULL_RTX || index != NULL_RTX))
91bb873f 5175 {
e075ae69 5176 reason = "displacement is an invalid half-pic reference";
50e60bc3 5177 goto report_error;
91bb873f 5178 }
3b3c6a3f 5179 }
f996902d
RH
5180 else if (!CONSTANT_ADDRESS_P (disp))
5181 {
5182 reason = "displacement is not constant";
5183 goto report_error;
5184 }
3b3c6a3f
MM
5185 }
5186
e075ae69 5187 /* Everything looks valid. */
3b3c6a3f 5188 if (TARGET_DEBUG_ADDR)
e075ae69 5189 fprintf (stderr, "Success.\n");
3b3c6a3f 5190 return TRUE;
e075ae69 5191
5bf0ebab 5192 report_error:
e075ae69
RH
5193 if (TARGET_DEBUG_ADDR)
5194 {
5195 fprintf (stderr, "Error: %s\n", reason);
5196 debug_rtx (reason_rtx);
5197 }
5198 return FALSE;
3b3c6a3f 5199}
3b3c6a3f 5200\f
55efb413
JW
5201/* Return an unique alias set for the GOT. */
5202
0f290768 5203static HOST_WIDE_INT
55efb413
JW
5204ix86_GOT_alias_set ()
5205{
5bf0ebab
RH
5206 static HOST_WIDE_INT set = -1;
5207 if (set == -1)
5208 set = new_alias_set ();
5209 return set;
0f290768 5210}
55efb413 5211
3b3c6a3f
MM
5212/* Return a legitimate reference for ORIG (an address) using the
5213 register REG. If REG is 0, a new pseudo is generated.
5214
91bb873f 5215 There are two types of references that must be handled:
3b3c6a3f
MM
5216
5217 1. Global data references must load the address from the GOT, via
5218 the PIC reg. An insn is emitted to do this load, and the reg is
5219 returned.
5220
91bb873f
RH
5221 2. Static data references, constant pool addresses, and code labels
5222 compute the address as an offset from the GOT, whose base is in
5223 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5224 differentiate them from global data objects. The returned
5225 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
5226
5227 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 5228 reg also appears in the address. */
3b3c6a3f
MM
5229
5230rtx
5231legitimize_pic_address (orig, reg)
5232 rtx orig;
5233 rtx reg;
5234{
5235 rtx addr = orig;
5236 rtx new = orig;
91bb873f 5237 rtx base;
3b3c6a3f 5238
623fe810 5239 if (local_symbolic_operand (addr, Pmode))
3b3c6a3f 5240 {
14f73b5a
JH
5241 /* In 64bit mode we can address such objects directly. */
5242 if (TARGET_64BIT)
5243 new = addr;
5244 else
5245 {
5246 /* This symbol may be referenced via a displacement from the PIC
5247 base address (@GOTOFF). */
3b3c6a3f 5248
66edd3b4
RH
5249 if (reload_in_progress)
5250 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf 5251 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
14f73b5a
JH
5252 new = gen_rtx_CONST (Pmode, new);
5253 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 5254
14f73b5a
JH
5255 if (reg != 0)
5256 {
5257 emit_move_insn (reg, new);
5258 new = reg;
5259 }
5260 }
3b3c6a3f 5261 }
91bb873f 5262 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 5263 {
14f73b5a
JH
5264 if (TARGET_64BIT)
5265 {
8ee41eaf 5266 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
14f73b5a
JH
5267 new = gen_rtx_CONST (Pmode, new);
5268 new = gen_rtx_MEM (Pmode, new);
5269 RTX_UNCHANGING_P (new) = 1;
5270 set_mem_alias_set (new, ix86_GOT_alias_set ());
5271
5272 if (reg == 0)
5273 reg = gen_reg_rtx (Pmode);
5274 /* Use directly gen_movsi, otherwise the address is loaded
5275 into register for CSE. We don't want to CSE this addresses,
5276 instead we CSE addresses from the GOT table, so skip this. */
5277 emit_insn (gen_movsi (reg, new));
5278 new = reg;
5279 }
5280 else
5281 {
5282 /* This symbol must be referenced via a load from the
5283 Global Offset Table (@GOT). */
3b3c6a3f 5284
66edd3b4
RH
5285 if (reload_in_progress)
5286 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf 5287 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
14f73b5a
JH
5288 new = gen_rtx_CONST (Pmode, new);
5289 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5290 new = gen_rtx_MEM (Pmode, new);
5291 RTX_UNCHANGING_P (new) = 1;
5292 set_mem_alias_set (new, ix86_GOT_alias_set ());
3b3c6a3f 5293
14f73b5a
JH
5294 if (reg == 0)
5295 reg = gen_reg_rtx (Pmode);
5296 emit_move_insn (reg, new);
5297 new = reg;
5298 }
0f290768 5299 }
91bb873f
RH
5300 else
5301 {
5302 if (GET_CODE (addr) == CONST)
3b3c6a3f 5303 {
91bb873f 5304 addr = XEXP (addr, 0);
e3c8ea67
RH
5305
5306 /* We must match stuff we generate before. Assume the only
5307 unspecs that can get here are ours. Not that we could do
5308 anything with them anyway... */
5309 if (GET_CODE (addr) == UNSPEC
5310 || (GET_CODE (addr) == PLUS
5311 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5312 return orig;
5313 if (GET_CODE (addr) != PLUS)
564d80f4 5314 abort ();
3b3c6a3f 5315 }
91bb873f
RH
5316 if (GET_CODE (addr) == PLUS)
5317 {
5318 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 5319
91bb873f
RH
5320 /* Check first to see if this is a constant offset from a @GOTOFF
5321 symbol reference. */
623fe810 5322 if (local_symbolic_operand (op0, Pmode)
91bb873f
RH
5323 && GET_CODE (op1) == CONST_INT)
5324 {
6eb791fc
JH
5325 if (!TARGET_64BIT)
5326 {
66edd3b4
RH
5327 if (reload_in_progress)
5328 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf
RH
5329 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5330 UNSPEC_GOTOFF);
6eb791fc
JH
5331 new = gen_rtx_PLUS (Pmode, new, op1);
5332 new = gen_rtx_CONST (Pmode, new);
5333 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
91bb873f 5334
6eb791fc
JH
5335 if (reg != 0)
5336 {
5337 emit_move_insn (reg, new);
5338 new = reg;
5339 }
5340 }
5341 else
91bb873f 5342 {
6eb791fc 5343 /* ??? We need to limit offsets here. */
91bb873f
RH
5344 }
5345 }
5346 else
5347 {
5348 base = legitimize_pic_address (XEXP (addr, 0), reg);
5349 new = legitimize_pic_address (XEXP (addr, 1),
5350 base == reg ? NULL_RTX : reg);
5351
5352 if (GET_CODE (new) == CONST_INT)
5353 new = plus_constant (base, INTVAL (new));
5354 else
5355 {
5356 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5357 {
5358 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5359 new = XEXP (new, 1);
5360 }
5361 new = gen_rtx_PLUS (Pmode, base, new);
5362 }
5363 }
5364 }
3b3c6a3f
MM
5365 }
5366 return new;
5367}
fb49053f 5368
fb49053f 5369static void
f996902d 5370ix86_encode_section_info (decl, first)
fb49053f
RH
5371 tree decl;
5372 int first ATTRIBUTE_UNUSED;
5373{
f996902d
RH
5374 bool local_p = (*targetm.binds_local_p) (decl);
5375 rtx rtl, symbol;
5376
5377 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5378 if (GET_CODE (rtl) != MEM)
5379 return;
5380 symbol = XEXP (rtl, 0);
5381 if (GET_CODE (symbol) != SYMBOL_REF)
5382 return;
5383
5384 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5385 symbol so that we may access it directly in the GOT. */
5386
fb49053f 5387 if (flag_pic)
f996902d
RH
5388 SYMBOL_REF_FLAG (symbol) = local_p;
5389
5390 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5391 "local dynamic", "initial exec" or "local exec" TLS models
5392 respectively. */
5393
5394 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
fb49053f 5395 {
f996902d
RH
5396 const char *symbol_str;
5397 char *newstr;
5398 size_t len;
5399 enum tls_model kind;
5400
5401 if (!flag_pic)
5402 {
5403 if (local_p)
5404 kind = TLS_MODEL_LOCAL_EXEC;
5405 else
5406 kind = TLS_MODEL_INITIAL_EXEC;
5407 }
5408 /* Local dynamic is inefficient when we're not combining the
5409 parts of the address. */
5410 else if (optimize && local_p)
5411 kind = TLS_MODEL_LOCAL_DYNAMIC;
5412 else
5413 kind = TLS_MODEL_GLOBAL_DYNAMIC;
5414 if (kind < flag_tls_default)
5415 kind = flag_tls_default;
5416
5417 symbol_str = XSTR (symbol, 0);
fb49053f 5418
f996902d
RH
5419 if (symbol_str[0] == '%')
5420 {
5421 if (symbol_str[1] == tls_model_chars[kind])
5422 return;
5423 symbol_str += 2;
5424 }
5425 len = strlen (symbol_str) + 1;
5426 newstr = alloca (len + 2);
5427
5428 newstr[0] = '%';
5429 newstr[1] = tls_model_chars[kind];
5430 memcpy (newstr + 2, symbol_str, len);
5431
5432 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
fb49053f
RH
5433 }
5434}
f996902d
RH
5435
5436/* Undo the above when printing symbol names. */
5437
5438static const char *
5439ix86_strip_name_encoding (str)
5440 const char *str;
5441{
5442 if (str[0] == '%')
5443 str += 2;
5444 if (str [0] == '*')
5445 str += 1;
5446 return str;
5447}
3b3c6a3f 5448\f
f996902d
RH
5449/* Load the thread pointer into a register. */
5450
5451static rtx
5452get_thread_pointer ()
5453{
5454 rtx tp;
5455
5456 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5457 tp = gen_rtx_CONST (Pmode, tp);
5458 tp = force_reg (Pmode, tp);
5459
5460 return tp;
5461}
5462
3b3c6a3f
MM
5463/* Try machine-dependent ways of modifying an illegitimate address
5464 to be legitimate. If we find one, return the new, valid address.
5465 This macro is used in only one place: `memory_address' in explow.c.
5466
5467 OLDX is the address as it was before break_out_memory_refs was called.
5468 In some cases it is useful to look at this to decide what needs to be done.
5469
5470 MODE and WIN are passed so that this macro can use
5471 GO_IF_LEGITIMATE_ADDRESS.
5472
5473 It is always safe for this macro to do nothing. It exists to recognize
5474 opportunities to optimize the output.
5475
5476 For the 80386, we handle X+REG by loading X into a register R and
5477 using R+REG. R will go in a general reg and indexing will be used.
5478 However, if REG is a broken-out memory address or multiplication,
5479 nothing needs to be done because REG can certainly go in a general reg.
5480
5481 When -fpic is used, special handling is needed for symbolic references.
5482 See comments by legitimize_pic_address in i386.c for details. */
5483
5484rtx
5485legitimize_address (x, oldx, mode)
5486 register rtx x;
bb5177ac 5487 register rtx oldx ATTRIBUTE_UNUSED;
3b3c6a3f
MM
5488 enum machine_mode mode;
5489{
5490 int changed = 0;
5491 unsigned log;
5492
5493 if (TARGET_DEBUG_ADDR)
5494 {
e9a25f70
JL
5495 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5496 GET_MODE_NAME (mode));
3b3c6a3f
MM
5497 debug_rtx (x);
5498 }
5499
f996902d
RH
5500 log = tls_symbolic_operand (x, mode);
5501 if (log)
5502 {
5503 rtx dest, base, off, pic;
5504
755ac5d4 5505 switch (log)
f996902d
RH
5506 {
5507 case TLS_MODEL_GLOBAL_DYNAMIC:
5508 dest = gen_reg_rtx (Pmode);
5509 emit_insn (gen_tls_global_dynamic (dest, x));
5510 break;
5511
5512 case TLS_MODEL_LOCAL_DYNAMIC:
5513 base = gen_reg_rtx (Pmode);
5514 emit_insn (gen_tls_local_dynamic_base (base));
5515
5516 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5517 off = gen_rtx_CONST (Pmode, off);
5518
5519 return gen_rtx_PLUS (Pmode, base, off);
5520
5521 case TLS_MODEL_INITIAL_EXEC:
5522 if (flag_pic)
5523 {
66edd3b4
RH
5524 if (reload_in_progress)
5525 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
f996902d
RH
5526 pic = pic_offset_table_rtx;
5527 }
5528 else
5529 {
5530 pic = gen_reg_rtx (Pmode);
5531 emit_insn (gen_set_got (pic));
5532 }
5533
5534 base = get_thread_pointer ();
5535
5536 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_GOTTPOFF);
5537 off = gen_rtx_CONST (Pmode, off);
5538 off = gen_rtx_PLUS (Pmode, pic, off);
5539 off = gen_rtx_MEM (Pmode, off);
5540 RTX_UNCHANGING_P (off) = 1;
5541 set_mem_alias_set (off, ix86_GOT_alias_set ());
5542
5543 /* Damn Sun for specifing a set of dynamic relocations without
5544 considering the two-operand nature of the architecture!
5545 We'd be much better off with a "GOTNTPOFF" relocation that
5546 already contained the negated constant. */
5547 /* ??? Using negl and reg+reg addressing appears to be a lose
5548 size-wise. The negl is two bytes, just like the extra movl
5549 incurred by the two-operand subl, but reg+reg addressing
5550 uses the two-byte modrm form, unlike plain reg. */
5551
5552 dest = gen_reg_rtx (Pmode);
5553 emit_insn (gen_subsi3 (dest, base, off));
5554 break;
5555
5556 case TLS_MODEL_LOCAL_EXEC:
5557 base = get_thread_pointer ();
5558
5559 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5560 TARGET_GNU_TLS ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5561 off = gen_rtx_CONST (Pmode, off);
5562
5563 if (TARGET_GNU_TLS)
5564 return gen_rtx_PLUS (Pmode, base, off);
5565 else
5566 {
5567 dest = gen_reg_rtx (Pmode);
5568 emit_insn (gen_subsi3 (dest, base, off));
5569 }
5570 break;
5571
5572 default:
5573 abort ();
5574 }
5575
5576 return dest;
5577 }
5578
3b3c6a3f
MM
5579 if (flag_pic && SYMBOLIC_CONST (x))
5580 return legitimize_pic_address (x, 0);
5581
5582 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5583 if (GET_CODE (x) == ASHIFT
5584 && GET_CODE (XEXP (x, 1)) == CONST_INT
b531087a 5585 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3b3c6a3f
MM
5586 {
5587 changed = 1;
a269a03c
JC
5588 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5589 GEN_INT (1 << log));
3b3c6a3f
MM
5590 }
5591
5592 if (GET_CODE (x) == PLUS)
5593 {
0f290768 5594 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 5595
3b3c6a3f
MM
5596 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5597 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
b531087a 5598 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3b3c6a3f
MM
5599 {
5600 changed = 1;
c5c76735
JL
5601 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5602 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5603 GEN_INT (1 << log));
3b3c6a3f
MM
5604 }
5605
5606 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5607 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
b531087a 5608 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3b3c6a3f
MM
5609 {
5610 changed = 1;
c5c76735
JL
5611 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5612 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5613 GEN_INT (1 << log));
3b3c6a3f
MM
5614 }
5615
0f290768 5616 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
5617 if (GET_CODE (XEXP (x, 1)) == MULT)
5618 {
5619 rtx tmp = XEXP (x, 0);
5620 XEXP (x, 0) = XEXP (x, 1);
5621 XEXP (x, 1) = tmp;
5622 changed = 1;
5623 }
5624
5625 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5626 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5627 created by virtual register instantiation, register elimination, and
5628 similar optimizations. */
5629 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5630 {
5631 changed = 1;
c5c76735
JL
5632 x = gen_rtx_PLUS (Pmode,
5633 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5634 XEXP (XEXP (x, 1), 0)),
5635 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
5636 }
5637
e9a25f70
JL
5638 /* Canonicalize
5639 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
5640 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5641 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5642 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5643 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5644 && CONSTANT_P (XEXP (x, 1)))
5645 {
00c79232
ML
5646 rtx constant;
5647 rtx other = NULL_RTX;
3b3c6a3f
MM
5648
5649 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5650 {
5651 constant = XEXP (x, 1);
5652 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5653 }
5654 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5655 {
5656 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5657 other = XEXP (x, 1);
5658 }
5659 else
5660 constant = 0;
5661
5662 if (constant)
5663 {
5664 changed = 1;
c5c76735
JL
5665 x = gen_rtx_PLUS (Pmode,
5666 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5667 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5668 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
5669 }
5670 }
5671
5672 if (changed && legitimate_address_p (mode, x, FALSE))
5673 return x;
5674
5675 if (GET_CODE (XEXP (x, 0)) == MULT)
5676 {
5677 changed = 1;
5678 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5679 }
5680
5681 if (GET_CODE (XEXP (x, 1)) == MULT)
5682 {
5683 changed = 1;
5684 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5685 }
5686
5687 if (changed
5688 && GET_CODE (XEXP (x, 1)) == REG
5689 && GET_CODE (XEXP (x, 0)) == REG)
5690 return x;
5691
5692 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5693 {
5694 changed = 1;
5695 x = legitimize_pic_address (x, 0);
5696 }
5697
5698 if (changed && legitimate_address_p (mode, x, FALSE))
5699 return x;
5700
5701 if (GET_CODE (XEXP (x, 0)) == REG)
5702 {
5703 register rtx temp = gen_reg_rtx (Pmode);
5704 register rtx val = force_operand (XEXP (x, 1), temp);
5705 if (val != temp)
5706 emit_move_insn (temp, val);
5707
5708 XEXP (x, 1) = temp;
5709 return x;
5710 }
5711
5712 else if (GET_CODE (XEXP (x, 1)) == REG)
5713 {
5714 register rtx temp = gen_reg_rtx (Pmode);
5715 register rtx val = force_operand (XEXP (x, 0), temp);
5716 if (val != temp)
5717 emit_move_insn (temp, val);
5718
5719 XEXP (x, 0) = temp;
5720 return x;
5721 }
5722 }
5723
5724 return x;
5725}
2a2ab3f9
JVA
5726\f
5727/* Print an integer constant expression in assembler syntax. Addition
5728 and subtraction are the only arithmetic that may appear in these
5729 expressions. FILE is the stdio stream to write to, X is the rtx, and
5730 CODE is the operand print code from the output string. */
5731
5732static void
5733output_pic_addr_const (file, x, code)
5734 FILE *file;
5735 rtx x;
5736 int code;
5737{
5738 char buf[256];
5739
5740 switch (GET_CODE (x))
5741 {
5742 case PC:
5743 if (flag_pic)
5744 putc ('.', file);
5745 else
5746 abort ();
5747 break;
5748
5749 case SYMBOL_REF:
91bb873f
RH
5750 assemble_name (file, XSTR (x, 0));
5751 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
5752 fputs ("@PLT", file);
2a2ab3f9
JVA
5753 break;
5754
91bb873f
RH
5755 case LABEL_REF:
5756 x = XEXP (x, 0);
5757 /* FALLTHRU */
2a2ab3f9
JVA
5758 case CODE_LABEL:
5759 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5760 assemble_name (asm_out_file, buf);
5761 break;
5762
5763 case CONST_INT:
f64cecad 5764 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
5765 break;
5766
5767 case CONST:
5768 /* This used to output parentheses around the expression,
5769 but that does not work on the 386 (either ATT or BSD assembler). */
5770 output_pic_addr_const (file, XEXP (x, 0), code);
5771 break;
5772
5773 case CONST_DOUBLE:
5774 if (GET_MODE (x) == VOIDmode)
5775 {
5776 /* We can use %d if the number is <32 bits and positive. */
5777 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
5778 fprintf (file, "0x%lx%08lx",
5779 (unsigned long) CONST_DOUBLE_HIGH (x),
5780 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 5781 else
f64cecad 5782 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
5783 }
5784 else
5785 /* We can't handle floating point constants;
5786 PRINT_OPERAND must handle them. */
5787 output_operand_lossage ("floating constant misused");
5788 break;
5789
5790 case PLUS:
e9a25f70 5791 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
5792 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5793 {
2a2ab3f9 5794 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 5795 putc ('+', file);
e9a25f70 5796 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 5797 }
91bb873f 5798 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 5799 {
2a2ab3f9 5800 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 5801 putc ('+', file);
e9a25f70 5802 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 5803 }
91bb873f
RH
5804 else
5805 abort ();
2a2ab3f9
JVA
5806 break;
5807
5808 case MINUS:
80f33d06 5809 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
2a2ab3f9 5810 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 5811 putc ('-', file);
2a2ab3f9 5812 output_pic_addr_const (file, XEXP (x, 1), code);
80f33d06 5813 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
2a2ab3f9
JVA
5814 break;
5815
91bb873f
RH
5816 case UNSPEC:
5817 if (XVECLEN (x, 0) != 1)
5bf0ebab 5818 abort ();
91bb873f
RH
5819 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5820 switch (XINT (x, 1))
77ebd435 5821 {
8ee41eaf 5822 case UNSPEC_GOT:
77ebd435
AJ
5823 fputs ("@GOT", file);
5824 break;
8ee41eaf 5825 case UNSPEC_GOTOFF:
77ebd435
AJ
5826 fputs ("@GOTOFF", file);
5827 break;
8ee41eaf 5828 case UNSPEC_GOTPCREL:
edfe8595 5829 fputs ("@GOTPCREL(%rip)", file);
6eb791fc 5830 break;
f996902d
RH
5831 case UNSPEC_GOTTPOFF:
5832 fputs ("@GOTTPOFF", file);
5833 break;
5834 case UNSPEC_TPOFF:
5835 fputs ("@TPOFF", file);
5836 break;
5837 case UNSPEC_NTPOFF:
5838 fputs ("@NTPOFF", file);
5839 break;
5840 case UNSPEC_DTPOFF:
5841 fputs ("@DTPOFF", file);
5842 break;
77ebd435
AJ
5843 default:
5844 output_operand_lossage ("invalid UNSPEC as operand");
5845 break;
5846 }
91bb873f
RH
5847 break;
5848
2a2ab3f9
JVA
5849 default:
5850 output_operand_lossage ("invalid expression as operand");
5851 }
5852}
1865dbb5 5853
0f290768 5854/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
1865dbb5
JM
5855 We need to handle our special PIC relocations. */
5856
0f290768 5857void
1865dbb5
JM
5858i386_dwarf_output_addr_const (file, x)
5859 FILE *file;
5860 rtx x;
5861{
14f73b5a 5862#ifdef ASM_QUAD
18b5b8d6 5863 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
14f73b5a
JH
5864#else
5865 if (TARGET_64BIT)
5866 abort ();
18b5b8d6 5867 fprintf (file, "%s", ASM_LONG);
14f73b5a 5868#endif
1865dbb5
JM
5869 if (flag_pic)
5870 output_pic_addr_const (file, x, '\0');
5871 else
5872 output_addr_const (file, x);
5873 fputc ('\n', file);
5874}
5875
5876/* In the name of slightly smaller debug output, and to cater to
5877 general assembler losage, recognize PIC+GOTOFF and turn it back
5878 into a direct symbol reference. */
5879
5880rtx
5881i386_simplify_dwarf_addr (orig_x)
5882 rtx orig_x;
5883{
ec65b2e3 5884 rtx x = orig_x, y;
1865dbb5 5885
4c8c0dec
JJ
5886 if (GET_CODE (x) == MEM)
5887 x = XEXP (x, 0);
5888
6eb791fc
JH
5889 if (TARGET_64BIT)
5890 {
5891 if (GET_CODE (x) != CONST
5892 || GET_CODE (XEXP (x, 0)) != UNSPEC
8ee41eaf 5893 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
4c8c0dec 5894 || GET_CODE (orig_x) != MEM)
6eb791fc
JH
5895 return orig_x;
5896 return XVECEXP (XEXP (x, 0), 0, 0);
5897 }
5898
1865dbb5 5899 if (GET_CODE (x) != PLUS
1865dbb5
JM
5900 || GET_CODE (XEXP (x, 1)) != CONST)
5901 return orig_x;
5902
ec65b2e3
JJ
5903 if (GET_CODE (XEXP (x, 0)) == REG
5904 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5905 /* %ebx + GOT/GOTOFF */
5906 y = NULL;
5907 else if (GET_CODE (XEXP (x, 0)) == PLUS)
5908 {
5909 /* %ebx + %reg * scale + GOT/GOTOFF */
5910 y = XEXP (x, 0);
5911 if (GET_CODE (XEXP (y, 0)) == REG
5912 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
5913 y = XEXP (y, 1);
5914 else if (GET_CODE (XEXP (y, 1)) == REG
5915 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
5916 y = XEXP (y, 0);
5917 else
5918 return orig_x;
5919 if (GET_CODE (y) != REG
5920 && GET_CODE (y) != MULT
5921 && GET_CODE (y) != ASHIFT)
5922 return orig_x;
5923 }
5924 else
5925 return orig_x;
5926
1865dbb5
JM
5927 x = XEXP (XEXP (x, 1), 0);
5928 if (GET_CODE (x) == UNSPEC
8ee41eaf
RH
5929 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
5930 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
5931 {
5932 if (y)
5933 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
5934 return XVECEXP (x, 0, 0);
5935 }
1865dbb5
JM
5936
5937 if (GET_CODE (x) == PLUS
5938 && GET_CODE (XEXP (x, 0)) == UNSPEC
5939 && GET_CODE (XEXP (x, 1)) == CONST_INT
8ee41eaf
RH
5940 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
5941 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
5942 && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
5943 {
5944 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
5945 if (y)
5946 return gen_rtx_PLUS (Pmode, y, x);
5947 return x;
5948 }
1865dbb5
JM
5949
5950 return orig_x;
5951}
2a2ab3f9 5952\f
a269a03c 5953static void
e075ae69 5954put_condition_code (code, mode, reverse, fp, file)
a269a03c 5955 enum rtx_code code;
e075ae69
RH
5956 enum machine_mode mode;
5957 int reverse, fp;
a269a03c
JC
5958 FILE *file;
5959{
a269a03c
JC
5960 const char *suffix;
5961
9a915772
JH
5962 if (mode == CCFPmode || mode == CCFPUmode)
5963 {
5964 enum rtx_code second_code, bypass_code;
5965 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
5966 if (bypass_code != NIL || second_code != NIL)
b531087a 5967 abort ();
9a915772
JH
5968 code = ix86_fp_compare_code_to_integer (code);
5969 mode = CCmode;
5970 }
a269a03c
JC
5971 if (reverse)
5972 code = reverse_condition (code);
e075ae69 5973
a269a03c
JC
5974 switch (code)
5975 {
5976 case EQ:
5977 suffix = "e";
5978 break;
a269a03c
JC
5979 case NE:
5980 suffix = "ne";
5981 break;
a269a03c 5982 case GT:
7e08e190 5983 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
5984 abort ();
5985 suffix = "g";
a269a03c 5986 break;
a269a03c 5987 case GTU:
e075ae69
RH
5988 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
5989 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 5990 if (mode != CCmode)
0f290768 5991 abort ();
e075ae69 5992 suffix = fp ? "nbe" : "a";
a269a03c 5993 break;
a269a03c 5994 case LT:
9076b9c1 5995 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 5996 suffix = "s";
7e08e190 5997 else if (mode == CCmode || mode == CCGCmode)
e075ae69 5998 suffix = "l";
9076b9c1 5999 else
0f290768 6000 abort ();
a269a03c 6001 break;
a269a03c 6002 case LTU:
9076b9c1 6003 if (mode != CCmode)
0f290768 6004 abort ();
a269a03c
JC
6005 suffix = "b";
6006 break;
a269a03c 6007 case GE:
9076b9c1 6008 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6009 suffix = "ns";
7e08e190 6010 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6011 suffix = "ge";
9076b9c1 6012 else
0f290768 6013 abort ();
a269a03c 6014 break;
a269a03c 6015 case GEU:
e075ae69 6016 /* ??? As above. */
7e08e190 6017 if (mode != CCmode)
0f290768 6018 abort ();
7e08e190 6019 suffix = fp ? "nb" : "ae";
a269a03c 6020 break;
a269a03c 6021 case LE:
7e08e190 6022 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
6023 abort ();
6024 suffix = "le";
a269a03c 6025 break;
a269a03c 6026 case LEU:
9076b9c1
JH
6027 if (mode != CCmode)
6028 abort ();
7e08e190 6029 suffix = "be";
a269a03c 6030 break;
3a3677ff 6031 case UNORDERED:
9e7adcb3 6032 suffix = fp ? "u" : "p";
3a3677ff
RH
6033 break;
6034 case ORDERED:
9e7adcb3 6035 suffix = fp ? "nu" : "np";
3a3677ff 6036 break;
a269a03c
JC
6037 default:
6038 abort ();
6039 }
6040 fputs (suffix, file);
6041}
6042
e075ae69
RH
6043void
6044print_reg (x, code, file)
6045 rtx x;
6046 int code;
6047 FILE *file;
e5cb57e8 6048{
e075ae69 6049 if (REGNO (x) == ARG_POINTER_REGNUM
564d80f4 6050 || REGNO (x) == FRAME_POINTER_REGNUM
e075ae69
RH
6051 || REGNO (x) == FLAGS_REG
6052 || REGNO (x) == FPSR_REG)
6053 abort ();
e9a25f70 6054
5bf0ebab 6055 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
e075ae69
RH
6056 putc ('%', file);
6057
ef6257cd 6058 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
6059 code = 2;
6060 else if (code == 'b')
6061 code = 1;
6062 else if (code == 'k')
6063 code = 4;
3f3f2124
JH
6064 else if (code == 'q')
6065 code = 8;
e075ae69
RH
6066 else if (code == 'y')
6067 code = 3;
6068 else if (code == 'h')
6069 code = 0;
6070 else
6071 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 6072
3f3f2124
JH
6073 /* Irritatingly, AMD extended registers use different naming convention
6074 from the normal registers. */
6075 if (REX_INT_REG_P (x))
6076 {
885a70fd
JH
6077 if (!TARGET_64BIT)
6078 abort ();
3f3f2124
JH
6079 switch (code)
6080 {
ef6257cd 6081 case 0:
c725bd79 6082 error ("extended registers have no high halves");
3f3f2124
JH
6083 break;
6084 case 1:
6085 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6086 break;
6087 case 2:
6088 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6089 break;
6090 case 4:
6091 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6092 break;
6093 case 8:
6094 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6095 break;
6096 default:
c725bd79 6097 error ("unsupported operand size for extended register");
3f3f2124
JH
6098 break;
6099 }
6100 return;
6101 }
e075ae69
RH
6102 switch (code)
6103 {
6104 case 3:
6105 if (STACK_TOP_P (x))
6106 {
6107 fputs ("st(0)", file);
6108 break;
6109 }
6110 /* FALLTHRU */
e075ae69 6111 case 8:
3f3f2124 6112 case 4:
e075ae69 6113 case 12:
446988df 6114 if (! ANY_FP_REG_P (x))
885a70fd 6115 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
e075ae69 6116 /* FALLTHRU */
a7180f70 6117 case 16:
e075ae69
RH
6118 case 2:
6119 fputs (hi_reg_name[REGNO (x)], file);
6120 break;
6121 case 1:
6122 fputs (qi_reg_name[REGNO (x)], file);
6123 break;
6124 case 0:
6125 fputs (qi_high_reg_name[REGNO (x)], file);
6126 break;
6127 default:
6128 abort ();
fe25fea3 6129 }
e5cb57e8
SC
6130}
6131
f996902d
RH
6132/* Locate some local-dynamic symbol still in use by this function
6133 so that we can print its name in some tls_local_dynamic_base
6134 pattern. */
6135
6136static const char *
6137get_some_local_dynamic_name ()
6138{
6139 rtx insn;
6140
6141 if (cfun->machine->some_ld_name)
6142 return cfun->machine->some_ld_name;
6143
6144 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6145 if (INSN_P (insn)
6146 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6147 return cfun->machine->some_ld_name;
6148
6149 abort ();
6150}
6151
6152static int
6153get_some_local_dynamic_name_1 (px, data)
6154 rtx *px;
6155 void *data ATTRIBUTE_UNUSED;
6156{
6157 rtx x = *px;
6158
6159 if (GET_CODE (x) == SYMBOL_REF
6160 && local_dynamic_symbolic_operand (x, Pmode))
6161 {
6162 cfun->machine->some_ld_name = XSTR (x, 0);
6163 return 1;
6164 }
6165
6166 return 0;
6167}
6168
2a2ab3f9 6169/* Meaning of CODE:
fe25fea3 6170 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 6171 C -- print opcode suffix for set/cmov insn.
fe25fea3 6172 c -- like C, but print reversed condition
ef6257cd 6173 F,f -- likewise, but for floating-point.
048b1c95
JJ
6174 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6175 nothing
2a2ab3f9
JVA
6176 R -- print the prefix for register names.
6177 z -- print the opcode suffix for the size of the current operand.
6178 * -- print a star (in certain assembler syntax)
fb204271 6179 A -- print an absolute memory reference.
2a2ab3f9 6180 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
6181 s -- print a shift double count, followed by the assemblers argument
6182 delimiter.
fe25fea3
SC
6183 b -- print the QImode name of the register for the indicated operand.
6184 %b0 would print %al if operands[0] is reg 0.
6185 w -- likewise, print the HImode name of the register.
6186 k -- likewise, print the SImode name of the register.
3f3f2124 6187 q -- likewise, print the DImode name of the register.
ef6257cd
JH
6188 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6189 y -- print "st(0)" instead of "st" as a register.
a46d1d38 6190 D -- print condition for SSE cmp instruction.
ef6257cd
JH
6191 P -- if PIC, print an @PLT suffix.
6192 X -- don't print any sort of PIC '@' suffix for a symbol.
f996902d 6193 & -- print some in-use local-dynamic symbol name.
a46d1d38 6194 */
2a2ab3f9
JVA
6195
6196void
6197print_operand (file, x, code)
6198 FILE *file;
6199 rtx x;
6200 int code;
6201{
6202 if (code)
6203 {
6204 switch (code)
6205 {
6206 case '*':
80f33d06 6207 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9
JVA
6208 putc ('*', file);
6209 return;
6210
f996902d
RH
6211 case '&':
6212 assemble_name (file, get_some_local_dynamic_name ());
6213 return;
6214
fb204271 6215 case 'A':
80f33d06 6216 if (ASSEMBLER_DIALECT == ASM_ATT)
fb204271 6217 putc ('*', file);
80f33d06 6218 else if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
6219 {
6220 /* Intel syntax. For absolute addresses, registers should not
6221 be surrounded by braces. */
6222 if (GET_CODE (x) != REG)
6223 {
6224 putc ('[', file);
6225 PRINT_OPERAND (file, x, 0);
6226 putc (']', file);
6227 return;
6228 }
6229 }
80f33d06
GS
6230 else
6231 abort ();
fb204271
DN
6232
6233 PRINT_OPERAND (file, x, 0);
6234 return;
6235
6236
2a2ab3f9 6237 case 'L':
80f33d06 6238 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6239 putc ('l', file);
2a2ab3f9
JVA
6240 return;
6241
6242 case 'W':
80f33d06 6243 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6244 putc ('w', file);
2a2ab3f9
JVA
6245 return;
6246
6247 case 'B':
80f33d06 6248 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6249 putc ('b', file);
2a2ab3f9
JVA
6250 return;
6251
6252 case 'Q':
80f33d06 6253 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6254 putc ('l', file);
2a2ab3f9
JVA
6255 return;
6256
6257 case 'S':
80f33d06 6258 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6259 putc ('s', file);
2a2ab3f9
JVA
6260 return;
6261
5f1ec3e6 6262 case 'T':
80f33d06 6263 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6264 putc ('t', file);
5f1ec3e6
JVA
6265 return;
6266
2a2ab3f9
JVA
6267 case 'z':
6268 /* 387 opcodes don't get size suffixes if the operands are
0f290768 6269 registers. */
2a2ab3f9
JVA
6270 if (STACK_REG_P (x))
6271 return;
6272
831c4e87
KC
6273 /* Likewise if using Intel opcodes. */
6274 if (ASSEMBLER_DIALECT == ASM_INTEL)
6275 return;
6276
6277 /* This is the size of op from size of operand. */
2a2ab3f9
JVA
6278 switch (GET_MODE_SIZE (GET_MODE (x)))
6279 {
2a2ab3f9 6280 case 2:
155d8a47
JW
6281#ifdef HAVE_GAS_FILDS_FISTS
6282 putc ('s', file);
6283#endif
2a2ab3f9
JVA
6284 return;
6285
6286 case 4:
6287 if (GET_MODE (x) == SFmode)
6288 {
e075ae69 6289 putc ('s', file);
2a2ab3f9
JVA
6290 return;
6291 }
6292 else
e075ae69 6293 putc ('l', file);
2a2ab3f9
JVA
6294 return;
6295
5f1ec3e6 6296 case 12:
2b589241 6297 case 16:
e075ae69
RH
6298 putc ('t', file);
6299 return;
5f1ec3e6 6300
2a2ab3f9
JVA
6301 case 8:
6302 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
6303 {
6304#ifdef GAS_MNEMONICS
e075ae69 6305 putc ('q', file);
56c0e8fa 6306#else
e075ae69
RH
6307 putc ('l', file);
6308 putc ('l', file);
56c0e8fa
JVA
6309#endif
6310 }
e075ae69
RH
6311 else
6312 putc ('l', file);
2a2ab3f9 6313 return;
155d8a47
JW
6314
6315 default:
6316 abort ();
2a2ab3f9 6317 }
4af3895e
JVA
6318
6319 case 'b':
6320 case 'w':
6321 case 'k':
3f3f2124 6322 case 'q':
4af3895e
JVA
6323 case 'h':
6324 case 'y':
5cb6195d 6325 case 'X':
e075ae69 6326 case 'P':
4af3895e
JVA
6327 break;
6328
2d49677f
SC
6329 case 's':
6330 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6331 {
6332 PRINT_OPERAND (file, x, 0);
e075ae69 6333 putc (',', file);
2d49677f 6334 }
a269a03c
JC
6335 return;
6336
a46d1d38
JH
6337 case 'D':
6338 /* Little bit of braindamage here. The SSE compare instructions
6339 does use completely different names for the comparisons that the
6340 fp conditional moves. */
6341 switch (GET_CODE (x))
6342 {
6343 case EQ:
6344 case UNEQ:
6345 fputs ("eq", file);
6346 break;
6347 case LT:
6348 case UNLT:
6349 fputs ("lt", file);
6350 break;
6351 case LE:
6352 case UNLE:
6353 fputs ("le", file);
6354 break;
6355 case UNORDERED:
6356 fputs ("unord", file);
6357 break;
6358 case NE:
6359 case LTGT:
6360 fputs ("neq", file);
6361 break;
6362 case UNGE:
6363 case GE:
6364 fputs ("nlt", file);
6365 break;
6366 case UNGT:
6367 case GT:
6368 fputs ("nle", file);
6369 break;
6370 case ORDERED:
6371 fputs ("ord", file);
6372 break;
6373 default:
6374 abort ();
6375 break;
6376 }
6377 return;
048b1c95
JJ
6378 case 'O':
6379#ifdef CMOV_SUN_AS_SYNTAX
6380 if (ASSEMBLER_DIALECT == ASM_ATT)
6381 {
6382 switch (GET_MODE (x))
6383 {
6384 case HImode: putc ('w', file); break;
6385 case SImode:
6386 case SFmode: putc ('l', file); break;
6387 case DImode:
6388 case DFmode: putc ('q', file); break;
6389 default: abort ();
6390 }
6391 putc ('.', file);
6392 }
6393#endif
6394 return;
1853aadd 6395 case 'C':
e075ae69 6396 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 6397 return;
fe25fea3 6398 case 'F':
048b1c95
JJ
6399#ifdef CMOV_SUN_AS_SYNTAX
6400 if (ASSEMBLER_DIALECT == ASM_ATT)
6401 putc ('.', file);
6402#endif
e075ae69 6403 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
6404 return;
6405
e9a25f70 6406 /* Like above, but reverse condition */
e075ae69 6407 case 'c':
c1d5afc4
CR
6408 /* Check to see if argument to %c is really a constant
6409 and not a condition code which needs to be reversed. */
6410 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
6411 {
6412 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6413 return;
6414 }
e075ae69
RH
6415 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6416 return;
fe25fea3 6417 case 'f':
048b1c95
JJ
6418#ifdef CMOV_SUN_AS_SYNTAX
6419 if (ASSEMBLER_DIALECT == ASM_ATT)
6420 putc ('.', file);
6421#endif
e075ae69 6422 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 6423 return;
ef6257cd
JH
6424 case '+':
6425 {
6426 rtx x;
e5cb57e8 6427
ef6257cd
JH
6428 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6429 return;
a4f31c00 6430
ef6257cd
JH
6431 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6432 if (x)
6433 {
6434 int pred_val = INTVAL (XEXP (x, 0));
6435
6436 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6437 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6438 {
6439 int taken = pred_val > REG_BR_PROB_BASE / 2;
6440 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6441
6442 /* Emit hints only in the case default branch prediction
6443 heruistics would fail. */
6444 if (taken != cputaken)
6445 {
6446 /* We use 3e (DS) prefix for taken branches and
6447 2e (CS) prefix for not taken branches. */
6448 if (taken)
6449 fputs ("ds ; ", file);
6450 else
6451 fputs ("cs ; ", file);
6452 }
6453 }
6454 }
6455 return;
6456 }
4af3895e 6457 default:
a52453cc 6458 output_operand_lossage ("invalid operand code `%c'", code);
2a2ab3f9
JVA
6459 }
6460 }
e9a25f70 6461
2a2ab3f9
JVA
6462 if (GET_CODE (x) == REG)
6463 {
6464 PRINT_REG (x, code, file);
6465 }
e9a25f70 6466
2a2ab3f9
JVA
6467 else if (GET_CODE (x) == MEM)
6468 {
e075ae69 6469 /* No `byte ptr' prefix for call instructions. */
80f33d06 6470 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
2a2ab3f9 6471 {
69ddee61 6472 const char * size;
e075ae69
RH
6473 switch (GET_MODE_SIZE (GET_MODE (x)))
6474 {
6475 case 1: size = "BYTE"; break;
6476 case 2: size = "WORD"; break;
6477 case 4: size = "DWORD"; break;
6478 case 8: size = "QWORD"; break;
6479 case 12: size = "XWORD"; break;
a7180f70 6480 case 16: size = "XMMWORD"; break;
e075ae69 6481 default:
564d80f4 6482 abort ();
e075ae69 6483 }
fb204271
DN
6484
6485 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6486 if (code == 'b')
6487 size = "BYTE";
6488 else if (code == 'w')
6489 size = "WORD";
6490 else if (code == 'k')
6491 size = "DWORD";
6492
e075ae69
RH
6493 fputs (size, file);
6494 fputs (" PTR ", file);
2a2ab3f9 6495 }
e075ae69
RH
6496
6497 x = XEXP (x, 0);
6498 if (flag_pic && CONSTANT_ADDRESS_P (x))
6499 output_pic_addr_const (file, x, code);
0d7d98ee 6500 /* Avoid (%rip) for call operands. */
5bf0ebab 6501 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
0d7d98ee
JH
6502 && GET_CODE (x) != CONST_INT)
6503 output_addr_const (file, x);
c8b94768
RH
6504 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6505 output_operand_lossage ("invalid constraints for operand");
2a2ab3f9 6506 else
e075ae69 6507 output_address (x);
2a2ab3f9 6508 }
e9a25f70 6509
2a2ab3f9
JVA
6510 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6511 {
e9a25f70
JL
6512 REAL_VALUE_TYPE r;
6513 long l;
6514
5f1ec3e6
JVA
6515 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6516 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69 6517
80f33d06 6518 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6519 putc ('$', file);
52267fcb 6520 fprintf (file, "0x%lx", l);
5f1ec3e6 6521 }
e9a25f70 6522
0f290768 6523 /* These float cases don't actually occur as immediate operands. */
5f1ec3e6
JVA
6524 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6525 {
e9a25f70
JL
6526 REAL_VALUE_TYPE r;
6527 char dstr[30];
6528
5f1ec3e6
JVA
6529 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6530 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6531 fprintf (file, "%s", dstr);
2a2ab3f9 6532 }
e9a25f70 6533
2b589241
JH
6534 else if (GET_CODE (x) == CONST_DOUBLE
6535 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
2a2ab3f9 6536 {
e9a25f70
JL
6537 REAL_VALUE_TYPE r;
6538 char dstr[30];
6539
5f1ec3e6
JVA
6540 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6541 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6542 fprintf (file, "%s", dstr);
2a2ab3f9 6543 }
f996902d
RH
6544
6545 else if (GET_CODE (x) == CONST
6546 && GET_CODE (XEXP (x, 0)) == UNSPEC
6547 && XINT (XEXP (x, 0), 1) == UNSPEC_TP)
6548 {
6549 if (ASSEMBLER_DIALECT == ASM_INTEL)
6550 fputs ("DWORD PTR ", file);
6551 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6552 putc ('%', file);
6553 fputs ("gs:0", file);
6554 }
6555
79325812 6556 else
2a2ab3f9 6557 {
4af3895e 6558 if (code != 'P')
2a2ab3f9 6559 {
695dac07 6560 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69 6561 {
80f33d06 6562 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
6563 putc ('$', file);
6564 }
2a2ab3f9
JVA
6565 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6566 || GET_CODE (x) == LABEL_REF)
e075ae69 6567 {
80f33d06 6568 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
6569 putc ('$', file);
6570 else
6571 fputs ("OFFSET FLAT:", file);
6572 }
2a2ab3f9 6573 }
e075ae69
RH
6574 if (GET_CODE (x) == CONST_INT)
6575 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6576 else if (flag_pic)
2a2ab3f9
JVA
6577 output_pic_addr_const (file, x, code);
6578 else
6579 output_addr_const (file, x);
6580 }
6581}
6582\f
6583/* Print a memory operand whose address is ADDR. */
6584
6585void
6586print_operand_address (file, addr)
6587 FILE *file;
6588 register rtx addr;
6589{
e075ae69
RH
6590 struct ix86_address parts;
6591 rtx base, index, disp;
6592 int scale;
e9a25f70 6593
e075ae69
RH
6594 if (! ix86_decompose_address (addr, &parts))
6595 abort ();
e9a25f70 6596
e075ae69
RH
6597 base = parts.base;
6598 index = parts.index;
6599 disp = parts.disp;
6600 scale = parts.scale;
e9a25f70 6601
e075ae69
RH
6602 if (!base && !index)
6603 {
6604 /* Displacement only requires special attention. */
e9a25f70 6605
e075ae69 6606 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 6607 {
80f33d06 6608 if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
6609 {
6610 if (USER_LABEL_PREFIX[0] == 0)
6611 putc ('%', file);
6612 fputs ("ds:", file);
6613 }
e075ae69 6614 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
2a2ab3f9 6615 }
e075ae69
RH
6616 else if (flag_pic)
6617 output_pic_addr_const (file, addr, 0);
6618 else
6619 output_addr_const (file, addr);
0d7d98ee
JH
6620
6621 /* Use one byte shorter RIP relative addressing for 64bit mode. */
edfe8595
RH
6622 if (TARGET_64BIT
6623 && (GET_CODE (addr) == SYMBOL_REF
6624 || GET_CODE (addr) == LABEL_REF
6625 || (GET_CODE (addr) == CONST
6626 && GET_CODE (XEXP (addr, 0)) == PLUS
6627 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
6628 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
0d7d98ee 6629 fputs ("(%rip)", file);
e075ae69
RH
6630 }
6631 else
6632 {
80f33d06 6633 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9 6634 {
e075ae69 6635 if (disp)
2a2ab3f9 6636 {
c399861d 6637 if (flag_pic)
e075ae69
RH
6638 output_pic_addr_const (file, disp, 0);
6639 else if (GET_CODE (disp) == LABEL_REF)
6640 output_asm_label (disp);
2a2ab3f9 6641 else
e075ae69 6642 output_addr_const (file, disp);
2a2ab3f9
JVA
6643 }
6644
e075ae69
RH
6645 putc ('(', file);
6646 if (base)
6647 PRINT_REG (base, 0, file);
6648 if (index)
2a2ab3f9 6649 {
e075ae69
RH
6650 putc (',', file);
6651 PRINT_REG (index, 0, file);
6652 if (scale != 1)
6653 fprintf (file, ",%d", scale);
2a2ab3f9 6654 }
e075ae69 6655 putc (')', file);
2a2ab3f9 6656 }
2a2ab3f9
JVA
6657 else
6658 {
e075ae69 6659 rtx offset = NULL_RTX;
e9a25f70 6660
e075ae69
RH
6661 if (disp)
6662 {
6663 /* Pull out the offset of a symbol; print any symbol itself. */
6664 if (GET_CODE (disp) == CONST
6665 && GET_CODE (XEXP (disp, 0)) == PLUS
6666 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6667 {
6668 offset = XEXP (XEXP (disp, 0), 1);
6669 disp = gen_rtx_CONST (VOIDmode,
6670 XEXP (XEXP (disp, 0), 0));
6671 }
ce193852 6672
e075ae69
RH
6673 if (flag_pic)
6674 output_pic_addr_const (file, disp, 0);
6675 else if (GET_CODE (disp) == LABEL_REF)
6676 output_asm_label (disp);
6677 else if (GET_CODE (disp) == CONST_INT)
6678 offset = disp;
6679 else
6680 output_addr_const (file, disp);
6681 }
e9a25f70 6682
e075ae69
RH
6683 putc ('[', file);
6684 if (base)
a8620236 6685 {
e075ae69
RH
6686 PRINT_REG (base, 0, file);
6687 if (offset)
6688 {
6689 if (INTVAL (offset) >= 0)
6690 putc ('+', file);
6691 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6692 }
a8620236 6693 }
e075ae69
RH
6694 else if (offset)
6695 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 6696 else
e075ae69 6697 putc ('0', file);
e9a25f70 6698
e075ae69
RH
6699 if (index)
6700 {
6701 putc ('+', file);
6702 PRINT_REG (index, 0, file);
6703 if (scale != 1)
6704 fprintf (file, "*%d", scale);
6705 }
6706 putc (']', file);
6707 }
2a2ab3f9
JVA
6708 }
6709}
f996902d
RH
6710
6711bool
6712output_addr_const_extra (file, x)
6713 FILE *file;
6714 rtx x;
6715{
6716 rtx op;
6717
6718 if (GET_CODE (x) != UNSPEC)
6719 return false;
6720
6721 op = XVECEXP (x, 0, 0);
6722 switch (XINT (x, 1))
6723 {
6724 case UNSPEC_GOTTPOFF:
6725 output_addr_const (file, op);
6726 fputs ("@GOTTPOFF", file);
6727 break;
6728 case UNSPEC_TPOFF:
6729 output_addr_const (file, op);
6730 fputs ("@TPOFF", file);
6731 break;
6732 case UNSPEC_NTPOFF:
6733 output_addr_const (file, op);
6734 fputs ("@NTPOFF", file);
6735 break;
6736 case UNSPEC_DTPOFF:
6737 output_addr_const (file, op);
6738 fputs ("@DTPOFF", file);
6739 break;
6740
6741 default:
6742 return false;
6743 }
6744
6745 return true;
6746}
2a2ab3f9
JVA
6747\f
6748/* Split one or more DImode RTL references into pairs of SImode
6749 references. The RTL can be REG, offsettable MEM, integer constant, or
6750 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6751 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 6752 that parallel "operands". */
2a2ab3f9
JVA
6753
6754void
6755split_di (operands, num, lo_half, hi_half)
6756 rtx operands[];
6757 int num;
6758 rtx lo_half[], hi_half[];
6759{
6760 while (num--)
6761 {
57dbca5e 6762 rtx op = operands[num];
b932f770
JH
6763
6764 /* simplify_subreg refuse to split volatile memory addresses,
6765 but we still have to handle it. */
6766 if (GET_CODE (op) == MEM)
2a2ab3f9 6767 {
f4ef873c 6768 lo_half[num] = adjust_address (op, SImode, 0);
b72f00af 6769 hi_half[num] = adjust_address (op, SImode, 4);
2a2ab3f9
JVA
6770 }
6771 else
b932f770 6772 {
38ca929b
JH
6773 lo_half[num] = simplify_gen_subreg (SImode, op,
6774 GET_MODE (op) == VOIDmode
6775 ? DImode : GET_MODE (op), 0);
6776 hi_half[num] = simplify_gen_subreg (SImode, op,
6777 GET_MODE (op) == VOIDmode
6778 ? DImode : GET_MODE (op), 4);
b932f770 6779 }
2a2ab3f9
JVA
6780 }
6781}
44cf5b6a
JH
6782/* Split one or more TImode RTL references into pairs of SImode
6783 references. The RTL can be REG, offsettable MEM, integer constant, or
6784 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6785 split and "num" is its length. lo_half and hi_half are output arrays
6786 that parallel "operands". */
6787
6788void
6789split_ti (operands, num, lo_half, hi_half)
6790 rtx operands[];
6791 int num;
6792 rtx lo_half[], hi_half[];
6793{
6794 while (num--)
6795 {
6796 rtx op = operands[num];
b932f770
JH
6797
6798 /* simplify_subreg refuse to split volatile memory addresses, but we
6799 still have to handle it. */
6800 if (GET_CODE (op) == MEM)
44cf5b6a
JH
6801 {
6802 lo_half[num] = adjust_address (op, DImode, 0);
6803 hi_half[num] = adjust_address (op, DImode, 8);
6804 }
6805 else
b932f770
JH
6806 {
6807 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6808 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6809 }
44cf5b6a
JH
6810 }
6811}
2a2ab3f9 6812\f
2a2ab3f9
JVA
6813/* Output code to perform a 387 binary operation in INSN, one of PLUS,
6814 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6815 is the expression of the binary operation. The output may either be
6816 emitted here, or returned to the caller, like all output_* functions.
6817
6818 There is no guarantee that the operands are the same mode, as they
0f290768 6819 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 6820
e3c2afab
AM
6821#ifndef SYSV386_COMPAT
6822/* Set to 1 for compatibility with brain-damaged assemblers. No-one
6823 wants to fix the assemblers because that causes incompatibility
6824 with gcc. No-one wants to fix gcc because that causes
6825 incompatibility with assemblers... You can use the option of
6826 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6827#define SYSV386_COMPAT 1
6828#endif
6829
69ddee61 6830const char *
2a2ab3f9
JVA
6831output_387_binary_op (insn, operands)
6832 rtx insn;
6833 rtx *operands;
6834{
e3c2afab 6835 static char buf[30];
69ddee61 6836 const char *p;
1deaa899
JH
6837 const char *ssep;
6838 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
2a2ab3f9 6839
e3c2afab
AM
6840#ifdef ENABLE_CHECKING
6841 /* Even if we do not want to check the inputs, this documents input
6842 constraints. Which helps in understanding the following code. */
6843 if (STACK_REG_P (operands[0])
6844 && ((REG_P (operands[1])
6845 && REGNO (operands[0]) == REGNO (operands[1])
6846 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6847 || (REG_P (operands[2])
6848 && REGNO (operands[0]) == REGNO (operands[2])
6849 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6850 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6851 ; /* ok */
1deaa899 6852 else if (!is_sse)
e3c2afab
AM
6853 abort ();
6854#endif
6855
2a2ab3f9
JVA
6856 switch (GET_CODE (operands[3]))
6857 {
6858 case PLUS:
e075ae69
RH
6859 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6860 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6861 p = "fiadd";
6862 else
6863 p = "fadd";
1deaa899 6864 ssep = "add";
2a2ab3f9
JVA
6865 break;
6866
6867 case MINUS:
e075ae69
RH
6868 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6869 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6870 p = "fisub";
6871 else
6872 p = "fsub";
1deaa899 6873 ssep = "sub";
2a2ab3f9
JVA
6874 break;
6875
6876 case MULT:
e075ae69
RH
6877 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6878 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6879 p = "fimul";
6880 else
6881 p = "fmul";
1deaa899 6882 ssep = "mul";
2a2ab3f9
JVA
6883 break;
6884
6885 case DIV:
e075ae69
RH
6886 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6887 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6888 p = "fidiv";
6889 else
6890 p = "fdiv";
1deaa899 6891 ssep = "div";
2a2ab3f9
JVA
6892 break;
6893
6894 default:
6895 abort ();
6896 }
6897
1deaa899
JH
6898 if (is_sse)
6899 {
6900 strcpy (buf, ssep);
6901 if (GET_MODE (operands[0]) == SFmode)
6902 strcat (buf, "ss\t{%2, %0|%0, %2}");
6903 else
6904 strcat (buf, "sd\t{%2, %0|%0, %2}");
6905 return buf;
6906 }
e075ae69 6907 strcpy (buf, p);
2a2ab3f9
JVA
6908
6909 switch (GET_CODE (operands[3]))
6910 {
6911 case MULT:
6912 case PLUS:
6913 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6914 {
e3c2afab 6915 rtx temp = operands[2];
2a2ab3f9
JVA
6916 operands[2] = operands[1];
6917 operands[1] = temp;
6918 }
6919
e3c2afab
AM
6920 /* know operands[0] == operands[1]. */
6921
2a2ab3f9 6922 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
6923 {
6924 p = "%z2\t%2";
6925 break;
6926 }
2a2ab3f9
JVA
6927
6928 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
6929 {
6930 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
6931 /* How is it that we are storing to a dead operand[2]?
6932 Well, presumably operands[1] is dead too. We can't
6933 store the result to st(0) as st(0) gets popped on this
6934 instruction. Instead store to operands[2] (which I
6935 think has to be st(1)). st(1) will be popped later.
6936 gcc <= 2.8.1 didn't have this check and generated
6937 assembly code that the Unixware assembler rejected. */
6938 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 6939 else
e3c2afab 6940 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 6941 break;
6b28fd63 6942 }
2a2ab3f9
JVA
6943
6944 if (STACK_TOP_P (operands[0]))
e3c2afab 6945 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 6946 else
e3c2afab 6947 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 6948 break;
2a2ab3f9
JVA
6949
6950 case MINUS:
6951 case DIV:
6952 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
6953 {
6954 p = "r%z1\t%1";
6955 break;
6956 }
2a2ab3f9
JVA
6957
6958 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
6959 {
6960 p = "%z2\t%2";
6961 break;
6962 }
2a2ab3f9 6963
2a2ab3f9 6964 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 6965 {
e3c2afab
AM
6966#if SYSV386_COMPAT
6967 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6968 derived assemblers, confusingly reverse the direction of
6969 the operation for fsub{r} and fdiv{r} when the
6970 destination register is not st(0). The Intel assembler
6971 doesn't have this brain damage. Read !SYSV386_COMPAT to
6972 figure out what the hardware really does. */
6973 if (STACK_TOP_P (operands[0]))
6974 p = "{p\t%0, %2|rp\t%2, %0}";
6975 else
6976 p = "{rp\t%2, %0|p\t%0, %2}";
6977#else
6b28fd63 6978 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
6979 /* As above for fmul/fadd, we can't store to st(0). */
6980 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 6981 else
e3c2afab
AM
6982 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6983#endif
e075ae69 6984 break;
6b28fd63 6985 }
2a2ab3f9
JVA
6986
6987 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 6988 {
e3c2afab 6989#if SYSV386_COMPAT
6b28fd63 6990 if (STACK_TOP_P (operands[0]))
e3c2afab 6991 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 6992 else
e3c2afab
AM
6993 p = "{p\t%1, %0|rp\t%0, %1}";
6994#else
6995 if (STACK_TOP_P (operands[0]))
6996 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
6997 else
6998 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
6999#endif
e075ae69 7000 break;
6b28fd63 7001 }
2a2ab3f9
JVA
7002
7003 if (STACK_TOP_P (operands[0]))
7004 {
7005 if (STACK_TOP_P (operands[1]))
e3c2afab 7006 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7007 else
e3c2afab 7008 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 7009 break;
2a2ab3f9
JVA
7010 }
7011 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
7012 {
7013#if SYSV386_COMPAT
7014 p = "{\t%1, %0|r\t%0, %1}";
7015#else
7016 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7017#endif
7018 }
2a2ab3f9 7019 else
e3c2afab
AM
7020 {
7021#if SYSV386_COMPAT
7022 p = "{r\t%2, %0|\t%0, %2}";
7023#else
7024 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7025#endif
7026 }
e075ae69 7027 break;
2a2ab3f9
JVA
7028
7029 default:
7030 abort ();
7031 }
e075ae69
RH
7032
7033 strcat (buf, p);
7034 return buf;
2a2ab3f9 7035}
e075ae69 7036
a4f31c00 7037/* Output code to initialize control word copies used by
7a2e09f4
JH
7038 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7039 is set to control word rounding downwards. */
7040void
7041emit_i387_cw_initialization (normal, round_down)
7042 rtx normal, round_down;
7043{
7044 rtx reg = gen_reg_rtx (HImode);
7045
7046 emit_insn (gen_x86_fnstcw_1 (normal));
7047 emit_move_insn (reg, normal);
7048 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7049 && !TARGET_64BIT)
7050 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7051 else
7052 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7053 emit_move_insn (round_down, reg);
7054}
7055
2a2ab3f9 7056/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 7057 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 7058 operand may be [SDX]Fmode. */
2a2ab3f9 7059
69ddee61 7060const char *
2a2ab3f9
JVA
7061output_fix_trunc (insn, operands)
7062 rtx insn;
7063 rtx *operands;
7064{
7065 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69 7066 int dimode_p = GET_MODE (operands[0]) == DImode;
2a2ab3f9 7067
e075ae69
RH
7068 /* Jump through a hoop or two for DImode, since the hardware has no
7069 non-popping instruction. We used to do this a different way, but
7070 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
7071 if (dimode_p && !stack_top_dies)
7072 output_asm_insn ("fld\t%y1", operands);
e075ae69 7073
7a2e09f4 7074 if (!STACK_TOP_P (operands[1]))
10195bd8
JW
7075 abort ();
7076
e075ae69 7077 if (GET_CODE (operands[0]) != MEM)
7a2e09f4 7078 abort ();
e9a25f70 7079
7a2e09f4 7080 output_asm_insn ("fldcw\t%3", operands);
e075ae69 7081 if (stack_top_dies || dimode_p)
7a2e09f4 7082 output_asm_insn ("fistp%z0\t%0", operands);
10195bd8 7083 else
7a2e09f4 7084 output_asm_insn ("fist%z0\t%0", operands);
e075ae69 7085 output_asm_insn ("fldcw\t%2", operands);
10195bd8 7086
e075ae69 7087 return "";
2a2ab3f9 7088}
cda749b1 7089
e075ae69
RH
7090/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7091 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7092 when fucom should be used. */
7093
69ddee61 7094const char *
e075ae69 7095output_fp_compare (insn, operands, eflags_p, unordered_p)
cda749b1
JW
7096 rtx insn;
7097 rtx *operands;
e075ae69 7098 int eflags_p, unordered_p;
cda749b1 7099{
e075ae69
RH
7100 int stack_top_dies;
7101 rtx cmp_op0 = operands[0];
7102 rtx cmp_op1 = operands[1];
0644b628 7103 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
e075ae69
RH
7104
7105 if (eflags_p == 2)
7106 {
7107 cmp_op0 = cmp_op1;
7108 cmp_op1 = operands[2];
7109 }
0644b628
JH
7110 if (is_sse)
7111 {
7112 if (GET_MODE (operands[0]) == SFmode)
7113 if (unordered_p)
7114 return "ucomiss\t{%1, %0|%0, %1}";
7115 else
7116 return "comiss\t{%1, %0|%0, %y}";
7117 else
7118 if (unordered_p)
7119 return "ucomisd\t{%1, %0|%0, %1}";
7120 else
7121 return "comisd\t{%1, %0|%0, %y}";
7122 }
cda749b1 7123
e075ae69 7124 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
7125 abort ();
7126
e075ae69 7127 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 7128
e075ae69
RH
7129 if (STACK_REG_P (cmp_op1)
7130 && stack_top_dies
7131 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7132 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 7133 {
e075ae69
RH
7134 /* If both the top of the 387 stack dies, and the other operand
7135 is also a stack register that dies, then this must be a
7136 `fcompp' float compare */
7137
7138 if (eflags_p == 1)
7139 {
7140 /* There is no double popping fcomi variant. Fortunately,
7141 eflags is immune from the fstp's cc clobbering. */
7142 if (unordered_p)
7143 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7144 else
7145 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7146 return "fstp\t%y0";
7147 }
7148 else
cda749b1 7149 {
e075ae69
RH
7150 if (eflags_p == 2)
7151 {
7152 if (unordered_p)
7153 return "fucompp\n\tfnstsw\t%0";
7154 else
7155 return "fcompp\n\tfnstsw\t%0";
7156 }
cda749b1
JW
7157 else
7158 {
e075ae69
RH
7159 if (unordered_p)
7160 return "fucompp";
7161 else
7162 return "fcompp";
cda749b1
JW
7163 }
7164 }
cda749b1
JW
7165 }
7166 else
7167 {
e075ae69 7168 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 7169
0f290768 7170 static const char * const alt[24] =
e075ae69
RH
7171 {
7172 "fcom%z1\t%y1",
7173 "fcomp%z1\t%y1",
7174 "fucom%z1\t%y1",
7175 "fucomp%z1\t%y1",
0f290768 7176
e075ae69
RH
7177 "ficom%z1\t%y1",
7178 "ficomp%z1\t%y1",
7179 NULL,
7180 NULL,
7181
7182 "fcomi\t{%y1, %0|%0, %y1}",
7183 "fcomip\t{%y1, %0|%0, %y1}",
7184 "fucomi\t{%y1, %0|%0, %y1}",
7185 "fucomip\t{%y1, %0|%0, %y1}",
7186
7187 NULL,
7188 NULL,
7189 NULL,
7190 NULL,
7191
7192 "fcom%z2\t%y2\n\tfnstsw\t%0",
7193 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7194 "fucom%z2\t%y2\n\tfnstsw\t%0",
7195 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 7196
e075ae69
RH
7197 "ficom%z2\t%y2\n\tfnstsw\t%0",
7198 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7199 NULL,
7200 NULL
7201 };
7202
7203 int mask;
69ddee61 7204 const char *ret;
e075ae69
RH
7205
7206 mask = eflags_p << 3;
7207 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7208 mask |= unordered_p << 1;
7209 mask |= stack_top_dies;
7210
7211 if (mask >= 24)
7212 abort ();
7213 ret = alt[mask];
7214 if (ret == NULL)
7215 abort ();
cda749b1 7216
e075ae69 7217 return ret;
cda749b1
JW
7218 }
7219}
2a2ab3f9 7220
f88c65f7
RH
7221void
7222ix86_output_addr_vec_elt (file, value)
7223 FILE *file;
7224 int value;
7225{
7226 const char *directive = ASM_LONG;
7227
7228 if (TARGET_64BIT)
7229 {
7230#ifdef ASM_QUAD
7231 directive = ASM_QUAD;
7232#else
7233 abort ();
7234#endif
7235 }
7236
7237 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7238}
7239
7240void
7241ix86_output_addr_diff_elt (file, value, rel)
7242 FILE *file;
7243 int value, rel;
7244{
7245 if (TARGET_64BIT)
74411039 7246 fprintf (file, "%s%s%d-%s%d\n",
f88c65f7
RH
7247 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7248 else if (HAVE_AS_GOTOFF_IN_DATA)
7249 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7250 else
7251 asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
7252 ASM_LONG, LPREFIX, value);
7253}
32b5b1aa 7254\f
a8bac9ab
RH
7255/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7256 for the target. */
7257
7258void
7259ix86_expand_clear (dest)
7260 rtx dest;
7261{
7262 rtx tmp;
7263
7264 /* We play register width games, which are only valid after reload. */
7265 if (!reload_completed)
7266 abort ();
7267
7268 /* Avoid HImode and its attendant prefix byte. */
7269 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7270 dest = gen_rtx_REG (SImode, REGNO (dest));
7271
7272 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7273
7274 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7275 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7276 {
7277 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7278 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7279 }
7280
7281 emit_insn (tmp);
7282}
7283
f996902d
RH
7284/* X is an unchanging MEM. If it is a constant pool reference, return
7285 the constant pool rtx, else NULL. */
7286
7287static rtx
7288maybe_get_pool_constant (x)
7289 rtx x;
7290{
7291 x = XEXP (x, 0);
7292
7293 if (flag_pic)
7294 {
7295 if (GET_CODE (x) != PLUS)
7296 return NULL_RTX;
7297 if (XEXP (x, 0) != pic_offset_table_rtx)
7298 return NULL_RTX;
7299 x = XEXP (x, 1);
7300 if (GET_CODE (x) != CONST)
7301 return NULL_RTX;
7302 x = XEXP (x, 0);
7303 if (GET_CODE (x) != UNSPEC)
7304 return NULL_RTX;
7305 if (XINT (x, 1) != UNSPEC_GOTOFF)
7306 return NULL_RTX;
7307 x = XVECEXP (x, 0, 0);
7308 }
7309
7310 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7311 return get_pool_constant (x);
7312
7313 return NULL_RTX;
7314}
7315
79325812 7316void
e075ae69
RH
7317ix86_expand_move (mode, operands)
7318 enum machine_mode mode;
7319 rtx operands[];
32b5b1aa 7320{
e075ae69 7321 int strict = (reload_in_progress || reload_completed);
f996902d
RH
7322 rtx insn, op0, op1, tmp;
7323
7324 op0 = operands[0];
7325 op1 = operands[1];
7326
7327 /* ??? We have a slight problem. We need to say that tls symbols are
7328 not legitimate constants so that reload does not helpfully reload
7329 these constants from a REG_EQUIV, which we cannot handle. (Recall
7330 that general- and local-dynamic address resolution requires a
7331 function call.)
e9a25f70 7332
f996902d
RH
7333 However, if we say that tls symbols are not legitimate constants,
7334 then emit_move_insn helpfully drop them into the constant pool.
7335
7336 It is far easier to work around emit_move_insn than reload. Recognize
7337 the MEM that we would have created and extract the symbol_ref. */
7338
7339 if (mode == Pmode
7340 && GET_CODE (op1) == MEM
7341 && RTX_UNCHANGING_P (op1))
32b5b1aa 7342 {
f996902d
RH
7343 tmp = maybe_get_pool_constant (op1);
7344 /* Note that we only care about symbolic constants here, which
7345 unlike CONST_INT will always have a proper mode. */
7346 if (tmp && GET_MODE (tmp) == Pmode)
7347 op1 = tmp;
7348 }
e9a25f70 7349
f996902d
RH
7350 if (tls_symbolic_operand (op1, Pmode))
7351 {
7352 op1 = legitimize_address (op1, op1, VOIDmode);
7353 if (GET_CODE (op0) == MEM)
7354 {
7355 tmp = gen_reg_rtx (mode);
7356 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7357 op1 = tmp;
7358 }
7359 }
7360 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7361 {
7362 if (GET_CODE (op0) == MEM)
7363 op1 = force_reg (Pmode, op1);
e075ae69 7364 else
32b5b1aa 7365 {
f996902d 7366 rtx temp = op0;
e075ae69
RH
7367 if (GET_CODE (temp) != REG)
7368 temp = gen_reg_rtx (Pmode);
f996902d
RH
7369 temp = legitimize_pic_address (op1, temp);
7370 if (temp == op0)
e075ae69 7371 return;
f996902d 7372 op1 = temp;
32b5b1aa 7373 }
e075ae69
RH
7374 }
7375 else
7376 {
f996902d 7377 if (GET_CODE (op0) == MEM
44cf5b6a 7378 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
f996902d
RH
7379 || !push_operand (op0, mode))
7380 && GET_CODE (op1) == MEM)
7381 op1 = force_reg (mode, op1);
e9a25f70 7382
f996902d
RH
7383 if (push_operand (op0, mode)
7384 && ! general_no_elim_operand (op1, mode))
7385 op1 = copy_to_mode_reg (mode, op1);
2c5a510c 7386
44cf5b6a
JH
7387 /* Force large constants in 64bit compilation into register
7388 to get them CSEed. */
7389 if (TARGET_64BIT && mode == DImode
f996902d
RH
7390 && immediate_operand (op1, mode)
7391 && !x86_64_zero_extended_value (op1)
7392 && !register_operand (op0, mode)
44cf5b6a 7393 && optimize && !reload_completed && !reload_in_progress)
f996902d 7394 op1 = copy_to_mode_reg (mode, op1);
44cf5b6a 7395
e075ae69 7396 if (FLOAT_MODE_P (mode))
32b5b1aa 7397 {
d7a29404
JH
7398 /* If we are loading a floating point constant to a register,
7399 force the value to memory now, since we'll get better code
7400 out the back end. */
e075ae69
RH
7401
7402 if (strict)
7403 ;
f996902d
RH
7404 else if (GET_CODE (op1) == CONST_DOUBLE
7405 && register_operand (op0, mode))
7406 op1 = validize_mem (force_const_mem (mode, op1));
32b5b1aa 7407 }
32b5b1aa 7408 }
e9a25f70 7409
f996902d 7410 insn = gen_rtx_SET (VOIDmode, op0, op1);
e9a25f70 7411
e075ae69
RH
7412 emit_insn (insn);
7413}
e9a25f70 7414
e37af218
RH
7415void
7416ix86_expand_vector_move (mode, operands)
7417 enum machine_mode mode;
7418 rtx operands[];
7419{
7420 /* Force constants other than zero into memory. We do not know how
7421 the instructions used to build constants modify the upper 64 bits
7422 of the register, once we have that information we may be able
7423 to handle some of them more efficiently. */
7424 if ((reload_in_progress | reload_completed) == 0
7425 && register_operand (operands[0], mode)
7426 && CONSTANT_P (operands[1]))
7427 {
7428 rtx addr = gen_reg_rtx (Pmode);
7429 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
7430 operands[1] = gen_rtx_MEM (mode, addr);
7431 }
7432
7433 /* Make operand1 a register if it isn't already. */
7434 if ((reload_in_progress | reload_completed) == 0
7435 && !register_operand (operands[0], mode)
7436 && !register_operand (operands[1], mode)
7437 && operands[1] != CONST0_RTX (mode))
7438 {
59bef189 7439 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
e37af218
RH
7440 emit_move_insn (operands[0], temp);
7441 return;
7442 }
7443
7444 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
7445}
7446
e075ae69
RH
7447/* Attempt to expand a binary operator. Make the expansion closer to the
7448 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 7449 memory references (one output, two input) in a single insn. */
e9a25f70 7450
e075ae69
RH
7451void
7452ix86_expand_binary_operator (code, mode, operands)
7453 enum rtx_code code;
7454 enum machine_mode mode;
7455 rtx operands[];
7456{
7457 int matching_memory;
7458 rtx src1, src2, dst, op, clob;
7459
7460 dst = operands[0];
7461 src1 = operands[1];
7462 src2 = operands[2];
7463
7464 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7465 if (GET_RTX_CLASS (code) == 'c'
7466 && (rtx_equal_p (dst, src2)
7467 || immediate_operand (src1, mode)))
7468 {
7469 rtx temp = src1;
7470 src1 = src2;
7471 src2 = temp;
32b5b1aa 7472 }
e9a25f70 7473
e075ae69
RH
7474 /* If the destination is memory, and we do not have matching source
7475 operands, do things in registers. */
7476 matching_memory = 0;
7477 if (GET_CODE (dst) == MEM)
32b5b1aa 7478 {
e075ae69
RH
7479 if (rtx_equal_p (dst, src1))
7480 matching_memory = 1;
7481 else if (GET_RTX_CLASS (code) == 'c'
7482 && rtx_equal_p (dst, src2))
7483 matching_memory = 2;
7484 else
7485 dst = gen_reg_rtx (mode);
7486 }
0f290768 7487
e075ae69
RH
7488 /* Both source operands cannot be in memory. */
7489 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7490 {
7491 if (matching_memory != 2)
7492 src2 = force_reg (mode, src2);
7493 else
7494 src1 = force_reg (mode, src1);
32b5b1aa 7495 }
e9a25f70 7496
06a964de
JH
7497 /* If the operation is not commutable, source 1 cannot be a constant
7498 or non-matching memory. */
0f290768 7499 if ((CONSTANT_P (src1)
06a964de
JH
7500 || (!matching_memory && GET_CODE (src1) == MEM))
7501 && GET_RTX_CLASS (code) != 'c')
e075ae69 7502 src1 = force_reg (mode, src1);
0f290768 7503
e075ae69 7504 /* If optimizing, copy to regs to improve CSE */
fe577e58 7505 if (optimize && ! no_new_pseudos)
32b5b1aa 7506 {
e075ae69
RH
7507 if (GET_CODE (dst) == MEM)
7508 dst = gen_reg_rtx (mode);
7509 if (GET_CODE (src1) == MEM)
7510 src1 = force_reg (mode, src1);
7511 if (GET_CODE (src2) == MEM)
7512 src2 = force_reg (mode, src2);
32b5b1aa 7513 }
e9a25f70 7514
e075ae69
RH
7515 /* Emit the instruction. */
7516
7517 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7518 if (reload_in_progress)
7519 {
7520 /* Reload doesn't know about the flags register, and doesn't know that
7521 it doesn't want to clobber it. We can only do this with PLUS. */
7522 if (code != PLUS)
7523 abort ();
7524 emit_insn (op);
7525 }
7526 else
32b5b1aa 7527 {
e075ae69
RH
7528 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7529 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 7530 }
e9a25f70 7531
e075ae69
RH
7532 /* Fix up the destination if needed. */
7533 if (dst != operands[0])
7534 emit_move_insn (operands[0], dst);
7535}
7536
7537/* Return TRUE or FALSE depending on whether the binary operator meets the
7538 appropriate constraints. */
7539
7540int
7541ix86_binary_operator_ok (code, mode, operands)
7542 enum rtx_code code;
7543 enum machine_mode mode ATTRIBUTE_UNUSED;
7544 rtx operands[3];
7545{
7546 /* Both source operands cannot be in memory. */
7547 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7548 return 0;
7549 /* If the operation is not commutable, source 1 cannot be a constant. */
7550 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
7551 return 0;
7552 /* If the destination is memory, we must have a matching source operand. */
7553 if (GET_CODE (operands[0]) == MEM
7554 && ! (rtx_equal_p (operands[0], operands[1])
7555 || (GET_RTX_CLASS (code) == 'c'
7556 && rtx_equal_p (operands[0], operands[2]))))
7557 return 0;
06a964de 7558 /* If the operation is not commutable and the source 1 is memory, we must
d6a7951f 7559 have a matching destination. */
06a964de
JH
7560 if (GET_CODE (operands[1]) == MEM
7561 && GET_RTX_CLASS (code) != 'c'
7562 && ! rtx_equal_p (operands[0], operands[1]))
7563 return 0;
e075ae69
RH
7564 return 1;
7565}
7566
7567/* Attempt to expand a unary operator. Make the expansion closer to the
7568 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 7569 memory references (one output, one input) in a single insn. */
e075ae69 7570
9d81fc27 7571void
e075ae69
RH
7572ix86_expand_unary_operator (code, mode, operands)
7573 enum rtx_code code;
7574 enum machine_mode mode;
7575 rtx operands[];
7576{
06a964de
JH
7577 int matching_memory;
7578 rtx src, dst, op, clob;
7579
7580 dst = operands[0];
7581 src = operands[1];
e075ae69 7582
06a964de
JH
7583 /* If the destination is memory, and we do not have matching source
7584 operands, do things in registers. */
7585 matching_memory = 0;
7586 if (GET_CODE (dst) == MEM)
32b5b1aa 7587 {
06a964de
JH
7588 if (rtx_equal_p (dst, src))
7589 matching_memory = 1;
e075ae69 7590 else
06a964de 7591 dst = gen_reg_rtx (mode);
32b5b1aa 7592 }
e9a25f70 7593
06a964de
JH
7594 /* When source operand is memory, destination must match. */
7595 if (!matching_memory && GET_CODE (src) == MEM)
7596 src = force_reg (mode, src);
0f290768 7597
06a964de 7598 /* If optimizing, copy to regs to improve CSE */
fe577e58 7599 if (optimize && ! no_new_pseudos)
06a964de
JH
7600 {
7601 if (GET_CODE (dst) == MEM)
7602 dst = gen_reg_rtx (mode);
7603 if (GET_CODE (src) == MEM)
7604 src = force_reg (mode, src);
7605 }
7606
7607 /* Emit the instruction. */
7608
7609 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7610 if (reload_in_progress || code == NOT)
7611 {
7612 /* Reload doesn't know about the flags register, and doesn't know that
7613 it doesn't want to clobber it. */
7614 if (code != NOT)
7615 abort ();
7616 emit_insn (op);
7617 }
7618 else
7619 {
7620 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7621 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7622 }
7623
7624 /* Fix up the destination if needed. */
7625 if (dst != operands[0])
7626 emit_move_insn (operands[0], dst);
e075ae69
RH
7627}
7628
7629/* Return TRUE or FALSE depending on whether the unary operator meets the
7630 appropriate constraints. */
7631
7632int
7633ix86_unary_operator_ok (code, mode, operands)
7634 enum rtx_code code ATTRIBUTE_UNUSED;
7635 enum machine_mode mode ATTRIBUTE_UNUSED;
7636 rtx operands[2] ATTRIBUTE_UNUSED;
7637{
06a964de
JH
7638 /* If one of operands is memory, source and destination must match. */
7639 if ((GET_CODE (operands[0]) == MEM
7640 || GET_CODE (operands[1]) == MEM)
7641 && ! rtx_equal_p (operands[0], operands[1]))
7642 return FALSE;
e075ae69
RH
7643 return TRUE;
7644}
7645
16189740
RH
7646/* Return TRUE or FALSE depending on whether the first SET in INSN
7647 has source and destination with matching CC modes, and that the
7648 CC mode is at least as constrained as REQ_MODE. */
7649
7650int
7651ix86_match_ccmode (insn, req_mode)
7652 rtx insn;
7653 enum machine_mode req_mode;
7654{
7655 rtx set;
7656 enum machine_mode set_mode;
7657
7658 set = PATTERN (insn);
7659 if (GET_CODE (set) == PARALLEL)
7660 set = XVECEXP (set, 0, 0);
7661 if (GET_CODE (set) != SET)
7662 abort ();
9076b9c1
JH
7663 if (GET_CODE (SET_SRC (set)) != COMPARE)
7664 abort ();
16189740
RH
7665
7666 set_mode = GET_MODE (SET_DEST (set));
7667 switch (set_mode)
7668 {
9076b9c1
JH
7669 case CCNOmode:
7670 if (req_mode != CCNOmode
7671 && (req_mode != CCmode
7672 || XEXP (SET_SRC (set), 1) != const0_rtx))
7673 return 0;
7674 break;
16189740 7675 case CCmode:
9076b9c1 7676 if (req_mode == CCGCmode)
16189740
RH
7677 return 0;
7678 /* FALLTHRU */
9076b9c1
JH
7679 case CCGCmode:
7680 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7681 return 0;
7682 /* FALLTHRU */
7683 case CCGOCmode:
16189740
RH
7684 if (req_mode == CCZmode)
7685 return 0;
7686 /* FALLTHRU */
7687 case CCZmode:
7688 break;
7689
7690 default:
7691 abort ();
7692 }
7693
7694 return (GET_MODE (SET_SRC (set)) == set_mode);
7695}
7696
e075ae69
RH
7697/* Generate insn patterns to do an integer compare of OPERANDS. */
7698
7699static rtx
7700ix86_expand_int_compare (code, op0, op1)
7701 enum rtx_code code;
7702 rtx op0, op1;
7703{
7704 enum machine_mode cmpmode;
7705 rtx tmp, flags;
7706
7707 cmpmode = SELECT_CC_MODE (code, op0, op1);
7708 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7709
7710 /* This is very simple, but making the interface the same as in the
7711 FP case makes the rest of the code easier. */
7712 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7713 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7714
7715 /* Return the test that should be put into the flags user, i.e.
7716 the bcc, scc, or cmov instruction. */
7717 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7718}
7719
3a3677ff
RH
7720/* Figure out whether to use ordered or unordered fp comparisons.
7721 Return the appropriate mode to use. */
e075ae69 7722
b1cdafbb 7723enum machine_mode
3a3677ff 7724ix86_fp_compare_mode (code)
8752c357 7725 enum rtx_code code ATTRIBUTE_UNUSED;
e075ae69 7726{
9e7adcb3
JH
7727 /* ??? In order to make all comparisons reversible, we do all comparisons
7728 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7729 all forms trapping and nontrapping comparisons, we can make inequality
7730 comparisons trapping again, since it results in better code when using
7731 FCOM based compares. */
7732 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
7733}
7734
9076b9c1
JH
7735enum machine_mode
7736ix86_cc_mode (code, op0, op1)
7737 enum rtx_code code;
7738 rtx op0, op1;
7739{
7740 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7741 return ix86_fp_compare_mode (code);
7742 switch (code)
7743 {
7744 /* Only zero flag is needed. */
7745 case EQ: /* ZF=0 */
7746 case NE: /* ZF!=0 */
7747 return CCZmode;
7748 /* Codes needing carry flag. */
265dab10
JH
7749 case GEU: /* CF=0 */
7750 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
7751 case LTU: /* CF=1 */
7752 case LEU: /* CF=1 | ZF=1 */
265dab10 7753 return CCmode;
9076b9c1
JH
7754 /* Codes possibly doable only with sign flag when
7755 comparing against zero. */
7756 case GE: /* SF=OF or SF=0 */
7e08e190 7757 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
7758 if (op1 == const0_rtx)
7759 return CCGOCmode;
7760 else
7761 /* For other cases Carry flag is not required. */
7762 return CCGCmode;
7763 /* Codes doable only with sign flag when comparing
7764 against zero, but we miss jump instruction for it
7765 so we need to use relational tests agains overflow
7766 that thus needs to be zero. */
7767 case GT: /* ZF=0 & SF=OF */
7768 case LE: /* ZF=1 | SF<>OF */
7769 if (op1 == const0_rtx)
7770 return CCNOmode;
7771 else
7772 return CCGCmode;
7fcd7218
JH
7773 /* strcmp pattern do (use flags) and combine may ask us for proper
7774 mode. */
7775 case USE:
7776 return CCmode;
9076b9c1 7777 default:
0f290768 7778 abort ();
9076b9c1
JH
7779 }
7780}
7781
3a3677ff
RH
7782/* Return true if we should use an FCOMI instruction for this fp comparison. */
7783
a940d8bd 7784int
3a3677ff 7785ix86_use_fcomi_compare (code)
9e7adcb3 7786 enum rtx_code code ATTRIBUTE_UNUSED;
3a3677ff 7787{
9e7adcb3
JH
7788 enum rtx_code swapped_code = swap_condition (code);
7789 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7790 || (ix86_fp_comparison_cost (swapped_code)
7791 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
7792}
7793
0f290768 7794/* Swap, force into registers, or otherwise massage the two operands
3a3677ff
RH
7795 to a fp comparison. The operands are updated in place; the new
7796 comparsion code is returned. */
7797
7798static enum rtx_code
7799ix86_prepare_fp_compare_args (code, pop0, pop1)
7800 enum rtx_code code;
7801 rtx *pop0, *pop1;
7802{
7803 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7804 rtx op0 = *pop0, op1 = *pop1;
7805 enum machine_mode op_mode = GET_MODE (op0);
0644b628 7806 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
3a3677ff 7807
e075ae69 7808 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
7809 The same is true of the XFmode compare instructions. The same is
7810 true of the fcomi compare instructions. */
7811
0644b628
JH
7812 if (!is_sse
7813 && (fpcmp_mode == CCFPUmode
7814 || op_mode == XFmode
7815 || op_mode == TFmode
7816 || ix86_use_fcomi_compare (code)))
e075ae69 7817 {
3a3677ff
RH
7818 op0 = force_reg (op_mode, op0);
7819 op1 = force_reg (op_mode, op1);
e075ae69
RH
7820 }
7821 else
7822 {
7823 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7824 things around if they appear profitable, otherwise force op0
7825 into a register. */
7826
7827 if (standard_80387_constant_p (op0) == 0
7828 || (GET_CODE (op0) == MEM
7829 && ! (standard_80387_constant_p (op1) == 0
7830 || GET_CODE (op1) == MEM)))
32b5b1aa 7831 {
e075ae69
RH
7832 rtx tmp;
7833 tmp = op0, op0 = op1, op1 = tmp;
7834 code = swap_condition (code);
7835 }
7836
7837 if (GET_CODE (op0) != REG)
3a3677ff 7838 op0 = force_reg (op_mode, op0);
e075ae69
RH
7839
7840 if (CONSTANT_P (op1))
7841 {
7842 if (standard_80387_constant_p (op1))
3a3677ff 7843 op1 = force_reg (op_mode, op1);
e075ae69 7844 else
3a3677ff 7845 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
7846 }
7847 }
e9a25f70 7848
9e7adcb3
JH
7849 /* Try to rearrange the comparison to make it cheaper. */
7850 if (ix86_fp_comparison_cost (code)
7851 > ix86_fp_comparison_cost (swap_condition (code))
558740bf 7852 && (GET_CODE (op1) == REG || !no_new_pseudos))
9e7adcb3
JH
7853 {
7854 rtx tmp;
7855 tmp = op0, op0 = op1, op1 = tmp;
7856 code = swap_condition (code);
7857 if (GET_CODE (op0) != REG)
7858 op0 = force_reg (op_mode, op0);
7859 }
7860
3a3677ff
RH
7861 *pop0 = op0;
7862 *pop1 = op1;
7863 return code;
7864}
7865
c0c102a9
JH
7866/* Convert comparison codes we use to represent FP comparison to integer
7867 code that will result in proper branch. Return UNKNOWN if no such code
7868 is available. */
7869static enum rtx_code
7870ix86_fp_compare_code_to_integer (code)
7871 enum rtx_code code;
7872{
7873 switch (code)
7874 {
7875 case GT:
7876 return GTU;
7877 case GE:
7878 return GEU;
7879 case ORDERED:
7880 case UNORDERED:
7881 return code;
7882 break;
7883 case UNEQ:
7884 return EQ;
7885 break;
7886 case UNLT:
7887 return LTU;
7888 break;
7889 case UNLE:
7890 return LEU;
7891 break;
7892 case LTGT:
7893 return NE;
7894 break;
7895 default:
7896 return UNKNOWN;
7897 }
7898}
7899
7900/* Split comparison code CODE into comparisons we can do using branch
7901 instructions. BYPASS_CODE is comparison code for branch that will
7902 branch around FIRST_CODE and SECOND_CODE. If some of branches
7903 is not required, set value to NIL.
7904 We never require more than two branches. */
7905static void
7906ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
7907 enum rtx_code code, *bypass_code, *first_code, *second_code;
7908{
7909 *first_code = code;
7910 *bypass_code = NIL;
7911 *second_code = NIL;
7912
7913 /* The fcomi comparison sets flags as follows:
7914
7915 cmp ZF PF CF
7916 > 0 0 0
7917 < 0 0 1
7918 = 1 0 0
7919 un 1 1 1 */
7920
7921 switch (code)
7922 {
7923 case GT: /* GTU - CF=0 & ZF=0 */
7924 case GE: /* GEU - CF=0 */
7925 case ORDERED: /* PF=0 */
7926 case UNORDERED: /* PF=1 */
7927 case UNEQ: /* EQ - ZF=1 */
7928 case UNLT: /* LTU - CF=1 */
7929 case UNLE: /* LEU - CF=1 | ZF=1 */
7930 case LTGT: /* EQ - ZF=0 */
7931 break;
7932 case LT: /* LTU - CF=1 - fails on unordered */
7933 *first_code = UNLT;
7934 *bypass_code = UNORDERED;
7935 break;
7936 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
7937 *first_code = UNLE;
7938 *bypass_code = UNORDERED;
7939 break;
7940 case EQ: /* EQ - ZF=1 - fails on unordered */
7941 *first_code = UNEQ;
7942 *bypass_code = UNORDERED;
7943 break;
7944 case NE: /* NE - ZF=0 - fails on unordered */
7945 *first_code = LTGT;
7946 *second_code = UNORDERED;
7947 break;
7948 case UNGE: /* GEU - CF=0 - fails on unordered */
7949 *first_code = GE;
7950 *second_code = UNORDERED;
7951 break;
7952 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
7953 *first_code = GT;
7954 *second_code = UNORDERED;
7955 break;
7956 default:
7957 abort ();
7958 }
7959 if (!TARGET_IEEE_FP)
7960 {
7961 *second_code = NIL;
7962 *bypass_code = NIL;
7963 }
7964}
7965
9e7adcb3
JH
7966/* Return cost of comparison done fcom + arithmetics operations on AX.
7967 All following functions do use number of instructions as an cost metrics.
7968 In future this should be tweaked to compute bytes for optimize_size and
7969 take into account performance of various instructions on various CPUs. */
7970static int
7971ix86_fp_comparison_arithmetics_cost (code)
7972 enum rtx_code code;
7973{
7974 if (!TARGET_IEEE_FP)
7975 return 4;
7976 /* The cost of code output by ix86_expand_fp_compare. */
7977 switch (code)
7978 {
7979 case UNLE:
7980 case UNLT:
7981 case LTGT:
7982 case GT:
7983 case GE:
7984 case UNORDERED:
7985 case ORDERED:
7986 case UNEQ:
7987 return 4;
7988 break;
7989 case LT:
7990 case NE:
7991 case EQ:
7992 case UNGE:
7993 return 5;
7994 break;
7995 case LE:
7996 case UNGT:
7997 return 6;
7998 break;
7999 default:
8000 abort ();
8001 }
8002}
8003
8004/* Return cost of comparison done using fcomi operation.
8005 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8006static int
8007ix86_fp_comparison_fcomi_cost (code)
8008 enum rtx_code code;
8009{
8010 enum rtx_code bypass_code, first_code, second_code;
8011 /* Return arbitarily high cost when instruction is not supported - this
8012 prevents gcc from using it. */
8013 if (!TARGET_CMOVE)
8014 return 1024;
8015 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8016 return (bypass_code != NIL || second_code != NIL) + 2;
8017}
8018
8019/* Return cost of comparison done using sahf operation.
8020 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8021static int
8022ix86_fp_comparison_sahf_cost (code)
8023 enum rtx_code code;
8024{
8025 enum rtx_code bypass_code, first_code, second_code;
8026 /* Return arbitarily high cost when instruction is not preferred - this
8027 avoids gcc from using it. */
8028 if (!TARGET_USE_SAHF && !optimize_size)
8029 return 1024;
8030 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8031 return (bypass_code != NIL || second_code != NIL) + 3;
8032}
8033
8034/* Compute cost of the comparison done using any method.
8035 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8036static int
8037ix86_fp_comparison_cost (code)
8038 enum rtx_code code;
8039{
8040 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8041 int min;
8042
8043 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8044 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8045
8046 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8047 if (min > sahf_cost)
8048 min = sahf_cost;
8049 if (min > fcomi_cost)
8050 min = fcomi_cost;
8051 return min;
8052}
c0c102a9 8053
3a3677ff
RH
8054/* Generate insn patterns to do a floating point compare of OPERANDS. */
8055
9e7adcb3
JH
8056static rtx
8057ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
3a3677ff
RH
8058 enum rtx_code code;
8059 rtx op0, op1, scratch;
9e7adcb3
JH
8060 rtx *second_test;
8061 rtx *bypass_test;
3a3677ff
RH
8062{
8063 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 8064 rtx tmp, tmp2;
9e7adcb3 8065 int cost = ix86_fp_comparison_cost (code);
c0c102a9 8066 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
8067
8068 fpcmp_mode = ix86_fp_compare_mode (code);
8069 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8070
9e7adcb3
JH
8071 if (second_test)
8072 *second_test = NULL_RTX;
8073 if (bypass_test)
8074 *bypass_test = NULL_RTX;
8075
c0c102a9
JH
8076 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8077
9e7adcb3
JH
8078 /* Do fcomi/sahf based test when profitable. */
8079 if ((bypass_code == NIL || bypass_test)
8080 && (second_code == NIL || second_test)
8081 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 8082 {
c0c102a9
JH
8083 if (TARGET_CMOVE)
8084 {
8085 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8086 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8087 tmp);
8088 emit_insn (tmp);
8089 }
8090 else
8091 {
8092 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 8093 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
8094 if (!scratch)
8095 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
8096 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8097 emit_insn (gen_x86_sahf_1 (scratch));
8098 }
e075ae69
RH
8099
8100 /* The FP codes work out to act like unsigned. */
9a915772 8101 intcmp_mode = fpcmp_mode;
9e7adcb3
JH
8102 code = first_code;
8103 if (bypass_code != NIL)
8104 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8105 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8106 const0_rtx);
8107 if (second_code != NIL)
8108 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8109 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8110 const0_rtx);
e075ae69
RH
8111 }
8112 else
8113 {
8114 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69 8115 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 8116 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
8117 if (!scratch)
8118 scratch = gen_reg_rtx (HImode);
3a3677ff 8119 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 8120
9a915772
JH
8121 /* In the unordered case, we have to check C2 for NaN's, which
8122 doesn't happen to work out to anything nice combination-wise.
8123 So do some bit twiddling on the value we've got in AH to come
8124 up with an appropriate set of condition codes. */
e075ae69 8125
9a915772
JH
8126 intcmp_mode = CCNOmode;
8127 switch (code)
32b5b1aa 8128 {
9a915772
JH
8129 case GT:
8130 case UNGT:
8131 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 8132 {
3a3677ff 8133 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 8134 code = EQ;
9a915772
JH
8135 }
8136 else
8137 {
8138 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8139 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8140 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8141 intcmp_mode = CCmode;
8142 code = GEU;
8143 }
8144 break;
8145 case LT:
8146 case UNLT:
8147 if (code == LT && TARGET_IEEE_FP)
8148 {
3a3677ff
RH
8149 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8150 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
8151 intcmp_mode = CCmode;
8152 code = EQ;
9a915772
JH
8153 }
8154 else
8155 {
8156 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8157 code = NE;
8158 }
8159 break;
8160 case GE:
8161 case UNGE:
8162 if (code == GE || !TARGET_IEEE_FP)
8163 {
3a3677ff 8164 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 8165 code = EQ;
9a915772
JH
8166 }
8167 else
8168 {
8169 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8170 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8171 GEN_INT (0x01)));
8172 code = NE;
8173 }
8174 break;
8175 case LE:
8176 case UNLE:
8177 if (code == LE && TARGET_IEEE_FP)
8178 {
3a3677ff
RH
8179 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8180 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8181 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
8182 intcmp_mode = CCmode;
8183 code = LTU;
9a915772
JH
8184 }
8185 else
8186 {
8187 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8188 code = NE;
8189 }
8190 break;
8191 case EQ:
8192 case UNEQ:
8193 if (code == EQ && TARGET_IEEE_FP)
8194 {
3a3677ff
RH
8195 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8196 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
8197 intcmp_mode = CCmode;
8198 code = EQ;
9a915772
JH
8199 }
8200 else
8201 {
3a3677ff
RH
8202 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8203 code = NE;
8204 break;
9a915772
JH
8205 }
8206 break;
8207 case NE:
8208 case LTGT:
8209 if (code == NE && TARGET_IEEE_FP)
8210 {
3a3677ff 8211 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
8212 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8213 GEN_INT (0x40)));
3a3677ff 8214 code = NE;
9a915772
JH
8215 }
8216 else
8217 {
3a3677ff
RH
8218 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8219 code = EQ;
32b5b1aa 8220 }
9a915772
JH
8221 break;
8222
8223 case UNORDERED:
8224 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8225 code = NE;
8226 break;
8227 case ORDERED:
8228 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8229 code = EQ;
8230 break;
8231
8232 default:
8233 abort ();
32b5b1aa 8234 }
32b5b1aa 8235 }
e075ae69
RH
8236
8237 /* Return the test that should be put into the flags user, i.e.
8238 the bcc, scc, or cmov instruction. */
8239 return gen_rtx_fmt_ee (code, VOIDmode,
8240 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8241 const0_rtx);
8242}
8243
9e3e266c 8244rtx
a1b8572c 8245ix86_expand_compare (code, second_test, bypass_test)
e075ae69 8246 enum rtx_code code;
a1b8572c 8247 rtx *second_test, *bypass_test;
e075ae69
RH
8248{
8249 rtx op0, op1, ret;
8250 op0 = ix86_compare_op0;
8251 op1 = ix86_compare_op1;
8252
a1b8572c
JH
8253 if (second_test)
8254 *second_test = NULL_RTX;
8255 if (bypass_test)
8256 *bypass_test = NULL_RTX;
8257
e075ae69 8258 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
bf71a4f8 8259 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
77ebd435 8260 second_test, bypass_test);
32b5b1aa 8261 else
e075ae69
RH
8262 ret = ix86_expand_int_compare (code, op0, op1);
8263
8264 return ret;
8265}
8266
03598dea
JH
8267/* Return true if the CODE will result in nontrivial jump sequence. */
8268bool
8269ix86_fp_jump_nontrivial_p (code)
8270 enum rtx_code code;
8271{
8272 enum rtx_code bypass_code, first_code, second_code;
8273 if (!TARGET_CMOVE)
8274 return true;
8275 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8276 return bypass_code != NIL || second_code != NIL;
8277}
8278
e075ae69 8279void
3a3677ff 8280ix86_expand_branch (code, label)
e075ae69 8281 enum rtx_code code;
e075ae69
RH
8282 rtx label;
8283{
3a3677ff 8284 rtx tmp;
e075ae69 8285
3a3677ff 8286 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 8287 {
3a3677ff
RH
8288 case QImode:
8289 case HImode:
8290 case SImode:
0d7d98ee 8291 simple:
a1b8572c 8292 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
8293 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8294 gen_rtx_LABEL_REF (VOIDmode, label),
8295 pc_rtx);
8296 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 8297 return;
e075ae69 8298
3a3677ff
RH
8299 case SFmode:
8300 case DFmode:
0f290768 8301 case XFmode:
2b589241 8302 case TFmode:
3a3677ff
RH
8303 {
8304 rtvec vec;
8305 int use_fcomi;
03598dea 8306 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
8307
8308 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8309 &ix86_compare_op1);
03598dea
JH
8310
8311 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8312
8313 /* Check whether we will use the natural sequence with one jump. If
8314 so, we can expand jump early. Otherwise delay expansion by
8315 creating compound insn to not confuse optimizers. */
8316 if (bypass_code == NIL && second_code == NIL
8317 && TARGET_CMOVE)
8318 {
8319 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8320 gen_rtx_LABEL_REF (VOIDmode, label),
8321 pc_rtx, NULL_RTX);
8322 }
8323 else
8324 {
8325 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8326 ix86_compare_op0, ix86_compare_op1);
8327 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8328 gen_rtx_LABEL_REF (VOIDmode, label),
8329 pc_rtx);
8330 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8331
8332 use_fcomi = ix86_use_fcomi_compare (code);
8333 vec = rtvec_alloc (3 + !use_fcomi);
8334 RTVEC_ELT (vec, 0) = tmp;
8335 RTVEC_ELT (vec, 1)
8336 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8337 RTVEC_ELT (vec, 2)
8338 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8339 if (! use_fcomi)
8340 RTVEC_ELT (vec, 3)
8341 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8342
8343 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8344 }
3a3677ff
RH
8345 return;
8346 }
32b5b1aa 8347
3a3677ff 8348 case DImode:
0d7d98ee
JH
8349 if (TARGET_64BIT)
8350 goto simple;
3a3677ff
RH
8351 /* Expand DImode branch into multiple compare+branch. */
8352 {
8353 rtx lo[2], hi[2], label2;
8354 enum rtx_code code1, code2, code3;
32b5b1aa 8355
3a3677ff
RH
8356 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8357 {
8358 tmp = ix86_compare_op0;
8359 ix86_compare_op0 = ix86_compare_op1;
8360 ix86_compare_op1 = tmp;
8361 code = swap_condition (code);
8362 }
8363 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8364 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 8365
3a3677ff
RH
8366 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8367 avoid two branches. This costs one extra insn, so disable when
8368 optimizing for size. */
32b5b1aa 8369
3a3677ff
RH
8370 if ((code == EQ || code == NE)
8371 && (!optimize_size
8372 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8373 {
8374 rtx xor0, xor1;
32b5b1aa 8375
3a3677ff
RH
8376 xor1 = hi[0];
8377 if (hi[1] != const0_rtx)
8378 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8379 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 8380
3a3677ff
RH
8381 xor0 = lo[0];
8382 if (lo[1] != const0_rtx)
8383 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8384 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 8385
3a3677ff
RH
8386 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8387 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 8388
3a3677ff
RH
8389 ix86_compare_op0 = tmp;
8390 ix86_compare_op1 = const0_rtx;
8391 ix86_expand_branch (code, label);
8392 return;
8393 }
e075ae69 8394
1f9124e4
JJ
8395 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8396 op1 is a constant and the low word is zero, then we can just
8397 examine the high word. */
32b5b1aa 8398
1f9124e4
JJ
8399 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8400 switch (code)
8401 {
8402 case LT: case LTU: case GE: case GEU:
8403 ix86_compare_op0 = hi[0];
8404 ix86_compare_op1 = hi[1];
8405 ix86_expand_branch (code, label);
8406 return;
8407 default:
8408 break;
8409 }
e075ae69 8410
3a3677ff 8411 /* Otherwise, we need two or three jumps. */
e075ae69 8412
3a3677ff 8413 label2 = gen_label_rtx ();
e075ae69 8414
3a3677ff
RH
8415 code1 = code;
8416 code2 = swap_condition (code);
8417 code3 = unsigned_condition (code);
e075ae69 8418
3a3677ff
RH
8419 switch (code)
8420 {
8421 case LT: case GT: case LTU: case GTU:
8422 break;
e075ae69 8423
3a3677ff
RH
8424 case LE: code1 = LT; code2 = GT; break;
8425 case GE: code1 = GT; code2 = LT; break;
8426 case LEU: code1 = LTU; code2 = GTU; break;
8427 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 8428
3a3677ff
RH
8429 case EQ: code1 = NIL; code2 = NE; break;
8430 case NE: code2 = NIL; break;
e075ae69 8431
3a3677ff
RH
8432 default:
8433 abort ();
8434 }
e075ae69 8435
3a3677ff
RH
8436 /*
8437 * a < b =>
8438 * if (hi(a) < hi(b)) goto true;
8439 * if (hi(a) > hi(b)) goto false;
8440 * if (lo(a) < lo(b)) goto true;
8441 * false:
8442 */
8443
8444 ix86_compare_op0 = hi[0];
8445 ix86_compare_op1 = hi[1];
8446
8447 if (code1 != NIL)
8448 ix86_expand_branch (code1, label);
8449 if (code2 != NIL)
8450 ix86_expand_branch (code2, label2);
8451
8452 ix86_compare_op0 = lo[0];
8453 ix86_compare_op1 = lo[1];
8454 ix86_expand_branch (code3, label);
8455
8456 if (code2 != NIL)
8457 emit_label (label2);
8458 return;
8459 }
e075ae69 8460
3a3677ff
RH
8461 default:
8462 abort ();
8463 }
32b5b1aa 8464}
e075ae69 8465
9e7adcb3
JH
8466/* Split branch based on floating point condition. */
8467void
03598dea
JH
8468ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
8469 enum rtx_code code;
8470 rtx op1, op2, target1, target2, tmp;
9e7adcb3
JH
8471{
8472 rtx second, bypass;
8473 rtx label = NULL_RTX;
03598dea 8474 rtx condition;
6b24c259
JH
8475 int bypass_probability = -1, second_probability = -1, probability = -1;
8476 rtx i;
9e7adcb3
JH
8477
8478 if (target2 != pc_rtx)
8479 {
8480 rtx tmp = target2;
8481 code = reverse_condition_maybe_unordered (code);
8482 target2 = target1;
8483 target1 = tmp;
8484 }
8485
8486 condition = ix86_expand_fp_compare (code, op1, op2,
8487 tmp, &second, &bypass);
6b24c259
JH
8488
8489 if (split_branch_probability >= 0)
8490 {
8491 /* Distribute the probabilities across the jumps.
8492 Assume the BYPASS and SECOND to be always test
8493 for UNORDERED. */
8494 probability = split_branch_probability;
8495
d6a7951f 8496 /* Value of 1 is low enough to make no need for probability
6b24c259
JH
8497 to be updated. Later we may run some experiments and see
8498 if unordered values are more frequent in practice. */
8499 if (bypass)
8500 bypass_probability = 1;
8501 if (second)
8502 second_probability = 1;
8503 }
9e7adcb3
JH
8504 if (bypass != NULL_RTX)
8505 {
8506 label = gen_label_rtx ();
6b24c259
JH
8507 i = emit_jump_insn (gen_rtx_SET
8508 (VOIDmode, pc_rtx,
8509 gen_rtx_IF_THEN_ELSE (VOIDmode,
8510 bypass,
8511 gen_rtx_LABEL_REF (VOIDmode,
8512 label),
8513 pc_rtx)));
8514 if (bypass_probability >= 0)
8515 REG_NOTES (i)
8516 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8517 GEN_INT (bypass_probability),
8518 REG_NOTES (i));
8519 }
8520 i = emit_jump_insn (gen_rtx_SET
9e7adcb3
JH
8521 (VOIDmode, pc_rtx,
8522 gen_rtx_IF_THEN_ELSE (VOIDmode,
6b24c259
JH
8523 condition, target1, target2)));
8524 if (probability >= 0)
8525 REG_NOTES (i)
8526 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8527 GEN_INT (probability),
8528 REG_NOTES (i));
8529 if (second != NULL_RTX)
9e7adcb3 8530 {
6b24c259
JH
8531 i = emit_jump_insn (gen_rtx_SET
8532 (VOIDmode, pc_rtx,
8533 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8534 target2)));
8535 if (second_probability >= 0)
8536 REG_NOTES (i)
8537 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8538 GEN_INT (second_probability),
8539 REG_NOTES (i));
9e7adcb3 8540 }
9e7adcb3
JH
8541 if (label != NULL_RTX)
8542 emit_label (label);
8543}
8544
32b5b1aa 8545int
3a3677ff 8546ix86_expand_setcc (code, dest)
e075ae69 8547 enum rtx_code code;
e075ae69 8548 rtx dest;
32b5b1aa 8549{
a1b8572c
JH
8550 rtx ret, tmp, tmpreg;
8551 rtx second_test, bypass_test;
e075ae69 8552
885a70fd
JH
8553 if (GET_MODE (ix86_compare_op0) == DImode
8554 && !TARGET_64BIT)
e075ae69
RH
8555 return 0; /* FAIL */
8556
b932f770
JH
8557 if (GET_MODE (dest) != QImode)
8558 abort ();
e075ae69 8559
a1b8572c 8560 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
8561 PUT_MODE (ret, QImode);
8562
8563 tmp = dest;
a1b8572c 8564 tmpreg = dest;
32b5b1aa 8565
e075ae69 8566 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
8567 if (bypass_test || second_test)
8568 {
8569 rtx test = second_test;
8570 int bypass = 0;
8571 rtx tmp2 = gen_reg_rtx (QImode);
8572 if (bypass_test)
8573 {
8574 if (second_test)
b531087a 8575 abort ();
a1b8572c
JH
8576 test = bypass_test;
8577 bypass = 1;
8578 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8579 }
8580 PUT_MODE (test, QImode);
8581 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8582
8583 if (bypass)
8584 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8585 else
8586 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8587 }
e075ae69 8588
e075ae69 8589 return 1; /* DONE */
32b5b1aa 8590}
e075ae69 8591
32b5b1aa 8592int
e075ae69
RH
8593ix86_expand_int_movcc (operands)
8594 rtx operands[];
32b5b1aa 8595{
e075ae69
RH
8596 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8597 rtx compare_seq, compare_op;
a1b8572c 8598 rtx second_test, bypass_test;
635559ab 8599 enum machine_mode mode = GET_MODE (operands[0]);
32b5b1aa 8600
36583fea
JH
8601 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8602 In case comparsion is done with immediate, we can convert it to LTU or
8603 GEU by altering the integer. */
8604
8605 if ((code == LEU || code == GTU)
8606 && GET_CODE (ix86_compare_op1) == CONST_INT
635559ab 8607 && mode != HImode
b531087a 8608 && (unsigned int) INTVAL (ix86_compare_op1) != 0xffffffff
74411039
JH
8609 /* The operand still must be representable as sign extended value. */
8610 && (!TARGET_64BIT
8611 || GET_MODE (ix86_compare_op0) != DImode
8612 || (unsigned int) INTVAL (ix86_compare_op1) != 0x7fffffff)
0f290768 8613 && GET_CODE (operands[2]) == CONST_INT
36583fea
JH
8614 && GET_CODE (operands[3]) == CONST_INT)
8615 {
8616 if (code == LEU)
8617 code = LTU;
8618 else
8619 code = GEU;
ce8076ad
JJ
8620 ix86_compare_op1
8621 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
8622 GET_MODE (ix86_compare_op0));
36583fea 8623 }
3a3677ff 8624
e075ae69 8625 start_sequence ();
a1b8572c 8626 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
8627 compare_seq = gen_sequence ();
8628 end_sequence ();
8629
8630 compare_code = GET_CODE (compare_op);
8631
8632 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8633 HImode insns, we'd be swallowed in word prefix ops. */
8634
635559ab
JH
8635 if (mode != HImode
8636 && (mode != DImode || TARGET_64BIT)
0f290768 8637 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
8638 && GET_CODE (operands[3]) == CONST_INT)
8639 {
8640 rtx out = operands[0];
8641 HOST_WIDE_INT ct = INTVAL (operands[2]);
8642 HOST_WIDE_INT cf = INTVAL (operands[3]);
8643 HOST_WIDE_INT diff;
8644
a1b8572c
JH
8645 if ((compare_code == LTU || compare_code == GEU)
8646 && !second_test && !bypass_test)
e075ae69 8647 {
e075ae69
RH
8648
8649 /* Detect overlap between destination and compare sources. */
8650 rtx tmp = out;
8651
0f290768 8652 /* To simplify rest of code, restrict to the GEU case. */
36583fea
JH
8653 if (compare_code == LTU)
8654 {
8655 int tmp = ct;
8656 ct = cf;
8657 cf = tmp;
8658 compare_code = reverse_condition (compare_code);
8659 code = reverse_condition (code);
8660 }
8661 diff = ct - cf;
8662
e075ae69 8663 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
a500c31b 8664 || reg_overlap_mentioned_p (out, ix86_compare_op1))
635559ab 8665 tmp = gen_reg_rtx (mode);
e075ae69
RH
8666
8667 emit_insn (compare_seq);
635559ab 8668 if (mode == DImode)
14f73b5a
JH
8669 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
8670 else
8671 emit_insn (gen_x86_movsicc_0_m1 (tmp));
e075ae69 8672
36583fea
JH
8673 if (diff == 1)
8674 {
8675 /*
8676 * cmpl op0,op1
8677 * sbbl dest,dest
8678 * [addl dest, ct]
8679 *
8680 * Size 5 - 8.
8681 */
8682 if (ct)
635559ab
JH
8683 tmp = expand_simple_binop (mode, PLUS,
8684 tmp, GEN_INT (ct),
8685 tmp, 1, OPTAB_DIRECT);
36583fea
JH
8686 }
8687 else if (cf == -1)
8688 {
8689 /*
8690 * cmpl op0,op1
8691 * sbbl dest,dest
8692 * orl $ct, dest
8693 *
8694 * Size 8.
8695 */
635559ab
JH
8696 tmp = expand_simple_binop (mode, IOR,
8697 tmp, GEN_INT (ct),
8698 tmp, 1, OPTAB_DIRECT);
36583fea
JH
8699 }
8700 else if (diff == -1 && ct)
8701 {
8702 /*
8703 * cmpl op0,op1
8704 * sbbl dest,dest
8705 * xorl $-1, dest
8706 * [addl dest, cf]
8707 *
8708 * Size 8 - 11.
8709 */
635559ab
JH
8710 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8711 if (cf)
8712 tmp = expand_simple_binop (mode, PLUS,
8713 tmp, GEN_INT (cf),
8714 tmp, 1, OPTAB_DIRECT);
36583fea
JH
8715 }
8716 else
8717 {
8718 /*
8719 * cmpl op0,op1
8720 * sbbl dest,dest
8721 * andl cf - ct, dest
8722 * [addl dest, ct]
8723 *
8724 * Size 8 - 11.
8725 */
635559ab
JH
8726 tmp = expand_simple_binop (mode, AND,
8727 tmp,
d8bf17f9 8728 gen_int_mode (cf - ct, mode),
635559ab
JH
8729 tmp, 1, OPTAB_DIRECT);
8730 if (ct)
8731 tmp = expand_simple_binop (mode, PLUS,
8732 tmp, GEN_INT (ct),
8733 tmp, 1, OPTAB_DIRECT);
36583fea 8734 }
e075ae69
RH
8735
8736 if (tmp != out)
8737 emit_move_insn (out, tmp);
8738
8739 return 1; /* DONE */
8740 }
8741
8742 diff = ct - cf;
8743 if (diff < 0)
8744 {
8745 HOST_WIDE_INT tmp;
8746 tmp = ct, ct = cf, cf = tmp;
8747 diff = -diff;
734dba19
JH
8748 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8749 {
8750 /* We may be reversing unordered compare to normal compare, that
8751 is not valid in general (we may convert non-trapping condition
8752 to trapping one), however on i386 we currently emit all
8753 comparisons unordered. */
8754 compare_code = reverse_condition_maybe_unordered (compare_code);
8755 code = reverse_condition_maybe_unordered (code);
8756 }
8757 else
8758 {
8759 compare_code = reverse_condition (compare_code);
8760 code = reverse_condition (code);
8761 }
e075ae69 8762 }
0f2a3457
JJ
8763
8764 compare_code = NIL;
8765 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
8766 && GET_CODE (ix86_compare_op1) == CONST_INT)
8767 {
8768 if (ix86_compare_op1 == const0_rtx
8769 && (code == LT || code == GE))
8770 compare_code = code;
8771 else if (ix86_compare_op1 == constm1_rtx)
8772 {
8773 if (code == LE)
8774 compare_code = LT;
8775 else if (code == GT)
8776 compare_code = GE;
8777 }
8778 }
8779
8780 /* Optimize dest = (op0 < 0) ? -1 : cf. */
8781 if (compare_code != NIL
8782 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
8783 && (cf == -1 || ct == -1))
8784 {
8785 /* If lea code below could be used, only optimize
8786 if it results in a 2 insn sequence. */
8787
8788 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
8789 || diff == 3 || diff == 5 || diff == 9)
8790 || (compare_code == LT && ct == -1)
8791 || (compare_code == GE && cf == -1))
8792 {
8793 /*
8794 * notl op1 (if necessary)
8795 * sarl $31, op1
8796 * orl cf, op1
8797 */
8798 if (ct != -1)
8799 {
8800 cf = ct;
8801 ct = -1;
8802 code = reverse_condition (code);
8803 }
8804
8805 out = emit_store_flag (out, code, ix86_compare_op0,
8806 ix86_compare_op1, VOIDmode, 0, -1);
8807
8808 out = expand_simple_binop (mode, IOR,
8809 out, GEN_INT (cf),
8810 out, 1, OPTAB_DIRECT);
8811 if (out != operands[0])
8812 emit_move_insn (operands[0], out);
8813
8814 return 1; /* DONE */
8815 }
8816 }
8817
635559ab
JH
8818 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
8819 || diff == 3 || diff == 5 || diff == 9)
8820 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
e075ae69
RH
8821 {
8822 /*
8823 * xorl dest,dest
8824 * cmpl op1,op2
8825 * setcc dest
8826 * lea cf(dest*(ct-cf)),dest
8827 *
8828 * Size 14.
8829 *
8830 * This also catches the degenerate setcc-only case.
8831 */
8832
8833 rtx tmp;
8834 int nops;
8835
8836 out = emit_store_flag (out, code, ix86_compare_op0,
8837 ix86_compare_op1, VOIDmode, 0, 1);
8838
8839 nops = 0;
885a70fd
JH
8840 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
8841 done in proper mode to match. */
e075ae69 8842 if (diff == 1)
14f73b5a 8843 tmp = out;
e075ae69
RH
8844 else
8845 {
885a70fd 8846 rtx out1;
14f73b5a 8847 out1 = out;
635559ab 8848 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
e075ae69
RH
8849 nops++;
8850 if (diff & 1)
8851 {
635559ab 8852 tmp = gen_rtx_PLUS (mode, tmp, out1);
e075ae69
RH
8853 nops++;
8854 }
8855 }
8856 if (cf != 0)
8857 {
635559ab 8858 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
e075ae69
RH
8859 nops++;
8860 }
885a70fd
JH
8861 if (tmp != out
8862 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
e075ae69 8863 {
14f73b5a 8864 if (nops == 1)
e075ae69
RH
8865 {
8866 rtx clob;
8867
8868 clob = gen_rtx_REG (CCmode, FLAGS_REG);
8869 clob = gen_rtx_CLOBBER (VOIDmode, clob);
8870
8871 tmp = gen_rtx_SET (VOIDmode, out, tmp);
8872 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8873 emit_insn (tmp);
8874 }
8875 else
8876 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
8877 }
8878 if (out != operands[0])
8879 emit_move_insn (operands[0], out);
8880
8881 return 1; /* DONE */
8882 }
8883
8884 /*
8885 * General case: Jumpful:
8886 * xorl dest,dest cmpl op1, op2
8887 * cmpl op1, op2 movl ct, dest
8888 * setcc dest jcc 1f
8889 * decl dest movl cf, dest
8890 * andl (cf-ct),dest 1:
8891 * addl ct,dest
0f290768 8892 *
e075ae69
RH
8893 * Size 20. Size 14.
8894 *
8895 * This is reasonably steep, but branch mispredict costs are
8896 * high on modern cpus, so consider failing only if optimizing
8897 * for space.
8898 *
8899 * %%% Parameterize branch_cost on the tuning architecture, then
8900 * use that. The 80386 couldn't care less about mispredicts.
8901 */
8902
8903 if (!optimize_size && !TARGET_CMOVE)
8904 {
8905 if (ct == 0)
8906 {
8907 ct = cf;
8908 cf = 0;
734dba19 8909 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
0f2a3457
JJ
8910 /* We may be reversing unordered compare to normal compare,
8911 that is not valid in general (we may convert non-trapping
8912 condition to trapping one), however on i386 we currently
8913 emit all comparisons unordered. */
8914 code = reverse_condition_maybe_unordered (code);
8915 else
8916 {
8917 code = reverse_condition (code);
8918 if (compare_code != NIL)
8919 compare_code = reverse_condition (compare_code);
8920 }
8921 }
8922
8923 if (compare_code != NIL)
8924 {
8925 /* notl op1 (if needed)
8926 sarl $31, op1
8927 andl (cf-ct), op1
8928 addl ct, op1
8929
8930 For x < 0 (resp. x <= -1) there will be no notl,
8931 so if possible swap the constants to get rid of the
8932 complement.
8933 True/false will be -1/0 while code below (store flag
8934 followed by decrement) is 0/-1, so the constants need
8935 to be exchanged once more. */
8936
8937 if (compare_code == GE || !cf)
734dba19 8938 {
0f2a3457
JJ
8939 code = reverse_condition (code);
8940 compare_code = LT;
734dba19
JH
8941 }
8942 else
8943 {
0f2a3457
JJ
8944 HOST_WIDE_INT tmp = cf;
8945 cf = ct;
8946 ct = tmp;
734dba19 8947 }
0f2a3457
JJ
8948
8949 out = emit_store_flag (out, code, ix86_compare_op0,
8950 ix86_compare_op1, VOIDmode, 0, -1);
e075ae69 8951 }
0f2a3457
JJ
8952 else
8953 {
8954 out = emit_store_flag (out, code, ix86_compare_op0,
8955 ix86_compare_op1, VOIDmode, 0, 1);
e075ae69 8956
0f2a3457
JJ
8957 out = expand_simple_binop (mode, PLUS,
8958 out, constm1_rtx,
8959 out, 1, OPTAB_DIRECT);
8960 }
e075ae69 8961
635559ab
JH
8962 out = expand_simple_binop (mode, AND,
8963 out,
d8bf17f9 8964 gen_int_mode (cf - ct, mode),
635559ab
JH
8965 out, 1, OPTAB_DIRECT);
8966 out = expand_simple_binop (mode, PLUS,
8967 out, GEN_INT (ct),
8968 out, 1, OPTAB_DIRECT);
e075ae69
RH
8969 if (out != operands[0])
8970 emit_move_insn (operands[0], out);
8971
8972 return 1; /* DONE */
8973 }
8974 }
8975
8976 if (!TARGET_CMOVE)
8977 {
8978 /* Try a few things more with specific constants and a variable. */
8979
78a0d70c 8980 optab op;
e075ae69
RH
8981 rtx var, orig_out, out, tmp;
8982
8983 if (optimize_size)
8984 return 0; /* FAIL */
8985
0f290768 8986 /* If one of the two operands is an interesting constant, load a
e075ae69 8987 constant with the above and mask it in with a logical operation. */
0f290768 8988
e075ae69
RH
8989 if (GET_CODE (operands[2]) == CONST_INT)
8990 {
8991 var = operands[3];
8992 if (INTVAL (operands[2]) == 0)
8993 operands[3] = constm1_rtx, op = and_optab;
8994 else if (INTVAL (operands[2]) == -1)
8995 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
8996 else
8997 return 0; /* FAIL */
e075ae69
RH
8998 }
8999 else if (GET_CODE (operands[3]) == CONST_INT)
9000 {
9001 var = operands[2];
9002 if (INTVAL (operands[3]) == 0)
9003 operands[2] = constm1_rtx, op = and_optab;
9004 else if (INTVAL (operands[3]) == -1)
9005 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
9006 else
9007 return 0; /* FAIL */
e075ae69 9008 }
78a0d70c 9009 else
e075ae69
RH
9010 return 0; /* FAIL */
9011
9012 orig_out = operands[0];
635559ab 9013 tmp = gen_reg_rtx (mode);
e075ae69
RH
9014 operands[0] = tmp;
9015
9016 /* Recurse to get the constant loaded. */
9017 if (ix86_expand_int_movcc (operands) == 0)
9018 return 0; /* FAIL */
9019
9020 /* Mask in the interesting variable. */
635559ab 9021 out = expand_binop (mode, op, var, tmp, orig_out, 0,
e075ae69
RH
9022 OPTAB_WIDEN);
9023 if (out != orig_out)
9024 emit_move_insn (orig_out, out);
9025
9026 return 1; /* DONE */
9027 }
9028
9029 /*
9030 * For comparison with above,
9031 *
9032 * movl cf,dest
9033 * movl ct,tmp
9034 * cmpl op1,op2
9035 * cmovcc tmp,dest
9036 *
9037 * Size 15.
9038 */
9039
635559ab
JH
9040 if (! nonimmediate_operand (operands[2], mode))
9041 operands[2] = force_reg (mode, operands[2]);
9042 if (! nonimmediate_operand (operands[3], mode))
9043 operands[3] = force_reg (mode, operands[3]);
e075ae69 9044
a1b8572c
JH
9045 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9046 {
635559ab 9047 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
9048 emit_move_insn (tmp, operands[3]);
9049 operands[3] = tmp;
9050 }
9051 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9052 {
635559ab 9053 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
9054 emit_move_insn (tmp, operands[2]);
9055 operands[2] = tmp;
9056 }
c9682caf
JH
9057 if (! register_operand (operands[2], VOIDmode)
9058 && ! register_operand (operands[3], VOIDmode))
635559ab 9059 operands[2] = force_reg (mode, operands[2]);
a1b8572c 9060
e075ae69
RH
9061 emit_insn (compare_seq);
9062 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 9063 gen_rtx_IF_THEN_ELSE (mode,
e075ae69
RH
9064 compare_op, operands[2],
9065 operands[3])));
a1b8572c
JH
9066 if (bypass_test)
9067 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 9068 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c
JH
9069 bypass_test,
9070 operands[3],
9071 operands[0])));
9072 if (second_test)
9073 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 9074 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c
JH
9075 second_test,
9076 operands[2],
9077 operands[0])));
e075ae69
RH
9078
9079 return 1; /* DONE */
e9a25f70 9080}
e075ae69 9081
32b5b1aa 9082int
e075ae69
RH
9083ix86_expand_fp_movcc (operands)
9084 rtx operands[];
32b5b1aa 9085{
e075ae69 9086 enum rtx_code code;
e075ae69 9087 rtx tmp;
a1b8572c 9088 rtx compare_op, second_test, bypass_test;
32b5b1aa 9089
0073023d
JH
9090 /* For SF/DFmode conditional moves based on comparisons
9091 in same mode, we may want to use SSE min/max instructions. */
965f5423
JH
9092 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9093 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
0073023d 9094 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
fa9f36a1
JH
9095 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9096 && (!TARGET_IEEE_FP
9097 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
0073023d
JH
9098 /* We may be called from the post-reload splitter. */
9099 && (!REG_P (operands[0])
9100 || SSE_REG_P (operands[0])
52a661a6 9101 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
0073023d
JH
9102 {
9103 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9104 code = GET_CODE (operands[1]);
9105
9106 /* See if we have (cross) match between comparison operands and
9107 conditional move operands. */
9108 if (rtx_equal_p (operands[2], op1))
9109 {
9110 rtx tmp = op0;
9111 op0 = op1;
9112 op1 = tmp;
9113 code = reverse_condition_maybe_unordered (code);
9114 }
9115 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9116 {
9117 /* Check for min operation. */
9118 if (code == LT)
9119 {
9120 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9121 if (memory_operand (op0, VOIDmode))
9122 op0 = force_reg (GET_MODE (operands[0]), op0);
9123 if (GET_MODE (operands[0]) == SFmode)
9124 emit_insn (gen_minsf3 (operands[0], op0, op1));
9125 else
9126 emit_insn (gen_mindf3 (operands[0], op0, op1));
9127 return 1;
9128 }
9129 /* Check for max operation. */
9130 if (code == GT)
9131 {
9132 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9133 if (memory_operand (op0, VOIDmode))
9134 op0 = force_reg (GET_MODE (operands[0]), op0);
9135 if (GET_MODE (operands[0]) == SFmode)
9136 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9137 else
9138 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9139 return 1;
9140 }
9141 }
9142 /* Manage condition to be sse_comparison_operator. In case we are
9143 in non-ieee mode, try to canonicalize the destination operand
9144 to be first in the comparison - this helps reload to avoid extra
9145 moves. */
9146 if (!sse_comparison_operator (operands[1], VOIDmode)
9147 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9148 {
9149 rtx tmp = ix86_compare_op0;
9150 ix86_compare_op0 = ix86_compare_op1;
9151 ix86_compare_op1 = tmp;
9152 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9153 VOIDmode, ix86_compare_op0,
9154 ix86_compare_op1);
9155 }
9156 /* Similary try to manage result to be first operand of conditional
fa9f36a1
JH
9157 move. We also don't support the NE comparison on SSE, so try to
9158 avoid it. */
037f20f1
JH
9159 if ((rtx_equal_p (operands[0], operands[3])
9160 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9161 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
0073023d
JH
9162 {
9163 rtx tmp = operands[2];
9164 operands[2] = operands[3];
92d0fb09 9165 operands[3] = tmp;
0073023d
JH
9166 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9167 (GET_CODE (operands[1])),
9168 VOIDmode, ix86_compare_op0,
9169 ix86_compare_op1);
9170 }
9171 if (GET_MODE (operands[0]) == SFmode)
9172 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9173 operands[2], operands[3],
9174 ix86_compare_op0, ix86_compare_op1));
9175 else
9176 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9177 operands[2], operands[3],
9178 ix86_compare_op0, ix86_compare_op1));
9179 return 1;
9180 }
9181
e075ae69 9182 /* The floating point conditional move instructions don't directly
0f290768 9183 support conditions resulting from a signed integer comparison. */
32b5b1aa 9184
e075ae69 9185 code = GET_CODE (operands[1]);
a1b8572c 9186 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
9187
9188 /* The floating point conditional move instructions don't directly
9189 support signed integer comparisons. */
9190
a1b8572c 9191 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 9192 {
a1b8572c 9193 if (second_test != NULL || bypass_test != NULL)
b531087a 9194 abort ();
e075ae69 9195 tmp = gen_reg_rtx (QImode);
3a3677ff 9196 ix86_expand_setcc (code, tmp);
e075ae69
RH
9197 code = NE;
9198 ix86_compare_op0 = tmp;
9199 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
9200 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9201 }
9202 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9203 {
9204 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9205 emit_move_insn (tmp, operands[3]);
9206 operands[3] = tmp;
9207 }
9208 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9209 {
9210 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9211 emit_move_insn (tmp, operands[2]);
9212 operands[2] = tmp;
e075ae69 9213 }
e9a25f70 9214
e075ae69
RH
9215 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9216 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9e7adcb3 9217 compare_op,
e075ae69
RH
9218 operands[2],
9219 operands[3])));
a1b8572c
JH
9220 if (bypass_test)
9221 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9222 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9223 bypass_test,
9224 operands[3],
9225 operands[0])));
9226 if (second_test)
9227 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9228 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9229 second_test,
9230 operands[2],
9231 operands[0])));
32b5b1aa 9232
e075ae69 9233 return 1;
32b5b1aa
SC
9234}
9235
2450a057
JH
9236/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9237 works for floating pointer parameters and nonoffsetable memories.
9238 For pushes, it returns just stack offsets; the values will be saved
9239 in the right order. Maximally three parts are generated. */
9240
2b589241 9241static int
2450a057
JH
9242ix86_split_to_parts (operand, parts, mode)
9243 rtx operand;
9244 rtx *parts;
9245 enum machine_mode mode;
32b5b1aa 9246{
26e5b205
JH
9247 int size;
9248
9249 if (!TARGET_64BIT)
9250 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9251 else
9252 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 9253
a7180f70
BS
9254 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9255 abort ();
2450a057
JH
9256 if (size < 2 || size > 3)
9257 abort ();
9258
f996902d
RH
9259 /* Optimize constant pool reference to immediates. This is used by fp
9260 moves, that force all constants to memory to allow combining. */
9261 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9262 {
9263 rtx tmp = maybe_get_pool_constant (operand);
9264 if (tmp)
9265 operand = tmp;
9266 }
d7a29404 9267
2450a057 9268 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 9269 {
2450a057
JH
9270 /* The only non-offsetable memories we handle are pushes. */
9271 if (! push_operand (operand, VOIDmode))
9272 abort ();
9273
26e5b205
JH
9274 operand = copy_rtx (operand);
9275 PUT_MODE (operand, Pmode);
2450a057
JH
9276 parts[0] = parts[1] = parts[2] = operand;
9277 }
26e5b205 9278 else if (!TARGET_64BIT)
2450a057
JH
9279 {
9280 if (mode == DImode)
9281 split_di (&operand, 1, &parts[0], &parts[1]);
9282 else
e075ae69 9283 {
2450a057
JH
9284 if (REG_P (operand))
9285 {
9286 if (!reload_completed)
9287 abort ();
9288 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9289 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9290 if (size == 3)
9291 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9292 }
9293 else if (offsettable_memref_p (operand))
9294 {
f4ef873c 9295 operand = adjust_address (operand, SImode, 0);
2450a057 9296 parts[0] = operand;
b72f00af 9297 parts[1] = adjust_address (operand, SImode, 4);
2450a057 9298 if (size == 3)
b72f00af 9299 parts[2] = adjust_address (operand, SImode, 8);
2450a057
JH
9300 }
9301 else if (GET_CODE (operand) == CONST_DOUBLE)
9302 {
9303 REAL_VALUE_TYPE r;
2b589241 9304 long l[4];
2450a057
JH
9305
9306 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9307 switch (mode)
9308 {
9309 case XFmode:
2b589241 9310 case TFmode:
2450a057 9311 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
d8bf17f9 9312 parts[2] = gen_int_mode (l[2], SImode);
2450a057
JH
9313 break;
9314 case DFmode:
9315 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9316 break;
9317 default:
9318 abort ();
9319 }
d8bf17f9
LB
9320 parts[1] = gen_int_mode (l[1], SImode);
9321 parts[0] = gen_int_mode (l[0], SImode);
2450a057
JH
9322 }
9323 else
9324 abort ();
e075ae69 9325 }
2450a057 9326 }
26e5b205
JH
9327 else
9328 {
44cf5b6a
JH
9329 if (mode == TImode)
9330 split_ti (&operand, 1, &parts[0], &parts[1]);
26e5b205
JH
9331 if (mode == XFmode || mode == TFmode)
9332 {
9333 if (REG_P (operand))
9334 {
9335 if (!reload_completed)
9336 abort ();
9337 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9338 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9339 }
9340 else if (offsettable_memref_p (operand))
9341 {
b72f00af 9342 operand = adjust_address (operand, DImode, 0);
26e5b205 9343 parts[0] = operand;
b72f00af 9344 parts[1] = adjust_address (operand, SImode, 8);
26e5b205
JH
9345 }
9346 else if (GET_CODE (operand) == CONST_DOUBLE)
9347 {
9348 REAL_VALUE_TYPE r;
9349 long l[3];
9350
9351 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9352 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9353 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9354 if (HOST_BITS_PER_WIDE_INT >= 64)
523fbd9d 9355 parts[0]
d8bf17f9 9356 = gen_int_mode
44cf5b6a 9357 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
b531087a 9358 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
d8bf17f9 9359 DImode);
26e5b205
JH
9360 else
9361 parts[0] = immed_double_const (l[0], l[1], DImode);
d8bf17f9 9362 parts[1] = gen_int_mode (l[2], SImode);
26e5b205
JH
9363 }
9364 else
9365 abort ();
9366 }
9367 }
2450a057 9368
2b589241 9369 return size;
2450a057
JH
9370}
9371
9372/* Emit insns to perform a move or push of DI, DF, and XF values.
9373 Return false when normal moves are needed; true when all required
9374 insns have been emitted. Operands 2-4 contain the input values
9375 int the correct order; operands 5-7 contain the output values. */
9376
26e5b205
JH
9377void
9378ix86_split_long_move (operands)
9379 rtx operands[];
2450a057
JH
9380{
9381 rtx part[2][3];
26e5b205 9382 int nparts;
2450a057
JH
9383 int push = 0;
9384 int collisions = 0;
26e5b205
JH
9385 enum machine_mode mode = GET_MODE (operands[0]);
9386
9387 /* The DFmode expanders may ask us to move double.
9388 For 64bit target this is single move. By hiding the fact
9389 here we simplify i386.md splitters. */
9390 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9391 {
8cdfa312
RH
9392 /* Optimize constant pool reference to immediates. This is used by
9393 fp moves, that force all constants to memory to allow combining. */
26e5b205
JH
9394
9395 if (GET_CODE (operands[1]) == MEM
9396 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9397 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9398 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9399 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
9400 {
9401 operands[0] = copy_rtx (operands[0]);
9402 PUT_MODE (operands[0], Pmode);
9403 }
26e5b205
JH
9404 else
9405 operands[0] = gen_lowpart (DImode, operands[0]);
9406 operands[1] = gen_lowpart (DImode, operands[1]);
9407 emit_move_insn (operands[0], operands[1]);
9408 return;
9409 }
2450a057 9410
2450a057
JH
9411 /* The only non-offsettable memory we handle is push. */
9412 if (push_operand (operands[0], VOIDmode))
9413 push = 1;
9414 else if (GET_CODE (operands[0]) == MEM
9415 && ! offsettable_memref_p (operands[0]))
9416 abort ();
9417
26e5b205
JH
9418 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9419 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
9420
9421 /* When emitting push, take care for source operands on the stack. */
9422 if (push && GET_CODE (operands[1]) == MEM
9423 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9424 {
26e5b205 9425 if (nparts == 3)
886cbb88
JH
9426 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9427 XEXP (part[1][2], 0));
9428 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9429 XEXP (part[1][1], 0));
2450a057
JH
9430 }
9431
0f290768 9432 /* We need to do copy in the right order in case an address register
2450a057
JH
9433 of the source overlaps the destination. */
9434 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9435 {
9436 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9437 collisions++;
9438 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9439 collisions++;
26e5b205 9440 if (nparts == 3
2450a057
JH
9441 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9442 collisions++;
9443
9444 /* Collision in the middle part can be handled by reordering. */
26e5b205 9445 if (collisions == 1 && nparts == 3
2450a057 9446 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 9447 {
2450a057
JH
9448 rtx tmp;
9449 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9450 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9451 }
e075ae69 9452
2450a057
JH
9453 /* If there are more collisions, we can't handle it by reordering.
9454 Do an lea to the last part and use only one colliding move. */
9455 else if (collisions > 1)
9456 {
9457 collisions = 1;
26e5b205 9458 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
2450a057 9459 XEXP (part[1][0], 0)));
26e5b205
JH
9460 part[1][0] = change_address (part[1][0],
9461 TARGET_64BIT ? DImode : SImode,
9462 part[0][nparts - 1]);
b72f00af 9463 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
26e5b205 9464 if (nparts == 3)
b72f00af 9465 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
2450a057
JH
9466 }
9467 }
9468
9469 if (push)
9470 {
26e5b205 9471 if (!TARGET_64BIT)
2b589241 9472 {
26e5b205
JH
9473 if (nparts == 3)
9474 {
9475 /* We use only first 12 bytes of TFmode value, but for pushing we
9476 are required to adjust stack as if we were pushing real 16byte
9477 value. */
9478 if (mode == TFmode && !TARGET_64BIT)
9479 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
9480 GEN_INT (-4)));
9481 emit_move_insn (part[0][2], part[1][2]);
9482 }
2b589241 9483 }
26e5b205
JH
9484 else
9485 {
9486 /* In 64bit mode we don't have 32bit push available. In case this is
9487 register, it is OK - we will just use larger counterpart. We also
9488 retype memory - these comes from attempt to avoid REX prefix on
9489 moving of second half of TFmode value. */
9490 if (GET_MODE (part[1][1]) == SImode)
9491 {
9492 if (GET_CODE (part[1][1]) == MEM)
f4ef873c 9493 part[1][1] = adjust_address (part[1][1], DImode, 0);
26e5b205
JH
9494 else if (REG_P (part[1][1]))
9495 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9496 else
b531087a 9497 abort ();
886cbb88
JH
9498 if (GET_MODE (part[1][0]) == SImode)
9499 part[1][0] = part[1][1];
26e5b205
JH
9500 }
9501 }
9502 emit_move_insn (part[0][1], part[1][1]);
9503 emit_move_insn (part[0][0], part[1][0]);
9504 return;
2450a057
JH
9505 }
9506
9507 /* Choose correct order to not overwrite the source before it is copied. */
9508 if ((REG_P (part[0][0])
9509 && REG_P (part[1][1])
9510 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 9511 || (nparts == 3
2450a057
JH
9512 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9513 || (collisions > 0
9514 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9515 {
26e5b205 9516 if (nparts == 3)
2450a057 9517 {
26e5b205
JH
9518 operands[2] = part[0][2];
9519 operands[3] = part[0][1];
9520 operands[4] = part[0][0];
9521 operands[5] = part[1][2];
9522 operands[6] = part[1][1];
9523 operands[7] = part[1][0];
2450a057
JH
9524 }
9525 else
9526 {
26e5b205
JH
9527 operands[2] = part[0][1];
9528 operands[3] = part[0][0];
9529 operands[5] = part[1][1];
9530 operands[6] = part[1][0];
2450a057
JH
9531 }
9532 }
9533 else
9534 {
26e5b205 9535 if (nparts == 3)
2450a057 9536 {
26e5b205
JH
9537 operands[2] = part[0][0];
9538 operands[3] = part[0][1];
9539 operands[4] = part[0][2];
9540 operands[5] = part[1][0];
9541 operands[6] = part[1][1];
9542 operands[7] = part[1][2];
2450a057
JH
9543 }
9544 else
9545 {
26e5b205
JH
9546 operands[2] = part[0][0];
9547 operands[3] = part[0][1];
9548 operands[5] = part[1][0];
9549 operands[6] = part[1][1];
e075ae69
RH
9550 }
9551 }
26e5b205
JH
9552 emit_move_insn (operands[2], operands[5]);
9553 emit_move_insn (operands[3], operands[6]);
9554 if (nparts == 3)
9555 emit_move_insn (operands[4], operands[7]);
32b5b1aa 9556
26e5b205 9557 return;
32b5b1aa 9558}
32b5b1aa 9559
e075ae69
RH
9560void
9561ix86_split_ashldi (operands, scratch)
9562 rtx *operands, scratch;
32b5b1aa 9563{
e075ae69
RH
9564 rtx low[2], high[2];
9565 int count;
b985a30f 9566
e075ae69
RH
9567 if (GET_CODE (operands[2]) == CONST_INT)
9568 {
9569 split_di (operands, 2, low, high);
9570 count = INTVAL (operands[2]) & 63;
32b5b1aa 9571
e075ae69
RH
9572 if (count >= 32)
9573 {
9574 emit_move_insn (high[0], low[1]);
9575 emit_move_insn (low[0], const0_rtx);
b985a30f 9576
e075ae69
RH
9577 if (count > 32)
9578 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
9579 }
9580 else
9581 {
9582 if (!rtx_equal_p (operands[0], operands[1]))
9583 emit_move_insn (operands[0], operands[1]);
9584 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
9585 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
9586 }
9587 }
9588 else
9589 {
9590 if (!rtx_equal_p (operands[0], operands[1]))
9591 emit_move_insn (operands[0], operands[1]);
b985a30f 9592
e075ae69 9593 split_di (operands, 1, low, high);
b985a30f 9594
e075ae69
RH
9595 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
9596 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 9597
fe577e58 9598 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 9599 {
fe577e58 9600 if (! no_new_pseudos)
e075ae69
RH
9601 scratch = force_reg (SImode, const0_rtx);
9602 else
9603 emit_move_insn (scratch, const0_rtx);
9604
9605 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
9606 scratch));
9607 }
9608 else
9609 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9610 }
e9a25f70 9611}
32b5b1aa 9612
e075ae69
RH
9613void
9614ix86_split_ashrdi (operands, scratch)
9615 rtx *operands, scratch;
32b5b1aa 9616{
e075ae69
RH
9617 rtx low[2], high[2];
9618 int count;
32b5b1aa 9619
e075ae69
RH
9620 if (GET_CODE (operands[2]) == CONST_INT)
9621 {
9622 split_di (operands, 2, low, high);
9623 count = INTVAL (operands[2]) & 63;
32b5b1aa 9624
e075ae69
RH
9625 if (count >= 32)
9626 {
9627 emit_move_insn (low[0], high[1]);
32b5b1aa 9628
e075ae69
RH
9629 if (! reload_completed)
9630 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
9631 else
9632 {
9633 emit_move_insn (high[0], low[0]);
9634 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9635 }
9636
9637 if (count > 32)
9638 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
9639 }
9640 else
9641 {
9642 if (!rtx_equal_p (operands[0], operands[1]))
9643 emit_move_insn (operands[0], operands[1]);
9644 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9645 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
9646 }
9647 }
9648 else
32b5b1aa 9649 {
e075ae69
RH
9650 if (!rtx_equal_p (operands[0], operands[1]))
9651 emit_move_insn (operands[0], operands[1]);
9652
9653 split_di (operands, 1, low, high);
9654
9655 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9656 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
9657
fe577e58 9658 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 9659 {
fe577e58 9660 if (! no_new_pseudos)
e075ae69
RH
9661 scratch = gen_reg_rtx (SImode);
9662 emit_move_insn (scratch, high[0]);
9663 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
9664 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9665 scratch));
9666 }
9667 else
9668 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 9669 }
e075ae69 9670}
32b5b1aa 9671
e075ae69
RH
9672void
9673ix86_split_lshrdi (operands, scratch)
9674 rtx *operands, scratch;
9675{
9676 rtx low[2], high[2];
9677 int count;
32b5b1aa 9678
e075ae69 9679 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 9680 {
e075ae69
RH
9681 split_di (operands, 2, low, high);
9682 count = INTVAL (operands[2]) & 63;
9683
9684 if (count >= 32)
c7271385 9685 {
e075ae69
RH
9686 emit_move_insn (low[0], high[1]);
9687 emit_move_insn (high[0], const0_rtx);
32b5b1aa 9688
e075ae69
RH
9689 if (count > 32)
9690 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
9691 }
9692 else
9693 {
9694 if (!rtx_equal_p (operands[0], operands[1]))
9695 emit_move_insn (operands[0], operands[1]);
9696 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9697 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
9698 }
32b5b1aa 9699 }
e075ae69
RH
9700 else
9701 {
9702 if (!rtx_equal_p (operands[0], operands[1]))
9703 emit_move_insn (operands[0], operands[1]);
32b5b1aa 9704
e075ae69
RH
9705 split_di (operands, 1, low, high);
9706
9707 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9708 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
9709
9710 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 9711 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 9712 {
fe577e58 9713 if (! no_new_pseudos)
e075ae69
RH
9714 scratch = force_reg (SImode, const0_rtx);
9715 else
9716 emit_move_insn (scratch, const0_rtx);
9717
9718 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9719 scratch));
9720 }
9721 else
9722 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
9723 }
32b5b1aa 9724}
3f803cd9 9725
0407c02b 9726/* Helper function for the string operations below. Dest VARIABLE whether
0945b39d
JH
9727 it is aligned to VALUE bytes. If true, jump to the label. */
9728static rtx
9729ix86_expand_aligntest (variable, value)
9730 rtx variable;
9731 int value;
9732{
9733 rtx label = gen_label_rtx ();
9734 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
9735 if (GET_MODE (variable) == DImode)
9736 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
9737 else
9738 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
9739 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
d43e0b7d 9740 1, label);
0945b39d
JH
9741 return label;
9742}
9743
9744/* Adjust COUNTER by the VALUE. */
9745static void
9746ix86_adjust_counter (countreg, value)
9747 rtx countreg;
9748 HOST_WIDE_INT value;
9749{
9750 if (GET_MODE (countreg) == DImode)
9751 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
9752 else
9753 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
9754}
9755
9756/* Zero extend possibly SImode EXP to Pmode register. */
d24b3457 9757rtx
0945b39d
JH
9758ix86_zero_extend_to_Pmode (exp)
9759 rtx exp;
9760{
9761 rtx r;
9762 if (GET_MODE (exp) == VOIDmode)
9763 return force_reg (Pmode, exp);
9764 if (GET_MODE (exp) == Pmode)
9765 return copy_to_mode_reg (Pmode, exp);
9766 r = gen_reg_rtx (Pmode);
9767 emit_insn (gen_zero_extendsidi2 (r, exp));
9768 return r;
9769}
9770
9771/* Expand string move (memcpy) operation. Use i386 string operations when
9772 profitable. expand_clrstr contains similar code. */
9773int
9774ix86_expand_movstr (dst, src, count_exp, align_exp)
9775 rtx dst, src, count_exp, align_exp;
9776{
9777 rtx srcreg, destreg, countreg;
9778 enum machine_mode counter_mode;
9779 HOST_WIDE_INT align = 0;
9780 unsigned HOST_WIDE_INT count = 0;
9781 rtx insns;
9782
9783 start_sequence ();
9784
9785 if (GET_CODE (align_exp) == CONST_INT)
9786 align = INTVAL (align_exp);
9787
5519a4f9 9788 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
9789 if (!TARGET_ALIGN_STRINGOPS)
9790 align = 64;
9791
9792 if (GET_CODE (count_exp) == CONST_INT)
9793 count = INTVAL (count_exp);
9794
9795 /* Figure out proper mode for counter. For 32bits it is always SImode,
9796 for 64bits use SImode when possible, otherwise DImode.
9797 Set count to number of bytes copied when known at compile time. */
9798 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9799 || x86_64_zero_extended_value (count_exp))
9800 counter_mode = SImode;
9801 else
9802 counter_mode = DImode;
9803
9804 if (counter_mode != SImode && counter_mode != DImode)
9805 abort ();
9806
9807 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9808 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9809
9810 emit_insn (gen_cld ());
9811
9812 /* When optimizing for size emit simple rep ; movsb instruction for
9813 counts not divisible by 4. */
9814
9815 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9816 {
9817 countreg = ix86_zero_extend_to_Pmode (count_exp);
9818 if (TARGET_64BIT)
9819 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
9820 destreg, srcreg, countreg));
9821 else
9822 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
9823 destreg, srcreg, countreg));
9824 }
9825
9826 /* For constant aligned (or small unaligned) copies use rep movsl
9827 followed by code copying the rest. For PentiumPro ensure 8 byte
9828 alignment to allow rep movsl acceleration. */
9829
9830 else if (count != 0
9831 && (align >= 8
9832 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 9833 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
9834 {
9835 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9836 if (count & ~(size - 1))
9837 {
9838 countreg = copy_to_mode_reg (counter_mode,
9839 GEN_INT ((count >> (size == 4 ? 2 : 3))
9840 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9841 countreg = ix86_zero_extend_to_Pmode (countreg);
9842 if (size == 4)
9843 {
9844 if (TARGET_64BIT)
9845 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
9846 destreg, srcreg, countreg));
9847 else
9848 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
9849 destreg, srcreg, countreg));
9850 }
9851 else
9852 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
9853 destreg, srcreg, countreg));
9854 }
9855 if (size == 8 && (count & 0x04))
9856 emit_insn (gen_strmovsi (destreg, srcreg));
9857 if (count & 0x02)
9858 emit_insn (gen_strmovhi (destreg, srcreg));
9859 if (count & 0x01)
9860 emit_insn (gen_strmovqi (destreg, srcreg));
9861 }
9862 /* The generic code based on the glibc implementation:
9863 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9864 allowing accelerated copying there)
9865 - copy the data using rep movsl
9866 - copy the rest. */
9867 else
9868 {
9869 rtx countreg2;
9870 rtx label = NULL;
37ad04a5
JH
9871 int desired_alignment = (TARGET_PENTIUMPRO
9872 && (count == 0 || count >= (unsigned int) 260)
9873 ? 8 : UNITS_PER_WORD);
0945b39d
JH
9874
9875 /* In case we don't know anything about the alignment, default to
9876 library version, since it is usually equally fast and result in
9877 shorter code. */
9878 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9879 {
9880 end_sequence ();
9881 return 0;
9882 }
9883
9884 if (TARGET_SINGLE_STRINGOP)
9885 emit_insn (gen_cld ());
9886
9887 countreg2 = gen_reg_rtx (Pmode);
9888 countreg = copy_to_mode_reg (counter_mode, count_exp);
9889
9890 /* We don't use loops to align destination and to copy parts smaller
9891 than 4 bytes, because gcc is able to optimize such code better (in
9892 the case the destination or the count really is aligned, gcc is often
9893 able to predict the branches) and also it is friendlier to the
a4f31c00 9894 hardware branch prediction.
0945b39d
JH
9895
9896 Using loops is benefical for generic case, because we can
9897 handle small counts using the loops. Many CPUs (such as Athlon)
9898 have large REP prefix setup costs.
9899
9900 This is quite costy. Maybe we can revisit this decision later or
9901 add some customizability to this code. */
9902
37ad04a5 9903 if (count == 0 && align < desired_alignment)
0945b39d
JH
9904 {
9905 label = gen_label_rtx ();
9906 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
d43e0b7d 9907 LEU, 0, counter_mode, 1, label);
0945b39d
JH
9908 }
9909 if (align <= 1)
9910 {
9911 rtx label = ix86_expand_aligntest (destreg, 1);
9912 emit_insn (gen_strmovqi (destreg, srcreg));
9913 ix86_adjust_counter (countreg, 1);
9914 emit_label (label);
9915 LABEL_NUSES (label) = 1;
9916 }
9917 if (align <= 2)
9918 {
9919 rtx label = ix86_expand_aligntest (destreg, 2);
9920 emit_insn (gen_strmovhi (destreg, srcreg));
9921 ix86_adjust_counter (countreg, 2);
9922 emit_label (label);
9923 LABEL_NUSES (label) = 1;
9924 }
37ad04a5 9925 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
9926 {
9927 rtx label = ix86_expand_aligntest (destreg, 4);
9928 emit_insn (gen_strmovsi (destreg, srcreg));
9929 ix86_adjust_counter (countreg, 4);
9930 emit_label (label);
9931 LABEL_NUSES (label) = 1;
9932 }
9933
37ad04a5
JH
9934 if (label && desired_alignment > 4 && !TARGET_64BIT)
9935 {
9936 emit_label (label);
9937 LABEL_NUSES (label) = 1;
9938 label = NULL_RTX;
9939 }
0945b39d
JH
9940 if (!TARGET_SINGLE_STRINGOP)
9941 emit_insn (gen_cld ());
9942 if (TARGET_64BIT)
9943 {
9944 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9945 GEN_INT (3)));
9946 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
9947 destreg, srcreg, countreg2));
9948 }
9949 else
9950 {
9951 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9952 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
9953 destreg, srcreg, countreg2));
9954 }
9955
9956 if (label)
9957 {
9958 emit_label (label);
9959 LABEL_NUSES (label) = 1;
9960 }
9961 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9962 emit_insn (gen_strmovsi (destreg, srcreg));
9963 if ((align <= 4 || count == 0) && TARGET_64BIT)
9964 {
9965 rtx label = ix86_expand_aligntest (countreg, 4);
9966 emit_insn (gen_strmovsi (destreg, srcreg));
9967 emit_label (label);
9968 LABEL_NUSES (label) = 1;
9969 }
9970 if (align > 2 && count != 0 && (count & 2))
9971 emit_insn (gen_strmovhi (destreg, srcreg));
9972 if (align <= 2 || count == 0)
9973 {
9974 rtx label = ix86_expand_aligntest (countreg, 2);
9975 emit_insn (gen_strmovhi (destreg, srcreg));
9976 emit_label (label);
9977 LABEL_NUSES (label) = 1;
9978 }
9979 if (align > 1 && count != 0 && (count & 1))
9980 emit_insn (gen_strmovqi (destreg, srcreg));
9981 if (align <= 1 || count == 0)
9982 {
9983 rtx label = ix86_expand_aligntest (countreg, 1);
9984 emit_insn (gen_strmovqi (destreg, srcreg));
9985 emit_label (label);
9986 LABEL_NUSES (label) = 1;
9987 }
9988 }
9989
9990 insns = get_insns ();
9991 end_sequence ();
9992
9993 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
9994 emit_insns (insns);
9995 return 1;
9996}
9997
9998/* Expand string clear operation (bzero). Use i386 string operations when
9999 profitable. expand_movstr contains similar code. */
10000int
10001ix86_expand_clrstr (src, count_exp, align_exp)
10002 rtx src, count_exp, align_exp;
10003{
10004 rtx destreg, zeroreg, countreg;
10005 enum machine_mode counter_mode;
10006 HOST_WIDE_INT align = 0;
10007 unsigned HOST_WIDE_INT count = 0;
10008
10009 if (GET_CODE (align_exp) == CONST_INT)
10010 align = INTVAL (align_exp);
10011
5519a4f9 10012 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
10013 if (!TARGET_ALIGN_STRINGOPS)
10014 align = 32;
10015
10016 if (GET_CODE (count_exp) == CONST_INT)
10017 count = INTVAL (count_exp);
10018 /* Figure out proper mode for counter. For 32bits it is always SImode,
10019 for 64bits use SImode when possible, otherwise DImode.
10020 Set count to number of bytes copied when known at compile time. */
10021 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10022 || x86_64_zero_extended_value (count_exp))
10023 counter_mode = SImode;
10024 else
10025 counter_mode = DImode;
10026
10027 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10028
10029 emit_insn (gen_cld ());
10030
10031 /* When optimizing for size emit simple rep ; movsb instruction for
10032 counts not divisible by 4. */
10033
10034 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10035 {
10036 countreg = ix86_zero_extend_to_Pmode (count_exp);
10037 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10038 if (TARGET_64BIT)
10039 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10040 destreg, countreg));
10041 else
10042 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10043 destreg, countreg));
10044 }
10045 else if (count != 0
10046 && (align >= 8
10047 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 10048 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
10049 {
10050 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10051 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10052 if (count & ~(size - 1))
10053 {
10054 countreg = copy_to_mode_reg (counter_mode,
10055 GEN_INT ((count >> (size == 4 ? 2 : 3))
10056 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10057 countreg = ix86_zero_extend_to_Pmode (countreg);
10058 if (size == 4)
10059 {
10060 if (TARGET_64BIT)
10061 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10062 destreg, countreg));
10063 else
10064 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10065 destreg, countreg));
10066 }
10067 else
10068 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10069 destreg, countreg));
10070 }
10071 if (size == 8 && (count & 0x04))
10072 emit_insn (gen_strsetsi (destreg,
10073 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10074 if (count & 0x02)
10075 emit_insn (gen_strsethi (destreg,
10076 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10077 if (count & 0x01)
10078 emit_insn (gen_strsetqi (destreg,
10079 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10080 }
10081 else
10082 {
10083 rtx countreg2;
10084 rtx label = NULL;
37ad04a5
JH
10085 /* Compute desired alignment of the string operation. */
10086 int desired_alignment = (TARGET_PENTIUMPRO
10087 && (count == 0 || count >= (unsigned int) 260)
10088 ? 8 : UNITS_PER_WORD);
0945b39d
JH
10089
10090 /* In case we don't know anything about the alignment, default to
10091 library version, since it is usually equally fast and result in
10092 shorter code. */
10093 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10094 return 0;
10095
10096 if (TARGET_SINGLE_STRINGOP)
10097 emit_insn (gen_cld ());
10098
10099 countreg2 = gen_reg_rtx (Pmode);
10100 countreg = copy_to_mode_reg (counter_mode, count_exp);
10101 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10102
37ad04a5 10103 if (count == 0 && align < desired_alignment)
0945b39d
JH
10104 {
10105 label = gen_label_rtx ();
37ad04a5 10106 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 10107 LEU, 0, counter_mode, 1, label);
0945b39d
JH
10108 }
10109 if (align <= 1)
10110 {
10111 rtx label = ix86_expand_aligntest (destreg, 1);
10112 emit_insn (gen_strsetqi (destreg,
10113 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10114 ix86_adjust_counter (countreg, 1);
10115 emit_label (label);
10116 LABEL_NUSES (label) = 1;
10117 }
10118 if (align <= 2)
10119 {
10120 rtx label = ix86_expand_aligntest (destreg, 2);
10121 emit_insn (gen_strsethi (destreg,
10122 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10123 ix86_adjust_counter (countreg, 2);
10124 emit_label (label);
10125 LABEL_NUSES (label) = 1;
10126 }
37ad04a5 10127 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
10128 {
10129 rtx label = ix86_expand_aligntest (destreg, 4);
10130 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10131 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10132 : zeroreg)));
10133 ix86_adjust_counter (countreg, 4);
10134 emit_label (label);
10135 LABEL_NUSES (label) = 1;
10136 }
10137
37ad04a5
JH
10138 if (label && desired_alignment > 4 && !TARGET_64BIT)
10139 {
10140 emit_label (label);
10141 LABEL_NUSES (label) = 1;
10142 label = NULL_RTX;
10143 }
10144
0945b39d
JH
10145 if (!TARGET_SINGLE_STRINGOP)
10146 emit_insn (gen_cld ());
10147 if (TARGET_64BIT)
10148 {
10149 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10150 GEN_INT (3)));
10151 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10152 destreg, countreg2));
10153 }
10154 else
10155 {
10156 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10157 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10158 destreg, countreg2));
10159 }
0945b39d
JH
10160 if (label)
10161 {
10162 emit_label (label);
10163 LABEL_NUSES (label) = 1;
10164 }
37ad04a5 10165
0945b39d
JH
10166 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10167 emit_insn (gen_strsetsi (destreg,
10168 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10169 if (TARGET_64BIT && (align <= 4 || count == 0))
10170 {
74411039 10171 rtx label = ix86_expand_aligntest (countreg, 2);
0945b39d
JH
10172 emit_insn (gen_strsetsi (destreg,
10173 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10174 emit_label (label);
10175 LABEL_NUSES (label) = 1;
10176 }
10177 if (align > 2 && count != 0 && (count & 2))
10178 emit_insn (gen_strsethi (destreg,
10179 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10180 if (align <= 2 || count == 0)
10181 {
74411039 10182 rtx label = ix86_expand_aligntest (countreg, 2);
0945b39d
JH
10183 emit_insn (gen_strsethi (destreg,
10184 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10185 emit_label (label);
10186 LABEL_NUSES (label) = 1;
10187 }
10188 if (align > 1 && count != 0 && (count & 1))
10189 emit_insn (gen_strsetqi (destreg,
10190 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10191 if (align <= 1 || count == 0)
10192 {
74411039 10193 rtx label = ix86_expand_aligntest (countreg, 1);
0945b39d
JH
10194 emit_insn (gen_strsetqi (destreg,
10195 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10196 emit_label (label);
10197 LABEL_NUSES (label) = 1;
10198 }
10199 }
10200 return 1;
10201}
10202/* Expand strlen. */
10203int
10204ix86_expand_strlen (out, src, eoschar, align)
10205 rtx out, src, eoschar, align;
10206{
10207 rtx addr, scratch1, scratch2, scratch3, scratch4;
10208
10209 /* The generic case of strlen expander is long. Avoid it's
10210 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10211
10212 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10213 && !TARGET_INLINE_ALL_STRINGOPS
10214 && !optimize_size
10215 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10216 return 0;
10217
10218 addr = force_reg (Pmode, XEXP (src, 0));
10219 scratch1 = gen_reg_rtx (Pmode);
10220
10221 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10222 && !optimize_size)
10223 {
10224 /* Well it seems that some optimizer does not combine a call like
10225 foo(strlen(bar), strlen(bar));
10226 when the move and the subtraction is done here. It does calculate
10227 the length just once when these instructions are done inside of
10228 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10229 often used and I use one fewer register for the lifetime of
10230 output_strlen_unroll() this is better. */
10231
10232 emit_move_insn (out, addr);
10233
10234 ix86_expand_strlensi_unroll_1 (out, align);
10235
10236 /* strlensi_unroll_1 returns the address of the zero at the end of
10237 the string, like memchr(), so compute the length by subtracting
10238 the start address. */
10239 if (TARGET_64BIT)
10240 emit_insn (gen_subdi3 (out, out, addr));
10241 else
10242 emit_insn (gen_subsi3 (out, out, addr));
10243 }
10244 else
10245 {
10246 scratch2 = gen_reg_rtx (Pmode);
10247 scratch3 = gen_reg_rtx (Pmode);
10248 scratch4 = force_reg (Pmode, constm1_rtx);
10249
10250 emit_move_insn (scratch3, addr);
10251 eoschar = force_reg (QImode, eoschar);
10252
10253 emit_insn (gen_cld ());
10254 if (TARGET_64BIT)
10255 {
10256 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10257 align, scratch4, scratch3));
10258 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10259 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10260 }
10261 else
10262 {
10263 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10264 align, scratch4, scratch3));
10265 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10266 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10267 }
10268 }
10269 return 1;
10270}
10271
e075ae69
RH
10272/* Expand the appropriate insns for doing strlen if not just doing
10273 repnz; scasb
10274
10275 out = result, initialized with the start address
10276 align_rtx = alignment of the address.
10277 scratch = scratch register, initialized with the startaddress when
77ebd435 10278 not aligned, otherwise undefined
3f803cd9
SC
10279
10280 This is just the body. It needs the initialisations mentioned above and
10281 some address computing at the end. These things are done in i386.md. */
10282
0945b39d
JH
10283static void
10284ix86_expand_strlensi_unroll_1 (out, align_rtx)
10285 rtx out, align_rtx;
3f803cd9 10286{
e075ae69
RH
10287 int align;
10288 rtx tmp;
10289 rtx align_2_label = NULL_RTX;
10290 rtx align_3_label = NULL_RTX;
10291 rtx align_4_label = gen_label_rtx ();
10292 rtx end_0_label = gen_label_rtx ();
e075ae69 10293 rtx mem;
e2e52e1b 10294 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 10295 rtx scratch = gen_reg_rtx (SImode);
e075ae69
RH
10296
10297 align = 0;
10298 if (GET_CODE (align_rtx) == CONST_INT)
10299 align = INTVAL (align_rtx);
3f803cd9 10300
e9a25f70 10301 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 10302
e9a25f70 10303 /* Is there a known alignment and is it less than 4? */
e075ae69 10304 if (align < 4)
3f803cd9 10305 {
0945b39d
JH
10306 rtx scratch1 = gen_reg_rtx (Pmode);
10307 emit_move_insn (scratch1, out);
e9a25f70 10308 /* Is there a known alignment and is it not 2? */
e075ae69 10309 if (align != 2)
3f803cd9 10310 {
e075ae69
RH
10311 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10312 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10313
10314 /* Leave just the 3 lower bits. */
0945b39d 10315 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
10316 NULL_RTX, 0, OPTAB_WIDEN);
10317
9076b9c1 10318 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 10319 Pmode, 1, align_4_label);
9076b9c1 10320 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
d43e0b7d 10321 Pmode, 1, align_2_label);
9076b9c1 10322 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
d43e0b7d 10323 Pmode, 1, align_3_label);
3f803cd9
SC
10324 }
10325 else
10326 {
e9a25f70
JL
10327 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10328 check if is aligned to 4 - byte. */
e9a25f70 10329
0945b39d 10330 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
e075ae69
RH
10331 NULL_RTX, 0, OPTAB_WIDEN);
10332
9076b9c1 10333 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 10334 Pmode, 1, align_4_label);
3f803cd9
SC
10335 }
10336
e075ae69 10337 mem = gen_rtx_MEM (QImode, out);
e9a25f70 10338
e075ae69 10339 /* Now compare the bytes. */
e9a25f70 10340
0f290768 10341 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1 10342 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
d43e0b7d 10343 QImode, 1, end_0_label);
3f803cd9 10344
0f290768 10345 /* Increment the address. */
0945b39d
JH
10346 if (TARGET_64BIT)
10347 emit_insn (gen_adddi3 (out, out, const1_rtx));
10348 else
10349 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 10350
e075ae69
RH
10351 /* Not needed with an alignment of 2 */
10352 if (align != 2)
10353 {
10354 emit_label (align_2_label);
3f803cd9 10355
d43e0b7d
RK
10356 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10357 end_0_label);
e075ae69 10358
0945b39d
JH
10359 if (TARGET_64BIT)
10360 emit_insn (gen_adddi3 (out, out, const1_rtx));
10361 else
10362 emit_insn (gen_addsi3 (out, out, const1_rtx));
e075ae69
RH
10363
10364 emit_label (align_3_label);
10365 }
10366
d43e0b7d
RK
10367 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10368 end_0_label);
e075ae69 10369
0945b39d
JH
10370 if (TARGET_64BIT)
10371 emit_insn (gen_adddi3 (out, out, const1_rtx));
10372 else
10373 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
10374 }
10375
e075ae69
RH
10376 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10377 align this loop. It gives only huge programs, but does not help to
10378 speed up. */
10379 emit_label (align_4_label);
3f803cd9 10380
e075ae69
RH
10381 mem = gen_rtx_MEM (SImode, out);
10382 emit_move_insn (scratch, mem);
0945b39d
JH
10383 if (TARGET_64BIT)
10384 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10385 else
10386 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 10387
e2e52e1b
JH
10388 /* This formula yields a nonzero result iff one of the bytes is zero.
10389 This saves three branches inside loop and many cycles. */
10390
10391 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
10392 emit_insn (gen_one_cmplsi2 (scratch, scratch));
10393 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0 10394 emit_insn (gen_andsi3 (tmpreg, tmpreg,
d8bf17f9 10395 gen_int_mode (0x80808080, SImode)));
d43e0b7d
RK
10396 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
10397 align_4_label);
e2e52e1b
JH
10398
10399 if (TARGET_CMOVE)
10400 {
10401 rtx reg = gen_reg_rtx (SImode);
0945b39d 10402 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
10403 emit_move_insn (reg, tmpreg);
10404 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
10405
0f290768 10406 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 10407 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
10408 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10409 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10410 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
10411 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
10412 reg,
10413 tmpreg)));
e2e52e1b 10414 /* Emit lea manually to avoid clobbering of flags. */
0945b39d
JH
10415 emit_insn (gen_rtx_SET (SImode, reg2,
10416 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
e2e52e1b
JH
10417
10418 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10419 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10420 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d 10421 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
a4f31c00
AJ
10422 reg2,
10423 out)));
e2e52e1b
JH
10424
10425 }
10426 else
10427 {
10428 rtx end_2_label = gen_label_rtx ();
10429 /* Is zero in the first two bytes? */
10430
16189740 10431 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
10432 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10433 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
10434 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10435 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
10436 pc_rtx);
10437 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10438 JUMP_LABEL (tmp) = end_2_label;
10439
0f290768 10440 /* Not in the first two. Move two bytes forward. */
e2e52e1b 10441 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
0945b39d
JH
10442 if (TARGET_64BIT)
10443 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
10444 else
10445 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
e2e52e1b
JH
10446
10447 emit_label (end_2_label);
10448
10449 }
10450
0f290768 10451 /* Avoid branch in fixing the byte. */
e2e52e1b 10452 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 10453 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
0945b39d
JH
10454 if (TARGET_64BIT)
10455 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
10456 else
10457 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
e075ae69
RH
10458
10459 emit_label (end_0_label);
10460}
0e07aff3
RH
10461
10462void
10463ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
10464 rtx retval, fnaddr, callarg1, callarg2, pop;
10465{
10466 rtx use = NULL, call;
10467
10468 if (pop == const0_rtx)
10469 pop = NULL;
10470 if (TARGET_64BIT && pop)
10471 abort ();
10472
10473 /* Static functions and indirect calls don't need the pic register. */
10474 if (! TARGET_64BIT && flag_pic
10475 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
10476 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
66edd3b4 10477 use_reg (&use, pic_offset_table_rtx);
0e07aff3
RH
10478
10479 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
10480 {
10481 rtx al = gen_rtx_REG (QImode, 0);
10482 emit_move_insn (al, callarg2);
10483 use_reg (&use, al);
10484 }
10485
10486 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
10487 {
10488 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10489 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10490 }
10491
10492 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
10493 if (retval)
10494 call = gen_rtx_SET (VOIDmode, retval, call);
10495 if (pop)
10496 {
10497 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
10498 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
10499 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
10500 }
10501
10502 call = emit_call_insn (call);
10503 if (use)
10504 CALL_INSN_FUNCTION_USAGE (call) = use;
10505}
10506
e075ae69 10507\f
e075ae69
RH
10508/* Clear stack slot assignments remembered from previous functions.
10509 This is called from INIT_EXPANDERS once before RTL is emitted for each
10510 function. */
10511
36edd3cc
BS
10512static void
10513ix86_init_machine_status (p)
1526a060 10514 struct function *p;
e075ae69 10515{
37b15744
RH
10516 p->machine = (struct machine_function *)
10517 xcalloc (1, sizeof (struct machine_function));
e075ae69
RH
10518}
10519
1526a060
BS
10520/* Mark machine specific bits of P for GC. */
10521static void
10522ix86_mark_machine_status (p)
10523 struct function *p;
10524{
37b15744 10525 struct machine_function *machine = p->machine;
1526a060
BS
10526 enum machine_mode mode;
10527 int n;
10528
37b15744
RH
10529 if (! machine)
10530 return;
10531
1526a060
BS
10532 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
10533 mode = (enum machine_mode) ((int) mode + 1))
10534 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
37b15744
RH
10535 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
10536}
10537
10538static void
10539ix86_free_machine_status (p)
10540 struct function *p;
10541{
10542 free (p->machine);
10543 p->machine = NULL;
1526a060
BS
10544}
10545
e075ae69
RH
10546/* Return a MEM corresponding to a stack slot with mode MODE.
10547 Allocate a new slot if necessary.
10548
10549 The RTL for a function can have several slots available: N is
10550 which slot to use. */
10551
10552rtx
10553assign_386_stack_local (mode, n)
10554 enum machine_mode mode;
10555 int n;
10556{
10557 if (n < 0 || n >= MAX_386_STACK_LOCALS)
10558 abort ();
10559
10560 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
10561 ix86_stack_locals[(int) mode][n]
10562 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
10563
10564 return ix86_stack_locals[(int) mode][n];
10565}
f996902d
RH
10566
10567/* Construct the SYMBOL_REF for the tls_get_addr function. */
10568
10569rtx
10570ix86_tls_get_addr ()
10571{
10572 static rtx symbol;
10573
10574 if (!symbol)
10575 {
10576 symbol = gen_rtx_SYMBOL_REF (Pmode, (TARGET_GNU_TLS
10577 ? "___tls_get_addr"
10578 : "__tls_get_addr"));
10579 ggc_add_rtx_root (&symbol, 1);
10580 }
10581
10582 return symbol;
10583}
e075ae69
RH
10584\f
10585/* Calculate the length of the memory address in the instruction
10586 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10587
10588static int
10589memory_address_length (addr)
10590 rtx addr;
10591{
10592 struct ix86_address parts;
10593 rtx base, index, disp;
10594 int len;
10595
10596 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
10597 || GET_CODE (addr) == POST_INC
10598 || GET_CODE (addr) == PRE_MODIFY
10599 || GET_CODE (addr) == POST_MODIFY)
e075ae69 10600 return 0;
3f803cd9 10601
e075ae69
RH
10602 if (! ix86_decompose_address (addr, &parts))
10603 abort ();
3f803cd9 10604
e075ae69
RH
10605 base = parts.base;
10606 index = parts.index;
10607 disp = parts.disp;
10608 len = 0;
3f803cd9 10609
e075ae69
RH
10610 /* Register Indirect. */
10611 if (base && !index && !disp)
10612 {
10613 /* Special cases: ebp and esp need the two-byte modrm form. */
10614 if (addr == stack_pointer_rtx
10615 || addr == arg_pointer_rtx
564d80f4
JH
10616 || addr == frame_pointer_rtx
10617 || addr == hard_frame_pointer_rtx)
e075ae69 10618 len = 1;
3f803cd9 10619 }
e9a25f70 10620
e075ae69
RH
10621 /* Direct Addressing. */
10622 else if (disp && !base && !index)
10623 len = 4;
10624
3f803cd9
SC
10625 else
10626 {
e075ae69
RH
10627 /* Find the length of the displacement constant. */
10628 if (disp)
10629 {
10630 if (GET_CODE (disp) == CONST_INT
10631 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
10632 len = 1;
10633 else
10634 len = 4;
10635 }
3f803cd9 10636
e075ae69
RH
10637 /* An index requires the two-byte modrm form. */
10638 if (index)
10639 len += 1;
3f803cd9
SC
10640 }
10641
e075ae69
RH
10642 return len;
10643}
79325812 10644
5bf0ebab
RH
10645/* Compute default value for "length_immediate" attribute. When SHORTFORM
10646 is set, expect that insn have 8bit immediate alternative. */
e075ae69 10647int
6ef67412 10648ix86_attr_length_immediate_default (insn, shortform)
e075ae69 10649 rtx insn;
6ef67412 10650 int shortform;
e075ae69 10651{
6ef67412
JH
10652 int len = 0;
10653 int i;
6c698a6d 10654 extract_insn_cached (insn);
6ef67412
JH
10655 for (i = recog_data.n_operands - 1; i >= 0; --i)
10656 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 10657 {
6ef67412 10658 if (len)
3071fab5 10659 abort ();
6ef67412
JH
10660 if (shortform
10661 && GET_CODE (recog_data.operand[i]) == CONST_INT
10662 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
10663 len = 1;
10664 else
10665 {
10666 switch (get_attr_mode (insn))
10667 {
10668 case MODE_QI:
10669 len+=1;
10670 break;
10671 case MODE_HI:
10672 len+=2;
10673 break;
10674 case MODE_SI:
10675 len+=4;
10676 break;
14f73b5a
JH
10677 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
10678 case MODE_DI:
10679 len+=4;
10680 break;
6ef67412 10681 default:
c725bd79 10682 fatal_insn ("unknown insn mode", insn);
6ef67412
JH
10683 }
10684 }
3071fab5 10685 }
6ef67412
JH
10686 return len;
10687}
10688/* Compute default value for "length_address" attribute. */
10689int
10690ix86_attr_length_address_default (insn)
10691 rtx insn;
10692{
10693 int i;
6c698a6d 10694 extract_insn_cached (insn);
1ccbefce
RH
10695 for (i = recog_data.n_operands - 1; i >= 0; --i)
10696 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 10697 {
6ef67412 10698 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
10699 break;
10700 }
6ef67412 10701 return 0;
3f803cd9 10702}
e075ae69
RH
10703\f
10704/* Return the maximum number of instructions a cpu can issue. */
b657fc39 10705
c237e94a 10706static int
e075ae69 10707ix86_issue_rate ()
b657fc39 10708{
e075ae69 10709 switch (ix86_cpu)
b657fc39 10710 {
e075ae69
RH
10711 case PROCESSOR_PENTIUM:
10712 case PROCESSOR_K6:
10713 return 2;
79325812 10714
e075ae69 10715 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
10716 case PROCESSOR_PENTIUM4:
10717 case PROCESSOR_ATHLON:
e075ae69 10718 return 3;
b657fc39 10719
b657fc39 10720 default:
e075ae69 10721 return 1;
b657fc39 10722 }
b657fc39
L
10723}
10724
e075ae69
RH
10725/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10726 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 10727
e075ae69
RH
10728static int
10729ix86_flags_dependant (insn, dep_insn, insn_type)
10730 rtx insn, dep_insn;
10731 enum attr_type insn_type;
10732{
10733 rtx set, set2;
b657fc39 10734
e075ae69
RH
10735 /* Simplify the test for uninteresting insns. */
10736 if (insn_type != TYPE_SETCC
10737 && insn_type != TYPE_ICMOV
10738 && insn_type != TYPE_FCMOV
10739 && insn_type != TYPE_IBR)
10740 return 0;
b657fc39 10741
e075ae69
RH
10742 if ((set = single_set (dep_insn)) != 0)
10743 {
10744 set = SET_DEST (set);
10745 set2 = NULL_RTX;
10746 }
10747 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
10748 && XVECLEN (PATTERN (dep_insn), 0) == 2
10749 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
10750 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
10751 {
10752 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10753 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10754 }
78a0d70c
ZW
10755 else
10756 return 0;
b657fc39 10757
78a0d70c
ZW
10758 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
10759 return 0;
b657fc39 10760
f5143c46 10761 /* This test is true if the dependent insn reads the flags but
78a0d70c
ZW
10762 not any other potentially set register. */
10763 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
10764 return 0;
10765
10766 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
10767 return 0;
10768
10769 return 1;
e075ae69 10770}
b657fc39 10771
e075ae69
RH
10772/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
10773 address with operands set by DEP_INSN. */
10774
10775static int
10776ix86_agi_dependant (insn, dep_insn, insn_type)
10777 rtx insn, dep_insn;
10778 enum attr_type insn_type;
10779{
10780 rtx addr;
10781
6ad48e84
JH
10782 if (insn_type == TYPE_LEA
10783 && TARGET_PENTIUM)
5fbdde42
RH
10784 {
10785 addr = PATTERN (insn);
10786 if (GET_CODE (addr) == SET)
10787 ;
10788 else if (GET_CODE (addr) == PARALLEL
10789 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
10790 addr = XVECEXP (addr, 0, 0);
10791 else
10792 abort ();
10793 addr = SET_SRC (addr);
10794 }
e075ae69
RH
10795 else
10796 {
10797 int i;
6c698a6d 10798 extract_insn_cached (insn);
1ccbefce
RH
10799 for (i = recog_data.n_operands - 1; i >= 0; --i)
10800 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 10801 {
1ccbefce 10802 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
10803 goto found;
10804 }
10805 return 0;
10806 found:;
b657fc39
L
10807 }
10808
e075ae69 10809 return modified_in_p (addr, dep_insn);
b657fc39 10810}
a269a03c 10811
c237e94a 10812static int
e075ae69 10813ix86_adjust_cost (insn, link, dep_insn, cost)
a269a03c
JC
10814 rtx insn, link, dep_insn;
10815 int cost;
10816{
e075ae69 10817 enum attr_type insn_type, dep_insn_type;
6ad48e84 10818 enum attr_memory memory, dep_memory;
e075ae69 10819 rtx set, set2;
9b00189f 10820 int dep_insn_code_number;
a269a03c 10821
309ada50 10822 /* Anti and output depenancies have zero cost on all CPUs. */
e075ae69 10823 if (REG_NOTE_KIND (link) != 0)
309ada50 10824 return 0;
a269a03c 10825
9b00189f
JH
10826 dep_insn_code_number = recog_memoized (dep_insn);
10827
e075ae69 10828 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 10829 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 10830 return cost;
a269a03c 10831
1c71e60e
JH
10832 insn_type = get_attr_type (insn);
10833 dep_insn_type = get_attr_type (dep_insn);
9b00189f 10834
a269a03c
JC
10835 switch (ix86_cpu)
10836 {
10837 case PROCESSOR_PENTIUM:
e075ae69
RH
10838 /* Address Generation Interlock adds a cycle of latency. */
10839 if (ix86_agi_dependant (insn, dep_insn, insn_type))
10840 cost += 1;
10841
10842 /* ??? Compares pair with jump/setcc. */
10843 if (ix86_flags_dependant (insn, dep_insn, insn_type))
10844 cost = 0;
10845
10846 /* Floating point stores require value to be ready one cycle ealier. */
0f290768 10847 if (insn_type == TYPE_FMOV
e075ae69
RH
10848 && get_attr_memory (insn) == MEMORY_STORE
10849 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10850 cost += 1;
10851 break;
a269a03c 10852
e075ae69 10853 case PROCESSOR_PENTIUMPRO:
6ad48e84
JH
10854 memory = get_attr_memory (insn);
10855 dep_memory = get_attr_memory (dep_insn);
10856
0f290768 10857 /* Since we can't represent delayed latencies of load+operation,
e075ae69
RH
10858 increase the cost here for non-imov insns. */
10859 if (dep_insn_type != TYPE_IMOV
6ad48e84
JH
10860 && dep_insn_type != TYPE_FMOV
10861 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
e075ae69
RH
10862 cost += 1;
10863
10864 /* INT->FP conversion is expensive. */
10865 if (get_attr_fp_int_src (dep_insn))
10866 cost += 5;
10867
10868 /* There is one cycle extra latency between an FP op and a store. */
10869 if (insn_type == TYPE_FMOV
10870 && (set = single_set (dep_insn)) != NULL_RTX
10871 && (set2 = single_set (insn)) != NULL_RTX
10872 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
10873 && GET_CODE (SET_DEST (set2)) == MEM)
10874 cost += 1;
6ad48e84
JH
10875
10876 /* Show ability of reorder buffer to hide latency of load by executing
10877 in parallel with previous instruction in case
10878 previous instruction is not needed to compute the address. */
10879 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10880 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10881 {
10882 /* Claim moves to take one cycle, as core can issue one load
10883 at time and the next load can start cycle later. */
10884 if (dep_insn_type == TYPE_IMOV
10885 || dep_insn_type == TYPE_FMOV)
10886 cost = 1;
10887 else if (cost > 1)
10888 cost--;
10889 }
e075ae69 10890 break;
a269a03c 10891
e075ae69 10892 case PROCESSOR_K6:
6ad48e84
JH
10893 memory = get_attr_memory (insn);
10894 dep_memory = get_attr_memory (dep_insn);
e075ae69
RH
10895 /* The esp dependency is resolved before the instruction is really
10896 finished. */
10897 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
10898 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
10899 return 1;
a269a03c 10900
0f290768 10901 /* Since we can't represent delayed latencies of load+operation,
e075ae69 10902 increase the cost here for non-imov insns. */
6ad48e84 10903 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
e075ae69
RH
10904 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
10905
10906 /* INT->FP conversion is expensive. */
10907 if (get_attr_fp_int_src (dep_insn))
10908 cost += 5;
6ad48e84
JH
10909
10910 /* Show ability of reorder buffer to hide latency of load by executing
10911 in parallel with previous instruction in case
10912 previous instruction is not needed to compute the address. */
10913 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10914 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10915 {
10916 /* Claim moves to take one cycle, as core can issue one load
10917 at time and the next load can start cycle later. */
10918 if (dep_insn_type == TYPE_IMOV
10919 || dep_insn_type == TYPE_FMOV)
10920 cost = 1;
10921 else if (cost > 2)
10922 cost -= 2;
10923 else
10924 cost = 1;
10925 }
a14003ee 10926 break;
e075ae69 10927
309ada50 10928 case PROCESSOR_ATHLON:
6ad48e84
JH
10929 memory = get_attr_memory (insn);
10930 dep_memory = get_attr_memory (dep_insn);
10931
10932 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
0b5107cf
JH
10933 {
10934 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
10935 cost += 2;
10936 else
10937 cost += 3;
10938 }
6ad48e84
JH
10939 /* Show ability of reorder buffer to hide latency of load by executing
10940 in parallel with previous instruction in case
10941 previous instruction is not needed to compute the address. */
10942 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10943 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10944 {
10945 /* Claim moves to take one cycle, as core can issue one load
10946 at time and the next load can start cycle later. */
10947 if (dep_insn_type == TYPE_IMOV
10948 || dep_insn_type == TYPE_FMOV)
10949 cost = 0;
10950 else if (cost >= 3)
10951 cost -= 3;
10952 else
10953 cost = 0;
10954 }
309ada50 10955
a269a03c 10956 default:
a269a03c
JC
10957 break;
10958 }
10959
10960 return cost;
10961}
0a726ef1 10962
e075ae69
RH
10963static union
10964{
10965 struct ppro_sched_data
10966 {
10967 rtx decode[3];
10968 int issued_this_cycle;
10969 } ppro;
10970} ix86_sched_data;
0a726ef1 10971
e075ae69
RH
10972static enum attr_ppro_uops
10973ix86_safe_ppro_uops (insn)
10974 rtx insn;
10975{
10976 if (recog_memoized (insn) >= 0)
10977 return get_attr_ppro_uops (insn);
10978 else
10979 return PPRO_UOPS_MANY;
10980}
0a726ef1 10981
e075ae69
RH
10982static void
10983ix86_dump_ppro_packet (dump)
10984 FILE *dump;
0a726ef1 10985{
e075ae69 10986 if (ix86_sched_data.ppro.decode[0])
0a726ef1 10987 {
e075ae69
RH
10988 fprintf (dump, "PPRO packet: %d",
10989 INSN_UID (ix86_sched_data.ppro.decode[0]));
10990 if (ix86_sched_data.ppro.decode[1])
10991 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
10992 if (ix86_sched_data.ppro.decode[2])
10993 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
10994 fputc ('\n', dump);
10995 }
10996}
0a726ef1 10997
e075ae69 10998/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 10999
c237e94a
ZW
11000static void
11001ix86_sched_init (dump, sched_verbose, veclen)
e075ae69
RH
11002 FILE *dump ATTRIBUTE_UNUSED;
11003 int sched_verbose ATTRIBUTE_UNUSED;
c237e94a 11004 int veclen ATTRIBUTE_UNUSED;
e075ae69
RH
11005{
11006 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11007}
11008
11009/* Shift INSN to SLOT, and shift everything else down. */
11010
11011static void
11012ix86_reorder_insn (insnp, slot)
11013 rtx *insnp, *slot;
11014{
11015 if (insnp != slot)
11016 {
11017 rtx insn = *insnp;
0f290768 11018 do
e075ae69
RH
11019 insnp[0] = insnp[1];
11020 while (++insnp != slot);
11021 *insnp = insn;
0a726ef1 11022 }
e075ae69
RH
11023}
11024
c6991660 11025static void
78a0d70c
ZW
11026ix86_sched_reorder_ppro (ready, e_ready)
11027 rtx *ready;
11028 rtx *e_ready;
11029{
11030 rtx decode[3];
11031 enum attr_ppro_uops cur_uops;
11032 int issued_this_cycle;
11033 rtx *insnp;
11034 int i;
e075ae69 11035
0f290768 11036 /* At this point .ppro.decode contains the state of the three
78a0d70c 11037 decoders from last "cycle". That is, those insns that were
0f290768 11038 actually independent. But here we're scheduling for the
78a0d70c
ZW
11039 decoder, and we may find things that are decodable in the
11040 same cycle. */
e075ae69 11041
0f290768 11042 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
78a0d70c 11043 issued_this_cycle = 0;
e075ae69 11044
78a0d70c
ZW
11045 insnp = e_ready;
11046 cur_uops = ix86_safe_ppro_uops (*insnp);
0a726ef1 11047
78a0d70c
ZW
11048 /* If the decoders are empty, and we've a complex insn at the
11049 head of the priority queue, let it issue without complaint. */
11050 if (decode[0] == NULL)
11051 {
11052 if (cur_uops == PPRO_UOPS_MANY)
11053 {
11054 decode[0] = *insnp;
11055 goto ppro_done;
11056 }
11057
11058 /* Otherwise, search for a 2-4 uop unsn to issue. */
11059 while (cur_uops != PPRO_UOPS_FEW)
11060 {
11061 if (insnp == ready)
11062 break;
11063 cur_uops = ix86_safe_ppro_uops (*--insnp);
11064 }
11065
11066 /* If so, move it to the head of the line. */
11067 if (cur_uops == PPRO_UOPS_FEW)
11068 ix86_reorder_insn (insnp, e_ready);
0a726ef1 11069
78a0d70c
ZW
11070 /* Issue the head of the queue. */
11071 issued_this_cycle = 1;
11072 decode[0] = *e_ready--;
11073 }
fb693d44 11074
78a0d70c
ZW
11075 /* Look for simple insns to fill in the other two slots. */
11076 for (i = 1; i < 3; ++i)
11077 if (decode[i] == NULL)
11078 {
a151daf0 11079 if (ready > e_ready)
78a0d70c 11080 goto ppro_done;
fb693d44 11081
e075ae69
RH
11082 insnp = e_ready;
11083 cur_uops = ix86_safe_ppro_uops (*insnp);
78a0d70c
ZW
11084 while (cur_uops != PPRO_UOPS_ONE)
11085 {
11086 if (insnp == ready)
11087 break;
11088 cur_uops = ix86_safe_ppro_uops (*--insnp);
11089 }
fb693d44 11090
78a0d70c
ZW
11091 /* Found one. Move it to the head of the queue and issue it. */
11092 if (cur_uops == PPRO_UOPS_ONE)
e075ae69 11093 {
78a0d70c
ZW
11094 ix86_reorder_insn (insnp, e_ready);
11095 decode[i] = *e_ready--;
11096 issued_this_cycle++;
11097 continue;
11098 }
fb693d44 11099
78a0d70c
ZW
11100 /* ??? Didn't find one. Ideally, here we would do a lazy split
11101 of 2-uop insns, issue one and queue the other. */
11102 }
fb693d44 11103
78a0d70c
ZW
11104 ppro_done:
11105 if (issued_this_cycle == 0)
11106 issued_this_cycle = 1;
11107 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11108}
fb693d44 11109
0f290768 11110/* We are about to being issuing insns for this clock cycle.
78a0d70c 11111 Override the default sort algorithm to better slot instructions. */
c237e94a
ZW
11112static int
11113ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
78a0d70c
ZW
11114 FILE *dump ATTRIBUTE_UNUSED;
11115 int sched_verbose ATTRIBUTE_UNUSED;
11116 rtx *ready;
c237e94a 11117 int *n_readyp;
78a0d70c
ZW
11118 int clock_var ATTRIBUTE_UNUSED;
11119{
c237e94a 11120 int n_ready = *n_readyp;
78a0d70c 11121 rtx *e_ready = ready + n_ready - 1;
fb693d44 11122
a151daf0
JL
11123 /* Make sure to go ahead and initialize key items in
11124 ix86_sched_data if we are not going to bother trying to
11125 reorder the ready queue. */
78a0d70c 11126 if (n_ready < 2)
a151daf0
JL
11127 {
11128 ix86_sched_data.ppro.issued_this_cycle = 1;
11129 goto out;
11130 }
e075ae69 11131
78a0d70c
ZW
11132 switch (ix86_cpu)
11133 {
11134 default:
11135 break;
e075ae69 11136
78a0d70c
ZW
11137 case PROCESSOR_PENTIUMPRO:
11138 ix86_sched_reorder_ppro (ready, e_ready);
e075ae69 11139 break;
fb693d44
RH
11140 }
11141
e075ae69
RH
11142out:
11143 return ix86_issue_rate ();
11144}
fb693d44 11145
e075ae69
RH
11146/* We are about to issue INSN. Return the number of insns left on the
11147 ready queue that can be issued this cycle. */
b222082e 11148
c237e94a 11149static int
e075ae69
RH
11150ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11151 FILE *dump;
11152 int sched_verbose;
11153 rtx insn;
11154 int can_issue_more;
11155{
11156 int i;
11157 switch (ix86_cpu)
fb693d44 11158 {
e075ae69
RH
11159 default:
11160 return can_issue_more - 1;
fb693d44 11161
e075ae69
RH
11162 case PROCESSOR_PENTIUMPRO:
11163 {
11164 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 11165
e075ae69
RH
11166 if (uops == PPRO_UOPS_MANY)
11167 {
11168 if (sched_verbose)
11169 ix86_dump_ppro_packet (dump);
11170 ix86_sched_data.ppro.decode[0] = insn;
11171 ix86_sched_data.ppro.decode[1] = NULL;
11172 ix86_sched_data.ppro.decode[2] = NULL;
11173 if (sched_verbose)
11174 ix86_dump_ppro_packet (dump);
11175 ix86_sched_data.ppro.decode[0] = NULL;
11176 }
11177 else if (uops == PPRO_UOPS_FEW)
11178 {
11179 if (sched_verbose)
11180 ix86_dump_ppro_packet (dump);
11181 ix86_sched_data.ppro.decode[0] = insn;
11182 ix86_sched_data.ppro.decode[1] = NULL;
11183 ix86_sched_data.ppro.decode[2] = NULL;
11184 }
11185 else
11186 {
11187 for (i = 0; i < 3; ++i)
11188 if (ix86_sched_data.ppro.decode[i] == NULL)
11189 {
11190 ix86_sched_data.ppro.decode[i] = insn;
11191 break;
11192 }
11193 if (i == 3)
11194 abort ();
11195 if (i == 2)
11196 {
11197 if (sched_verbose)
11198 ix86_dump_ppro_packet (dump);
11199 ix86_sched_data.ppro.decode[0] = NULL;
11200 ix86_sched_data.ppro.decode[1] = NULL;
11201 ix86_sched_data.ppro.decode[2] = NULL;
11202 }
11203 }
11204 }
11205 return --ix86_sched_data.ppro.issued_this_cycle;
11206 }
fb693d44 11207}
9b690711
RH
11208
11209static int
11210ia32_use_dfa_pipeline_interface ()
11211{
11212 if (ix86_cpu == PROCESSOR_PENTIUM)
11213 return 1;
11214 return 0;
11215}
11216
11217/* How many alternative schedules to try. This should be as wide as the
11218 scheduling freedom in the DFA, but no wider. Making this value too
11219 large results extra work for the scheduler. */
11220
11221static int
11222ia32_multipass_dfa_lookahead ()
11223{
11224 if (ix86_cpu == PROCESSOR_PENTIUM)
11225 return 2;
11226 else
11227 return 0;
11228}
11229
a7180f70 11230\f
0e4970d7
RK
11231/* Walk through INSNS and look for MEM references whose address is DSTREG or
11232 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11233 appropriate. */
11234
11235void
11236ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11237 rtx insns;
11238 rtx dstref, srcref, dstreg, srcreg;
11239{
11240 rtx insn;
11241
11242 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11243 if (INSN_P (insn))
11244 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11245 dstreg, srcreg);
11246}
11247
11248/* Subroutine of above to actually do the updating by recursively walking
11249 the rtx. */
11250
11251static void
11252ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11253 rtx x;
11254 rtx dstref, srcref, dstreg, srcreg;
11255{
11256 enum rtx_code code = GET_CODE (x);
11257 const char *format_ptr = GET_RTX_FORMAT (code);
11258 int i, j;
11259
11260 if (code == MEM && XEXP (x, 0) == dstreg)
11261 MEM_COPY_ATTRIBUTES (x, dstref);
11262 else if (code == MEM && XEXP (x, 0) == srcreg)
11263 MEM_COPY_ATTRIBUTES (x, srcref);
11264
11265 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11266 {
11267 if (*format_ptr == 'e')
11268 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11269 dstreg, srcreg);
11270 else if (*format_ptr == 'E')
11271 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
397be6cf 11272 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
0e4970d7
RK
11273 dstreg, srcreg);
11274 }
11275}
11276\f
a7180f70
BS
11277/* Compute the alignment given to a constant that is being placed in memory.
11278 EXP is the constant and ALIGN is the alignment that the object would
11279 ordinarily have.
11280 The value of this function is used instead of that alignment to align
11281 the object. */
11282
11283int
11284ix86_constant_alignment (exp, align)
11285 tree exp;
11286 int align;
11287{
11288 if (TREE_CODE (exp) == REAL_CST)
11289 {
11290 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11291 return 64;
11292 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11293 return 128;
11294 }
11295 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
11296 && align < 256)
11297 return 256;
11298
11299 return align;
11300}
11301
11302/* Compute the alignment for a static variable.
11303 TYPE is the data type, and ALIGN is the alignment that
11304 the object would ordinarily have. The value of this function is used
11305 instead of that alignment to align the object. */
11306
11307int
11308ix86_data_alignment (type, align)
11309 tree type;
11310 int align;
11311{
11312 if (AGGREGATE_TYPE_P (type)
11313 && TYPE_SIZE (type)
11314 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11315 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11316 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11317 return 256;
11318
0d7d98ee
JH
11319 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11320 to 16byte boundary. */
11321 if (TARGET_64BIT)
11322 {
11323 if (AGGREGATE_TYPE_P (type)
11324 && TYPE_SIZE (type)
11325 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11326 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11327 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11328 return 128;
11329 }
11330
a7180f70
BS
11331 if (TREE_CODE (type) == ARRAY_TYPE)
11332 {
11333 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11334 return 64;
11335 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11336 return 128;
11337 }
11338 else if (TREE_CODE (type) == COMPLEX_TYPE)
11339 {
0f290768 11340
a7180f70
BS
11341 if (TYPE_MODE (type) == DCmode && align < 64)
11342 return 64;
11343 if (TYPE_MODE (type) == XCmode && align < 128)
11344 return 128;
11345 }
11346 else if ((TREE_CODE (type) == RECORD_TYPE
11347 || TREE_CODE (type) == UNION_TYPE
11348 || TREE_CODE (type) == QUAL_UNION_TYPE)
11349 && TYPE_FIELDS (type))
11350 {
11351 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11352 return 64;
11353 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11354 return 128;
11355 }
11356 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11357 || TREE_CODE (type) == INTEGER_TYPE)
11358 {
11359 if (TYPE_MODE (type) == DFmode && align < 64)
11360 return 64;
11361 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11362 return 128;
11363 }
11364
11365 return align;
11366}
11367
11368/* Compute the alignment for a local variable.
11369 TYPE is the data type, and ALIGN is the alignment that
11370 the object would ordinarily have. The value of this macro is used
11371 instead of that alignment to align the object. */
11372
11373int
11374ix86_local_alignment (type, align)
11375 tree type;
11376 int align;
11377{
0d7d98ee
JH
11378 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11379 to 16byte boundary. */
11380 if (TARGET_64BIT)
11381 {
11382 if (AGGREGATE_TYPE_P (type)
11383 && TYPE_SIZE (type)
11384 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11385 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11386 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11387 return 128;
11388 }
a7180f70
BS
11389 if (TREE_CODE (type) == ARRAY_TYPE)
11390 {
11391 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11392 return 64;
11393 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11394 return 128;
11395 }
11396 else if (TREE_CODE (type) == COMPLEX_TYPE)
11397 {
11398 if (TYPE_MODE (type) == DCmode && align < 64)
11399 return 64;
11400 if (TYPE_MODE (type) == XCmode && align < 128)
11401 return 128;
11402 }
11403 else if ((TREE_CODE (type) == RECORD_TYPE
11404 || TREE_CODE (type) == UNION_TYPE
11405 || TREE_CODE (type) == QUAL_UNION_TYPE)
11406 && TYPE_FIELDS (type))
11407 {
11408 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11409 return 64;
11410 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11411 return 128;
11412 }
11413 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11414 || TREE_CODE (type) == INTEGER_TYPE)
11415 {
0f290768 11416
a7180f70
BS
11417 if (TYPE_MODE (type) == DFmode && align < 64)
11418 return 64;
11419 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11420 return 128;
11421 }
11422 return align;
11423}
0ed08620
JH
11424\f
11425/* Emit RTL insns to initialize the variable parts of a trampoline.
11426 FNADDR is an RTX for the address of the function's pure code.
11427 CXT is an RTX for the static chain value for the function. */
11428void
11429x86_initialize_trampoline (tramp, fnaddr, cxt)
11430 rtx tramp, fnaddr, cxt;
11431{
11432 if (!TARGET_64BIT)
11433 {
11434 /* Compute offset from the end of the jmp to the target function. */
11435 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11436 plus_constant (tramp, 10),
11437 NULL_RTX, 1, OPTAB_DIRECT);
11438 emit_move_insn (gen_rtx_MEM (QImode, tramp),
d8bf17f9 11439 gen_int_mode (0xb9, QImode));
0ed08620
JH
11440 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11441 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
d8bf17f9 11442 gen_int_mode (0xe9, QImode));
0ed08620
JH
11443 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11444 }
11445 else
11446 {
11447 int offset = 0;
11448 /* Try to load address using shorter movl instead of movabs.
11449 We may want to support movq for kernel mode, but kernel does not use
11450 trampolines at the moment. */
11451 if (x86_64_zero_extended_value (fnaddr))
11452 {
11453 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11454 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11455 gen_int_mode (0xbb41, HImode));
0ed08620
JH
11456 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11457 gen_lowpart (SImode, fnaddr));
11458 offset += 6;
11459 }
11460 else
11461 {
11462 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11463 gen_int_mode (0xbb49, HImode));
0ed08620
JH
11464 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11465 fnaddr);
11466 offset += 10;
11467 }
11468 /* Load static chain using movabs to r10. */
11469 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11470 gen_int_mode (0xba49, HImode));
0ed08620
JH
11471 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11472 cxt);
11473 offset += 10;
11474 /* Jump to the r11 */
11475 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11476 gen_int_mode (0xff49, HImode));
0ed08620 11477 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
d8bf17f9 11478 gen_int_mode (0xe3, QImode));
0ed08620
JH
11479 offset += 3;
11480 if (offset > TRAMPOLINE_SIZE)
b531087a 11481 abort ();
0ed08620
JH
11482 }
11483}
eeb06b1b
BS
11484\f
11485#define def_builtin(MASK, NAME, TYPE, CODE) \
11486do { \
11487 if ((MASK) & target_flags) \
11488 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL); \
11489} while (0)
bd793c65 11490
bd793c65
BS
11491struct builtin_description
11492{
8b60264b
KG
11493 const unsigned int mask;
11494 const enum insn_code icode;
11495 const char *const name;
11496 const enum ix86_builtins code;
11497 const enum rtx_code comparison;
11498 const unsigned int flag;
bd793c65
BS
11499};
11500
fbe5eb6d
BS
11501/* Used for builtins that are enabled both by -msse and -msse2. */
11502#define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11503
8b60264b 11504static const struct builtin_description bdesc_comi[] =
bd793c65 11505{
fbe5eb6d
BS
11506 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
11507 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
11508 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
11509 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
11510 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
11511 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
11512 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
11513 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
11514 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
11515 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
11516 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
11517 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 },
11518 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0 },
11519 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0 },
11520 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0 },
11521 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, LT, 1 },
11522 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, LE, 1 },
11523 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0 },
11524 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 },
11525 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 },
11526 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 },
11527 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, LT, 1 },
11528 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, LE, 1 },
11529 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0 },
bd793c65
BS
11530};
11531
8b60264b 11532static const struct builtin_description bdesc_2arg[] =
bd793c65
BS
11533{
11534 /* SSE */
fbe5eb6d
BS
11535 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11536 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11537 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11538 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11539 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11540 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11541 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11542 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11543
11544 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11545 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11546 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11547 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11548 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11549 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11550 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11551 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11552 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11553 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11554 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11555 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11556 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11557 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11558 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11559 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
11560 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
11561 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11562 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11563 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11564 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11565 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
11566 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
11567 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11568
11569 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11570 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11571 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11572 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11573
11574 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11575 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11576 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11577 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11578 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
bd793c65
BS
11579
11580 /* MMX */
eeb06b1b
BS
11581 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11582 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11583 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11584 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11585 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11586 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11587
11588 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11589 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11590 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11591 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11592 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11593 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11594 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11595 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11596
11597 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11598 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
fbe5eb6d 11599 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
eeb06b1b
BS
11600
11601 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11602 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11603 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11604 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11605
fbe5eb6d
BS
11606 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11607 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
eeb06b1b
BS
11608
11609 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11610 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11611 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11612 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11613 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11614 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11615
fbe5eb6d
BS
11616 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11617 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11618 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11619 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
eeb06b1b
BS
11620
11621 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11622 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11623 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11624 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11625 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11626 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
bd793c65
BS
11627
11628 /* Special. */
eeb06b1b
BS
11629 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11630 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11631 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
11632
fbe5eb6d
BS
11633 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
11634 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
eeb06b1b
BS
11635
11636 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
11637 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
11638 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
11639 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
11640 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
11641 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
11642
11643 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
11644 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
11645 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
11646 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
11647 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
11648 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
11649
11650 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
11651 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
11652 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
11653 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
11654
fbe5eb6d
BS
11655 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
11656 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
11657
11658 /* SSE2 */
11659 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
11660 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
11661 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
11662 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
11663 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
11664 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
11665 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
11666 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
11667
11668 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
11669 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
11670 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
11671 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
11672 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
11673 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
11674 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
11675 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
11676 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
11677 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
11678 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
11679 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
11680 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
11681 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
11682 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
11683 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD, LT, 1 },
11684 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD, LE, 1 },
11685 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
11686 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
11687 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
11688 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
11689 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD, LT, 1 },
11690 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD, LE, 1 },
11691 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
11692
11693 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
11694 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
11695 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
11696 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
11697
11698 { MASK_SSE2, CODE_FOR_sse2_anddf3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
11699 { MASK_SSE2, CODE_FOR_sse2_nanddf3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
11700 { MASK_SSE2, CODE_FOR_sse2_iordf3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
11701 { MASK_SSE2, CODE_FOR_sse2_xordf3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
11702
11703 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
11704 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
11705 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
11706
11707 /* SSE2 MMX */
11708 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
11709 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
11710 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
11711 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
11712 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
11713 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
11714 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
11715 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
11716
11717 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
11718 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
11719 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
11720 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
11721 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
11722 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
11723 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
11724 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
11725
11726 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
11727 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
11728 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
11729 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
11730
916b60b7
BS
11731 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
11732 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
11733 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
11734 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
fbe5eb6d
BS
11735
11736 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
11737 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
11738
11739 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
11740 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
11741 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
11742 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
11743 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
11744 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
11745
11746 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
11747 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
11748 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
11749 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
11750
11751 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
11752 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
11753 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
11754 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
11755 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
11756 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
11757
916b60b7
BS
11758 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
11759 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
11760 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
11761
11762 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
11763 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
11764
11765 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
11766 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
11767 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
11768 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
11769 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
11770 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
11771
11772 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
11773 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
11774 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
11775 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
11776 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
11777 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
11778
11779 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
11780 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
11781 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
11782 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
11783
11784 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
11785
fbe5eb6d
BS
11786 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
11787 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
11788 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
bd793c65
BS
11789};
11790
8b60264b 11791static const struct builtin_description bdesc_1arg[] =
bd793c65 11792{
fbe5eb6d
BS
11793 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
11794 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
11795
11796 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
11797 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
11798 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
11799
11800 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
11801 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
11802 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
11803 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
11804
11805 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
11806 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
11807 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
11808
11809 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
11810
11811 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
11812 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
bd793c65 11813
fbe5eb6d
BS
11814 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
11815 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
11816 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
11817 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
11818 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
bd793c65 11819
fbe5eb6d 11820 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
bd793c65 11821
fbe5eb6d
BS
11822 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
11823 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
11824
11825 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
11826 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
11827 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }
bd793c65
BS
11828};
11829
f6155fda
SS
11830void
11831ix86_init_builtins ()
11832{
11833 if (TARGET_MMX)
11834 ix86_init_mmx_sse_builtins ();
11835}
11836
11837/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
bd793c65
BS
11838 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
11839 builtins. */
e37af218 11840static void
f6155fda 11841ix86_init_mmx_sse_builtins ()
bd793c65 11842{
8b60264b 11843 const struct builtin_description * d;
77ebd435 11844 size_t i;
cbd5937a 11845 tree endlink = void_list_node;
bd793c65
BS
11846
11847 tree pchar_type_node = build_pointer_type (char_type_node);
11848 tree pfloat_type_node = build_pointer_type (float_type_node);
11849 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
916b60b7 11850 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
bd793c65
BS
11851 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
11852
11853 /* Comparisons. */
11854 tree int_ftype_v4sf_v4sf
11855 = build_function_type (integer_type_node,
11856 tree_cons (NULL_TREE, V4SF_type_node,
11857 tree_cons (NULL_TREE,
11858 V4SF_type_node,
11859 endlink)));
11860 tree v4si_ftype_v4sf_v4sf
11861 = build_function_type (V4SI_type_node,
11862 tree_cons (NULL_TREE, V4SF_type_node,
11863 tree_cons (NULL_TREE,
11864 V4SF_type_node,
11865 endlink)));
11866 /* MMX/SSE/integer conversions. */
bd793c65
BS
11867 tree int_ftype_v4sf
11868 = build_function_type (integer_type_node,
11869 tree_cons (NULL_TREE, V4SF_type_node,
11870 endlink));
11871 tree int_ftype_v8qi
11872 = build_function_type (integer_type_node,
11873 tree_cons (NULL_TREE, V8QI_type_node,
11874 endlink));
bd793c65 11875 tree v4sf_ftype_v4sf_int
21e1b5f1 11876 = build_function_type (V4SF_type_node,
bd793c65
BS
11877 tree_cons (NULL_TREE, V4SF_type_node,
11878 tree_cons (NULL_TREE, integer_type_node,
11879 endlink)));
11880 tree v4sf_ftype_v4sf_v2si
11881 = build_function_type (V4SF_type_node,
11882 tree_cons (NULL_TREE, V4SF_type_node,
11883 tree_cons (NULL_TREE, V2SI_type_node,
11884 endlink)));
11885 tree int_ftype_v4hi_int
11886 = build_function_type (integer_type_node,
11887 tree_cons (NULL_TREE, V4HI_type_node,
11888 tree_cons (NULL_TREE, integer_type_node,
11889 endlink)));
11890 tree v4hi_ftype_v4hi_int_int
332316cd 11891 = build_function_type (V4HI_type_node,
bd793c65
BS
11892 tree_cons (NULL_TREE, V4HI_type_node,
11893 tree_cons (NULL_TREE, integer_type_node,
11894 tree_cons (NULL_TREE,
11895 integer_type_node,
11896 endlink))));
11897 /* Miscellaneous. */
11898 tree v8qi_ftype_v4hi_v4hi
11899 = build_function_type (V8QI_type_node,
11900 tree_cons (NULL_TREE, V4HI_type_node,
11901 tree_cons (NULL_TREE, V4HI_type_node,
11902 endlink)));
11903 tree v4hi_ftype_v2si_v2si
11904 = build_function_type (V4HI_type_node,
11905 tree_cons (NULL_TREE, V2SI_type_node,
11906 tree_cons (NULL_TREE, V2SI_type_node,
11907 endlink)));
11908 tree v4sf_ftype_v4sf_v4sf_int
11909 = build_function_type (V4SF_type_node,
11910 tree_cons (NULL_TREE, V4SF_type_node,
11911 tree_cons (NULL_TREE, V4SF_type_node,
11912 tree_cons (NULL_TREE,
11913 integer_type_node,
11914 endlink))));
bd793c65
BS
11915 tree v2si_ftype_v4hi_v4hi
11916 = build_function_type (V2SI_type_node,
11917 tree_cons (NULL_TREE, V4HI_type_node,
11918 tree_cons (NULL_TREE, V4HI_type_node,
11919 endlink)));
11920 tree v4hi_ftype_v4hi_int
11921 = build_function_type (V4HI_type_node,
11922 tree_cons (NULL_TREE, V4HI_type_node,
11923 tree_cons (NULL_TREE, integer_type_node,
11924 endlink)));
bd793c65
BS
11925 tree v4hi_ftype_v4hi_di
11926 = build_function_type (V4HI_type_node,
11927 tree_cons (NULL_TREE, V4HI_type_node,
11928 tree_cons (NULL_TREE,
11929 long_long_integer_type_node,
11930 endlink)));
11931 tree v2si_ftype_v2si_di
11932 = build_function_type (V2SI_type_node,
11933 tree_cons (NULL_TREE, V2SI_type_node,
11934 tree_cons (NULL_TREE,
11935 long_long_integer_type_node,
11936 endlink)));
11937 tree void_ftype_void
11938 = build_function_type (void_type_node, endlink);
bd793c65
BS
11939 tree void_ftype_unsigned
11940 = build_function_type (void_type_node,
11941 tree_cons (NULL_TREE, unsigned_type_node,
11942 endlink));
11943 tree unsigned_ftype_void
11944 = build_function_type (unsigned_type_node, endlink);
11945 tree di_ftype_void
11946 = build_function_type (long_long_unsigned_type_node, endlink);
e37af218
RH
11947 tree v4sf_ftype_void
11948 = build_function_type (V4SF_type_node, endlink);
bd793c65
BS
11949 tree v2si_ftype_v4sf
11950 = build_function_type (V2SI_type_node,
11951 tree_cons (NULL_TREE, V4SF_type_node,
11952 endlink));
11953 /* Loads/stores. */
11954 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
11955 tree_cons (NULL_TREE, V8QI_type_node,
11956 tree_cons (NULL_TREE,
11957 pchar_type_node,
11958 endlink)));
11959 tree void_ftype_v8qi_v8qi_pchar
11960 = build_function_type (void_type_node, maskmovq_args);
11961 tree v4sf_ftype_pfloat
11962 = build_function_type (V4SF_type_node,
11963 tree_cons (NULL_TREE, pfloat_type_node,
11964 endlink));
bd793c65
BS
11965 /* @@@ the type is bogus */
11966 tree v4sf_ftype_v4sf_pv2si
11967 = build_function_type (V4SF_type_node,
11968 tree_cons (NULL_TREE, V4SF_type_node,
11969 tree_cons (NULL_TREE, pv2si_type_node,
11970 endlink)));
1255c85c
BS
11971 tree void_ftype_pv2si_v4sf
11972 = build_function_type (void_type_node,
11973 tree_cons (NULL_TREE, pv2si_type_node,
11974 tree_cons (NULL_TREE, V4SF_type_node,
bd793c65
BS
11975 endlink)));
11976 tree void_ftype_pfloat_v4sf
11977 = build_function_type (void_type_node,
11978 tree_cons (NULL_TREE, pfloat_type_node,
11979 tree_cons (NULL_TREE, V4SF_type_node,
11980 endlink)));
11981 tree void_ftype_pdi_di
11982 = build_function_type (void_type_node,
11983 tree_cons (NULL_TREE, pdi_type_node,
11984 tree_cons (NULL_TREE,
11985 long_long_unsigned_type_node,
11986 endlink)));
916b60b7
BS
11987 tree void_ftype_pv2di_v2di
11988 = build_function_type (void_type_node,
11989 tree_cons (NULL_TREE, pv2di_type_node,
11990 tree_cons (NULL_TREE,
11991 V2DI_type_node,
11992 endlink)));
bd793c65
BS
11993 /* Normal vector unops. */
11994 tree v4sf_ftype_v4sf
11995 = build_function_type (V4SF_type_node,
11996 tree_cons (NULL_TREE, V4SF_type_node,
11997 endlink));
0f290768 11998
bd793c65
BS
11999 /* Normal vector binops. */
12000 tree v4sf_ftype_v4sf_v4sf
12001 = build_function_type (V4SF_type_node,
12002 tree_cons (NULL_TREE, V4SF_type_node,
12003 tree_cons (NULL_TREE, V4SF_type_node,
12004 endlink)));
12005 tree v8qi_ftype_v8qi_v8qi
12006 = build_function_type (V8QI_type_node,
12007 tree_cons (NULL_TREE, V8QI_type_node,
12008 tree_cons (NULL_TREE, V8QI_type_node,
12009 endlink)));
12010 tree v4hi_ftype_v4hi_v4hi
12011 = build_function_type (V4HI_type_node,
12012 tree_cons (NULL_TREE, V4HI_type_node,
12013 tree_cons (NULL_TREE, V4HI_type_node,
12014 endlink)));
12015 tree v2si_ftype_v2si_v2si
12016 = build_function_type (V2SI_type_node,
12017 tree_cons (NULL_TREE, V2SI_type_node,
12018 tree_cons (NULL_TREE, V2SI_type_node,
12019 endlink)));
bd793c65
BS
12020 tree di_ftype_di_di
12021 = build_function_type (long_long_unsigned_type_node,
12022 tree_cons (NULL_TREE, long_long_unsigned_type_node,
12023 tree_cons (NULL_TREE,
12024 long_long_unsigned_type_node,
12025 endlink)));
12026
47f339cf
BS
12027 tree v2si_ftype_v2sf
12028 = build_function_type (V2SI_type_node,
12029 tree_cons (NULL_TREE, V2SF_type_node,
12030 endlink));
12031 tree v2sf_ftype_v2si
12032 = build_function_type (V2SF_type_node,
12033 tree_cons (NULL_TREE, V2SI_type_node,
12034 endlink));
12035 tree v2si_ftype_v2si
12036 = build_function_type (V2SI_type_node,
12037 tree_cons (NULL_TREE, V2SI_type_node,
12038 endlink));
12039 tree v2sf_ftype_v2sf
12040 = build_function_type (V2SF_type_node,
12041 tree_cons (NULL_TREE, V2SF_type_node,
12042 endlink));
12043 tree v2sf_ftype_v2sf_v2sf
12044 = build_function_type (V2SF_type_node,
12045 tree_cons (NULL_TREE, V2SF_type_node,
12046 tree_cons (NULL_TREE,
12047 V2SF_type_node,
12048 endlink)));
12049 tree v2si_ftype_v2sf_v2sf
12050 = build_function_type (V2SI_type_node,
12051 tree_cons (NULL_TREE, V2SF_type_node,
12052 tree_cons (NULL_TREE,
12053 V2SF_type_node,
12054 endlink)));
fbe5eb6d
BS
12055 tree pint_type_node = build_pointer_type (integer_type_node);
12056 tree pdouble_type_node = build_pointer_type (double_type_node);
12057 tree int_ftype_v2df_v2df
12058 = build_function_type (integer_type_node,
12059 tree_cons (NULL_TREE, V2DF_type_node,
12060 tree_cons (NULL_TREE, V2DF_type_node, endlink)));
12061
12062 tree ti_ftype_void
12063 = build_function_type (intTI_type_node, endlink);
12064 tree ti_ftype_ti_ti
12065 = build_function_type (intTI_type_node,
12066 tree_cons (NULL_TREE, intTI_type_node,
12067 tree_cons (NULL_TREE, intTI_type_node,
12068 endlink)));
12069 tree void_ftype_pvoid
12070 = build_function_type (void_type_node,
12071 tree_cons (NULL_TREE, ptr_type_node, endlink));
12072 tree v2di_ftype_di
12073 = build_function_type (V2DI_type_node,
12074 tree_cons (NULL_TREE, long_long_unsigned_type_node,
12075 endlink));
12076 tree v4sf_ftype_v4si
12077 = build_function_type (V4SF_type_node,
12078 tree_cons (NULL_TREE, V4SI_type_node, endlink));
12079 tree v4si_ftype_v4sf
12080 = build_function_type (V4SI_type_node,
12081 tree_cons (NULL_TREE, V4SF_type_node, endlink));
12082 tree v2df_ftype_v4si
12083 = build_function_type (V2DF_type_node,
12084 tree_cons (NULL_TREE, V4SI_type_node, endlink));
12085 tree v4si_ftype_v2df
12086 = build_function_type (V4SI_type_node,
12087 tree_cons (NULL_TREE, V2DF_type_node, endlink));
12088 tree v2si_ftype_v2df
12089 = build_function_type (V2SI_type_node,
12090 tree_cons (NULL_TREE, V2DF_type_node, endlink));
12091 tree v4sf_ftype_v2df
12092 = build_function_type (V4SF_type_node,
12093 tree_cons (NULL_TREE, V2DF_type_node, endlink));
12094 tree v2df_ftype_v2si
12095 = build_function_type (V2DF_type_node,
12096 tree_cons (NULL_TREE, V2SI_type_node, endlink));
12097 tree v2df_ftype_v4sf
12098 = build_function_type (V2DF_type_node,
12099 tree_cons (NULL_TREE, V4SF_type_node, endlink));
12100 tree int_ftype_v2df
12101 = build_function_type (integer_type_node,
12102 tree_cons (NULL_TREE, V2DF_type_node, endlink));
12103 tree v2df_ftype_v2df_int
12104 = build_function_type (V2DF_type_node,
12105 tree_cons (NULL_TREE, V2DF_type_node,
12106 tree_cons (NULL_TREE, integer_type_node,
12107 endlink)));
12108 tree v4sf_ftype_v4sf_v2df
12109 = build_function_type (V4SF_type_node,
12110 tree_cons (NULL_TREE, V4SF_type_node,
12111 tree_cons (NULL_TREE, V2DF_type_node,
12112 endlink)));
12113 tree v2df_ftype_v2df_v4sf
12114 = build_function_type (V2DF_type_node,
12115 tree_cons (NULL_TREE, V2DF_type_node,
12116 tree_cons (NULL_TREE, V4SF_type_node,
12117 endlink)));
12118 tree v2df_ftype_v2df_v2df_int
12119 = build_function_type (V2DF_type_node,
12120 tree_cons (NULL_TREE, V2DF_type_node,
12121 tree_cons (NULL_TREE, V2DF_type_node,
12122 tree_cons (NULL_TREE,
12123 integer_type_node,
12124 endlink))));
12125 tree v2df_ftype_v2df_pv2si
12126 = build_function_type (V2DF_type_node,
12127 tree_cons (NULL_TREE, V2DF_type_node,
12128 tree_cons (NULL_TREE, pv2si_type_node,
12129 endlink)));
12130 tree void_ftype_pv2si_v2df
12131 = build_function_type (void_type_node,
12132 tree_cons (NULL_TREE, pv2si_type_node,
12133 tree_cons (NULL_TREE, V2DF_type_node,
12134 endlink)));
12135 tree void_ftype_pdouble_v2df
12136 = build_function_type (void_type_node,
12137 tree_cons (NULL_TREE, pdouble_type_node,
12138 tree_cons (NULL_TREE, V2DF_type_node,
12139 endlink)));
12140 tree void_ftype_pint_int
12141 = build_function_type (void_type_node,
12142 tree_cons (NULL_TREE, pint_type_node,
12143 tree_cons (NULL_TREE, integer_type_node,
12144 endlink)));
12145 tree maskmovdqu_args = tree_cons (NULL_TREE, V16QI_type_node,
12146 tree_cons (NULL_TREE, V16QI_type_node,
12147 tree_cons (NULL_TREE,
12148 pchar_type_node,
12149 endlink)));
12150 tree void_ftype_v16qi_v16qi_pchar
12151 = build_function_type (void_type_node, maskmovdqu_args);
12152 tree v2df_ftype_pdouble
12153 = build_function_type (V2DF_type_node,
12154 tree_cons (NULL_TREE, pdouble_type_node,
12155 endlink));
12156 tree v2df_ftype_v2df_v2df
12157 = build_function_type (V2DF_type_node,
12158 tree_cons (NULL_TREE, V2DF_type_node,
12159 tree_cons (NULL_TREE, V2DF_type_node,
12160 endlink)));
12161 tree v16qi_ftype_v16qi_v16qi
12162 = build_function_type (V16QI_type_node,
12163 tree_cons (NULL_TREE, V16QI_type_node,
12164 tree_cons (NULL_TREE, V16QI_type_node,
12165 endlink)));
12166 tree v8hi_ftype_v8hi_v8hi
12167 = build_function_type (V8HI_type_node,
12168 tree_cons (NULL_TREE, V8HI_type_node,
12169 tree_cons (NULL_TREE, V8HI_type_node,
12170 endlink)));
12171 tree v4si_ftype_v4si_v4si
12172 = build_function_type (V4SI_type_node,
12173 tree_cons (NULL_TREE, V4SI_type_node,
12174 tree_cons (NULL_TREE, V4SI_type_node,
12175 endlink)));
12176 tree v2di_ftype_v2di_v2di
12177 = build_function_type (V2DI_type_node,
12178 tree_cons (NULL_TREE, V2DI_type_node,
12179 tree_cons (NULL_TREE, V2DI_type_node,
12180 endlink)));
12181 tree v2di_ftype_v2df_v2df
12182 = build_function_type (V2DI_type_node,
12183 tree_cons (NULL_TREE, V2DF_type_node,
12184 tree_cons (NULL_TREE, V2DF_type_node,
12185 endlink)));
12186 tree v2df_ftype_v2df
12187 = build_function_type (V2DF_type_node,
12188 tree_cons (NULL_TREE, V2DF_type_node,
12189 endlink));
12190 tree v2df_ftype_double
12191 = build_function_type (V2DF_type_node,
12192 tree_cons (NULL_TREE, double_type_node,
12193 endlink));
12194 tree v2df_ftype_double_double
12195 = build_function_type (V2DF_type_node,
12196 tree_cons (NULL_TREE, double_type_node,
12197 tree_cons (NULL_TREE, double_type_node,
12198 endlink)));
12199 tree int_ftype_v8hi_int
12200 = build_function_type (integer_type_node,
12201 tree_cons (NULL_TREE, V8HI_type_node,
12202 tree_cons (NULL_TREE, integer_type_node,
12203 endlink)));
12204 tree v8hi_ftype_v8hi_int_int
12205 = build_function_type (V8HI_type_node,
12206 tree_cons (NULL_TREE, V8HI_type_node,
12207 tree_cons (NULL_TREE, integer_type_node,
12208 tree_cons (NULL_TREE,
12209 integer_type_node,
12210 endlink))));
916b60b7
BS
12211 tree v2di_ftype_v2di_int
12212 = build_function_type (V2DI_type_node,
12213 tree_cons (NULL_TREE, V2DI_type_node,
12214 tree_cons (NULL_TREE, integer_type_node,
12215 endlink)));
fbe5eb6d
BS
12216 tree v4si_ftype_v4si_int
12217 = build_function_type (V4SI_type_node,
12218 tree_cons (NULL_TREE, V4SI_type_node,
12219 tree_cons (NULL_TREE, integer_type_node,
12220 endlink)));
12221 tree v8hi_ftype_v8hi_int
12222 = build_function_type (V8HI_type_node,
12223 tree_cons (NULL_TREE, V8HI_type_node,
12224 tree_cons (NULL_TREE, integer_type_node,
12225 endlink)));
916b60b7
BS
12226 tree v8hi_ftype_v8hi_v2di
12227 = build_function_type (V8HI_type_node,
12228 tree_cons (NULL_TREE, V8HI_type_node,
12229 tree_cons (NULL_TREE, V2DI_type_node,
12230 endlink)));
12231 tree v4si_ftype_v4si_v2di
12232 = build_function_type (V4SI_type_node,
12233 tree_cons (NULL_TREE, V4SI_type_node,
12234 tree_cons (NULL_TREE, V2DI_type_node,
12235 endlink)));
12236 tree v4si_ftype_v8hi_v8hi
12237 = build_function_type (V4SI_type_node,
12238 tree_cons (NULL_TREE, V8HI_type_node,
12239 tree_cons (NULL_TREE, V8HI_type_node,
12240 endlink)));
12241 tree di_ftype_v8qi_v8qi
12242 = build_function_type (long_long_unsigned_type_node,
12243 tree_cons (NULL_TREE, V8QI_type_node,
12244 tree_cons (NULL_TREE, V8QI_type_node,
12245 endlink)));
12246 tree v2di_ftype_v16qi_v16qi
12247 = build_function_type (V2DI_type_node,
12248 tree_cons (NULL_TREE, V16QI_type_node,
12249 tree_cons (NULL_TREE, V16QI_type_node,
12250 endlink)));
12251 tree int_ftype_v16qi
12252 = build_function_type (integer_type_node,
12253 tree_cons (NULL_TREE, V16QI_type_node, endlink));
47f339cf 12254
bd793c65
BS
12255 /* Add all builtins that are more or less simple operations on two
12256 operands. */
ca7558fc 12257 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
12258 {
12259 /* Use one of the operands; the target can have a different mode for
12260 mask-generating compares. */
12261 enum machine_mode mode;
12262 tree type;
12263
12264 if (d->name == 0)
12265 continue;
12266 mode = insn_data[d->icode].operand[1].mode;
12267
bd793c65
BS
12268 switch (mode)
12269 {
fbe5eb6d
BS
12270 case V16QImode:
12271 type = v16qi_ftype_v16qi_v16qi;
12272 break;
12273 case V8HImode:
12274 type = v8hi_ftype_v8hi_v8hi;
12275 break;
12276 case V4SImode:
12277 type = v4si_ftype_v4si_v4si;
12278 break;
12279 case V2DImode:
12280 type = v2di_ftype_v2di_v2di;
12281 break;
12282 case V2DFmode:
12283 type = v2df_ftype_v2df_v2df;
12284 break;
12285 case TImode:
12286 type = ti_ftype_ti_ti;
12287 break;
bd793c65
BS
12288 case V4SFmode:
12289 type = v4sf_ftype_v4sf_v4sf;
12290 break;
12291 case V8QImode:
12292 type = v8qi_ftype_v8qi_v8qi;
12293 break;
12294 case V4HImode:
12295 type = v4hi_ftype_v4hi_v4hi;
12296 break;
12297 case V2SImode:
12298 type = v2si_ftype_v2si_v2si;
12299 break;
bd793c65
BS
12300 case DImode:
12301 type = di_ftype_di_di;
12302 break;
12303
12304 default:
12305 abort ();
12306 }
0f290768 12307
bd793c65
BS
12308 /* Override for comparisons. */
12309 if (d->icode == CODE_FOR_maskcmpv4sf3
12310 || d->icode == CODE_FOR_maskncmpv4sf3
12311 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12312 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12313 type = v4si_ftype_v4sf_v4sf;
12314
fbe5eb6d
BS
12315 if (d->icode == CODE_FOR_maskcmpv2df3
12316 || d->icode == CODE_FOR_maskncmpv2df3
12317 || d->icode == CODE_FOR_vmmaskcmpv2df3
12318 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12319 type = v2di_ftype_v2df_v2df;
12320
eeb06b1b 12321 def_builtin (d->mask, d->name, type, d->code);
bd793c65
BS
12322 }
12323
12324 /* Add the remaining MMX insns with somewhat more complicated types. */
eeb06b1b
BS
12325 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12326 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12327 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12328 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12329 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12330 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12331 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12332
12333 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12334 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12335 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12336
12337 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12338 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12339
12340 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12341 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
0f290768 12342
bd793c65 12343 /* comi/ucomi insns. */
ca7558fc 12344 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
fbe5eb6d
BS
12345 if (d->mask == MASK_SSE2)
12346 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12347 else
12348 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
bd793c65 12349
1255c85c
BS
12350 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12351 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12352 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
bd793c65 12353
fbe5eb6d
BS
12354 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12355 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12356 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12357 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12358 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12359 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
bd793c65 12360
fbe5eb6d
BS
12361 def_builtin (MASK_SSE1, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
12362 def_builtin (MASK_SSE1, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
12363 def_builtin (MASK_SSE1, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
12364 def_builtin (MASK_SSE1, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
e37af218 12365
fbe5eb6d
BS
12366 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12367 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
bd793c65 12368
fbe5eb6d 12369 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
bd793c65 12370
fbe5eb6d
BS
12371 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
12372 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
12373 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
12374 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12375 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12376 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
bd793c65 12377
fbe5eb6d
BS
12378 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12379 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12380 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12381 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
bd793c65 12382
fbe5eb6d
BS
12383 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12384 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12385 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12386 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
bd793c65 12387
fbe5eb6d 12388 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
bd793c65 12389
916b60b7 12390 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
bd793c65 12391
fbe5eb6d
BS
12392 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12393 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12394 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12395 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12396 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12397 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
bd793c65 12398
fbe5eb6d 12399 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
bd793c65 12400
47f339cf
BS
12401 /* Original 3DNow! */
12402 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12403 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12404 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12405 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12406 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12407 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12408 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12409 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12410 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12411 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12412 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12413 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12414 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12415 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12416 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12417 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12418 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12419 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12420 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12421 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
47f339cf
BS
12422
12423 /* 3DNow! extension as used in the Athlon CPU. */
12424 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12425 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12426 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12427 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12428 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12429 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12430
fbe5eb6d
BS
12431 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12432
12433 /* SSE2 */
12434 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12435 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12436
12437 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12438 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12439
12440 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
12441 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
12442 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
12443 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12444 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12445 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12446
12447 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12448 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12449 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12450 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12451
12452 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
916b60b7 12453 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
fbe5eb6d
BS
12454 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12455 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
916b60b7 12456 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
fbe5eb6d
BS
12457
12458 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12459 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12460 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
916b60b7 12461 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
fbe5eb6d
BS
12462
12463 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12464 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12465
12466 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12467
12468 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
916b60b7 12469 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
fbe5eb6d
BS
12470
12471 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12472 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12473 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12474 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12475 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12476
12477 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12478
12479 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12480 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12481
12482 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12483 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12484 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12485
12486 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12487 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12488 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12489
12490 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12491 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12492 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12493 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
12494 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
12495 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12496 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12497
12498 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
12499 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12500 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
916b60b7
BS
12501
12502 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12503 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12504 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12505
12506 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12507 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12508 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12509
12510 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12511 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12512
12513 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12514 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12515 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12516
12517 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12518 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12519 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12520
12521 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12522 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12523
12524 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
bd793c65
BS
12525}
12526
12527/* Errors in the source file can cause expand_expr to return const0_rtx
12528 where we expect a vector. To avoid crashing, use one of the vector
12529 clear instructions. */
12530static rtx
12531safe_vector_operand (x, mode)
12532 rtx x;
12533 enum machine_mode mode;
12534{
12535 if (x != const0_rtx)
12536 return x;
12537 x = gen_reg_rtx (mode);
12538
47f339cf 12539 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
bd793c65
BS
12540 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12541 : gen_rtx_SUBREG (DImode, x, 0)));
12542 else
e37af218
RH
12543 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12544 : gen_rtx_SUBREG (V4SFmode, x, 0)));
bd793c65
BS
12545 return x;
12546}
12547
12548/* Subroutine of ix86_expand_builtin to take care of binop insns. */
12549
12550static rtx
12551ix86_expand_binop_builtin (icode, arglist, target)
12552 enum insn_code icode;
12553 tree arglist;
12554 rtx target;
12555{
12556 rtx pat;
12557 tree arg0 = TREE_VALUE (arglist);
12558 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12559 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12560 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12561 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12562 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12563 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12564
12565 if (VECTOR_MODE_P (mode0))
12566 op0 = safe_vector_operand (op0, mode0);
12567 if (VECTOR_MODE_P (mode1))
12568 op1 = safe_vector_operand (op1, mode1);
12569
12570 if (! target
12571 || GET_MODE (target) != tmode
12572 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12573 target = gen_reg_rtx (tmode);
12574
12575 /* In case the insn wants input operands in modes different from
12576 the result, abort. */
12577 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
12578 abort ();
12579
12580 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12581 op0 = copy_to_mode_reg (mode0, op0);
12582 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12583 op1 = copy_to_mode_reg (mode1, op1);
12584
59bef189
RH
12585 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12586 yet one of the two must not be a memory. This is normally enforced
12587 by expanders, but we didn't bother to create one here. */
12588 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12589 op0 = copy_to_mode_reg (mode0, op0);
12590
bd793c65
BS
12591 pat = GEN_FCN (icode) (target, op0, op1);
12592 if (! pat)
12593 return 0;
12594 emit_insn (pat);
12595 return target;
12596}
12597
e37af218
RH
12598/* In type_for_mode we restrict the ability to create TImode types
12599 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
12600 to have a V4SFmode signature. Convert them in-place to TImode. */
12601
12602static rtx
12603ix86_expand_timode_binop_builtin (icode, arglist, target)
12604 enum insn_code icode;
12605 tree arglist;
12606 rtx target;
12607{
12608 rtx pat;
12609 tree arg0 = TREE_VALUE (arglist);
12610 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12611 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12612 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12613
12614 op0 = gen_lowpart (TImode, op0);
12615 op1 = gen_lowpart (TImode, op1);
12616 target = gen_reg_rtx (TImode);
12617
12618 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
12619 op0 = copy_to_mode_reg (TImode, op0);
12620 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
12621 op1 = copy_to_mode_reg (TImode, op1);
12622
59bef189
RH
12623 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12624 yet one of the two must not be a memory. This is normally enforced
12625 by expanders, but we didn't bother to create one here. */
12626 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12627 op0 = copy_to_mode_reg (TImode, op0);
12628
e37af218
RH
12629 pat = GEN_FCN (icode) (target, op0, op1);
12630 if (! pat)
12631 return 0;
12632 emit_insn (pat);
12633
12634 return gen_lowpart (V4SFmode, target);
12635}
12636
bd793c65
BS
12637/* Subroutine of ix86_expand_builtin to take care of stores. */
12638
12639static rtx
e37af218 12640ix86_expand_store_builtin (icode, arglist)
bd793c65
BS
12641 enum insn_code icode;
12642 tree arglist;
bd793c65
BS
12643{
12644 rtx pat;
12645 tree arg0 = TREE_VALUE (arglist);
12646 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12647 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12648 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12649 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12650 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12651
12652 if (VECTOR_MODE_P (mode1))
12653 op1 = safe_vector_operand (op1, mode1);
12654
12655 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
59bef189
RH
12656
12657 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12658 op1 = copy_to_mode_reg (mode1, op1);
12659
bd793c65
BS
12660 pat = GEN_FCN (icode) (op0, op1);
12661 if (pat)
12662 emit_insn (pat);
12663 return 0;
12664}
12665
12666/* Subroutine of ix86_expand_builtin to take care of unop insns. */
12667
12668static rtx
12669ix86_expand_unop_builtin (icode, arglist, target, do_load)
12670 enum insn_code icode;
12671 tree arglist;
12672 rtx target;
12673 int do_load;
12674{
12675 rtx pat;
12676 tree arg0 = TREE_VALUE (arglist);
12677 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12678 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12679 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12680
12681 if (! target
12682 || GET_MODE (target) != tmode
12683 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12684 target = gen_reg_rtx (tmode);
12685 if (do_load)
12686 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12687 else
12688 {
12689 if (VECTOR_MODE_P (mode0))
12690 op0 = safe_vector_operand (op0, mode0);
12691
12692 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12693 op0 = copy_to_mode_reg (mode0, op0);
12694 }
12695
12696 pat = GEN_FCN (icode) (target, op0);
12697 if (! pat)
12698 return 0;
12699 emit_insn (pat);
12700 return target;
12701}
12702
12703/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12704 sqrtss, rsqrtss, rcpss. */
12705
12706static rtx
12707ix86_expand_unop1_builtin (icode, arglist, target)
12708 enum insn_code icode;
12709 tree arglist;
12710 rtx target;
12711{
12712 rtx pat;
12713 tree arg0 = TREE_VALUE (arglist);
59bef189 12714 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
bd793c65
BS
12715 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12716 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12717
12718 if (! target
12719 || GET_MODE (target) != tmode
12720 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12721 target = gen_reg_rtx (tmode);
12722
12723 if (VECTOR_MODE_P (mode0))
12724 op0 = safe_vector_operand (op0, mode0);
12725
12726 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12727 op0 = copy_to_mode_reg (mode0, op0);
59bef189
RH
12728
12729 op1 = op0;
12730 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12731 op1 = copy_to_mode_reg (mode0, op1);
12732
12733 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
12734 if (! pat)
12735 return 0;
12736 emit_insn (pat);
12737 return target;
12738}
12739
12740/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12741
12742static rtx
12743ix86_expand_sse_compare (d, arglist, target)
8b60264b 12744 const struct builtin_description *d;
bd793c65
BS
12745 tree arglist;
12746 rtx target;
12747{
12748 rtx pat;
12749 tree arg0 = TREE_VALUE (arglist);
12750 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12751 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12752 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12753 rtx op2;
12754 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12755 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12756 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12757 enum rtx_code comparison = d->comparison;
12758
12759 if (VECTOR_MODE_P (mode0))
12760 op0 = safe_vector_operand (op0, mode0);
12761 if (VECTOR_MODE_P (mode1))
12762 op1 = safe_vector_operand (op1, mode1);
12763
12764 /* Swap operands if we have a comparison that isn't available in
12765 hardware. */
12766 if (d->flag)
12767 {
21e1b5f1
BS
12768 rtx tmp = gen_reg_rtx (mode1);
12769 emit_move_insn (tmp, op1);
bd793c65 12770 op1 = op0;
21e1b5f1 12771 op0 = tmp;
bd793c65 12772 }
21e1b5f1
BS
12773
12774 if (! target
12775 || GET_MODE (target) != tmode
12776 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
bd793c65
BS
12777 target = gen_reg_rtx (tmode);
12778
12779 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12780 op0 = copy_to_mode_reg (mode0, op0);
12781 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12782 op1 = copy_to_mode_reg (mode1, op1);
12783
12784 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12785 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12786 if (! pat)
12787 return 0;
12788 emit_insn (pat);
12789 return target;
12790}
12791
12792/* Subroutine of ix86_expand_builtin to take care of comi insns. */
12793
12794static rtx
12795ix86_expand_sse_comi (d, arglist, target)
8b60264b 12796 const struct builtin_description *d;
bd793c65
BS
12797 tree arglist;
12798 rtx target;
12799{
12800 rtx pat;
12801 tree arg0 = TREE_VALUE (arglist);
12802 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12803 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12804 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12805 rtx op2;
12806 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
12807 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
12808 enum rtx_code comparison = d->comparison;
12809
12810 if (VECTOR_MODE_P (mode0))
12811 op0 = safe_vector_operand (op0, mode0);
12812 if (VECTOR_MODE_P (mode1))
12813 op1 = safe_vector_operand (op1, mode1);
12814
12815 /* Swap operands if we have a comparison that isn't available in
12816 hardware. */
12817 if (d->flag)
12818 {
12819 rtx tmp = op1;
12820 op1 = op0;
12821 op0 = tmp;
bd793c65
BS
12822 }
12823
12824 target = gen_reg_rtx (SImode);
12825 emit_move_insn (target, const0_rtx);
12826 target = gen_rtx_SUBREG (QImode, target, 0);
12827
12828 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12829 op0 = copy_to_mode_reg (mode0, op0);
12830 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12831 op1 = copy_to_mode_reg (mode1, op1);
12832
12833 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12834 pat = GEN_FCN (d->icode) (op0, op1, op2);
12835 if (! pat)
12836 return 0;
12837 emit_insn (pat);
29628f27
BS
12838 emit_insn (gen_rtx_SET (VOIDmode,
12839 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
12840 gen_rtx_fmt_ee (comparison, QImode,
12841 gen_rtx_REG (CCmode, FLAGS_REG),
12842 const0_rtx)));
bd793c65 12843
6f1a6c5b 12844 return SUBREG_REG (target);
bd793c65
BS
12845}
12846
12847/* Expand an expression EXP that calls a built-in function,
12848 with result going to TARGET if that's convenient
12849 (and in mode MODE if that's convenient).
12850 SUBTARGET may be used as the target for computing one of EXP's operands.
12851 IGNORE is nonzero if the value is to be ignored. */
12852
12853rtx
12854ix86_expand_builtin (exp, target, subtarget, mode, ignore)
12855 tree exp;
12856 rtx target;
12857 rtx subtarget ATTRIBUTE_UNUSED;
12858 enum machine_mode mode ATTRIBUTE_UNUSED;
12859 int ignore ATTRIBUTE_UNUSED;
12860{
8b60264b 12861 const struct builtin_description *d;
77ebd435 12862 size_t i;
bd793c65
BS
12863 enum insn_code icode;
12864 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12865 tree arglist = TREE_OPERAND (exp, 1);
e37af218 12866 tree arg0, arg1, arg2;
bd793c65
BS
12867 rtx op0, op1, op2, pat;
12868 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 12869 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
12870
12871 switch (fcode)
12872 {
12873 case IX86_BUILTIN_EMMS:
12874 emit_insn (gen_emms ());
12875 return 0;
12876
12877 case IX86_BUILTIN_SFENCE:
12878 emit_insn (gen_sfence ());
12879 return 0;
12880
bd793c65 12881 case IX86_BUILTIN_PEXTRW:
fbe5eb6d
BS
12882 case IX86_BUILTIN_PEXTRW128:
12883 icode = (fcode == IX86_BUILTIN_PEXTRW
12884 ? CODE_FOR_mmx_pextrw
12885 : CODE_FOR_sse2_pextrw);
bd793c65
BS
12886 arg0 = TREE_VALUE (arglist);
12887 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12888 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12889 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12890 tmode = insn_data[icode].operand[0].mode;
12891 mode0 = insn_data[icode].operand[1].mode;
12892 mode1 = insn_data[icode].operand[2].mode;
12893
12894 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12895 op0 = copy_to_mode_reg (mode0, op0);
12896 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12897 {
12898 /* @@@ better error message */
12899 error ("selector must be an immediate");
6f1a6c5b 12900 return gen_reg_rtx (tmode);
bd793c65
BS
12901 }
12902 if (target == 0
12903 || GET_MODE (target) != tmode
12904 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12905 target = gen_reg_rtx (tmode);
12906 pat = GEN_FCN (icode) (target, op0, op1);
12907 if (! pat)
12908 return 0;
12909 emit_insn (pat);
12910 return target;
12911
12912 case IX86_BUILTIN_PINSRW:
fbe5eb6d
BS
12913 case IX86_BUILTIN_PINSRW128:
12914 icode = (fcode == IX86_BUILTIN_PINSRW
12915 ? CODE_FOR_mmx_pinsrw
12916 : CODE_FOR_sse2_pinsrw);
bd793c65
BS
12917 arg0 = TREE_VALUE (arglist);
12918 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12919 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12920 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12921 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12922 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12923 tmode = insn_data[icode].operand[0].mode;
12924 mode0 = insn_data[icode].operand[1].mode;
12925 mode1 = insn_data[icode].operand[2].mode;
12926 mode2 = insn_data[icode].operand[3].mode;
12927
12928 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12929 op0 = copy_to_mode_reg (mode0, op0);
12930 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12931 op1 = copy_to_mode_reg (mode1, op1);
12932 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12933 {
12934 /* @@@ better error message */
12935 error ("selector must be an immediate");
12936 return const0_rtx;
12937 }
12938 if (target == 0
12939 || GET_MODE (target) != tmode
12940 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12941 target = gen_reg_rtx (tmode);
12942 pat = GEN_FCN (icode) (target, op0, op1, op2);
12943 if (! pat)
12944 return 0;
12945 emit_insn (pat);
12946 return target;
12947
12948 case IX86_BUILTIN_MASKMOVQ:
fbe5eb6d
BS
12949 icode = (fcode == IX86_BUILTIN_MASKMOVQ
12950 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
12951 : CODE_FOR_sse2_maskmovdqu);
bd793c65
BS
12952 /* Note the arg order is different from the operand order. */
12953 arg1 = TREE_VALUE (arglist);
12954 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
12955 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12956 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12957 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12958 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12959 mode0 = insn_data[icode].operand[0].mode;
12960 mode1 = insn_data[icode].operand[1].mode;
12961 mode2 = insn_data[icode].operand[2].mode;
12962
5c464583 12963 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
bd793c65
BS
12964 op0 = copy_to_mode_reg (mode0, op0);
12965 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12966 op1 = copy_to_mode_reg (mode1, op1);
12967 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
12968 op2 = copy_to_mode_reg (mode2, op2);
12969 pat = GEN_FCN (icode) (op0, op1, op2);
12970 if (! pat)
12971 return 0;
12972 emit_insn (pat);
12973 return 0;
12974
12975 case IX86_BUILTIN_SQRTSS:
12976 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
12977 case IX86_BUILTIN_RSQRTSS:
12978 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
12979 case IX86_BUILTIN_RCPSS:
12980 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
12981
e37af218
RH
12982 case IX86_BUILTIN_ANDPS:
12983 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
12984 arglist, target);
12985 case IX86_BUILTIN_ANDNPS:
12986 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
12987 arglist, target);
12988 case IX86_BUILTIN_ORPS:
12989 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
12990 arglist, target);
12991 case IX86_BUILTIN_XORPS:
12992 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
12993 arglist, target);
12994
bd793c65
BS
12995 case IX86_BUILTIN_LOADAPS:
12996 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
12997
12998 case IX86_BUILTIN_LOADUPS:
12999 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13000
13001 case IX86_BUILTIN_STOREAPS:
e37af218 13002 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
bd793c65 13003 case IX86_BUILTIN_STOREUPS:
e37af218 13004 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
bd793c65
BS
13005
13006 case IX86_BUILTIN_LOADSS:
13007 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13008
13009 case IX86_BUILTIN_STORESS:
e37af218 13010 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
bd793c65 13011
0f290768 13012 case IX86_BUILTIN_LOADHPS:
bd793c65 13013 case IX86_BUILTIN_LOADLPS:
fbe5eb6d
BS
13014 case IX86_BUILTIN_LOADHPD:
13015 case IX86_BUILTIN_LOADLPD:
13016 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13017 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13018 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13019 : CODE_FOR_sse2_movlpd);
bd793c65
BS
13020 arg0 = TREE_VALUE (arglist);
13021 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13022 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13023 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13024 tmode = insn_data[icode].operand[0].mode;
13025 mode0 = insn_data[icode].operand[1].mode;
13026 mode1 = insn_data[icode].operand[2].mode;
13027
13028 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13029 op0 = copy_to_mode_reg (mode0, op0);
13030 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13031 if (target == 0
13032 || GET_MODE (target) != tmode
13033 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13034 target = gen_reg_rtx (tmode);
13035 pat = GEN_FCN (icode) (target, op0, op1);
13036 if (! pat)
13037 return 0;
13038 emit_insn (pat);
13039 return target;
0f290768 13040
bd793c65
BS
13041 case IX86_BUILTIN_STOREHPS:
13042 case IX86_BUILTIN_STORELPS:
fbe5eb6d
BS
13043 case IX86_BUILTIN_STOREHPD:
13044 case IX86_BUILTIN_STORELPD:
13045 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13046 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13047 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13048 : CODE_FOR_sse2_movlpd);
bd793c65
BS
13049 arg0 = TREE_VALUE (arglist);
13050 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13051 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13052 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13053 mode0 = insn_data[icode].operand[1].mode;
13054 mode1 = insn_data[icode].operand[2].mode;
13055
13056 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13057 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13058 op1 = copy_to_mode_reg (mode1, op1);
13059
13060 pat = GEN_FCN (icode) (op0, op0, op1);
13061 if (! pat)
13062 return 0;
13063 emit_insn (pat);
13064 return 0;
13065
13066 case IX86_BUILTIN_MOVNTPS:
e37af218 13067 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
bd793c65 13068 case IX86_BUILTIN_MOVNTQ:
e37af218 13069 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
bd793c65
BS
13070
13071 case IX86_BUILTIN_LDMXCSR:
13072 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13073 target = assign_386_stack_local (SImode, 0);
13074 emit_move_insn (target, op0);
13075 emit_insn (gen_ldmxcsr (target));
13076 return 0;
13077
13078 case IX86_BUILTIN_STMXCSR:
13079 target = assign_386_stack_local (SImode, 0);
13080 emit_insn (gen_stmxcsr (target));
13081 return copy_to_mode_reg (SImode, target);
13082
bd793c65 13083 case IX86_BUILTIN_SHUFPS:
fbe5eb6d
BS
13084 case IX86_BUILTIN_SHUFPD:
13085 icode = (fcode == IX86_BUILTIN_SHUFPS
13086 ? CODE_FOR_sse_shufps
13087 : CODE_FOR_sse2_shufpd);
bd793c65
BS
13088 arg0 = TREE_VALUE (arglist);
13089 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13090 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13091 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13092 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13093 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13094 tmode = insn_data[icode].operand[0].mode;
13095 mode0 = insn_data[icode].operand[1].mode;
13096 mode1 = insn_data[icode].operand[2].mode;
13097 mode2 = insn_data[icode].operand[3].mode;
13098
13099 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13100 op0 = copy_to_mode_reg (mode0, op0);
13101 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13102 op1 = copy_to_mode_reg (mode1, op1);
13103 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13104 {
13105 /* @@@ better error message */
13106 error ("mask must be an immediate");
6f1a6c5b 13107 return gen_reg_rtx (tmode);
bd793c65
BS
13108 }
13109 if (target == 0
13110 || GET_MODE (target) != tmode
13111 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13112 target = gen_reg_rtx (tmode);
13113 pat = GEN_FCN (icode) (target, op0, op1, op2);
13114 if (! pat)
13115 return 0;
13116 emit_insn (pat);
13117 return target;
13118
13119 case IX86_BUILTIN_PSHUFW:
fbe5eb6d
BS
13120 case IX86_BUILTIN_PSHUFD:
13121 case IX86_BUILTIN_PSHUFHW:
13122 case IX86_BUILTIN_PSHUFLW:
13123 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13124 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13125 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13126 : CODE_FOR_mmx_pshufw);
bd793c65
BS
13127 arg0 = TREE_VALUE (arglist);
13128 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13129 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13130 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13131 tmode = insn_data[icode].operand[0].mode;
29628f27
BS
13132 mode1 = insn_data[icode].operand[1].mode;
13133 mode2 = insn_data[icode].operand[2].mode;
bd793c65 13134
29628f27
BS
13135 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13136 op0 = copy_to_mode_reg (mode1, op0);
13137 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
bd793c65
BS
13138 {
13139 /* @@@ better error message */
13140 error ("mask must be an immediate");
13141 return const0_rtx;
13142 }
13143 if (target == 0
13144 || GET_MODE (target) != tmode
13145 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13146 target = gen_reg_rtx (tmode);
29628f27 13147 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
13148 if (! pat)
13149 return 0;
13150 emit_insn (pat);
13151 return target;
13152
47f339cf
BS
13153 case IX86_BUILTIN_FEMMS:
13154 emit_insn (gen_femms ());
13155 return NULL_RTX;
13156
13157 case IX86_BUILTIN_PAVGUSB:
13158 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13159
13160 case IX86_BUILTIN_PF2ID:
13161 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13162
13163 case IX86_BUILTIN_PFACC:
13164 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13165
13166 case IX86_BUILTIN_PFADD:
13167 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13168
13169 case IX86_BUILTIN_PFCMPEQ:
13170 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13171
13172 case IX86_BUILTIN_PFCMPGE:
13173 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13174
13175 case IX86_BUILTIN_PFCMPGT:
13176 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13177
13178 case IX86_BUILTIN_PFMAX:
13179 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13180
13181 case IX86_BUILTIN_PFMIN:
13182 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13183
13184 case IX86_BUILTIN_PFMUL:
13185 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13186
13187 case IX86_BUILTIN_PFRCP:
13188 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13189
13190 case IX86_BUILTIN_PFRCPIT1:
13191 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13192
13193 case IX86_BUILTIN_PFRCPIT2:
13194 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13195
13196 case IX86_BUILTIN_PFRSQIT1:
13197 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13198
13199 case IX86_BUILTIN_PFRSQRT:
13200 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13201
13202 case IX86_BUILTIN_PFSUB:
13203 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13204
13205 case IX86_BUILTIN_PFSUBR:
13206 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13207
13208 case IX86_BUILTIN_PI2FD:
13209 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13210
13211 case IX86_BUILTIN_PMULHRW:
13212 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13213
47f339cf
BS
13214 case IX86_BUILTIN_PF2IW:
13215 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13216
13217 case IX86_BUILTIN_PFNACC:
13218 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13219
13220 case IX86_BUILTIN_PFPNACC:
13221 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13222
13223 case IX86_BUILTIN_PI2FW:
13224 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13225
13226 case IX86_BUILTIN_PSWAPDSI:
13227 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13228
13229 case IX86_BUILTIN_PSWAPDSF:
13230 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13231
e37af218
RH
13232 case IX86_BUILTIN_SSE_ZERO:
13233 target = gen_reg_rtx (V4SFmode);
13234 emit_insn (gen_sse_clrv4sf (target));
bd793c65
BS
13235 return target;
13236
bd793c65
BS
13237 case IX86_BUILTIN_MMX_ZERO:
13238 target = gen_reg_rtx (DImode);
13239 emit_insn (gen_mmx_clrdi (target));
13240 return target;
13241
fbe5eb6d
BS
13242 case IX86_BUILTIN_SQRTSD:
13243 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13244 case IX86_BUILTIN_LOADAPD:
13245 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13246 case IX86_BUILTIN_LOADUPD:
13247 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13248
13249 case IX86_BUILTIN_STOREAPD:
13250 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13251 case IX86_BUILTIN_STOREUPD:
13252 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13253
13254 case IX86_BUILTIN_LOADSD:
13255 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13256
13257 case IX86_BUILTIN_STORESD:
13258 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13259
13260 case IX86_BUILTIN_SETPD1:
13261 target = assign_386_stack_local (DFmode, 0);
13262 arg0 = TREE_VALUE (arglist);
13263 emit_move_insn (adjust_address (target, DFmode, 0),
13264 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13265 op0 = gen_reg_rtx (V2DFmode);
13266 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13267 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13268 return op0;
13269
13270 case IX86_BUILTIN_SETPD:
13271 target = assign_386_stack_local (V2DFmode, 0);
13272 arg0 = TREE_VALUE (arglist);
13273 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13274 emit_move_insn (adjust_address (target, DFmode, 0),
13275 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13276 emit_move_insn (adjust_address (target, DFmode, 8),
13277 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13278 op0 = gen_reg_rtx (V2DFmode);
13279 emit_insn (gen_sse2_movapd (op0, target));
13280 return op0;
13281
13282 case IX86_BUILTIN_LOADRPD:
13283 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13284 gen_reg_rtx (V2DFmode), 1);
13285 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13286 return target;
13287
13288 case IX86_BUILTIN_LOADPD1:
13289 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13290 gen_reg_rtx (V2DFmode), 1);
13291 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13292 return target;
13293
13294 case IX86_BUILTIN_STOREPD1:
13295 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13296 case IX86_BUILTIN_STORERPD:
13297 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13298
13299 case IX86_BUILTIN_MFENCE:
13300 emit_insn (gen_sse2_mfence ());
13301 return 0;
13302 case IX86_BUILTIN_LFENCE:
13303 emit_insn (gen_sse2_lfence ());
13304 return 0;
13305
13306 case IX86_BUILTIN_CLFLUSH:
13307 arg0 = TREE_VALUE (arglist);
13308 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13309 icode = CODE_FOR_sse2_clflush;
13310 mode0 = insn_data[icode].operand[0].mode;
13311 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13312 op0 = copy_to_mode_reg (mode0, op0);
13313
13314 emit_insn (gen_sse2_clflush (op0));
13315 return 0;
13316
13317 case IX86_BUILTIN_MOVNTPD:
13318 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13319 case IX86_BUILTIN_MOVNTDQ:
916b60b7 13320 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
fbe5eb6d
BS
13321 case IX86_BUILTIN_MOVNTI:
13322 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13323
bd793c65
BS
13324 default:
13325 break;
13326 }
13327
ca7558fc 13328 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
13329 if (d->code == fcode)
13330 {
13331 /* Compares are treated specially. */
13332 if (d->icode == CODE_FOR_maskcmpv4sf3
13333 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13334 || d->icode == CODE_FOR_maskncmpv4sf3
fbe5eb6d
BS
13335 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13336 || d->icode == CODE_FOR_maskcmpv2df3
13337 || d->icode == CODE_FOR_vmmaskcmpv2df3
13338 || d->icode == CODE_FOR_maskncmpv2df3
13339 || d->icode == CODE_FOR_vmmaskncmpv2df3)
bd793c65
BS
13340 return ix86_expand_sse_compare (d, arglist, target);
13341
13342 return ix86_expand_binop_builtin (d->icode, arglist, target);
13343 }
13344
ca7558fc 13345 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
bd793c65
BS
13346 if (d->code == fcode)
13347 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 13348
ca7558fc 13349 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
bd793c65
BS
13350 if (d->code == fcode)
13351 return ix86_expand_sse_comi (d, arglist, target);
0f290768 13352
bd793c65
BS
13353 /* @@@ Should really do something sensible here. */
13354 return 0;
bd793c65 13355}
4211a8fb
JH
13356
13357/* Store OPERAND to the memory after reload is completed. This means
f710504c 13358 that we can't easily use assign_stack_local. */
4211a8fb
JH
13359rtx
13360ix86_force_to_memory (mode, operand)
13361 enum machine_mode mode;
13362 rtx operand;
13363{
898d374d 13364 rtx result;
4211a8fb
JH
13365 if (!reload_completed)
13366 abort ();
898d374d
JH
13367 if (TARGET_64BIT && TARGET_RED_ZONE)
13368 {
13369 result = gen_rtx_MEM (mode,
13370 gen_rtx_PLUS (Pmode,
13371 stack_pointer_rtx,
13372 GEN_INT (-RED_ZONE_SIZE)));
13373 emit_move_insn (result, operand);
13374 }
13375 else if (TARGET_64BIT && !TARGET_RED_ZONE)
4211a8fb 13376 {
898d374d 13377 switch (mode)
4211a8fb 13378 {
898d374d
JH
13379 case HImode:
13380 case SImode:
13381 operand = gen_lowpart (DImode, operand);
13382 /* FALLTHRU */
13383 case DImode:
4211a8fb 13384 emit_insn (
898d374d
JH
13385 gen_rtx_SET (VOIDmode,
13386 gen_rtx_MEM (DImode,
13387 gen_rtx_PRE_DEC (DImode,
13388 stack_pointer_rtx)),
13389 operand));
13390 break;
13391 default:
13392 abort ();
13393 }
13394 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13395 }
13396 else
13397 {
13398 switch (mode)
13399 {
13400 case DImode:
13401 {
13402 rtx operands[2];
13403 split_di (&operand, 1, operands, operands + 1);
13404 emit_insn (
13405 gen_rtx_SET (VOIDmode,
13406 gen_rtx_MEM (SImode,
13407 gen_rtx_PRE_DEC (Pmode,
13408 stack_pointer_rtx)),
13409 operands[1]));
13410 emit_insn (
13411 gen_rtx_SET (VOIDmode,
13412 gen_rtx_MEM (SImode,
13413 gen_rtx_PRE_DEC (Pmode,
13414 stack_pointer_rtx)),
13415 operands[0]));
13416 }
13417 break;
13418 case HImode:
13419 /* It is better to store HImodes as SImodes. */
13420 if (!TARGET_PARTIAL_REG_STALL)
13421 operand = gen_lowpart (SImode, operand);
13422 /* FALLTHRU */
13423 case SImode:
4211a8fb 13424 emit_insn (
898d374d
JH
13425 gen_rtx_SET (VOIDmode,
13426 gen_rtx_MEM (GET_MODE (operand),
13427 gen_rtx_PRE_DEC (SImode,
13428 stack_pointer_rtx)),
13429 operand));
13430 break;
13431 default:
13432 abort ();
4211a8fb 13433 }
898d374d 13434 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 13435 }
898d374d 13436 return result;
4211a8fb
JH
13437}
13438
13439/* Free operand from the memory. */
13440void
13441ix86_free_from_memory (mode)
13442 enum machine_mode mode;
13443{
898d374d
JH
13444 if (!TARGET_64BIT || !TARGET_RED_ZONE)
13445 {
13446 int size;
13447
13448 if (mode == DImode || TARGET_64BIT)
13449 size = 8;
13450 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13451 size = 2;
13452 else
13453 size = 4;
13454 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13455 to pop or add instruction if registers are available. */
13456 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13457 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13458 GEN_INT (size))));
13459 }
4211a8fb 13460}
a946dd00 13461
f84aa48a
JH
13462/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13463 QImode must go into class Q_REGS.
13464 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
892a2d68 13465 movdf to do mem-to-mem moves through integer regs. */
f84aa48a
JH
13466enum reg_class
13467ix86_preferred_reload_class (x, class)
13468 rtx x;
13469 enum reg_class class;
13470{
13471 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13472 {
13473 /* SSE can't load any constant directly yet. */
13474 if (SSE_CLASS_P (class))
13475 return NO_REGS;
13476 /* Floats can load 0 and 1. */
13477 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13478 {
13479 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13480 if (MAYBE_SSE_CLASS_P (class))
13481 return (reg_class_subset_p (class, GENERAL_REGS)
13482 ? GENERAL_REGS : FLOAT_REGS);
13483 else
13484 return class;
13485 }
13486 /* General regs can load everything. */
13487 if (reg_class_subset_p (class, GENERAL_REGS))
13488 return GENERAL_REGS;
13489 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13490 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13491 return NO_REGS;
13492 }
13493 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13494 return NO_REGS;
13495 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13496 return Q_REGS;
13497 return class;
13498}
13499
13500/* If we are copying between general and FP registers, we need a memory
13501 location. The same is true for SSE and MMX registers.
13502
13503 The macro can't work reliably when one of the CLASSES is class containing
13504 registers from multiple units (SSE, MMX, integer). We avoid this by never
13505 combining those units in single alternative in the machine description.
13506 Ensure that this constraint holds to avoid unexpected surprises.
13507
13508 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13509 enforce these sanity checks. */
13510int
13511ix86_secondary_memory_needed (class1, class2, mode, strict)
13512 enum reg_class class1, class2;
13513 enum machine_mode mode;
13514 int strict;
13515{
13516 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13517 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13518 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13519 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13520 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13521 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13522 {
13523 if (strict)
13524 abort ();
13525 else
13526 return 1;
13527 }
13528 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13529 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13530 && (mode) != SImode)
13531 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13532 && (mode) != SImode));
13533}
13534/* Return the cost of moving data from a register in class CLASS1 to
a4f31c00 13535 one in class CLASS2.
f84aa48a
JH
13536
13537 It is not required that the cost always equal 2 when FROM is the same as TO;
13538 on some machines it is expensive to move between registers if they are not
13539 general registers. */
13540int
13541ix86_register_move_cost (mode, class1, class2)
13542 enum machine_mode mode;
13543 enum reg_class class1, class2;
13544{
13545 /* In case we require secondary memory, compute cost of the store followed
13546 by load. In case of copying from general_purpose_register we may emit
13547 multiple stores followed by single load causing memory size mismatch
13548 stall. Count this as arbitarily high cost of 20. */
13549 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13550 {
92d0fb09 13551 int add_cost = 0;
62415523 13552 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
92d0fb09 13553 add_cost = 20;
62415523 13554 return (MEMORY_MOVE_COST (mode, class1, 0)
92d0fb09 13555 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
f84aa48a 13556 }
92d0fb09 13557 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
13558 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13559 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
13560 return ix86_cost->mmxsse_to_integer;
13561 if (MAYBE_FLOAT_CLASS_P (class1))
13562 return ix86_cost->fp_move;
13563 if (MAYBE_SSE_CLASS_P (class1))
13564 return ix86_cost->sse_move;
13565 if (MAYBE_MMX_CLASS_P (class1))
13566 return ix86_cost->mmx_move;
f84aa48a
JH
13567 return 2;
13568}
13569
a946dd00
JH
13570/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13571int
13572ix86_hard_regno_mode_ok (regno, mode)
13573 int regno;
13574 enum machine_mode mode;
13575{
13576 /* Flags and only flags can only hold CCmode values. */
13577 if (CC_REGNO_P (regno))
13578 return GET_MODE_CLASS (mode) == MODE_CC;
13579 if (GET_MODE_CLASS (mode) == MODE_CC
13580 || GET_MODE_CLASS (mode) == MODE_RANDOM
13581 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13582 return 0;
13583 if (FP_REGNO_P (regno))
13584 return VALID_FP_MODE_P (mode);
13585 if (SSE_REGNO_P (regno))
13586 return VALID_SSE_REG_MODE (mode);
13587 if (MMX_REGNO_P (regno))
47f339cf 13588 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
a946dd00
JH
13589 /* We handle both integer and floats in the general purpose registers.
13590 In future we should be able to handle vector modes as well. */
13591 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13592 return 0;
13593 /* Take care for QImode values - they can be in non-QI regs, but then
13594 they do cause partial register stalls. */
d2836273 13595 if (regno < 4 || mode != QImode || TARGET_64BIT)
a946dd00
JH
13596 return 1;
13597 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13598}
fa79946e
JH
13599
13600/* Return the cost of moving data of mode M between a
13601 register and memory. A value of 2 is the default; this cost is
13602 relative to those in `REGISTER_MOVE_COST'.
13603
13604 If moving between registers and memory is more expensive than
13605 between two registers, you should define this macro to express the
a4f31c00
AJ
13606 relative cost.
13607
fa79946e
JH
13608 Model also increased moving costs of QImode registers in non
13609 Q_REGS classes.
13610 */
13611int
13612ix86_memory_move_cost (mode, class, in)
13613 enum machine_mode mode;
13614 enum reg_class class;
13615 int in;
13616{
13617 if (FLOAT_CLASS_P (class))
13618 {
13619 int index;
13620 switch (mode)
13621 {
13622 case SFmode:
13623 index = 0;
13624 break;
13625 case DFmode:
13626 index = 1;
13627 break;
13628 case XFmode:
13629 case TFmode:
13630 index = 2;
13631 break;
13632 default:
13633 return 100;
13634 }
13635 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13636 }
13637 if (SSE_CLASS_P (class))
13638 {
13639 int index;
13640 switch (GET_MODE_SIZE (mode))
13641 {
13642 case 4:
13643 index = 0;
13644 break;
13645 case 8:
13646 index = 1;
13647 break;
13648 case 16:
13649 index = 2;
13650 break;
13651 default:
13652 return 100;
13653 }
13654 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
13655 }
13656 if (MMX_CLASS_P (class))
13657 {
13658 int index;
13659 switch (GET_MODE_SIZE (mode))
13660 {
13661 case 4:
13662 index = 0;
13663 break;
13664 case 8:
13665 index = 1;
13666 break;
13667 default:
13668 return 100;
13669 }
13670 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13671 }
13672 switch (GET_MODE_SIZE (mode))
13673 {
13674 case 1:
13675 if (in)
13676 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13677 : ix86_cost->movzbl_load);
13678 else
13679 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13680 : ix86_cost->int_store[0] + 4);
13681 break;
13682 case 2:
13683 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13684 default:
13685 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13686 if (mode == TFmode)
13687 mode = XFmode;
3bb7e126 13688 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
fa79946e
JH
13689 * (int) GET_MODE_SIZE (mode) / 4);
13690 }
13691}
0ecf09f9 13692
2cc07db4
RH
13693#ifdef DO_GLOBAL_CTORS_BODY
13694static void
13695ix86_svr3_asm_out_constructor (symbol, priority)
13696 rtx symbol;
13697 int priority ATTRIBUTE_UNUSED;
13698{
13699 init_section ();
13700 fputs ("\tpushl $", asm_out_file);
13701 assemble_name (asm_out_file, XSTR (symbol, 0));
13702 fputc ('\n', asm_out_file);
13703}
13704#endif
162f023b
JH
13705
13706/* Order the registers for register allocator. */
13707
13708void
13709x86_order_regs_for_local_alloc ()
13710{
13711 int pos = 0;
13712 int i;
13713
13714 /* First allocate the local general purpose registers. */
13715 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13716 if (GENERAL_REGNO_P (i) && call_used_regs[i])
13717 reg_alloc_order [pos++] = i;
13718
13719 /* Global general purpose registers. */
13720 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13721 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
13722 reg_alloc_order [pos++] = i;
13723
13724 /* x87 registers come first in case we are doing FP math
13725 using them. */
13726 if (!TARGET_SSE_MATH)
13727 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13728 reg_alloc_order [pos++] = i;
13729
13730 /* SSE registers. */
13731 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
13732 reg_alloc_order [pos++] = i;
13733 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
13734 reg_alloc_order [pos++] = i;
13735
13736 /* x87 registerts. */
13737 if (TARGET_SSE_MATH)
13738 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13739 reg_alloc_order [pos++] = i;
13740
13741 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
13742 reg_alloc_order [pos++] = i;
13743
13744 /* Initialize the rest of array as we do not allocate some registers
13745 at all. */
13746 while (pos < FIRST_PSEUDO_REGISTER)
13747 reg_alloc_order [pos++] = 0;
13748}
194734e9
JH
13749
13750void
13751x86_output_mi_thunk (file, delta, function)
13752 FILE *file;
13753 int delta;
13754 tree function;
13755{
13756 tree parm;
13757 rtx xops[3];
13758
13759 if (ix86_regparm > 0)
13760 parm = TYPE_ARG_TYPES (TREE_TYPE (function));
13761 else
13762 parm = NULL_TREE;
13763 for (; parm; parm = TREE_CHAIN (parm))
13764 if (TREE_VALUE (parm) == void_type_node)
13765 break;
13766
13767 xops[0] = GEN_INT (delta);
13768 if (TARGET_64BIT)
13769 {
13770 int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
13771 xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
13772 output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
13773 if (flag_pic)
13774 {
13775 fprintf (file, "\tjmp *");
13776 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13777 fprintf (file, "@GOTPCREL(%%rip)\n");
13778 }
13779 else
13780 {
13781 fprintf (file, "\tjmp ");
13782 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13783 fprintf (file, "\n");
13784 }
13785 }
13786 else
13787 {
13788 if (parm)
13789 xops[1] = gen_rtx_REG (SImode, 0);
13790 else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
13791 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
13792 else
13793 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
13794 output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);
13795
13796 if (flag_pic)
13797 {
13798 xops[0] = pic_offset_table_rtx;
13799 xops[1] = gen_label_rtx ();
13800 xops[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
13801
13802 if (ix86_regparm > 2)
13803 abort ();
13804 output_asm_insn ("push{l}\t%0", xops);
13805 output_asm_insn ("call\t%P1", xops);
13806 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
13807 output_asm_insn ("pop{l}\t%0", xops);
13808 output_asm_insn
13809 ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
13810 xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
13811 output_asm_insn
13812 ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
13813 asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
13814 asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
13815 }
13816 else
13817 {
13818 fprintf (file, "\tjmp ");
13819 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13820 fprintf (file, "\n");
13821 }
13822 }
13823}
This page took 3.244552 seconds and 5 git commands to generate.