]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
Daily bump.
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
07933f72
GS
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
2a2ab3f9
JVA
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
bb5177ac 23#include "system.h"
2a2ab3f9 24#include "rtl.h"
6baf1cc8
BS
25#include "tree.h"
26#include "tm_p.h"
2a2ab3f9
JVA
27#include "regs.h"
28#include "hard-reg-set.h"
29#include "real.h"
30#include "insn-config.h"
31#include "conditions.h"
2a2ab3f9
JVA
32#include "output.h"
33#include "insn-attr.h"
2a2ab3f9 34#include "flags.h"
a8ffcc81 35#include "except.h"
ecbc4695 36#include "function.h"
00c79232 37#include "recog.h"
ced8dd8c 38#include "expr.h"
e78d8e51 39#include "optabs.h"
f103890b 40#include "toplev.h"
e075ae69 41#include "basic-block.h"
1526a060 42#include "ggc.h"
672a6f42
NB
43#include "target.h"
44#include "target-def.h"
f1e639b1 45#include "langhooks.h"
2a2ab3f9 46
8dfe5673 47#ifndef CHECK_STACK_LIMIT
07933f72 48#define CHECK_STACK_LIMIT (-1)
8dfe5673
RK
49#endif
50
2ab0437e 51/* Processor costs (relative to an add) */
fce5a9f2 52static const
2ab0437e
JH
53struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
44cf5b6a
JH
61 3, /* cost of movsx */
62 3, /* cost of movzx */
2ab0437e
JH
63 0, /* "large" insn */
64 2, /* MOVE_RATIO */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
f4365627
JH
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
2ab0437e 87};
32b5b1aa 88/* Processor costs (relative to an add) */
fce5a9f2 89static const
32b5b1aa 90struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 91 1, /* cost of an add instruction */
32b5b1aa
SC
92 1, /* cost of a lea instruction */
93 3, /* variable shift costs */
94 2, /* constant shift costs */
95 6, /* cost of starting a multiply */
96 1, /* cost of multiply per each bit set */
e075ae69 97 23, /* cost of a divide/mod */
44cf5b6a
JH
98 3, /* cost of movsx */
99 2, /* cost of movzx */
96e7ae40 100 15, /* "large" insn */
e2e52e1b 101 3, /* MOVE_RATIO */
7c6b971d 102 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
0f290768 105 Relative to reg-reg move (2). */
96e7ae40
JH
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {8, 8, 8}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
fa79946e
JH
110 {8, 8, 8}, /* cost of loading integer registers */
111 2, /* cost of moving MMX register */
112 {4, 8}, /* cost of loading MMX registers
113 in SImode and DImode */
114 {4, 8}, /* cost of storing MMX registers
115 in SImode and DImode */
116 2, /* cost of moving SSE register */
117 {4, 8, 16}, /* cost of loading SSE registers
118 in SImode, DImode and TImode */
119 {4, 8, 16}, /* cost of storing SSE registers
120 in SImode, DImode and TImode */
121 3, /* MMX or SSE register to integer */
f4365627
JH
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
32b5b1aa
SC
124};
125
fce5a9f2 126static const
32b5b1aa
SC
127struct processor_costs i486_cost = { /* 486 specific costs */
128 1, /* cost of an add instruction */
129 1, /* cost of a lea instruction */
130 3, /* variable shift costs */
131 2, /* constant shift costs */
132 12, /* cost of starting a multiply */
133 1, /* cost of multiply per each bit set */
e075ae69 134 40, /* cost of a divide/mod */
44cf5b6a
JH
135 3, /* cost of movsx */
136 2, /* cost of movzx */
96e7ae40 137 15, /* "large" insn */
e2e52e1b 138 3, /* MOVE_RATIO */
7c6b971d 139 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
140 {2, 4, 2}, /* cost of loading integer registers
141 in QImode, HImode and SImode.
0f290768 142 Relative to reg-reg move (2). */
96e7ae40
JH
143 {2, 4, 2}, /* cost of storing integer registers */
144 2, /* cost of reg,reg fld/fst */
145 {8, 8, 8}, /* cost of loading fp registers
146 in SFmode, DFmode and XFmode */
fa79946e
JH
147 {8, 8, 8}, /* cost of loading integer registers */
148 2, /* cost of moving MMX register */
149 {4, 8}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {4, 8}, /* cost of storing MMX registers
152 in SImode and DImode */
153 2, /* cost of moving SSE register */
154 {4, 8, 16}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {4, 8, 16}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
f4365627
JH
158 3, /* MMX or SSE register to integer */
159 0, /* size of prefetch block */
160 0, /* number of parallel prefetches */
32b5b1aa
SC
161};
162
fce5a9f2 163static const
e5cb57e8 164struct processor_costs pentium_cost = {
32b5b1aa
SC
165 1, /* cost of an add instruction */
166 1, /* cost of a lea instruction */
856b07a1 167 4, /* variable shift costs */
e5cb57e8 168 1, /* constant shift costs */
856b07a1
SC
169 11, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
e075ae69 171 25, /* cost of a divide/mod */
44cf5b6a
JH
172 3, /* cost of movsx */
173 2, /* cost of movzx */
96e7ae40 174 8, /* "large" insn */
e2e52e1b 175 6, /* MOVE_RATIO */
7c6b971d 176 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
177 {2, 4, 2}, /* cost of loading integer registers
178 in QImode, HImode and SImode.
0f290768 179 Relative to reg-reg move (2). */
96e7ae40
JH
180 {2, 4, 2}, /* cost of storing integer registers */
181 2, /* cost of reg,reg fld/fst */
182 {2, 2, 6}, /* cost of loading fp registers
183 in SFmode, DFmode and XFmode */
fa79946e
JH
184 {4, 4, 6}, /* cost of loading integer registers */
185 8, /* cost of moving MMX register */
186 {8, 8}, /* cost of loading MMX registers
187 in SImode and DImode */
188 {8, 8}, /* cost of storing MMX registers
189 in SImode and DImode */
190 2, /* cost of moving SSE register */
191 {4, 8, 16}, /* cost of loading SSE registers
192 in SImode, DImode and TImode */
193 {4, 8, 16}, /* cost of storing SSE registers
194 in SImode, DImode and TImode */
f4365627
JH
195 3, /* MMX or SSE register to integer */
196 0, /* size of prefetch block */
197 0, /* number of parallel prefetches */
32b5b1aa
SC
198};
199
fce5a9f2 200static const
856b07a1
SC
201struct processor_costs pentiumpro_cost = {
202 1, /* cost of an add instruction */
203 1, /* cost of a lea instruction */
e075ae69 204 1, /* variable shift costs */
856b07a1 205 1, /* constant shift costs */
369e59b1 206 4, /* cost of starting a multiply */
856b07a1 207 0, /* cost of multiply per each bit set */
e075ae69 208 17, /* cost of a divide/mod */
44cf5b6a
JH
209 1, /* cost of movsx */
210 1, /* cost of movzx */
96e7ae40 211 8, /* "large" insn */
e2e52e1b 212 6, /* MOVE_RATIO */
7c6b971d 213 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
214 {4, 4, 4}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
0f290768 216 Relative to reg-reg move (2). */
96e7ae40
JH
217 {2, 2, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {2, 2, 6}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
fa79946e
JH
221 {4, 4, 6}, /* cost of loading integer registers */
222 2, /* cost of moving MMX register */
223 {2, 2}, /* cost of loading MMX registers
224 in SImode and DImode */
225 {2, 2}, /* cost of storing MMX registers
226 in SImode and DImode */
227 2, /* cost of moving SSE register */
228 {2, 2, 8}, /* cost of loading SSE registers
229 in SImode, DImode and TImode */
230 {2, 2, 8}, /* cost of storing SSE registers
231 in SImode, DImode and TImode */
f4365627
JH
232 3, /* MMX or SSE register to integer */
233 32, /* size of prefetch block */
234 6, /* number of parallel prefetches */
856b07a1
SC
235};
236
fce5a9f2 237static const
a269a03c
JC
238struct processor_costs k6_cost = {
239 1, /* cost of an add instruction */
e075ae69 240 2, /* cost of a lea instruction */
a269a03c
JC
241 1, /* variable shift costs */
242 1, /* constant shift costs */
73fe76e4 243 3, /* cost of starting a multiply */
a269a03c 244 0, /* cost of multiply per each bit set */
e075ae69 245 18, /* cost of a divide/mod */
44cf5b6a
JH
246 2, /* cost of movsx */
247 2, /* cost of movzx */
96e7ae40 248 8, /* "large" insn */
e2e52e1b 249 4, /* MOVE_RATIO */
7c6b971d 250 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
251 {4, 5, 4}, /* cost of loading integer registers
252 in QImode, HImode and SImode.
0f290768 253 Relative to reg-reg move (2). */
96e7ae40
JH
254 {2, 3, 2}, /* cost of storing integer registers */
255 4, /* cost of reg,reg fld/fst */
256 {6, 6, 6}, /* cost of loading fp registers
257 in SFmode, DFmode and XFmode */
fa79946e
JH
258 {4, 4, 4}, /* cost of loading integer registers */
259 2, /* cost of moving MMX register */
260 {2, 2}, /* cost of loading MMX registers
261 in SImode and DImode */
262 {2, 2}, /* cost of storing MMX registers
263 in SImode and DImode */
264 2, /* cost of moving SSE register */
265 {2, 2, 8}, /* cost of loading SSE registers
266 in SImode, DImode and TImode */
267 {2, 2, 8}, /* cost of storing SSE registers
268 in SImode, DImode and TImode */
f4365627
JH
269 6, /* MMX or SSE register to integer */
270 32, /* size of prefetch block */
271 1, /* number of parallel prefetches */
a269a03c
JC
272};
273
fce5a9f2 274static const
309ada50
JH
275struct processor_costs athlon_cost = {
276 1, /* cost of an add instruction */
0b5107cf 277 2, /* cost of a lea instruction */
309ada50
JH
278 1, /* variable shift costs */
279 1, /* constant shift costs */
280 5, /* cost of starting a multiply */
281 0, /* cost of multiply per each bit set */
0b5107cf 282 42, /* cost of a divide/mod */
44cf5b6a
JH
283 1, /* cost of movsx */
284 1, /* cost of movzx */
309ada50 285 8, /* "large" insn */
e2e52e1b 286 9, /* MOVE_RATIO */
309ada50
JH
287 4, /* cost for loading QImode using movzbl */
288 {4, 5, 4}, /* cost of loading integer registers
289 in QImode, HImode and SImode.
0f290768 290 Relative to reg-reg move (2). */
309ada50
JH
291 {2, 3, 2}, /* cost of storing integer registers */
292 4, /* cost of reg,reg fld/fst */
0b5107cf 293 {6, 6, 20}, /* cost of loading fp registers
309ada50 294 in SFmode, DFmode and XFmode */
fa79946e
JH
295 {4, 4, 16}, /* cost of loading integer registers */
296 2, /* cost of moving MMX register */
297 {2, 2}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {2, 2}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {2, 2, 8}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {2, 2, 8}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
f4365627
JH
306 6, /* MMX or SSE register to integer */
307 64, /* size of prefetch block */
308 6, /* number of parallel prefetches */
309ada50
JH
309};
310
fce5a9f2 311static const
b4e89e2d
JH
312struct processor_costs pentium4_cost = {
313 1, /* cost of an add instruction */
314 1, /* cost of a lea instruction */
315 8, /* variable shift costs */
316 8, /* constant shift costs */
317 30, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 112, /* cost of a divide/mod */
44cf5b6a
JH
320 1, /* cost of movsx */
321 1, /* cost of movzx */
b4e89e2d
JH
322 16, /* "large" insn */
323 6, /* MOVE_RATIO */
324 2, /* cost for loading QImode using movzbl */
325 {4, 5, 4}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {2, 3, 2}, /* cost of storing integer registers */
329 2, /* cost of reg,reg fld/fst */
330 {2, 2, 6}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {4, 4, 6}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 12, /* cost of moving SSE register */
339 {12, 12, 12}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 10, /* MMX or SSE register to integer */
f4365627
JH
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
b4e89e2d
JH
346};
347
8b60264b 348const struct processor_costs *ix86_cost = &pentium_cost;
32b5b1aa 349
a269a03c
JC
350/* Processor feature/optimization bitmasks. */
351#define m_386 (1<<PROCESSOR_I386)
352#define m_486 (1<<PROCESSOR_I486)
353#define m_PENT (1<<PROCESSOR_PENTIUM)
354#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
355#define m_K6 (1<<PROCESSOR_K6)
309ada50 356#define m_ATHLON (1<<PROCESSOR_ATHLON)
b4e89e2d 357#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
a269a03c 358
309ada50 359const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
b4e89e2d 360const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
a269a03c 361const int x86_zero_extend_with_and = m_486 | m_PENT;
b4e89e2d 362const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
e075ae69 363const int x86_double_with_add = ~m_386;
a269a03c 364const int x86_use_bit_test = m_386;
e2e52e1b 365const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
b4e89e2d 366const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
47f339cf 367const int x86_3dnow_a = m_ATHLON;
b4e89e2d 368const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
ef6257cd 369const int x86_branch_hints = m_PENT4;
b4e89e2d 370const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
e075ae69
RH
371const int x86_partial_reg_stall = m_PPRO;
372const int x86_use_loop = m_K6;
309ada50 373const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
e075ae69
RH
374const int x86_use_mov0 = m_K6;
375const int x86_use_cltd = ~(m_PENT | m_K6);
376const int x86_read_modify_write = ~m_PENT;
377const int x86_read_modify = ~(m_PENT | m_PPRO);
378const int x86_split_long_moves = m_PPRO;
285464d0
JH
379const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
380const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
b4e89e2d 381const int x86_single_stringop = m_386 | m_PENT4;
d9f32422
JH
382const int x86_qimode_math = ~(0);
383const int x86_promote_qi_regs = 0;
384const int x86_himode_math = ~(m_PPRO);
385const int x86_promote_hi_regs = m_PPRO;
b4e89e2d
JH
386const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
387const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
388const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
389const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
77966be3 390const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO);
b4e89e2d
JH
391const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
392const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
c6036a37
JH
393const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
394const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
395const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
b972dd02 396const int x86_decompose_lea = m_PENT4;
495333a6 397const int x86_shift1 = ~m_486;
285464d0 398const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
a269a03c 399
6ab16dd9
JH
400/* In case the avreage insn count for single function invocation is
401 lower than this constant, emit fast (but longer) prologue and
402 epilogue code. */
403#define FAST_PROLOGUE_INSN_COUNT 30
5bf0ebab 404
6ab16dd9
JH
405/* Set by prologue expander and used by epilogue expander to determine
406 the style used. */
407static int use_fast_prologue_epilogue;
408
5bf0ebab
RH
409/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
410static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
411static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
412static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
4c0d89b5
RS
413
414/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 415 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 416
e075ae69 417enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
418{
419 /* ax, dx, cx, bx */
ab408a86 420 AREG, DREG, CREG, BREG,
4c0d89b5 421 /* si, di, bp, sp */
e075ae69 422 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
423 /* FP registers */
424 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 425 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 426 /* arg pointer */
83774849 427 NON_Q_REGS,
564d80f4 428 /* flags, fpsr, dirflag, frame */
a7180f70
BS
429 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
430 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
431 SSE_REGS, SSE_REGS,
432 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30
JH
433 MMX_REGS, MMX_REGS,
434 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
435 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
436 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
437 SSE_REGS, SSE_REGS,
4c0d89b5 438};
c572e5ba 439
3d117b30 440/* The "default" register map used in 32bit mode. */
83774849 441
0f290768 442int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
443{
444 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
445 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
3d117b30 446 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
447 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
448 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
449 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
450 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
451};
452
5bf0ebab
RH
453static int const x86_64_int_parameter_registers[6] =
454{
455 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
456 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
457};
458
459static int const x86_64_int_return_registers[4] =
460{
461 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
462};
53c17031 463
0f7fa3d0
JH
464/* The "default" register map used in 64bit mode. */
465int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
466{
467 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
3de72741 468 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
0f7fa3d0
JH
469 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
470 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
471 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
472 8,9,10,11,12,13,14,15, /* extended integer registers */
473 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
474};
475
83774849
RH
476/* Define the register numbers to be used in Dwarf debugging information.
477 The SVR4 reference port C compiler uses the following register numbers
478 in its Dwarf output code:
479 0 for %eax (gcc regno = 0)
480 1 for %ecx (gcc regno = 2)
481 2 for %edx (gcc regno = 1)
482 3 for %ebx (gcc regno = 3)
483 4 for %esp (gcc regno = 7)
484 5 for %ebp (gcc regno = 6)
485 6 for %esi (gcc regno = 4)
486 7 for %edi (gcc regno = 5)
487 The following three DWARF register numbers are never generated by
488 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
489 believes these numbers have these meanings.
490 8 for %eip (no gcc equivalent)
491 9 for %eflags (gcc regno = 17)
492 10 for %trapno (no gcc equivalent)
493 It is not at all clear how we should number the FP stack registers
494 for the x86 architecture. If the version of SDB on x86/svr4 were
495 a bit less brain dead with respect to floating-point then we would
496 have a precedent to follow with respect to DWARF register numbers
497 for x86 FP registers, but the SDB on x86/svr4 is so completely
498 broken with respect to FP registers that it is hardly worth thinking
499 of it as something to strive for compatibility with.
500 The version of x86/svr4 SDB I have at the moment does (partially)
501 seem to believe that DWARF register number 11 is associated with
502 the x86 register %st(0), but that's about all. Higher DWARF
503 register numbers don't seem to be associated with anything in
504 particular, and even for DWARF regno 11, SDB only seems to under-
505 stand that it should say that a variable lives in %st(0) (when
506 asked via an `=' command) if we said it was in DWARF regno 11,
507 but SDB still prints garbage when asked for the value of the
508 variable in question (via a `/' command).
509 (Also note that the labels SDB prints for various FP stack regs
510 when doing an `x' command are all wrong.)
511 Note that these problems generally don't affect the native SVR4
512 C compiler because it doesn't allow the use of -O with -g and
513 because when it is *not* optimizing, it allocates a memory
514 location for each floating-point variable, and the memory
515 location is what gets described in the DWARF AT_location
516 attribute for the variable in question.
517 Regardless of the severe mental illness of the x86/svr4 SDB, we
518 do something sensible here and we use the following DWARF
519 register numbers. Note that these are all stack-top-relative
520 numbers.
521 11 for %st(0) (gcc regno = 8)
522 12 for %st(1) (gcc regno = 9)
523 13 for %st(2) (gcc regno = 10)
524 14 for %st(3) (gcc regno = 11)
525 15 for %st(4) (gcc regno = 12)
526 16 for %st(5) (gcc regno = 13)
527 17 for %st(6) (gcc regno = 14)
528 18 for %st(7) (gcc regno = 15)
529*/
0f290768 530int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
531{
532 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
533 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
3f3f2124 534 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
535 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
536 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
3f3f2124
JH
537 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
538 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
83774849
RH
539};
540
c572e5ba
JVA
541/* Test and compare insns in i386.md store the information needed to
542 generate branch and scc insns here. */
543
07933f72
GS
544rtx ix86_compare_op0 = NULL_RTX;
545rtx ix86_compare_op1 = NULL_RTX;
f5316dfe 546
f996902d
RH
547/* The encoding characters for the four TLS models present in ELF. */
548
755ac5d4 549static char const tls_model_chars[] = " GLil";
f996902d 550
7a2e09f4 551#define MAX_386_STACK_LOCALS 3
8362f420
JH
552/* Size of the register save area. */
553#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
36edd3cc
BS
554
555/* Define the structure for the machine field in struct function. */
e2500fed 556struct machine_function GTY(())
36edd3cc
BS
557{
558 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
f996902d 559 const char *some_ld_name;
8362f420 560 int save_varrargs_registers;
6fca22eb 561 int accesses_prev_frame;
36edd3cc
BS
562};
563
01d939e8 564#define ix86_stack_locals (cfun->machine->stack_locals)
8362f420 565#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
36edd3cc 566
4dd2ac2c
JH
567/* Structure describing stack frame layout.
568 Stack grows downward:
569
570 [arguments]
571 <- ARG_POINTER
572 saved pc
573
574 saved frame pointer if frame_pointer_needed
575 <- HARD_FRAME_POINTER
576 [saved regs]
577
578 [padding1] \
579 )
580 [va_arg registers] (
581 > to_allocate <- FRAME_POINTER
582 [frame] (
583 )
584 [padding2] /
585 */
586struct ix86_frame
587{
588 int nregs;
589 int padding1;
8362f420 590 int va_arg_size;
4dd2ac2c
JH
591 HOST_WIDE_INT frame;
592 int padding2;
593 int outgoing_arguments_size;
8362f420 594 int red_zone_size;
4dd2ac2c
JH
595
596 HOST_WIDE_INT to_allocate;
597 /* The offsets relative to ARG_POINTER. */
598 HOST_WIDE_INT frame_pointer_offset;
599 HOST_WIDE_INT hard_frame_pointer_offset;
600 HOST_WIDE_INT stack_pointer_offset;
601};
602
c93e80a5
JH
603/* Used to enable/disable debugging features. */
604const char *ix86_debug_arg_string, *ix86_debug_addr_string;
6189a572
JH
605/* Code model option as passed by user. */
606const char *ix86_cmodel_string;
607/* Parsed value. */
608enum cmodel ix86_cmodel;
80f33d06
GS
609/* Asm dialect. */
610const char *ix86_asm_string;
611enum asm_dialect ix86_asm_dialect = ASM_ATT;
f996902d
RH
612/* TLS dialext. */
613const char *ix86_tls_dialect_string;
614enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
6189a572 615
5bf0ebab 616/* Which unit we are generating floating point math for. */
965f5423
JH
617enum fpmath_unit ix86_fpmath;
618
5bf0ebab
RH
619/* Which cpu are we scheduling for. */
620enum processor_type ix86_cpu;
621/* Which instruction set architecture to use. */
622enum processor_type ix86_arch;
c8c5cb99
SC
623
624/* Strings to hold which cpu and instruction set architecture to use. */
9c23aa47
ZW
625const char *ix86_cpu_string; /* for -mcpu=<xxx> */
626const char *ix86_arch_string; /* for -march=<xxx> */
965f5423 627const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
c8c5cb99 628
0f290768 629/* # of registers to use to pass arguments. */
e075ae69 630const char *ix86_regparm_string;
e9a25f70 631
f4365627
JH
632/* true if sse prefetch instruction is not NOOP. */
633int x86_prefetch_sse;
634
e075ae69
RH
635/* ix86_regparm_string as a number */
636int ix86_regparm;
e9a25f70
JL
637
638/* Alignment to use for loops and jumps: */
639
0f290768 640/* Power of two alignment for loops. */
e075ae69 641const char *ix86_align_loops_string;
e9a25f70 642
0f290768 643/* Power of two alignment for non-loop jumps. */
e075ae69 644const char *ix86_align_jumps_string;
e9a25f70 645
3af4bd89 646/* Power of two alignment for stack boundary in bytes. */
e075ae69 647const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
648
649/* Preferred alignment for stack boundary in bits. */
e075ae69 650int ix86_preferred_stack_boundary;
3af4bd89 651
e9a25f70 652/* Values 1-5: see jump.c */
e075ae69
RH
653int ix86_branch_cost;
654const char *ix86_branch_cost_string;
e9a25f70 655
0f290768 656/* Power of two alignment for functions. */
e075ae69 657const char *ix86_align_funcs_string;
623fe810
RH
658
659/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
660static char internal_label_prefix[16];
661static int internal_label_prefix_len;
e075ae69 662\f
623fe810 663static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
f996902d 664static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
f6da8bc3
KG
665static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
666static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
e075ae69 667 int, int, FILE *));
f996902d
RH
668static const char *get_some_local_dynamic_name PARAMS ((void));
669static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
670static rtx maybe_get_pool_constant PARAMS ((rtx));
f6da8bc3 671static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
3a3677ff
RH
672static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
673 rtx *, rtx *));
f996902d 674static rtx get_thread_pointer PARAMS ((void));
145aacc2 675static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
f6da8bc3
KG
676static rtx gen_push PARAMS ((rtx));
677static int memory_address_length PARAMS ((rtx addr));
678static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
679static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
f6da8bc3
KG
680static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
681static void ix86_dump_ppro_packet PARAMS ((FILE *));
682static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
e2500fed 683static struct machine_function * ix86_init_machine_status PARAMS ((void));
2b589241 684static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
b531087a
KH
685static int ix86_nsaved_regs PARAMS ((void));
686static void ix86_emit_save_regs PARAMS ((void));
c6036a37 687static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
37a58036 688static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
bd09bdeb 689static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
0e4970d7 690static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
b531087a 691static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
55efb413 692static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
0945b39d 693static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
0945b39d
JH
694static rtx ix86_expand_aligntest PARAMS ((rtx, int));
695static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
c237e94a
ZW
696static int ix86_issue_rate PARAMS ((void));
697static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
698static void ix86_sched_init PARAMS ((FILE *, int, int));
699static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
700static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
9b690711
RH
701static int ia32_use_dfa_pipeline_interface PARAMS ((void));
702static int ia32_multipass_dfa_lookahead PARAMS ((void));
e37af218 703static void ix86_init_mmx_sse_builtins PARAMS ((void));
e075ae69
RH
704
705struct ix86_address
706{
707 rtx base, index, disp;
708 HOST_WIDE_INT scale;
709};
b08de47e 710
e075ae69 711static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
bd793c65 712
f996902d
RH
713static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
714static const char *ix86_strip_name_encoding PARAMS ((const char *))
715 ATTRIBUTE_UNUSED;
fb49053f 716
bd793c65 717struct builtin_description;
8b60264b
KG
718static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
719 tree, rtx));
720static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
721 tree, rtx));
bd793c65
BS
722static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
723static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
724static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
e37af218
RH
725static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
726 tree, rtx));
727static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
bd793c65 728static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
c0c102a9
JH
729static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
730static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
731 enum rtx_code *,
732 enum rtx_code *,
733 enum rtx_code *));
9e7adcb3
JH
734static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
735 rtx *, rtx *));
736static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
737static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
738static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
739static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
bd09bdeb 740static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
9b690711 741static int ix86_save_reg PARAMS ((unsigned int, int));
4dd2ac2c 742static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
8d8e52be 743static int ix86_comp_type_attributes PARAMS ((tree, tree));
91d231cb
JM
744const struct attribute_spec ix86_attribute_table[];
745static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
746static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
b069de3b 747static int ix86_value_regno PARAMS ((enum machine_mode));
7c262518 748
21c318ba 749#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
2cc07db4
RH
750static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
751#endif
e56feed6 752
53c17031
JH
753/* Register class used for passing given 64bit part of the argument.
754 These represent classes as documented by the PS ABI, with the exception
755 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
756 use SF or DFmode move instead of DImode to avoid reformating penalties.
757
758 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
759 whenever possible (upper half does contain padding).
760 */
761enum x86_64_reg_class
762 {
763 X86_64_NO_CLASS,
764 X86_64_INTEGER_CLASS,
765 X86_64_INTEGERSI_CLASS,
766 X86_64_SSE_CLASS,
767 X86_64_SSESF_CLASS,
768 X86_64_SSEDF_CLASS,
769 X86_64_SSEUP_CLASS,
770 X86_64_X87_CLASS,
771 X86_64_X87UP_CLASS,
772 X86_64_MEMORY_CLASS
773 };
0b5826ac 774static const char * const x86_64_reg_class_name[] =
53c17031
JH
775 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
776
777#define MAX_CLASSES 4
778static int classify_argument PARAMS ((enum machine_mode, tree,
779 enum x86_64_reg_class [MAX_CLASSES],
780 int));
781static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
782 int *));
783static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
07933f72 784 const int *, int));
53c17031
JH
785static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
786 enum x86_64_reg_class));
672a6f42
NB
787\f
788/* Initialize the GCC target structure. */
91d231cb
JM
789#undef TARGET_ATTRIBUTE_TABLE
790#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
672a6f42 791#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
f5f4be42
NB
792# undef TARGET_MERGE_DECL_ATTRIBUTES
793# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
672a6f42
NB
794#endif
795
8d8e52be
JM
796#undef TARGET_COMP_TYPE_ATTRIBUTES
797#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
798
f6155fda
SS
799#undef TARGET_INIT_BUILTINS
800#define TARGET_INIT_BUILTINS ix86_init_builtins
801
802#undef TARGET_EXPAND_BUILTIN
803#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
804
bd09bdeb
RH
805#undef TARGET_ASM_FUNCTION_EPILOGUE
806#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
08c148a8 807
17b53c33
NB
808#undef TARGET_ASM_OPEN_PAREN
809#define TARGET_ASM_OPEN_PAREN ""
810#undef TARGET_ASM_CLOSE_PAREN
811#define TARGET_ASM_CLOSE_PAREN ""
812
301d03af
RS
813#undef TARGET_ASM_ALIGNED_HI_OP
814#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
815#undef TARGET_ASM_ALIGNED_SI_OP
816#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
817#ifdef ASM_QUAD
818#undef TARGET_ASM_ALIGNED_DI_OP
819#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
820#endif
821
822#undef TARGET_ASM_UNALIGNED_HI_OP
823#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
824#undef TARGET_ASM_UNALIGNED_SI_OP
825#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
826#undef TARGET_ASM_UNALIGNED_DI_OP
827#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
828
c237e94a
ZW
829#undef TARGET_SCHED_ADJUST_COST
830#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
831#undef TARGET_SCHED_ISSUE_RATE
832#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
833#undef TARGET_SCHED_VARIABLE_ISSUE
834#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
835#undef TARGET_SCHED_INIT
836#define TARGET_SCHED_INIT ix86_sched_init
837#undef TARGET_SCHED_REORDER
838#define TARGET_SCHED_REORDER ix86_sched_reorder
fce5a9f2 839#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
9b690711
RH
840#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
841 ia32_use_dfa_pipeline_interface
842#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
843#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
844 ia32_multipass_dfa_lookahead
c237e94a 845
f996902d
RH
846#ifdef HAVE_AS_TLS
847#undef TARGET_HAVE_TLS
848#define TARGET_HAVE_TLS true
849#endif
850
f6897b10 851struct gcc_target targetm = TARGET_INITIALIZER;
e075ae69 852\f
f5316dfe
MM
853/* Sometimes certain combinations of command options do not make
854 sense on a particular target machine. You can define a macro
855 `OVERRIDE_OPTIONS' to take account of this. This macro, if
856 defined, is executed once just after all the command options have
857 been parsed.
858
859 Don't use this macro to turn on various extra optimizations for
860 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
861
862void
863override_options ()
864{
400500c4 865 int i;
e075ae69
RH
866 /* Comes from final.c -- no real reason to change it. */
867#define MAX_CODE_ALIGN 16
f5316dfe 868
c8c5cb99
SC
869 static struct ptt
870 {
8b60264b
KG
871 const struct processor_costs *cost; /* Processor costs */
872 const int target_enable; /* Target flags to enable. */
873 const int target_disable; /* Target flags to disable. */
874 const int align_loop; /* Default alignments. */
2cca7283 875 const int align_loop_max_skip;
8b60264b 876 const int align_jump;
2cca7283 877 const int align_jump_max_skip;
8b60264b
KG
878 const int align_func;
879 const int branch_cost;
e075ae69 880 }
0f290768 881 const processor_target_table[PROCESSOR_max] =
e075ae69 882 {
2cca7283
JH
883 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
884 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
885 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
886 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
887 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
888 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
889 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
e075ae69
RH
890 };
891
f4365627 892 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
e075ae69
RH
893 static struct pta
894 {
8b60264b
KG
895 const char *const name; /* processor name or nickname. */
896 const enum processor_type processor;
0dd0e980
JH
897 const enum pta_flags
898 {
899 PTA_SSE = 1,
900 PTA_SSE2 = 2,
901 PTA_MMX = 4,
f4365627 902 PTA_PREFETCH_SSE = 8,
0dd0e980
JH
903 PTA_3DNOW = 16,
904 PTA_3DNOW_A = 64
905 } flags;
e075ae69 906 }
0f290768 907 const processor_alias_table[] =
e075ae69 908 {
0dd0e980
JH
909 {"i386", PROCESSOR_I386, 0},
910 {"i486", PROCESSOR_I486, 0},
911 {"i586", PROCESSOR_PENTIUM, 0},
912 {"pentium", PROCESSOR_PENTIUM, 0},
913 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
914 {"i686", PROCESSOR_PENTIUMPRO, 0},
915 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
916 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
f4365627 917 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
0dd0e980 918 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
f4365627 919 PTA_MMX | PTA_PREFETCH_SSE},
0dd0e980
JH
920 {"k6", PROCESSOR_K6, PTA_MMX},
921 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
922 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
f4365627 923 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 924 | PTA_3DNOW_A},
f4365627 925 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
0dd0e980 926 | PTA_3DNOW | PTA_3DNOW_A},
f4365627 927 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 928 | PTA_3DNOW_A | PTA_SSE},
f4365627 929 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 930 | PTA_3DNOW_A | PTA_SSE},
f4365627 931 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 932 | PTA_3DNOW_A | PTA_SSE},
3af4bd89 933 };
c8c5cb99 934
ca7558fc 935 int const pta_size = ARRAY_SIZE (processor_alias_table);
c8c5cb99 936
f5316dfe
MM
937#ifdef SUBTARGET_OVERRIDE_OPTIONS
938 SUBTARGET_OVERRIDE_OPTIONS;
939#endif
940
f4365627
JH
941 if (!ix86_cpu_string && ix86_arch_string)
942 ix86_cpu_string = ix86_arch_string;
943 if (!ix86_cpu_string)
944 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
945 if (!ix86_arch_string)
946 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
e075ae69 947
6189a572
JH
948 if (ix86_cmodel_string != 0)
949 {
950 if (!strcmp (ix86_cmodel_string, "small"))
951 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
952 else if (flag_pic)
c725bd79 953 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
6189a572
JH
954 else if (!strcmp (ix86_cmodel_string, "32"))
955 ix86_cmodel = CM_32;
956 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
957 ix86_cmodel = CM_KERNEL;
958 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
959 ix86_cmodel = CM_MEDIUM;
960 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
961 ix86_cmodel = CM_LARGE;
962 else
963 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
964 }
965 else
966 {
967 ix86_cmodel = CM_32;
968 if (TARGET_64BIT)
969 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
970 }
c93e80a5
JH
971 if (ix86_asm_string != 0)
972 {
973 if (!strcmp (ix86_asm_string, "intel"))
974 ix86_asm_dialect = ASM_INTEL;
975 else if (!strcmp (ix86_asm_string, "att"))
976 ix86_asm_dialect = ASM_ATT;
977 else
978 error ("bad value (%s) for -masm= switch", ix86_asm_string);
979 }
6189a572 980 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
c725bd79 981 error ("code model `%s' not supported in the %s bit mode",
6189a572
JH
982 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
983 if (ix86_cmodel == CM_LARGE)
c725bd79 984 sorry ("code model `large' not supported yet");
0c2dc519 985 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
c725bd79 986 sorry ("%i-bit mode not compiled in",
0c2dc519 987 (target_flags & MASK_64BIT) ? 64 : 32);
6189a572 988
f4365627
JH
989 for (i = 0; i < pta_size; i++)
990 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
991 {
992 ix86_arch = processor_alias_table[i].processor;
993 /* Default cpu tuning to the architecture. */
994 ix86_cpu = ix86_arch;
995 if (processor_alias_table[i].flags & PTA_MMX
996 && !(target_flags & MASK_MMX_SET))
997 target_flags |= MASK_MMX;
998 if (processor_alias_table[i].flags & PTA_3DNOW
999 && !(target_flags & MASK_3DNOW_SET))
1000 target_flags |= MASK_3DNOW;
1001 if (processor_alias_table[i].flags & PTA_3DNOW_A
1002 && !(target_flags & MASK_3DNOW_A_SET))
1003 target_flags |= MASK_3DNOW_A;
1004 if (processor_alias_table[i].flags & PTA_SSE
1005 && !(target_flags & MASK_SSE_SET))
1006 target_flags |= MASK_SSE;
1007 if (processor_alias_table[i].flags & PTA_SSE2
1008 && !(target_flags & MASK_SSE2_SET))
1009 target_flags |= MASK_SSE2;
1010 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1011 x86_prefetch_sse = true;
1012 break;
1013 }
400500c4 1014
f4365627
JH
1015 if (i == pta_size)
1016 error ("bad value (%s) for -march= switch", ix86_arch_string);
e075ae69 1017
f4365627
JH
1018 for (i = 0; i < pta_size; i++)
1019 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1020 {
1021 ix86_cpu = processor_alias_table[i].processor;
1022 break;
1023 }
1024 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1025 x86_prefetch_sse = true;
1026 if (i == pta_size)
1027 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
e075ae69 1028
2ab0437e
JH
1029 if (optimize_size)
1030 ix86_cost = &size_cost;
1031 else
1032 ix86_cost = processor_target_table[ix86_cpu].cost;
e075ae69
RH
1033 target_flags |= processor_target_table[ix86_cpu].target_enable;
1034 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1035
36edd3cc
BS
1036 /* Arrange to set up i386_stack_locals for all functions. */
1037 init_machine_status = ix86_init_machine_status;
fce5a9f2 1038
0f290768 1039 /* Validate -mregparm= value. */
e075ae69 1040 if (ix86_regparm_string)
b08de47e 1041 {
400500c4
RK
1042 i = atoi (ix86_regparm_string);
1043 if (i < 0 || i > REGPARM_MAX)
1044 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1045 else
1046 ix86_regparm = i;
b08de47e 1047 }
0d7d98ee
JH
1048 else
1049 if (TARGET_64BIT)
1050 ix86_regparm = REGPARM_MAX;
b08de47e 1051
3e18fdf6 1052 /* If the user has provided any of the -malign-* options,
a4f31c00 1053 warn and use that value only if -falign-* is not set.
3e18fdf6 1054 Remove this code in GCC 3.2 or later. */
e075ae69 1055 if (ix86_align_loops_string)
b08de47e 1056 {
3e18fdf6
GK
1057 warning ("-malign-loops is obsolete, use -falign-loops");
1058 if (align_loops == 0)
1059 {
1060 i = atoi (ix86_align_loops_string);
1061 if (i < 0 || i > MAX_CODE_ALIGN)
1062 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1063 else
1064 align_loops = 1 << i;
1065 }
b08de47e 1066 }
3af4bd89 1067
e075ae69 1068 if (ix86_align_jumps_string)
b08de47e 1069 {
3e18fdf6
GK
1070 warning ("-malign-jumps is obsolete, use -falign-jumps");
1071 if (align_jumps == 0)
1072 {
1073 i = atoi (ix86_align_jumps_string);
1074 if (i < 0 || i > MAX_CODE_ALIGN)
1075 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1076 else
1077 align_jumps = 1 << i;
1078 }
b08de47e 1079 }
b08de47e 1080
e075ae69 1081 if (ix86_align_funcs_string)
b08de47e 1082 {
3e18fdf6
GK
1083 warning ("-malign-functions is obsolete, use -falign-functions");
1084 if (align_functions == 0)
1085 {
1086 i = atoi (ix86_align_funcs_string);
1087 if (i < 0 || i > MAX_CODE_ALIGN)
1088 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1089 else
1090 align_functions = 1 << i;
1091 }
b08de47e 1092 }
3af4bd89 1093
3e18fdf6 1094 /* Default align_* from the processor table. */
3e18fdf6 1095 if (align_loops == 0)
2cca7283
JH
1096 {
1097 align_loops = processor_target_table[ix86_cpu].align_loop;
1098 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1099 }
3e18fdf6 1100 if (align_jumps == 0)
2cca7283
JH
1101 {
1102 align_jumps = processor_target_table[ix86_cpu].align_jump;
1103 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1104 }
3e18fdf6 1105 if (align_functions == 0)
2cca7283
JH
1106 {
1107 align_functions = processor_target_table[ix86_cpu].align_func;
1108 }
3e18fdf6 1109
e4c0478d 1110 /* Validate -mpreferred-stack-boundary= value, or provide default.
fbb83b43
AO
1111 The default of 128 bits is for Pentium III's SSE __m128, but we
1112 don't want additional code to keep the stack aligned when
1113 optimizing for code size. */
1114 ix86_preferred_stack_boundary = (optimize_size
1115 ? TARGET_64BIT ? 64 : 32
1116 : 128);
e075ae69 1117 if (ix86_preferred_stack_boundary_string)
3af4bd89 1118 {
400500c4 1119 i = atoi (ix86_preferred_stack_boundary_string);
c6257c5d
AO
1120 if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
1121 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
0d7d98ee 1122 TARGET_64BIT ? 3 : 2);
400500c4
RK
1123 else
1124 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 1125 }
77a989d1 1126
0f290768 1127 /* Validate -mbranch-cost= value, or provide default. */
e075ae69
RH
1128 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1129 if (ix86_branch_cost_string)
804a8ee0 1130 {
400500c4
RK
1131 i = atoi (ix86_branch_cost_string);
1132 if (i < 0 || i > 5)
1133 error ("-mbranch-cost=%d is not between 0 and 5", i);
1134 else
1135 ix86_branch_cost = i;
804a8ee0 1136 }
804a8ee0 1137
f996902d
RH
1138 if (ix86_tls_dialect_string)
1139 {
1140 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1141 ix86_tls_dialect = TLS_DIALECT_GNU;
1142 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1143 ix86_tls_dialect = TLS_DIALECT_SUN;
1144 else
1145 error ("bad value (%s) for -mtls-dialect= switch",
1146 ix86_tls_dialect_string);
1147 }
1148
db01f480
JH
1149 if (profile_flag)
1150 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1151
e9a25f70
JL
1152 /* Keep nonleaf frame pointers. */
1153 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 1154 flag_omit_frame_pointer = 1;
e075ae69
RH
1155
1156 /* If we're doing fast math, we don't care about comparison order
1157 wrt NaNs. This lets us use a shorter comparison sequence. */
de6c5979 1158 if (flag_unsafe_math_optimizations)
e075ae69
RH
1159 target_flags &= ~MASK_IEEE_FP;
1160
30c99a84
RH
1161 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1162 since the insns won't need emulation. */
1163 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1164 target_flags &= ~MASK_NO_FANCY_MATH_387;
1165
14f73b5a
JH
1166 if (TARGET_64BIT)
1167 {
1168 if (TARGET_ALIGN_DOUBLE)
c725bd79 1169 error ("-malign-double makes no sense in the 64bit mode");
14f73b5a 1170 if (TARGET_RTD)
c725bd79 1171 error ("-mrtd calling convention not supported in the 64bit mode");
14f73b5a 1172 /* Enable by default the SSE and MMX builtins. */
965f5423
JH
1173 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1174 ix86_fpmath = FPMATH_SSE;
14f73b5a 1175 }
965f5423
JH
1176 else
1177 ix86_fpmath = FPMATH_387;
1178
1179 if (ix86_fpmath_string != 0)
1180 {
1181 if (! strcmp (ix86_fpmath_string, "387"))
1182 ix86_fpmath = FPMATH_387;
1183 else if (! strcmp (ix86_fpmath_string, "sse"))
1184 {
1185 if (!TARGET_SSE)
1186 {
1187 warning ("SSE instruction set disabled, using 387 arithmetics");
1188 ix86_fpmath = FPMATH_387;
1189 }
1190 else
1191 ix86_fpmath = FPMATH_SSE;
1192 }
1193 else if (! strcmp (ix86_fpmath_string, "387,sse")
1194 || ! strcmp (ix86_fpmath_string, "sse,387"))
1195 {
1196 if (!TARGET_SSE)
1197 {
1198 warning ("SSE instruction set disabled, using 387 arithmetics");
1199 ix86_fpmath = FPMATH_387;
1200 }
1201 else if (!TARGET_80387)
1202 {
1203 warning ("387 instruction set disabled, using SSE arithmetics");
1204 ix86_fpmath = FPMATH_SSE;
1205 }
1206 else
1207 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1208 }
fce5a9f2 1209 else
965f5423
JH
1210 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1211 }
14f73b5a 1212
a7180f70
BS
1213 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1214 on by -msse. */
1215 if (TARGET_SSE)
e37af218
RH
1216 {
1217 target_flags |= MASK_MMX;
1218 x86_prefetch_sse = true;
1219 }
c6036a37 1220
47f339cf
BS
1221 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1222 if (TARGET_3DNOW)
1223 {
1224 target_flags |= MASK_MMX;
1225 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1226 extensions it adds. */
1227 if (x86_3dnow_a & (1 << ix86_arch))
1228 target_flags |= MASK_3DNOW_A;
1229 }
c6036a37 1230 if ((x86_accumulate_outgoing_args & CPUMASK)
0dd0e980 1231 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
c6036a37
JH
1232 && !optimize_size)
1233 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
623fe810
RH
1234
1235 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1236 {
1237 char *p;
1238 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1239 p = strchr (internal_label_prefix, 'X');
1240 internal_label_prefix_len = p - internal_label_prefix;
1241 *p = '\0';
1242 }
f5316dfe
MM
1243}
1244\f
32b5b1aa 1245void
c6aded7c 1246optimization_options (level, size)
32b5b1aa 1247 int level;
bb5177ac 1248 int size ATTRIBUTE_UNUSED;
32b5b1aa 1249{
e9a25f70
JL
1250 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1251 make the problem with not enough registers even worse. */
32b5b1aa
SC
1252#ifdef INSN_SCHEDULING
1253 if (level > 1)
1254 flag_schedule_insns = 0;
1255#endif
53c17031
JH
1256 if (TARGET_64BIT && optimize >= 1)
1257 flag_omit_frame_pointer = 1;
1258 if (TARGET_64BIT)
b932f770
JH
1259 {
1260 flag_pcc_struct_return = 0;
1261 flag_asynchronous_unwind_tables = 1;
1262 }
db01f480
JH
1263 if (profile_flag)
1264 flag_omit_frame_pointer = 0;
32b5b1aa 1265}
b08de47e 1266\f
91d231cb
JM
1267/* Table of valid machine attributes. */
1268const struct attribute_spec ix86_attribute_table[] =
b08de47e 1269{
91d231cb 1270 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
b08de47e
MM
1271 /* Stdcall attribute says callee is responsible for popping arguments
1272 if they are not variable. */
91d231cb
JM
1273 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1274 /* Cdecl attribute says the callee is a normal C declaration */
1275 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
b08de47e 1276 /* Regparm attribute specifies how many integer arguments are to be
0f290768 1277 passed in registers. */
91d231cb
JM
1278 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1279#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
3da1eb0b
DS
1280 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1281 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1282 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
91d231cb
JM
1283#endif
1284 { NULL, 0, 0, false, false, false, NULL }
1285};
1286
1287/* Handle a "cdecl" or "stdcall" attribute;
1288 arguments as in struct attribute_spec.handler. */
1289static tree
1290ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1291 tree *node;
1292 tree name;
1293 tree args ATTRIBUTE_UNUSED;
1294 int flags ATTRIBUTE_UNUSED;
1295 bool *no_add_attrs;
1296{
1297 if (TREE_CODE (*node) != FUNCTION_TYPE
1298 && TREE_CODE (*node) != METHOD_TYPE
1299 && TREE_CODE (*node) != FIELD_DECL
1300 && TREE_CODE (*node) != TYPE_DECL)
b08de47e 1301 {
91d231cb
JM
1302 warning ("`%s' attribute only applies to functions",
1303 IDENTIFIER_POINTER (name));
1304 *no_add_attrs = true;
1305 }
b08de47e 1306
91d231cb
JM
1307 if (TARGET_64BIT)
1308 {
1309 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1310 *no_add_attrs = true;
1311 }
b08de47e 1312
91d231cb
JM
1313 return NULL_TREE;
1314}
b08de47e 1315
91d231cb
JM
1316/* Handle a "regparm" attribute;
1317 arguments as in struct attribute_spec.handler. */
1318static tree
1319ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1320 tree *node;
1321 tree name;
1322 tree args;
1323 int flags ATTRIBUTE_UNUSED;
1324 bool *no_add_attrs;
1325{
1326 if (TREE_CODE (*node) != FUNCTION_TYPE
1327 && TREE_CODE (*node) != METHOD_TYPE
1328 && TREE_CODE (*node) != FIELD_DECL
1329 && TREE_CODE (*node) != TYPE_DECL)
1330 {
1331 warning ("`%s' attribute only applies to functions",
1332 IDENTIFIER_POINTER (name));
1333 *no_add_attrs = true;
1334 }
1335 else
1336 {
1337 tree cst;
b08de47e 1338
91d231cb
JM
1339 cst = TREE_VALUE (args);
1340 if (TREE_CODE (cst) != INTEGER_CST)
1341 {
1342 warning ("`%s' attribute requires an integer constant argument",
1343 IDENTIFIER_POINTER (name));
1344 *no_add_attrs = true;
1345 }
1346 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1347 {
1348 warning ("argument to `%s' attribute larger than %d",
1349 IDENTIFIER_POINTER (name), REGPARM_MAX);
1350 *no_add_attrs = true;
1351 }
b08de47e
MM
1352 }
1353
91d231cb 1354 return NULL_TREE;
b08de47e
MM
1355}
1356
1357/* Return 0 if the attributes for two types are incompatible, 1 if they
1358 are compatible, and 2 if they are nearly compatible (which causes a
1359 warning to be generated). */
1360
8d8e52be 1361static int
e075ae69 1362ix86_comp_type_attributes (type1, type2)
afcfe58c
MM
1363 tree type1;
1364 tree type2;
b08de47e 1365{
0f290768 1366 /* Check for mismatch of non-default calling convention. */
27c38fbe 1367 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
1368
1369 if (TREE_CODE (type1) != FUNCTION_TYPE)
1370 return 1;
1371
1372 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
1373 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1374 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 1375 return 0;
b08de47e
MM
1376 return 1;
1377}
b08de47e
MM
1378\f
1379/* Value is the number of bytes of arguments automatically
1380 popped when returning from a subroutine call.
1381 FUNDECL is the declaration node of the function (as a tree),
1382 FUNTYPE is the data type of the function (as a tree),
1383 or for a library call it is an identifier node for the subroutine name.
1384 SIZE is the number of bytes of arguments passed on the stack.
1385
1386 On the 80386, the RTD insn may be used to pop them if the number
1387 of args is fixed, but if the number is variable then the caller
1388 must pop them all. RTD can't be used for library calls now
1389 because the library is compiled with the Unix compiler.
1390 Use of RTD is a selectable option, since it is incompatible with
1391 standard Unix calling sequences. If the option is not selected,
1392 the caller must always pop the args.
1393
1394 The attribute stdcall is equivalent to RTD on a per module basis. */
1395
1396int
e075ae69 1397ix86_return_pops_args (fundecl, funtype, size)
b08de47e
MM
1398 tree fundecl;
1399 tree funtype;
1400 int size;
79325812 1401{
3345ee7d 1402 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 1403
0f290768 1404 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 1405 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 1406
0f290768 1407 /* Stdcall functions will pop the stack if not variable args. */
698cdd84
SC
1408 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1409 rtd = 1;
79325812 1410
698cdd84
SC
1411 if (rtd
1412 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
1413 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1414 == void_type_node)))
698cdd84
SC
1415 return size;
1416 }
79325812 1417
232b8f52 1418 /* Lose any fake structure return argument if it is passed on the stack. */
0d7d98ee
JH
1419 if (aggregate_value_p (TREE_TYPE (funtype))
1420 && !TARGET_64BIT)
232b8f52
JJ
1421 {
1422 int nregs = ix86_regparm;
79325812 1423
232b8f52
JJ
1424 if (funtype)
1425 {
1426 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype));
1427
1428 if (attr)
1429 nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1430 }
1431
1432 if (!nregs)
1433 return GET_MODE_SIZE (Pmode);
1434 }
1435
1436 return 0;
b08de47e 1437}
b08de47e
MM
1438\f
1439/* Argument support functions. */
1440
53c17031
JH
1441/* Return true when register may be used to pass function parameters. */
1442bool
1443ix86_function_arg_regno_p (regno)
1444 int regno;
1445{
1446 int i;
1447 if (!TARGET_64BIT)
0333394e
JJ
1448 return (regno < REGPARM_MAX
1449 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
53c17031
JH
1450 if (SSE_REGNO_P (regno) && TARGET_SSE)
1451 return true;
1452 /* RAX is used as hidden argument to va_arg functions. */
1453 if (!regno)
1454 return true;
1455 for (i = 0; i < REGPARM_MAX; i++)
1456 if (regno == x86_64_int_parameter_registers[i])
1457 return true;
1458 return false;
1459}
1460
b08de47e
MM
1461/* Initialize a variable CUM of type CUMULATIVE_ARGS
1462 for a call to a function whose data type is FNTYPE.
1463 For a library call, FNTYPE is 0. */
1464
1465void
1466init_cumulative_args (cum, fntype, libname)
e9a25f70 1467 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
b08de47e
MM
1468 tree fntype; /* tree ptr for function decl */
1469 rtx libname; /* SYMBOL_REF of library name or 0 */
1470{
1471 static CUMULATIVE_ARGS zero_cum;
1472 tree param, next_param;
1473
1474 if (TARGET_DEBUG_ARG)
1475 {
1476 fprintf (stderr, "\ninit_cumulative_args (");
1477 if (fntype)
e9a25f70
JL
1478 fprintf (stderr, "fntype code = %s, ret code = %s",
1479 tree_code_name[(int) TREE_CODE (fntype)],
1480 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
1481 else
1482 fprintf (stderr, "no fntype");
1483
1484 if (libname)
1485 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1486 }
1487
1488 *cum = zero_cum;
1489
1490 /* Set up the number of registers to use for passing arguments. */
e075ae69 1491 cum->nregs = ix86_regparm;
53c17031
JH
1492 cum->sse_nregs = SSE_REGPARM_MAX;
1493 if (fntype && !TARGET_64BIT)
b08de47e
MM
1494 {
1495 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
e9a25f70 1496
b08de47e
MM
1497 if (attr)
1498 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1499 }
53c17031 1500 cum->maybe_vaarg = false;
b08de47e
MM
1501
1502 /* Determine if this function has variable arguments. This is
1503 indicated by the last argument being 'void_type_mode' if there
1504 are no variable arguments. If there are variable arguments, then
1505 we won't pass anything in registers */
1506
1507 if (cum->nregs)
1508 {
1509 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 1510 param != 0; param = next_param)
b08de47e
MM
1511 {
1512 next_param = TREE_CHAIN (param);
e9a25f70 1513 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
53c17031
JH
1514 {
1515 if (!TARGET_64BIT)
1516 cum->nregs = 0;
1517 cum->maybe_vaarg = true;
1518 }
b08de47e
MM
1519 }
1520 }
53c17031
JH
1521 if ((!fntype && !libname)
1522 || (fntype && !TYPE_ARG_TYPES (fntype)))
1523 cum->maybe_vaarg = 1;
b08de47e
MM
1524
1525 if (TARGET_DEBUG_ARG)
1526 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1527
1528 return;
1529}
1530
53c17031 1531/* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
f710504c 1532 of this code is to classify each 8bytes of incoming argument by the register
53c17031
JH
1533 class and assign registers accordingly. */
1534
1535/* Return the union class of CLASS1 and CLASS2.
1536 See the x86-64 PS ABI for details. */
1537
1538static enum x86_64_reg_class
1539merge_classes (class1, class2)
1540 enum x86_64_reg_class class1, class2;
1541{
1542 /* Rule #1: If both classes are equal, this is the resulting class. */
1543 if (class1 == class2)
1544 return class1;
1545
1546 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1547 the other class. */
1548 if (class1 == X86_64_NO_CLASS)
1549 return class2;
1550 if (class2 == X86_64_NO_CLASS)
1551 return class1;
1552
1553 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1554 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1555 return X86_64_MEMORY_CLASS;
1556
1557 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1558 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1559 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1560 return X86_64_INTEGERSI_CLASS;
1561 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1562 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1563 return X86_64_INTEGER_CLASS;
1564
1565 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1566 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1567 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1568 return X86_64_MEMORY_CLASS;
1569
1570 /* Rule #6: Otherwise class SSE is used. */
1571 return X86_64_SSE_CLASS;
1572}
1573
1574/* Classify the argument of type TYPE and mode MODE.
1575 CLASSES will be filled by the register class used to pass each word
1576 of the operand. The number of words is returned. In case the parameter
1577 should be passed in memory, 0 is returned. As a special case for zero
1578 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1579
1580 BIT_OFFSET is used internally for handling records and specifies offset
1581 of the offset in bits modulo 256 to avoid overflow cases.
1582
1583 See the x86-64 PS ABI for details.
1584*/
1585
1586static int
1587classify_argument (mode, type, classes, bit_offset)
1588 enum machine_mode mode;
1589 tree type;
1590 enum x86_64_reg_class classes[MAX_CLASSES];
1591 int bit_offset;
1592{
1593 int bytes =
1594 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1595 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1596
1597 if (type && AGGREGATE_TYPE_P (type))
1598 {
1599 int i;
1600 tree field;
1601 enum x86_64_reg_class subclasses[MAX_CLASSES];
1602
1603 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1604 if (bytes > 16)
1605 return 0;
1606
1607 for (i = 0; i < words; i++)
1608 classes[i] = X86_64_NO_CLASS;
1609
1610 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1611 signalize memory class, so handle it as special case. */
1612 if (!words)
1613 {
1614 classes[0] = X86_64_NO_CLASS;
1615 return 1;
1616 }
1617
1618 /* Classify each field of record and merge classes. */
1619 if (TREE_CODE (type) == RECORD_TYPE)
1620 {
91ea38f9
JH
1621 /* For classes first merge in the field of the subclasses. */
1622 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1623 {
1624 tree bases = TYPE_BINFO_BASETYPES (type);
1625 int n_bases = TREE_VEC_LENGTH (bases);
1626 int i;
1627
1628 for (i = 0; i < n_bases; ++i)
1629 {
1630 tree binfo = TREE_VEC_ELT (bases, i);
1631 int num;
1632 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1633 tree type = BINFO_TYPE (binfo);
1634
1635 num = classify_argument (TYPE_MODE (type),
1636 type, subclasses,
1637 (offset + bit_offset) % 256);
1638 if (!num)
1639 return 0;
1640 for (i = 0; i < num; i++)
1641 {
db01f480 1642 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
1643 classes[i + pos] =
1644 merge_classes (subclasses[i], classes[i + pos]);
1645 }
1646 }
1647 }
1648 /* And now merge the fields of structure. */
53c17031
JH
1649 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1650 {
1651 if (TREE_CODE (field) == FIELD_DECL)
1652 {
1653 int num;
1654
1655 /* Bitfields are always classified as integer. Handle them
1656 early, since later code would consider them to be
1657 misaligned integers. */
1658 if (DECL_BIT_FIELD (field))
1659 {
1660 for (i = int_bit_position (field) / 8 / 8;
1661 i < (int_bit_position (field)
1662 + tree_low_cst (DECL_SIZE (field), 0)
1663 + 63) / 8 / 8; i++)
1664 classes[i] =
1665 merge_classes (X86_64_INTEGER_CLASS,
1666 classes[i]);
1667 }
1668 else
1669 {
1670 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1671 TREE_TYPE (field), subclasses,
1672 (int_bit_position (field)
1673 + bit_offset) % 256);
1674 if (!num)
1675 return 0;
1676 for (i = 0; i < num; i++)
1677 {
1678 int pos =
db01f480 1679 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
53c17031
JH
1680 classes[i + pos] =
1681 merge_classes (subclasses[i], classes[i + pos]);
1682 }
1683 }
1684 }
1685 }
1686 }
1687 /* Arrays are handled as small records. */
1688 else if (TREE_CODE (type) == ARRAY_TYPE)
1689 {
1690 int num;
1691 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1692 TREE_TYPE (type), subclasses, bit_offset);
1693 if (!num)
1694 return 0;
1695
1696 /* The partial classes are now full classes. */
1697 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1698 subclasses[0] = X86_64_SSE_CLASS;
1699 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1700 subclasses[0] = X86_64_INTEGER_CLASS;
1701
1702 for (i = 0; i < words; i++)
1703 classes[i] = subclasses[i % num];
1704 }
1705 /* Unions are similar to RECORD_TYPE but offset is always 0. */
e4dbaed5
AS
1706 else if (TREE_CODE (type) == UNION_TYPE
1707 || TREE_CODE (type) == QUAL_UNION_TYPE)
53c17031 1708 {
91ea38f9
JH
1709 /* For classes first merge in the field of the subclasses. */
1710 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1711 {
1712 tree bases = TYPE_BINFO_BASETYPES (type);
1713 int n_bases = TREE_VEC_LENGTH (bases);
1714 int i;
1715
1716 for (i = 0; i < n_bases; ++i)
1717 {
1718 tree binfo = TREE_VEC_ELT (bases, i);
1719 int num;
1720 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1721 tree type = BINFO_TYPE (binfo);
1722
1723 num = classify_argument (TYPE_MODE (type),
1724 type, subclasses,
db01f480 1725 (offset + (bit_offset % 64)) % 256);
91ea38f9
JH
1726 if (!num)
1727 return 0;
1728 for (i = 0; i < num; i++)
1729 {
1730 int pos = (offset + bit_offset) / 8 / 8;
1731 classes[i + pos] =
1732 merge_classes (subclasses[i], classes[i + pos]);
1733 }
1734 }
1735 }
53c17031
JH
1736 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1737 {
1738 if (TREE_CODE (field) == FIELD_DECL)
1739 {
1740 int num;
1741 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1742 TREE_TYPE (field), subclasses,
1743 bit_offset);
1744 if (!num)
1745 return 0;
1746 for (i = 0; i < num; i++)
1747 classes[i] = merge_classes (subclasses[i], classes[i]);
1748 }
1749 }
1750 }
1751 else
1752 abort ();
1753
1754 /* Final merger cleanup. */
1755 for (i = 0; i < words; i++)
1756 {
1757 /* If one class is MEMORY, everything should be passed in
1758 memory. */
1759 if (classes[i] == X86_64_MEMORY_CLASS)
1760 return 0;
1761
d6a7951f 1762 /* The X86_64_SSEUP_CLASS should be always preceded by
53c17031
JH
1763 X86_64_SSE_CLASS. */
1764 if (classes[i] == X86_64_SSEUP_CLASS
1765 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1766 classes[i] = X86_64_SSE_CLASS;
1767
d6a7951f 1768 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
53c17031
JH
1769 if (classes[i] == X86_64_X87UP_CLASS
1770 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1771 classes[i] = X86_64_SSE_CLASS;
1772 }
1773 return words;
1774 }
1775
1776 /* Compute alignment needed. We align all types to natural boundaries with
1777 exception of XFmode that is aligned to 64bits. */
1778 if (mode != VOIDmode && mode != BLKmode)
1779 {
1780 int mode_alignment = GET_MODE_BITSIZE (mode);
1781
1782 if (mode == XFmode)
1783 mode_alignment = 128;
1784 else if (mode == XCmode)
1785 mode_alignment = 256;
f5143c46 1786 /* Misaligned fields are always returned in memory. */
53c17031
JH
1787 if (bit_offset % mode_alignment)
1788 return 0;
1789 }
1790
1791 /* Classification of atomic types. */
1792 switch (mode)
1793 {
1794 case DImode:
1795 case SImode:
1796 case HImode:
1797 case QImode:
1798 case CSImode:
1799 case CHImode:
1800 case CQImode:
1801 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1802 classes[0] = X86_64_INTEGERSI_CLASS;
1803 else
1804 classes[0] = X86_64_INTEGER_CLASS;
1805 return 1;
1806 case CDImode:
1807 case TImode:
1808 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1809 return 2;
1810 case CTImode:
1811 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1812 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1813 return 4;
1814 case SFmode:
1815 if (!(bit_offset % 64))
1816 classes[0] = X86_64_SSESF_CLASS;
1817 else
1818 classes[0] = X86_64_SSE_CLASS;
1819 return 1;
1820 case DFmode:
1821 classes[0] = X86_64_SSEDF_CLASS;
1822 return 1;
1823 case TFmode:
1824 classes[0] = X86_64_X87_CLASS;
1825 classes[1] = X86_64_X87UP_CLASS;
1826 return 2;
1827 case TCmode:
1828 classes[0] = X86_64_X87_CLASS;
1829 classes[1] = X86_64_X87UP_CLASS;
1830 classes[2] = X86_64_X87_CLASS;
1831 classes[3] = X86_64_X87UP_CLASS;
1832 return 4;
1833 case DCmode:
1834 classes[0] = X86_64_SSEDF_CLASS;
1835 classes[1] = X86_64_SSEDF_CLASS;
1836 return 2;
1837 case SCmode:
1838 classes[0] = X86_64_SSE_CLASS;
1839 return 1;
e95d6b23
JH
1840 case V4SFmode:
1841 case V4SImode:
495333a6
JH
1842 case V16QImode:
1843 case V8HImode:
1844 case V2DFmode:
1845 case V2DImode:
e95d6b23
JH
1846 classes[0] = X86_64_SSE_CLASS;
1847 classes[1] = X86_64_SSEUP_CLASS;
1848 return 2;
1849 case V2SFmode:
1850 case V2SImode:
1851 case V4HImode:
1852 case V8QImode:
1853 classes[0] = X86_64_SSE_CLASS;
1854 return 1;
53c17031 1855 case BLKmode:
e95d6b23 1856 case VOIDmode:
53c17031
JH
1857 return 0;
1858 default:
1859 abort ();
1860 }
1861}
1862
1863/* Examine the argument and return set number of register required in each
f5143c46 1864 class. Return 0 iff parameter should be passed in memory. */
53c17031
JH
1865static int
1866examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1867 enum machine_mode mode;
1868 tree type;
1869 int *int_nregs, *sse_nregs;
1870 int in_return;
1871{
1872 enum x86_64_reg_class class[MAX_CLASSES];
1873 int n = classify_argument (mode, type, class, 0);
1874
1875 *int_nregs = 0;
1876 *sse_nregs = 0;
1877 if (!n)
1878 return 0;
1879 for (n--; n >= 0; n--)
1880 switch (class[n])
1881 {
1882 case X86_64_INTEGER_CLASS:
1883 case X86_64_INTEGERSI_CLASS:
1884 (*int_nregs)++;
1885 break;
1886 case X86_64_SSE_CLASS:
1887 case X86_64_SSESF_CLASS:
1888 case X86_64_SSEDF_CLASS:
1889 (*sse_nregs)++;
1890 break;
1891 case X86_64_NO_CLASS:
1892 case X86_64_SSEUP_CLASS:
1893 break;
1894 case X86_64_X87_CLASS:
1895 case X86_64_X87UP_CLASS:
1896 if (!in_return)
1897 return 0;
1898 break;
1899 case X86_64_MEMORY_CLASS:
1900 abort ();
1901 }
1902 return 1;
1903}
1904/* Construct container for the argument used by GCC interface. See
1905 FUNCTION_ARG for the detailed description. */
1906static rtx
1907construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1908 enum machine_mode mode;
1909 tree type;
1910 int in_return;
1911 int nintregs, nsseregs;
07933f72
GS
1912 const int * intreg;
1913 int sse_regno;
53c17031
JH
1914{
1915 enum machine_mode tmpmode;
1916 int bytes =
1917 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1918 enum x86_64_reg_class class[MAX_CLASSES];
1919 int n;
1920 int i;
1921 int nexps = 0;
1922 int needed_sseregs, needed_intregs;
1923 rtx exp[MAX_CLASSES];
1924 rtx ret;
1925
1926 n = classify_argument (mode, type, class, 0);
1927 if (TARGET_DEBUG_ARG)
1928 {
1929 if (!n)
1930 fprintf (stderr, "Memory class\n");
1931 else
1932 {
1933 fprintf (stderr, "Classes:");
1934 for (i = 0; i < n; i++)
1935 {
1936 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1937 }
1938 fprintf (stderr, "\n");
1939 }
1940 }
1941 if (!n)
1942 return NULL;
1943 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1944 return NULL;
1945 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1946 return NULL;
1947
1948 /* First construct simple cases. Avoid SCmode, since we want to use
1949 single register to pass this type. */
1950 if (n == 1 && mode != SCmode)
1951 switch (class[0])
1952 {
1953 case X86_64_INTEGER_CLASS:
1954 case X86_64_INTEGERSI_CLASS:
1955 return gen_rtx_REG (mode, intreg[0]);
1956 case X86_64_SSE_CLASS:
1957 case X86_64_SSESF_CLASS:
1958 case X86_64_SSEDF_CLASS:
1959 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1960 case X86_64_X87_CLASS:
1961 return gen_rtx_REG (mode, FIRST_STACK_REG);
1962 case X86_64_NO_CLASS:
1963 /* Zero sized array, struct or class. */
1964 return NULL;
1965 default:
1966 abort ();
1967 }
1968 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
e95d6b23 1969 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
53c17031
JH
1970 if (n == 2
1971 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1972 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1973 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1974 && class[1] == X86_64_INTEGER_CLASS
1975 && (mode == CDImode || mode == TImode)
1976 && intreg[0] + 1 == intreg[1])
1977 return gen_rtx_REG (mode, intreg[0]);
1978 if (n == 4
1979 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1980 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1981 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1982
1983 /* Otherwise figure out the entries of the PARALLEL. */
1984 for (i = 0; i < n; i++)
1985 {
1986 switch (class[i])
1987 {
1988 case X86_64_NO_CLASS:
1989 break;
1990 case X86_64_INTEGER_CLASS:
1991 case X86_64_INTEGERSI_CLASS:
1992 /* Merge TImodes on aligned occassions here too. */
1993 if (i * 8 + 8 > bytes)
1994 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
1995 else if (class[i] == X86_64_INTEGERSI_CLASS)
1996 tmpmode = SImode;
1997 else
1998 tmpmode = DImode;
1999 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2000 if (tmpmode == BLKmode)
2001 tmpmode = DImode;
2002 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2003 gen_rtx_REG (tmpmode, *intreg),
2004 GEN_INT (i*8));
2005 intreg++;
2006 break;
2007 case X86_64_SSESF_CLASS:
2008 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2009 gen_rtx_REG (SFmode,
2010 SSE_REGNO (sse_regno)),
2011 GEN_INT (i*8));
2012 sse_regno++;
2013 break;
2014 case X86_64_SSEDF_CLASS:
2015 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2016 gen_rtx_REG (DFmode,
2017 SSE_REGNO (sse_regno)),
2018 GEN_INT (i*8));
2019 sse_regno++;
2020 break;
2021 case X86_64_SSE_CLASS:
2022 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2023 tmpmode = TImode, i++;
2024 else
2025 tmpmode = DImode;
2026 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2027 gen_rtx_REG (tmpmode,
2028 SSE_REGNO (sse_regno)),
2029 GEN_INT (i*8));
2030 sse_regno++;
2031 break;
2032 default:
2033 abort ();
2034 }
2035 }
2036 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2037 for (i = 0; i < nexps; i++)
2038 XVECEXP (ret, 0, i) = exp [i];
2039 return ret;
2040}
2041
b08de47e
MM
2042/* Update the data in CUM to advance over an argument
2043 of mode MODE and data type TYPE.
2044 (TYPE is null for libcalls where that information may not be available.) */
2045
2046void
2047function_arg_advance (cum, mode, type, named)
2048 CUMULATIVE_ARGS *cum; /* current arg information */
2049 enum machine_mode mode; /* current arg mode */
2050 tree type; /* type of the argument or 0 if lib support */
2051 int named; /* whether or not the argument was named */
2052{
5ac9118e
KG
2053 int bytes =
2054 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2055 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2056
2057 if (TARGET_DEBUG_ARG)
2058 fprintf (stderr,
e9a25f70 2059 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
b08de47e 2060 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
53c17031 2061 if (TARGET_64BIT)
b08de47e 2062 {
53c17031
JH
2063 int int_nregs, sse_nregs;
2064 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2065 cum->words += words;
2066 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
82a127a9 2067 {
53c17031
JH
2068 cum->nregs -= int_nregs;
2069 cum->sse_nregs -= sse_nregs;
2070 cum->regno += int_nregs;
2071 cum->sse_regno += sse_nregs;
82a127a9 2072 }
53c17031
JH
2073 else
2074 cum->words += words;
b08de47e 2075 }
a4f31c00 2076 else
82a127a9 2077 {
53c17031
JH
2078 if (TARGET_SSE && mode == TImode)
2079 {
2080 cum->sse_words += words;
2081 cum->sse_nregs -= 1;
2082 cum->sse_regno += 1;
2083 if (cum->sse_nregs <= 0)
2084 {
2085 cum->sse_nregs = 0;
2086 cum->sse_regno = 0;
2087 }
2088 }
2089 else
82a127a9 2090 {
53c17031
JH
2091 cum->words += words;
2092 cum->nregs -= words;
2093 cum->regno += words;
2094
2095 if (cum->nregs <= 0)
2096 {
2097 cum->nregs = 0;
2098 cum->regno = 0;
2099 }
82a127a9
CM
2100 }
2101 }
b08de47e
MM
2102 return;
2103}
2104
2105/* Define where to put the arguments to a function.
2106 Value is zero to push the argument on the stack,
2107 or a hard register in which to store the argument.
2108
2109 MODE is the argument's machine mode.
2110 TYPE is the data type of the argument (as a tree).
2111 This is null for libcalls where that information may
2112 not be available.
2113 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2114 the preceding args and about the function being called.
2115 NAMED is nonzero if this argument is a named parameter
2116 (otherwise it is an extra parameter matching an ellipsis). */
2117
07933f72 2118rtx
b08de47e
MM
2119function_arg (cum, mode, type, named)
2120 CUMULATIVE_ARGS *cum; /* current arg information */
2121 enum machine_mode mode; /* current arg mode */
2122 tree type; /* type of the argument or 0 if lib support */
2123 int named; /* != 0 for normal args, == 0 for ... args */
2124{
2125 rtx ret = NULL_RTX;
5ac9118e
KG
2126 int bytes =
2127 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2128 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2129
53c17031
JH
2130 /* Handle an hidden AL argument containing number of registers for varargs
2131 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2132 any AL settings. */
32ee7d1d 2133 if (mode == VOIDmode)
b08de47e 2134 {
53c17031
JH
2135 if (TARGET_64BIT)
2136 return GEN_INT (cum->maybe_vaarg
2137 ? (cum->sse_nregs < 0
2138 ? SSE_REGPARM_MAX
2139 : cum->sse_regno)
2140 : -1);
2141 else
2142 return constm1_rtx;
b08de47e 2143 }
53c17031
JH
2144 if (TARGET_64BIT)
2145 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2146 &x86_64_int_parameter_registers [cum->regno],
2147 cum->sse_regno);
2148 else
2149 switch (mode)
2150 {
2151 /* For now, pass fp/complex values on the stack. */
2152 default:
2153 break;
2154
2155 case BLKmode:
2156 case DImode:
2157 case SImode:
2158 case HImode:
2159 case QImode:
2160 if (words <= cum->nregs)
2161 ret = gen_rtx_REG (mode, cum->regno);
2162 break;
2163 case TImode:
2164 if (cum->sse_nregs)
2165 ret = gen_rtx_REG (mode, cum->sse_regno);
2166 break;
2167 }
b08de47e
MM
2168
2169 if (TARGET_DEBUG_ARG)
2170 {
2171 fprintf (stderr,
91ea38f9 2172 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
b08de47e
MM
2173 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2174
2175 if (ret)
91ea38f9 2176 print_simple_rtl (stderr, ret);
b08de47e
MM
2177 else
2178 fprintf (stderr, ", stack");
2179
2180 fprintf (stderr, " )\n");
2181 }
2182
2183 return ret;
2184}
53c17031
JH
2185
2186/* Gives the alignment boundary, in bits, of an argument with the specified mode
2187 and type. */
2188
2189int
2190ix86_function_arg_boundary (mode, type)
2191 enum machine_mode mode;
2192 tree type;
2193{
2194 int align;
2195 if (!TARGET_64BIT)
2196 return PARM_BOUNDARY;
2197 if (type)
2198 align = TYPE_ALIGN (type);
2199 else
2200 align = GET_MODE_ALIGNMENT (mode);
2201 if (align < PARM_BOUNDARY)
2202 align = PARM_BOUNDARY;
2203 if (align > 128)
2204 align = 128;
2205 return align;
2206}
2207
2208/* Return true if N is a possible register number of function value. */
2209bool
2210ix86_function_value_regno_p (regno)
2211 int regno;
2212{
2213 if (!TARGET_64BIT)
2214 {
2215 return ((regno) == 0
2216 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2217 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2218 }
2219 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2220 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2221 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2222}
2223
2224/* Define how to find the value returned by a function.
2225 VALTYPE is the data type of the value (as a tree).
2226 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2227 otherwise, FUNC is 0. */
2228rtx
2229ix86_function_value (valtype)
2230 tree valtype;
2231{
2232 if (TARGET_64BIT)
2233 {
2234 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2235 REGPARM_MAX, SSE_REGPARM_MAX,
2236 x86_64_int_return_registers, 0);
2237 /* For zero sized structures, construct_continer return NULL, but we need
2238 to keep rest of compiler happy by returning meaningfull value. */
2239 if (!ret)
2240 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2241 return ret;
2242 }
2243 else
b069de3b
SS
2244 return gen_rtx_REG (TYPE_MODE (valtype),
2245 ix86_value_regno (TYPE_MODE (valtype)));
53c17031
JH
2246}
2247
f5143c46 2248/* Return false iff type is returned in memory. */
53c17031
JH
2249int
2250ix86_return_in_memory (type)
2251 tree type;
2252{
2253 int needed_intregs, needed_sseregs;
2254 if (TARGET_64BIT)
2255 {
2256 return !examine_argument (TYPE_MODE (type), type, 1,
2257 &needed_intregs, &needed_sseregs);
2258 }
2259 else
2260 {
2261 if (TYPE_MODE (type) == BLKmode
2262 || (VECTOR_MODE_P (TYPE_MODE (type))
2263 && int_size_in_bytes (type) == 8)
2264 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2265 && TYPE_MODE (type) != TFmode
2266 && !VECTOR_MODE_P (TYPE_MODE (type))))
2267 return 1;
2268 return 0;
2269 }
2270}
2271
2272/* Define how to find the value returned by a library function
2273 assuming the value has mode MODE. */
2274rtx
2275ix86_libcall_value (mode)
2276 enum machine_mode mode;
2277{
2278 if (TARGET_64BIT)
2279 {
2280 switch (mode)
2281 {
2282 case SFmode:
2283 case SCmode:
2284 case DFmode:
2285 case DCmode:
2286 return gen_rtx_REG (mode, FIRST_SSE_REG);
2287 case TFmode:
2288 case TCmode:
2289 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2290 default:
2291 return gen_rtx_REG (mode, 0);
2292 }
2293 }
2294 else
b069de3b
SS
2295 return gen_rtx_REG (mode, ix86_value_regno (mode));
2296}
2297
2298/* Given a mode, return the register to use for a return value. */
2299
2300static int
2301ix86_value_regno (mode)
2302 enum machine_mode mode;
2303{
2304 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2305 return FIRST_FLOAT_REG;
2306 if (mode == TImode || VECTOR_MODE_P (mode))
2307 return FIRST_SSE_REG;
2308 return 0;
53c17031 2309}
ad919812
JH
2310\f
2311/* Create the va_list data type. */
53c17031 2312
ad919812
JH
2313tree
2314ix86_build_va_list ()
2315{
2316 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
53c17031 2317
ad919812
JH
2318 /* For i386 we use plain pointer to argument area. */
2319 if (!TARGET_64BIT)
2320 return build_pointer_type (char_type_node);
2321
f1e639b1 2322 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
ad919812
JH
2323 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2324
fce5a9f2 2325 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
ad919812 2326 unsigned_type_node);
fce5a9f2 2327 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
ad919812
JH
2328 unsigned_type_node);
2329 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2330 ptr_type_node);
2331 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2332 ptr_type_node);
2333
2334 DECL_FIELD_CONTEXT (f_gpr) = record;
2335 DECL_FIELD_CONTEXT (f_fpr) = record;
2336 DECL_FIELD_CONTEXT (f_ovf) = record;
2337 DECL_FIELD_CONTEXT (f_sav) = record;
2338
2339 TREE_CHAIN (record) = type_decl;
2340 TYPE_NAME (record) = type_decl;
2341 TYPE_FIELDS (record) = f_gpr;
2342 TREE_CHAIN (f_gpr) = f_fpr;
2343 TREE_CHAIN (f_fpr) = f_ovf;
2344 TREE_CHAIN (f_ovf) = f_sav;
2345
2346 layout_type (record);
2347
2348 /* The correct type is an array type of one element. */
2349 return build_array_type (record, build_index_type (size_zero_node));
2350}
2351
2352/* Perform any needed actions needed for a function that is receiving a
fce5a9f2 2353 variable number of arguments.
ad919812
JH
2354
2355 CUM is as above.
2356
2357 MODE and TYPE are the mode and type of the current parameter.
2358
2359 PRETEND_SIZE is a variable that should be set to the amount of stack
2360 that must be pushed by the prolog to pretend that our caller pushed
2361 it.
2362
2363 Normally, this macro will push all remaining incoming registers on the
2364 stack and set PRETEND_SIZE to the length of the registers pushed. */
2365
2366void
2367ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2368 CUMULATIVE_ARGS *cum;
2369 enum machine_mode mode;
2370 tree type;
2371 int *pretend_size ATTRIBUTE_UNUSED;
2372 int no_rtl;
2373
2374{
2375 CUMULATIVE_ARGS next_cum;
2376 rtx save_area = NULL_RTX, mem;
2377 rtx label;
2378 rtx label_ref;
2379 rtx tmp_reg;
2380 rtx nsse_reg;
2381 int set;
2382 tree fntype;
2383 int stdarg_p;
2384 int i;
2385
2386 if (!TARGET_64BIT)
2387 return;
2388
2389 /* Indicate to allocate space on the stack for varargs save area. */
2390 ix86_save_varrargs_registers = 1;
2391
2392 fntype = TREE_TYPE (current_function_decl);
2393 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2394 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2395 != void_type_node));
2396
2397 /* For varargs, we do not want to skip the dummy va_dcl argument.
2398 For stdargs, we do want to skip the last named argument. */
2399 next_cum = *cum;
2400 if (stdarg_p)
2401 function_arg_advance (&next_cum, mode, type, 1);
2402
2403 if (!no_rtl)
2404 save_area = frame_pointer_rtx;
2405
2406 set = get_varargs_alias_set ();
2407
2408 for (i = next_cum.regno; i < ix86_regparm; i++)
2409 {
2410 mem = gen_rtx_MEM (Pmode,
2411 plus_constant (save_area, i * UNITS_PER_WORD));
0692acba 2412 set_mem_alias_set (mem, set);
ad919812
JH
2413 emit_move_insn (mem, gen_rtx_REG (Pmode,
2414 x86_64_int_parameter_registers[i]));
2415 }
2416
2417 if (next_cum.sse_nregs)
2418 {
2419 /* Now emit code to save SSE registers. The AX parameter contains number
2420 of SSE parameter regsiters used to call this function. We use
2421 sse_prologue_save insn template that produces computed jump across
2422 SSE saves. We need some preparation work to get this working. */
2423
2424 label = gen_label_rtx ();
2425 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2426
2427 /* Compute address to jump to :
2428 label - 5*eax + nnamed_sse_arguments*5 */
2429 tmp_reg = gen_reg_rtx (Pmode);
2430 nsse_reg = gen_reg_rtx (Pmode);
2431 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2432 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
44cf5b6a 2433 gen_rtx_MULT (Pmode, nsse_reg,
ad919812
JH
2434 GEN_INT (4))));
2435 if (next_cum.sse_regno)
2436 emit_move_insn
2437 (nsse_reg,
2438 gen_rtx_CONST (DImode,
2439 gen_rtx_PLUS (DImode,
2440 label_ref,
2441 GEN_INT (next_cum.sse_regno * 4))));
2442 else
2443 emit_move_insn (nsse_reg, label_ref);
2444 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2445
2446 /* Compute address of memory block we save into. We always use pointer
2447 pointing 127 bytes after first byte to store - this is needed to keep
2448 instruction size limited by 4 bytes. */
2449 tmp_reg = gen_reg_rtx (Pmode);
8ac61af7
RK
2450 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2451 plus_constant (save_area,
2452 8 * REGPARM_MAX + 127)));
ad919812 2453 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
14f73b5a 2454 set_mem_alias_set (mem, set);
8ac61af7 2455 set_mem_align (mem, BITS_PER_WORD);
ad919812
JH
2456
2457 /* And finally do the dirty job! */
8ac61af7
RK
2458 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2459 GEN_INT (next_cum.sse_regno), label));
ad919812
JH
2460 }
2461
2462}
2463
2464/* Implement va_start. */
2465
2466void
e5faf155 2467ix86_va_start (valist, nextarg)
ad919812
JH
2468 tree valist;
2469 rtx nextarg;
2470{
2471 HOST_WIDE_INT words, n_gpr, n_fpr;
2472 tree f_gpr, f_fpr, f_ovf, f_sav;
2473 tree gpr, fpr, ovf, sav, t;
2474
2475 /* Only 64bit target needs something special. */
2476 if (!TARGET_64BIT)
2477 {
e5faf155 2478 std_expand_builtin_va_start (valist, nextarg);
ad919812
JH
2479 return;
2480 }
2481
2482 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2483 f_fpr = TREE_CHAIN (f_gpr);
2484 f_ovf = TREE_CHAIN (f_fpr);
2485 f_sav = TREE_CHAIN (f_ovf);
2486
2487 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2488 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2489 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2490 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2491 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2492
2493 /* Count number of gp and fp argument registers used. */
2494 words = current_function_args_info.words;
2495 n_gpr = current_function_args_info.regno;
2496 n_fpr = current_function_args_info.sse_regno;
2497
2498 if (TARGET_DEBUG_ARG)
2499 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
b531087a 2500 (int) words, (int) n_gpr, (int) n_fpr);
ad919812
JH
2501
2502 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2503 build_int_2 (n_gpr * 8, 0));
2504 TREE_SIDE_EFFECTS (t) = 1;
2505 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2506
2507 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2508 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2509 TREE_SIDE_EFFECTS (t) = 1;
2510 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2511
2512 /* Find the overflow area. */
2513 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2514 if (words != 0)
2515 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2516 build_int_2 (words * UNITS_PER_WORD, 0));
2517 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2518 TREE_SIDE_EFFECTS (t) = 1;
2519 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2520
2521 /* Find the register save area.
2522 Prologue of the function save it right above stack frame. */
2523 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2524 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2525 TREE_SIDE_EFFECTS (t) = 1;
2526 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2527}
2528
2529/* Implement va_arg. */
2530rtx
2531ix86_va_arg (valist, type)
2532 tree valist, type;
2533{
0139adca 2534 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
ad919812
JH
2535 tree f_gpr, f_fpr, f_ovf, f_sav;
2536 tree gpr, fpr, ovf, sav, t;
b932f770 2537 int size, rsize;
ad919812
JH
2538 rtx lab_false, lab_over = NULL_RTX;
2539 rtx addr_rtx, r;
2540 rtx container;
2541
2542 /* Only 64bit target needs something special. */
2543 if (!TARGET_64BIT)
2544 {
2545 return std_expand_builtin_va_arg (valist, type);
2546 }
2547
2548 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2549 f_fpr = TREE_CHAIN (f_gpr);
2550 f_ovf = TREE_CHAIN (f_fpr);
2551 f_sav = TREE_CHAIN (f_ovf);
2552
2553 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2554 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2555 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2556 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2557 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2558
2559 size = int_size_in_bytes (type);
2560 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2561
2562 container = construct_container (TYPE_MODE (type), type, 0,
2563 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2564 /*
2565 * Pull the value out of the saved registers ...
2566 */
2567
2568 addr_rtx = gen_reg_rtx (Pmode);
2569
2570 if (container)
2571 {
2572 rtx int_addr_rtx, sse_addr_rtx;
2573 int needed_intregs, needed_sseregs;
2574 int need_temp;
2575
2576 lab_over = gen_label_rtx ();
2577 lab_false = gen_label_rtx ();
8bad7136 2578
ad919812
JH
2579 examine_argument (TYPE_MODE (type), type, 0,
2580 &needed_intregs, &needed_sseregs);
2581
2582
2583 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2584 || TYPE_ALIGN (type) > 128);
2585
2586 /* In case we are passing structure, verify that it is consetuctive block
2587 on the register save area. If not we need to do moves. */
2588 if (!need_temp && !REG_P (container))
2589 {
2590 /* Verify that all registers are strictly consetuctive */
2591 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2592 {
2593 int i;
2594
2595 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2596 {
2597 rtx slot = XVECEXP (container, 0, i);
b531087a 2598 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
ad919812
JH
2599 || INTVAL (XEXP (slot, 1)) != i * 16)
2600 need_temp = 1;
2601 }
2602 }
2603 else
2604 {
2605 int i;
2606
2607 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2608 {
2609 rtx slot = XVECEXP (container, 0, i);
b531087a 2610 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
ad919812
JH
2611 || INTVAL (XEXP (slot, 1)) != i * 8)
2612 need_temp = 1;
2613 }
2614 }
2615 }
2616 if (!need_temp)
2617 {
2618 int_addr_rtx = addr_rtx;
2619 sse_addr_rtx = addr_rtx;
2620 }
2621 else
2622 {
2623 int_addr_rtx = gen_reg_rtx (Pmode);
2624 sse_addr_rtx = gen_reg_rtx (Pmode);
2625 }
2626 /* First ensure that we fit completely in registers. */
2627 if (needed_intregs)
2628 {
2629 emit_cmp_and_jump_insns (expand_expr
2630 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2631 GEN_INT ((REGPARM_MAX - needed_intregs +
2632 1) * 8), GE, const1_rtx, SImode,
d43e0b7d 2633 1, lab_false);
ad919812
JH
2634 }
2635 if (needed_sseregs)
2636 {
2637 emit_cmp_and_jump_insns (expand_expr
2638 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2639 GEN_INT ((SSE_REGPARM_MAX -
2640 needed_sseregs + 1) * 16 +
2641 REGPARM_MAX * 8), GE, const1_rtx,
d43e0b7d 2642 SImode, 1, lab_false);
ad919812
JH
2643 }
2644
2645 /* Compute index to start of area used for integer regs. */
2646 if (needed_intregs)
2647 {
2648 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2649 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2650 if (r != int_addr_rtx)
2651 emit_move_insn (int_addr_rtx, r);
2652 }
2653 if (needed_sseregs)
2654 {
2655 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2656 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2657 if (r != sse_addr_rtx)
2658 emit_move_insn (sse_addr_rtx, r);
2659 }
2660 if (need_temp)
2661 {
2662 int i;
2663 rtx mem;
2664
b932f770
JH
2665 /* Never use the memory itself, as it has the alias set. */
2666 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2667 mem = gen_rtx_MEM (BLKmode, addr_rtx);
0692acba 2668 set_mem_alias_set (mem, get_varargs_alias_set ());
8ac61af7 2669 set_mem_align (mem, BITS_PER_UNIT);
b932f770 2670
ad919812
JH
2671 for (i = 0; i < XVECLEN (container, 0); i++)
2672 {
2673 rtx slot = XVECEXP (container, 0, i);
2674 rtx reg = XEXP (slot, 0);
2675 enum machine_mode mode = GET_MODE (reg);
2676 rtx src_addr;
2677 rtx src_mem;
2678 int src_offset;
2679 rtx dest_mem;
2680
2681 if (SSE_REGNO_P (REGNO (reg)))
2682 {
2683 src_addr = sse_addr_rtx;
2684 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2685 }
2686 else
2687 {
2688 src_addr = int_addr_rtx;
2689 src_offset = REGNO (reg) * 8;
2690 }
2691 src_mem = gen_rtx_MEM (mode, src_addr);
0692acba 2692 set_mem_alias_set (src_mem, get_varargs_alias_set ());
ad919812
JH
2693 src_mem = adjust_address (src_mem, mode, src_offset);
2694 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
ad919812
JH
2695 emit_move_insn (dest_mem, src_mem);
2696 }
2697 }
2698
2699 if (needed_intregs)
2700 {
2701 t =
2702 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2703 build_int_2 (needed_intregs * 8, 0));
2704 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2705 TREE_SIDE_EFFECTS (t) = 1;
2706 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2707 }
2708 if (needed_sseregs)
2709 {
2710 t =
2711 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2712 build_int_2 (needed_sseregs * 16, 0));
2713 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2714 TREE_SIDE_EFFECTS (t) = 1;
2715 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2716 }
2717
2718 emit_jump_insn (gen_jump (lab_over));
2719 emit_barrier ();
2720 emit_label (lab_false);
2721 }
2722
2723 /* ... otherwise out of the overflow area. */
2724
2725 /* Care for on-stack alignment if needed. */
2726 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2727 t = ovf;
2728 else
2729 {
2730 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2731 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2732 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2733 }
2734 t = save_expr (t);
2735
2736 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2737 if (r != addr_rtx)
2738 emit_move_insn (addr_rtx, r);
2739
2740 t =
2741 build (PLUS_EXPR, TREE_TYPE (t), t,
2742 build_int_2 (rsize * UNITS_PER_WORD, 0));
2743 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2744 TREE_SIDE_EFFECTS (t) = 1;
2745 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2746
2747 if (container)
2748 emit_label (lab_over);
2749
ad919812
JH
2750 return addr_rtx;
2751}
2752\f
7dd4b4a3
JH
2753/* Return nonzero if OP is general operand representable on x86_64. */
2754
2755int
2756x86_64_general_operand (op, mode)
2757 rtx op;
2758 enum machine_mode mode;
2759{
2760 if (!TARGET_64BIT)
2761 return general_operand (op, mode);
2762 if (nonimmediate_operand (op, mode))
2763 return 1;
2764 return x86_64_sign_extended_value (op);
2765}
2766
2767/* Return nonzero if OP is general operand representable on x86_64
d6a7951f 2768 as either sign extended or zero extended constant. */
7dd4b4a3
JH
2769
2770int
2771x86_64_szext_general_operand (op, mode)
2772 rtx op;
2773 enum machine_mode mode;
2774{
2775 if (!TARGET_64BIT)
2776 return general_operand (op, mode);
2777 if (nonimmediate_operand (op, mode))
2778 return 1;
2779 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2780}
2781
2782/* Return nonzero if OP is nonmemory operand representable on x86_64. */
2783
2784int
2785x86_64_nonmemory_operand (op, mode)
2786 rtx op;
2787 enum machine_mode mode;
2788{
2789 if (!TARGET_64BIT)
2790 return nonmemory_operand (op, mode);
2791 if (register_operand (op, mode))
2792 return 1;
2793 return x86_64_sign_extended_value (op);
2794}
2795
2796/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2797
2798int
2799x86_64_movabs_operand (op, mode)
2800 rtx op;
2801 enum machine_mode mode;
2802{
2803 if (!TARGET_64BIT || !flag_pic)
2804 return nonmemory_operand (op, mode);
2805 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2806 return 1;
2807 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2808 return 1;
2809 return 0;
2810}
2811
2812/* Return nonzero if OP is nonmemory operand representable on x86_64. */
2813
2814int
2815x86_64_szext_nonmemory_operand (op, mode)
2816 rtx op;
2817 enum machine_mode mode;
2818{
2819 if (!TARGET_64BIT)
2820 return nonmemory_operand (op, mode);
2821 if (register_operand (op, mode))
2822 return 1;
2823 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2824}
2825
2826/* Return nonzero if OP is immediate operand representable on x86_64. */
2827
2828int
2829x86_64_immediate_operand (op, mode)
2830 rtx op;
2831 enum machine_mode mode;
2832{
2833 if (!TARGET_64BIT)
2834 return immediate_operand (op, mode);
2835 return x86_64_sign_extended_value (op);
2836}
2837
2838/* Return nonzero if OP is immediate operand representable on x86_64. */
2839
2840int
2841x86_64_zext_immediate_operand (op, mode)
2842 rtx op;
2843 enum machine_mode mode ATTRIBUTE_UNUSED;
2844{
2845 return x86_64_zero_extended_value (op);
2846}
2847
8bad7136
JL
2848/* Return nonzero if OP is (const_int 1), else return zero. */
2849
2850int
2851const_int_1_operand (op, mode)
2852 rtx op;
2853 enum machine_mode mode ATTRIBUTE_UNUSED;
2854{
2855 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2856}
2857
794a292d
JJ
2858/* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
2859 for shift & compare patterns, as shifting by 0 does not change flags),
2860 else return zero. */
2861
2862int
2863const_int_1_31_operand (op, mode)
2864 rtx op;
2865 enum machine_mode mode ATTRIBUTE_UNUSED;
2866{
2867 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
2868}
2869
e075ae69
RH
2870/* Returns 1 if OP is either a symbol reference or a sum of a symbol
2871 reference and a constant. */
b08de47e
MM
2872
2873int
e075ae69
RH
2874symbolic_operand (op, mode)
2875 register rtx op;
2876 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 2877{
e075ae69 2878 switch (GET_CODE (op))
2a2ab3f9 2879 {
e075ae69
RH
2880 case SYMBOL_REF:
2881 case LABEL_REF:
2882 return 1;
2883
2884 case CONST:
2885 op = XEXP (op, 0);
2886 if (GET_CODE (op) == SYMBOL_REF
2887 || GET_CODE (op) == LABEL_REF
2888 || (GET_CODE (op) == UNSPEC
8ee41eaf
RH
2889 && (XINT (op, 1) == UNSPEC_GOT
2890 || XINT (op, 1) == UNSPEC_GOTOFF
2891 || XINT (op, 1) == UNSPEC_GOTPCREL)))
e075ae69
RH
2892 return 1;
2893 if (GET_CODE (op) != PLUS
2894 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2895 return 0;
2896
2897 op = XEXP (op, 0);
2898 if (GET_CODE (op) == SYMBOL_REF
2899 || GET_CODE (op) == LABEL_REF)
2900 return 1;
2901 /* Only @GOTOFF gets offsets. */
2902 if (GET_CODE (op) != UNSPEC
8ee41eaf 2903 || XINT (op, 1) != UNSPEC_GOTOFF)
e075ae69
RH
2904 return 0;
2905
2906 op = XVECEXP (op, 0, 0);
2907 if (GET_CODE (op) == SYMBOL_REF
2908 || GET_CODE (op) == LABEL_REF)
2909 return 1;
2910 return 0;
2911
2912 default:
2913 return 0;
2a2ab3f9
JVA
2914 }
2915}
2a2ab3f9 2916
e075ae69 2917/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 2918
e075ae69
RH
2919int
2920pic_symbolic_operand (op, mode)
2921 register rtx op;
2922 enum machine_mode mode ATTRIBUTE_UNUSED;
2923{
6eb791fc
JH
2924 if (GET_CODE (op) != CONST)
2925 return 0;
2926 op = XEXP (op, 0);
2927 if (TARGET_64BIT)
2928 {
2929 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2930 return 1;
2931 }
fce5a9f2 2932 else
2a2ab3f9 2933 {
e075ae69
RH
2934 if (GET_CODE (op) == UNSPEC)
2935 return 1;
2936 if (GET_CODE (op) != PLUS
2937 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2938 return 0;
2939 op = XEXP (op, 0);
2940 if (GET_CODE (op) == UNSPEC)
2941 return 1;
2a2ab3f9 2942 }
e075ae69 2943 return 0;
2a2ab3f9 2944}
2a2ab3f9 2945
623fe810
RH
2946/* Return true if OP is a symbolic operand that resolves locally. */
2947
2948static int
2949local_symbolic_operand (op, mode)
2950 rtx op;
2951 enum machine_mode mode ATTRIBUTE_UNUSED;
2952{
2953 if (GET_CODE (op) == LABEL_REF)
2954 return 1;
2955
2956 if (GET_CODE (op) == CONST
2957 && GET_CODE (XEXP (op, 0)) == PLUS
2958 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2959 op = XEXP (XEXP (op, 0), 0);
2960
2961 if (GET_CODE (op) != SYMBOL_REF)
2962 return 0;
2963
2964 /* These we've been told are local by varasm and encode_section_info
2965 respectively. */
2966 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2967 return 1;
2968
2969 /* There is, however, a not insubstantial body of code in the rest of
fce5a9f2 2970 the compiler that assumes it can just stick the results of
623fe810
RH
2971 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2972 /* ??? This is a hack. Should update the body of the compiler to
fb49053f 2973 always create a DECL an invoke targetm.encode_section_info. */
623fe810
RH
2974 if (strncmp (XSTR (op, 0), internal_label_prefix,
2975 internal_label_prefix_len) == 0)
2976 return 1;
2977
2978 return 0;
2979}
2980
f996902d
RH
2981/* Test for various thread-local symbols. See ix86_encode_section_info. */
2982
2983int
2984tls_symbolic_operand (op, mode)
2985 register rtx op;
2986 enum machine_mode mode ATTRIBUTE_UNUSED;
2987{
2988 const char *symbol_str;
2989
2990 if (GET_CODE (op) != SYMBOL_REF)
2991 return 0;
2992 symbol_str = XSTR (op, 0);
2993
2994 if (symbol_str[0] != '%')
2995 return 0;
755ac5d4 2996 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
f996902d
RH
2997}
2998
2999static int
3000tls_symbolic_operand_1 (op, kind)
3001 rtx op;
3002 enum tls_model kind;
3003{
3004 const char *symbol_str;
3005
3006 if (GET_CODE (op) != SYMBOL_REF)
3007 return 0;
3008 symbol_str = XSTR (op, 0);
3009
3010 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3011}
3012
3013int
3014global_dynamic_symbolic_operand (op, mode)
3015 register rtx op;
3016 enum machine_mode mode ATTRIBUTE_UNUSED;
3017{
3018 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3019}
3020
3021int
3022local_dynamic_symbolic_operand (op, mode)
3023 register rtx op;
3024 enum machine_mode mode ATTRIBUTE_UNUSED;
3025{
3026 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3027}
3028
3029int
3030initial_exec_symbolic_operand (op, mode)
3031 register rtx op;
3032 enum machine_mode mode ATTRIBUTE_UNUSED;
3033{
3034 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3035}
3036
3037int
3038local_exec_symbolic_operand (op, mode)
3039 register rtx op;
3040 enum machine_mode mode ATTRIBUTE_UNUSED;
3041{
3042 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3043}
3044
28d52ffb
RH
3045/* Test for a valid operand for a call instruction. Don't allow the
3046 arg pointer register or virtual regs since they may decay into
3047 reg + const, which the patterns can't handle. */
2a2ab3f9 3048
e075ae69
RH
3049int
3050call_insn_operand (op, mode)
3051 rtx op;
3052 enum machine_mode mode ATTRIBUTE_UNUSED;
3053{
e075ae69
RH
3054 /* Disallow indirect through a virtual register. This leads to
3055 compiler aborts when trying to eliminate them. */
3056 if (GET_CODE (op) == REG
3057 && (op == arg_pointer_rtx
564d80f4 3058 || op == frame_pointer_rtx
e075ae69
RH
3059 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3060 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3061 return 0;
2a2ab3f9 3062
28d52ffb
RH
3063 /* Disallow `call 1234'. Due to varying assembler lameness this
3064 gets either rejected or translated to `call .+1234'. */
3065 if (GET_CODE (op) == CONST_INT)
3066 return 0;
3067
cbbf65e0
RH
3068 /* Explicitly allow SYMBOL_REF even if pic. */
3069 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 3070 return 1;
2a2ab3f9 3071
cbbf65e0
RH
3072 /* Otherwise we can allow any general_operand in the address. */
3073 return general_operand (op, Pmode);
e075ae69 3074}
79325812 3075
e075ae69
RH
3076int
3077constant_call_address_operand (op, mode)
3078 rtx op;
3079 enum machine_mode mode ATTRIBUTE_UNUSED;
3080{
eaf19aba
JJ
3081 if (GET_CODE (op) == CONST
3082 && GET_CODE (XEXP (op, 0)) == PLUS
3083 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3084 op = XEXP (XEXP (op, 0), 0);
e1ff012c 3085 return GET_CODE (op) == SYMBOL_REF;
e075ae69 3086}
2a2ab3f9 3087
e075ae69 3088/* Match exactly zero and one. */
e9a25f70 3089
0f290768 3090int
e075ae69
RH
3091const0_operand (op, mode)
3092 register rtx op;
3093 enum machine_mode mode;
3094{
3095 return op == CONST0_RTX (mode);
3096}
e9a25f70 3097
0f290768 3098int
e075ae69
RH
3099const1_operand (op, mode)
3100 register rtx op;
3101 enum machine_mode mode ATTRIBUTE_UNUSED;
3102{
3103 return op == const1_rtx;
3104}
2a2ab3f9 3105
e075ae69 3106/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 3107
e075ae69
RH
3108int
3109const248_operand (op, mode)
3110 register rtx op;
3111 enum machine_mode mode ATTRIBUTE_UNUSED;
3112{
3113 return (GET_CODE (op) == CONST_INT
3114 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3115}
e9a25f70 3116
e075ae69 3117/* True if this is a constant appropriate for an increment or decremenmt. */
81fd0956 3118
e075ae69
RH
3119int
3120incdec_operand (op, mode)
3121 register rtx op;
0631e0bf 3122 enum machine_mode mode ATTRIBUTE_UNUSED;
e075ae69 3123{
f5143c46 3124 /* On Pentium4, the inc and dec operations causes extra dependency on flag
b4e89e2d
JH
3125 registers, since carry flag is not set. */
3126 if (TARGET_PENTIUM4 && !optimize_size)
3127 return 0;
2b1c08f5 3128 return op == const1_rtx || op == constm1_rtx;
e075ae69 3129}
2a2ab3f9 3130
371bc54b
JH
3131/* Return nonzero if OP is acceptable as operand of DImode shift
3132 expander. */
3133
3134int
3135shiftdi_operand (op, mode)
3136 rtx op;
3137 enum machine_mode mode ATTRIBUTE_UNUSED;
3138{
3139 if (TARGET_64BIT)
3140 return nonimmediate_operand (op, mode);
3141 else
3142 return register_operand (op, mode);
3143}
3144
0f290768 3145/* Return false if this is the stack pointer, or any other fake
e075ae69
RH
3146 register eliminable to the stack pointer. Otherwise, this is
3147 a register operand.
2a2ab3f9 3148
e075ae69
RH
3149 This is used to prevent esp from being used as an index reg.
3150 Which would only happen in pathological cases. */
5f1ec3e6 3151
e075ae69
RH
3152int
3153reg_no_sp_operand (op, mode)
3154 register rtx op;
3155 enum machine_mode mode;
3156{
3157 rtx t = op;
3158 if (GET_CODE (t) == SUBREG)
3159 t = SUBREG_REG (t);
564d80f4 3160 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 3161 return 0;
2a2ab3f9 3162
e075ae69 3163 return register_operand (op, mode);
2a2ab3f9 3164}
b840bfb0 3165
915119a5
BS
3166int
3167mmx_reg_operand (op, mode)
3168 register rtx op;
bd793c65 3169 enum machine_mode mode ATTRIBUTE_UNUSED;
915119a5
BS
3170{
3171 return MMX_REG_P (op);
3172}
3173
2c5a510c
RH
3174/* Return false if this is any eliminable register. Otherwise
3175 general_operand. */
3176
3177int
3178general_no_elim_operand (op, mode)
3179 register rtx op;
3180 enum machine_mode mode;
3181{
3182 rtx t = op;
3183 if (GET_CODE (t) == SUBREG)
3184 t = SUBREG_REG (t);
3185 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3186 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3187 || t == virtual_stack_dynamic_rtx)
3188 return 0;
1020a5ab
RH
3189 if (REG_P (t)
3190 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3191 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3192 return 0;
2c5a510c
RH
3193
3194 return general_operand (op, mode);
3195}
3196
3197/* Return false if this is any eliminable register. Otherwise
3198 register_operand or const_int. */
3199
3200int
3201nonmemory_no_elim_operand (op, mode)
3202 register rtx op;
3203 enum machine_mode mode;
3204{
3205 rtx t = op;
3206 if (GET_CODE (t) == SUBREG)
3207 t = SUBREG_REG (t);
3208 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3209 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3210 || t == virtual_stack_dynamic_rtx)
3211 return 0;
3212
3213 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3214}
3215
e075ae69 3216/* Return true if op is a Q_REGS class register. */
b840bfb0 3217
e075ae69
RH
3218int
3219q_regs_operand (op, mode)
3220 register rtx op;
3221 enum machine_mode mode;
b840bfb0 3222{
e075ae69
RH
3223 if (mode != VOIDmode && GET_MODE (op) != mode)
3224 return 0;
3225 if (GET_CODE (op) == SUBREG)
3226 op = SUBREG_REG (op);
7799175f 3227 return ANY_QI_REG_P (op);
0f290768 3228}
b840bfb0 3229
e075ae69 3230/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 3231
e075ae69
RH
3232int
3233non_q_regs_operand (op, mode)
3234 register rtx op;
3235 enum machine_mode mode;
3236{
3237 if (mode != VOIDmode && GET_MODE (op) != mode)
3238 return 0;
3239 if (GET_CODE (op) == SUBREG)
3240 op = SUBREG_REG (op);
3241 return NON_QI_REG_P (op);
0f290768 3242}
b840bfb0 3243
915119a5
BS
3244/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3245 insns. */
3246int
3247sse_comparison_operator (op, mode)
3248 rtx op;
3249 enum machine_mode mode ATTRIBUTE_UNUSED;
3250{
3251 enum rtx_code code = GET_CODE (op);
a46d1d38
JH
3252 switch (code)
3253 {
3254 /* Operations supported directly. */
3255 case EQ:
3256 case LT:
3257 case LE:
3258 case UNORDERED:
3259 case NE:
3260 case UNGE:
3261 case UNGT:
3262 case ORDERED:
3263 return 1;
3264 /* These are equivalent to ones above in non-IEEE comparisons. */
3265 case UNEQ:
3266 case UNLT:
3267 case UNLE:
3268 case LTGT:
3269 case GE:
3270 case GT:
3271 return !TARGET_IEEE_FP;
3272 default:
3273 return 0;
3274 }
915119a5 3275}
9076b9c1 3276/* Return 1 if OP is a valid comparison operator in valid mode. */
e075ae69 3277int
9076b9c1
JH
3278ix86_comparison_operator (op, mode)
3279 register rtx op;
3280 enum machine_mode mode;
e075ae69 3281{
9076b9c1 3282 enum machine_mode inmode;
9a915772 3283 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
3284 if (mode != VOIDmode && GET_MODE (op) != mode)
3285 return 0;
9a915772
JH
3286 if (GET_RTX_CLASS (code) != '<')
3287 return 0;
3288 inmode = GET_MODE (XEXP (op, 0));
3289
3290 if (inmode == CCFPmode || inmode == CCFPUmode)
3291 {
3292 enum rtx_code second_code, bypass_code;
3293 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3294 return (bypass_code == NIL && second_code == NIL);
3295 }
3296 switch (code)
3a3677ff
RH
3297 {
3298 case EQ: case NE:
3a3677ff 3299 return 1;
9076b9c1 3300 case LT: case GE:
7e08e190 3301 if (inmode == CCmode || inmode == CCGCmode
9076b9c1
JH
3302 || inmode == CCGOCmode || inmode == CCNOmode)
3303 return 1;
3304 return 0;
7e08e190 3305 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
7e08e190 3306 if (inmode == CCmode)
9076b9c1
JH
3307 return 1;
3308 return 0;
3309 case GT: case LE:
7e08e190 3310 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
9076b9c1
JH
3311 return 1;
3312 return 0;
3a3677ff
RH
3313 default:
3314 return 0;
3315 }
3316}
3317
9076b9c1 3318/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3a3677ff 3319
9076b9c1
JH
3320int
3321fcmov_comparison_operator (op, mode)
3a3677ff
RH
3322 register rtx op;
3323 enum machine_mode mode;
3324{
b62d22a2 3325 enum machine_mode inmode;
9a915772 3326 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
3327 if (mode != VOIDmode && GET_MODE (op) != mode)
3328 return 0;
9a915772
JH
3329 if (GET_RTX_CLASS (code) != '<')
3330 return 0;
3331 inmode = GET_MODE (XEXP (op, 0));
3332 if (inmode == CCFPmode || inmode == CCFPUmode)
3a3677ff 3333 {
9a915772
JH
3334 enum rtx_code second_code, bypass_code;
3335 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3336 if (bypass_code != NIL || second_code != NIL)
3337 return 0;
3338 code = ix86_fp_compare_code_to_integer (code);
3339 }
3340 /* i387 supports just limited amount of conditional codes. */
3341 switch (code)
3342 {
3343 case LTU: case GTU: case LEU: case GEU:
3344 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
9076b9c1
JH
3345 return 1;
3346 return 0;
9a915772
JH
3347 case ORDERED: case UNORDERED:
3348 case EQ: case NE:
3349 return 1;
3a3677ff
RH
3350 default:
3351 return 0;
3352 }
e075ae69 3353}
b840bfb0 3354
e9e80858
JH
3355/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3356
3357int
3358promotable_binary_operator (op, mode)
3359 register rtx op;
3360 enum machine_mode mode ATTRIBUTE_UNUSED;
3361{
3362 switch (GET_CODE (op))
3363 {
3364 case MULT:
3365 /* Modern CPUs have same latency for HImode and SImode multiply,
3366 but 386 and 486 do HImode multiply faster. */
3367 return ix86_cpu > PROCESSOR_I486;
3368 case PLUS:
3369 case AND:
3370 case IOR:
3371 case XOR:
3372 case ASHIFT:
3373 return 1;
3374 default:
3375 return 0;
3376 }
3377}
3378
e075ae69
RH
3379/* Nearly general operand, but accept any const_double, since we wish
3380 to be able to drop them into memory rather than have them get pulled
3381 into registers. */
b840bfb0 3382
2a2ab3f9 3383int
e075ae69
RH
3384cmp_fp_expander_operand (op, mode)
3385 register rtx op;
3386 enum machine_mode mode;
2a2ab3f9 3387{
e075ae69 3388 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 3389 return 0;
e075ae69 3390 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 3391 return 1;
e075ae69 3392 return general_operand (op, mode);
2a2ab3f9
JVA
3393}
3394
e075ae69 3395/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
3396
3397int
e075ae69 3398ext_register_operand (op, mode)
2a2ab3f9 3399 register rtx op;
bb5177ac 3400 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 3401{
3522082b 3402 int regno;
0d7d98ee
JH
3403 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3404 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
e075ae69 3405 return 0;
3522082b
JH
3406
3407 if (!register_operand (op, VOIDmode))
3408 return 0;
3409
3410 /* Be curefull to accept only registers having upper parts. */
3411 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3412 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
e075ae69
RH
3413}
3414
3415/* Return 1 if this is a valid binary floating-point operation.
0f290768 3416 OP is the expression matched, and MODE is its mode. */
e075ae69
RH
3417
3418int
3419binary_fp_operator (op, mode)
3420 register rtx op;
3421 enum machine_mode mode;
3422{
3423 if (mode != VOIDmode && mode != GET_MODE (op))
3424 return 0;
3425
2a2ab3f9
JVA
3426 switch (GET_CODE (op))
3427 {
e075ae69
RH
3428 case PLUS:
3429 case MINUS:
3430 case MULT:
3431 case DIV:
3432 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 3433
2a2ab3f9
JVA
3434 default:
3435 return 0;
3436 }
3437}
fee2770d 3438
e075ae69 3439int
b531087a 3440mult_operator (op, mode)
e075ae69
RH
3441 register rtx op;
3442 enum machine_mode mode ATTRIBUTE_UNUSED;
3443{
3444 return GET_CODE (op) == MULT;
3445}
3446
3447int
b531087a 3448div_operator (op, mode)
e075ae69
RH
3449 register rtx op;
3450 enum machine_mode mode ATTRIBUTE_UNUSED;
3451{
3452 return GET_CODE (op) == DIV;
3453}
0a726ef1
JL
3454
3455int
e075ae69
RH
3456arith_or_logical_operator (op, mode)
3457 rtx op;
3458 enum machine_mode mode;
0a726ef1 3459{
e075ae69
RH
3460 return ((mode == VOIDmode || GET_MODE (op) == mode)
3461 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3462 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
3463}
3464
e075ae69 3465/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
3466
3467int
e075ae69
RH
3468memory_displacement_operand (op, mode)
3469 register rtx op;
3470 enum machine_mode mode;
4f2c8ebb 3471{
e075ae69 3472 struct ix86_address parts;
e9a25f70 3473
e075ae69
RH
3474 if (! memory_operand (op, mode))
3475 return 0;
3476
3477 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3478 abort ();
3479
3480 return parts.disp != NULL_RTX;
4f2c8ebb
RS
3481}
3482
16189740 3483/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
3484 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3485
3486 ??? It seems likely that this will only work because cmpsi is an
3487 expander, and no actual insns use this. */
4f2c8ebb
RS
3488
3489int
e075ae69
RH
3490cmpsi_operand (op, mode)
3491 rtx op;
3492 enum machine_mode mode;
fee2770d 3493{
b9b2c339 3494 if (nonimmediate_operand (op, mode))
e075ae69
RH
3495 return 1;
3496
3497 if (GET_CODE (op) == AND
3498 && GET_MODE (op) == SImode
3499 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3500 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3501 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3502 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3503 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3504 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 3505 return 1;
e9a25f70 3506
fee2770d
RS
3507 return 0;
3508}
d784886d 3509
e075ae69
RH
3510/* Returns 1 if OP is memory operand that can not be represented by the
3511 modRM array. */
d784886d
RK
3512
3513int
e075ae69 3514long_memory_operand (op, mode)
d784886d
RK
3515 register rtx op;
3516 enum machine_mode mode;
3517{
e075ae69 3518 if (! memory_operand (op, mode))
d784886d
RK
3519 return 0;
3520
e075ae69 3521 return memory_address_length (op) != 0;
d784886d 3522}
2247f6ed
JH
3523
3524/* Return nonzero if the rtx is known aligned. */
3525
3526int
3527aligned_operand (op, mode)
3528 rtx op;
3529 enum machine_mode mode;
3530{
3531 struct ix86_address parts;
3532
3533 if (!general_operand (op, mode))
3534 return 0;
3535
0f290768 3536 /* Registers and immediate operands are always "aligned". */
2247f6ed
JH
3537 if (GET_CODE (op) != MEM)
3538 return 1;
3539
0f290768 3540 /* Don't even try to do any aligned optimizations with volatiles. */
2247f6ed
JH
3541 if (MEM_VOLATILE_P (op))
3542 return 0;
3543
3544 op = XEXP (op, 0);
3545
3546 /* Pushes and pops are only valid on the stack pointer. */
3547 if (GET_CODE (op) == PRE_DEC
3548 || GET_CODE (op) == POST_INC)
3549 return 1;
3550
3551 /* Decode the address. */
3552 if (! ix86_decompose_address (op, &parts))
3553 abort ();
3554
1540f9eb
JH
3555 if (parts.base && GET_CODE (parts.base) == SUBREG)
3556 parts.base = SUBREG_REG (parts.base);
3557 if (parts.index && GET_CODE (parts.index) == SUBREG)
3558 parts.index = SUBREG_REG (parts.index);
3559
2247f6ed
JH
3560 /* Look for some component that isn't known to be aligned. */
3561 if (parts.index)
3562 {
3563 if (parts.scale < 4
bdb429a5 3564 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
3565 return 0;
3566 }
3567 if (parts.base)
3568 {
bdb429a5 3569 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
3570 return 0;
3571 }
3572 if (parts.disp)
3573 {
3574 if (GET_CODE (parts.disp) != CONST_INT
3575 || (INTVAL (parts.disp) & 3) != 0)
3576 return 0;
3577 }
3578
3579 /* Didn't find one -- this must be an aligned address. */
3580 return 1;
3581}
e075ae69
RH
3582\f
3583/* Return true if the constant is something that can be loaded with
3584 a special instruction. Only handle 0.0 and 1.0; others are less
3585 worthwhile. */
57dbca5e
BS
3586
3587int
e075ae69
RH
3588standard_80387_constant_p (x)
3589 rtx x;
57dbca5e 3590{
2b04e52b 3591 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 3592 return -1;
2b04e52b
JH
3593 /* Note that on the 80387, other constants, such as pi, that we should support
3594 too. On some machines, these are much slower to load as standard constant,
3595 than to load from doubles in memory. */
3596 if (x == CONST0_RTX (GET_MODE (x)))
3597 return 1;
3598 if (x == CONST1_RTX (GET_MODE (x)))
3599 return 2;
e075ae69 3600 return 0;
57dbca5e
BS
3601}
3602
2b04e52b
JH
3603/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3604 */
3605int
3606standard_sse_constant_p (x)
3607 rtx x;
3608{
3609 if (GET_CODE (x) != CONST_DOUBLE)
3610 return -1;
3611 return (x == CONST0_RTX (GET_MODE (x)));
3612}
3613
2a2ab3f9
JVA
3614/* Returns 1 if OP contains a symbol reference */
3615
3616int
3617symbolic_reference_mentioned_p (op)
3618 rtx op;
3619{
6f7d635c 3620 register const char *fmt;
2a2ab3f9
JVA
3621 register int i;
3622
3623 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3624 return 1;
3625
3626 fmt = GET_RTX_FORMAT (GET_CODE (op));
3627 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3628 {
3629 if (fmt[i] == 'E')
3630 {
3631 register int j;
3632
3633 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3634 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3635 return 1;
3636 }
e9a25f70 3637
2a2ab3f9
JVA
3638 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3639 return 1;
3640 }
3641
3642 return 0;
3643}
e075ae69
RH
3644
3645/* Return 1 if it is appropriate to emit `ret' instructions in the
3646 body of a function. Do this only if the epilogue is simple, needing a
3647 couple of insns. Prior to reloading, we can't tell how many registers
3648 must be saved, so return 0 then. Return 0 if there is no frame
3649 marker to de-allocate.
3650
3651 If NON_SAVING_SETJMP is defined and true, then it is not possible
3652 for the epilogue to be simple, so return 0. This is a special case
3653 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3654 until final, but jump_optimize may need to know sooner if a
3655 `return' is OK. */
32b5b1aa
SC
3656
3657int
e075ae69 3658ix86_can_use_return_insn_p ()
32b5b1aa 3659{
4dd2ac2c 3660 struct ix86_frame frame;
9a7372d6 3661
e075ae69
RH
3662#ifdef NON_SAVING_SETJMP
3663 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3664 return 0;
3665#endif
9a7372d6
RH
3666
3667 if (! reload_completed || frame_pointer_needed)
3668 return 0;
32b5b1aa 3669
9a7372d6
RH
3670 /* Don't allow more than 32 pop, since that's all we can do
3671 with one instruction. */
3672 if (current_function_pops_args
3673 && current_function_args_size >= 32768)
e075ae69 3674 return 0;
32b5b1aa 3675
4dd2ac2c
JH
3676 ix86_compute_frame_layout (&frame);
3677 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 3678}
6189a572
JH
3679\f
3680/* Return 1 if VALUE can be stored in the sign extended immediate field. */
3681int
3682x86_64_sign_extended_value (value)
3683 rtx value;
3684{
3685 switch (GET_CODE (value))
3686 {
3687 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3688 to be at least 32 and this all acceptable constants are
3689 represented as CONST_INT. */
3690 case CONST_INT:
3691 if (HOST_BITS_PER_WIDE_INT == 32)
3692 return 1;
3693 else
3694 {
3695 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
fa9f36a1 3696 return trunc_int_for_mode (val, SImode) == val;
6189a572
JH
3697 }
3698 break;
3699
3700 /* For certain code models, the symbolic references are known to fit. */
3701 case SYMBOL_REF:
3702 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3703
3704 /* For certain code models, the code is near as well. */
3705 case LABEL_REF:
3706 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3707
3708 /* We also may accept the offsetted memory references in certain special
3709 cases. */
3710 case CONST:
3711 if (GET_CODE (XEXP (value, 0)) == UNSPEC
8ee41eaf 3712 && XINT (XEXP (value, 0), 1) == UNSPEC_GOTPCREL)
6189a572
JH
3713 return 1;
3714 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3715 {
3716 rtx op1 = XEXP (XEXP (value, 0), 0);
3717 rtx op2 = XEXP (XEXP (value, 0), 1);
3718 HOST_WIDE_INT offset;
3719
3720 if (ix86_cmodel == CM_LARGE)
3721 return 0;
3722 if (GET_CODE (op2) != CONST_INT)
3723 return 0;
3724 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3725 switch (GET_CODE (op1))
3726 {
3727 case SYMBOL_REF:
3728 /* For CM_SMALL assume that latest object is 1MB before
3729 end of 31bits boundary. We may also accept pretty
3730 large negative constants knowing that all objects are
3731 in the positive half of address space. */
3732 if (ix86_cmodel == CM_SMALL
3733 && offset < 1024*1024*1024
3734 && trunc_int_for_mode (offset, SImode) == offset)
3735 return 1;
3736 /* For CM_KERNEL we know that all object resist in the
3737 negative half of 32bits address space. We may not
3738 accept negative offsets, since they may be just off
d6a7951f 3739 and we may accept pretty large positive ones. */
6189a572
JH
3740 if (ix86_cmodel == CM_KERNEL
3741 && offset > 0
3742 && trunc_int_for_mode (offset, SImode) == offset)
3743 return 1;
3744 break;
3745 case LABEL_REF:
3746 /* These conditions are similar to SYMBOL_REF ones, just the
3747 constraints for code models differ. */
3748 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3749 && offset < 1024*1024*1024
3750 && trunc_int_for_mode (offset, SImode) == offset)
3751 return 1;
3752 if (ix86_cmodel == CM_KERNEL
3753 && offset > 0
3754 && trunc_int_for_mode (offset, SImode) == offset)
3755 return 1;
3756 break;
3757 default:
3758 return 0;
3759 }
3760 }
3761 return 0;
3762 default:
3763 return 0;
3764 }
3765}
3766
3767/* Return 1 if VALUE can be stored in the zero extended immediate field. */
3768int
3769x86_64_zero_extended_value (value)
3770 rtx value;
3771{
3772 switch (GET_CODE (value))
3773 {
3774 case CONST_DOUBLE:
3775 if (HOST_BITS_PER_WIDE_INT == 32)
3776 return (GET_MODE (value) == VOIDmode
3777 && !CONST_DOUBLE_HIGH (value));
3778 else
3779 return 0;
3780 case CONST_INT:
3781 if (HOST_BITS_PER_WIDE_INT == 32)
3782 return INTVAL (value) >= 0;
3783 else
b531087a 3784 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
6189a572
JH
3785 break;
3786
3787 /* For certain code models, the symbolic references are known to fit. */
3788 case SYMBOL_REF:
3789 return ix86_cmodel == CM_SMALL;
3790
3791 /* For certain code models, the code is near as well. */
3792 case LABEL_REF:
3793 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3794
3795 /* We also may accept the offsetted memory references in certain special
3796 cases. */
3797 case CONST:
3798 if (GET_CODE (XEXP (value, 0)) == PLUS)
3799 {
3800 rtx op1 = XEXP (XEXP (value, 0), 0);
3801 rtx op2 = XEXP (XEXP (value, 0), 1);
3802
3803 if (ix86_cmodel == CM_LARGE)
3804 return 0;
3805 switch (GET_CODE (op1))
3806 {
3807 case SYMBOL_REF:
3808 return 0;
d6a7951f 3809 /* For small code model we may accept pretty large positive
6189a572
JH
3810 offsets, since one bit is available for free. Negative
3811 offsets are limited by the size of NULL pointer area
3812 specified by the ABI. */
3813 if (ix86_cmodel == CM_SMALL
3814 && GET_CODE (op2) == CONST_INT
3815 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3816 && (trunc_int_for_mode (INTVAL (op2), SImode)
3817 == INTVAL (op2)))
3818 return 1;
3819 /* ??? For the kernel, we may accept adjustment of
3820 -0x10000000, since we know that it will just convert
d6a7951f 3821 negative address space to positive, but perhaps this
6189a572
JH
3822 is not worthwhile. */
3823 break;
3824 case LABEL_REF:
3825 /* These conditions are similar to SYMBOL_REF ones, just the
3826 constraints for code models differ. */
3827 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3828 && GET_CODE (op2) == CONST_INT
3829 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3830 && (trunc_int_for_mode (INTVAL (op2), SImode)
3831 == INTVAL (op2)))
3832 return 1;
3833 break;
3834 default:
3835 return 0;
3836 }
3837 }
3838 return 0;
3839 default:
3840 return 0;
3841 }
3842}
6fca22eb
RH
3843
3844/* Value should be nonzero if functions must have frame pointers.
3845 Zero means the frame pointer need not be set up (and parms may
3846 be accessed via the stack pointer) in functions that seem suitable. */
3847
3848int
3849ix86_frame_pointer_required ()
3850{
3851 /* If we accessed previous frames, then the generated code expects
3852 to be able to access the saved ebp value in our frame. */
3853 if (cfun->machine->accesses_prev_frame)
3854 return 1;
a4f31c00 3855
6fca22eb
RH
3856 /* Several x86 os'es need a frame pointer for other reasons,
3857 usually pertaining to setjmp. */
3858 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3859 return 1;
3860
3861 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3862 the frame pointer by default. Turn it back on now if we've not
3863 got a leaf function. */
a7943381
RH
3864 if (TARGET_OMIT_LEAF_FRAME_POINTER
3865 && (!current_function_is_leaf || current_function_profile))
6fca22eb
RH
3866 return 1;
3867
3868 return 0;
3869}
3870
3871/* Record that the current function accesses previous call frames. */
3872
3873void
3874ix86_setup_frame_addresses ()
3875{
3876 cfun->machine->accesses_prev_frame = 1;
3877}
e075ae69 3878\f
145aacc2
RH
3879#if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3880# define USE_HIDDEN_LINKONCE 1
3881#else
3882# define USE_HIDDEN_LINKONCE 0
3883#endif
3884
bd09bdeb 3885static int pic_labels_used;
e9a25f70 3886
145aacc2
RH
3887/* Fills in the label name that should be used for a pc thunk for
3888 the given register. */
3889
3890static void
3891get_pc_thunk_name (name, regno)
3892 char name[32];
3893 unsigned int regno;
3894{
3895 if (USE_HIDDEN_LINKONCE)
3896 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3897 else
3898 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3899}
3900
3901
e075ae69
RH
3902/* This function generates code for -fpic that loads %ebx with
3903 the return address of the caller and then returns. */
3904
3905void
4cf12e7e 3906ix86_asm_file_end (file)
e075ae69 3907 FILE *file;
e075ae69
RH
3908{
3909 rtx xops[2];
bd09bdeb 3910 int regno;
32b5b1aa 3911
bd09bdeb 3912 for (regno = 0; regno < 8; ++regno)
7c262518 3913 {
145aacc2
RH
3914 char name[32];
3915
bd09bdeb
RH
3916 if (! ((pic_labels_used >> regno) & 1))
3917 continue;
3918
145aacc2 3919 get_pc_thunk_name (name, regno);
bd09bdeb 3920
145aacc2
RH
3921 if (USE_HIDDEN_LINKONCE)
3922 {
3923 tree decl;
3924
3925 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3926 error_mark_node);
3927 TREE_PUBLIC (decl) = 1;
3928 TREE_STATIC (decl) = 1;
3929 DECL_ONE_ONLY (decl) = 1;
3930
3931 (*targetm.asm_out.unique_section) (decl, 0);
3932 named_section (decl, NULL, 0);
3933
3934 ASM_GLOBALIZE_LABEL (file, name);
3935 fputs ("\t.hidden\t", file);
3936 assemble_name (file, name);
3937 fputc ('\n', file);
3938 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
3939 }
3940 else
3941 {
3942 text_section ();
3943 ASM_OUTPUT_LABEL (file, name);
3944 }
bd09bdeb
RH
3945
3946 xops[0] = gen_rtx_REG (SImode, regno);
3947 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3948 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3949 output_asm_insn ("ret", xops);
7c262518 3950 }
32b5b1aa 3951}
32b5b1aa 3952
c8c03509 3953/* Emit code for the SET_GOT patterns. */
32b5b1aa 3954
c8c03509
RH
3955const char *
3956output_set_got (dest)
3957 rtx dest;
3958{
3959 rtx xops[3];
0d7d98ee 3960
c8c03509
RH
3961 xops[0] = dest;
3962 xops[1] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
32b5b1aa 3963
c8c03509 3964 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
32b5b1aa 3965 {
c8c03509
RH
3966 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
3967
3968 if (!flag_pic)
3969 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3970 else
3971 output_asm_insn ("call\t%a2", xops);
3972
b069de3b
SS
3973#if TARGET_MACHO
3974 /* Output the "canonical" label name ("Lxx$pb") here too. This
3975 is what will be referred to by the Mach-O PIC subsystem. */
3976 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
3977#endif
c8c03509
RH
3978 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
3979 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
3980
3981 if (flag_pic)
3982 output_asm_insn ("pop{l}\t%0", xops);
32b5b1aa 3983 }
e075ae69 3984 else
e5cb57e8 3985 {
145aacc2
RH
3986 char name[32];
3987 get_pc_thunk_name (name, REGNO (dest));
bd09bdeb 3988 pic_labels_used |= 1 << REGNO (dest);
f996902d 3989
145aacc2 3990 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
c8c03509
RH
3991 xops[2] = gen_rtx_MEM (QImode, xops[2]);
3992 output_asm_insn ("call\t%X2", xops);
e5cb57e8 3993 }
e5cb57e8 3994
c8c03509
RH
3995 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
3996 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
b069de3b 3997 else if (!TARGET_MACHO)
8e9fadc3 3998 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
79325812 3999
c8c03509 4000 return "";
e9a25f70 4001}
8dfe5673 4002
0d7d98ee 4003/* Generate an "push" pattern for input ARG. */
e9a25f70 4004
e075ae69
RH
4005static rtx
4006gen_push (arg)
4007 rtx arg;
e9a25f70 4008{
c5c76735 4009 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
4010 gen_rtx_MEM (Pmode,
4011 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
4012 stack_pointer_rtx)),
4013 arg);
e9a25f70
JL
4014}
4015
bd09bdeb
RH
4016/* Return >= 0 if there is an unused call-clobbered register available
4017 for the entire function. */
4018
4019static unsigned int
4020ix86_select_alt_pic_regnum ()
4021{
4022 if (current_function_is_leaf && !current_function_profile)
4023 {
4024 int i;
4025 for (i = 2; i >= 0; --i)
4026 if (!regs_ever_live[i])
4027 return i;
4028 }
4029
4030 return INVALID_REGNUM;
4031}
fce5a9f2 4032
4dd2ac2c
JH
4033/* Return 1 if we need to save REGNO. */
4034static int
1020a5ab 4035ix86_save_reg (regno, maybe_eh_return)
9b690711 4036 unsigned int regno;
37a58036 4037 int maybe_eh_return;
1020a5ab 4038{
bd09bdeb
RH
4039 if (pic_offset_table_rtx
4040 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4041 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
66edd3b4 4042 || current_function_profile
1020a5ab 4043 || current_function_calls_eh_return))
bd09bdeb
RH
4044 {
4045 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4046 return 0;
4047 return 1;
4048 }
1020a5ab
RH
4049
4050 if (current_function_calls_eh_return && maybe_eh_return)
4051 {
4052 unsigned i;
4053 for (i = 0; ; i++)
4054 {
b531087a 4055 unsigned test = EH_RETURN_DATA_REGNO (i);
1020a5ab
RH
4056 if (test == INVALID_REGNUM)
4057 break;
9b690711 4058 if (test == regno)
1020a5ab
RH
4059 return 1;
4060 }
4061 }
4dd2ac2c 4062
1020a5ab
RH
4063 return (regs_ever_live[regno]
4064 && !call_used_regs[regno]
4065 && !fixed_regs[regno]
4066 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
4067}
4068
0903fcab
JH
4069/* Return number of registers to be saved on the stack. */
4070
4071static int
4072ix86_nsaved_regs ()
4073{
4074 int nregs = 0;
0903fcab
JH
4075 int regno;
4076
4dd2ac2c 4077 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4078 if (ix86_save_reg (regno, true))
4dd2ac2c 4079 nregs++;
0903fcab
JH
4080 return nregs;
4081}
4082
4083/* Return the offset between two registers, one to be eliminated, and the other
4084 its replacement, at the start of a routine. */
4085
4086HOST_WIDE_INT
4087ix86_initial_elimination_offset (from, to)
4088 int from;
4089 int to;
4090{
4dd2ac2c
JH
4091 struct ix86_frame frame;
4092 ix86_compute_frame_layout (&frame);
564d80f4
JH
4093
4094 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4095 return frame.hard_frame_pointer_offset;
564d80f4
JH
4096 else if (from == FRAME_POINTER_REGNUM
4097 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4098 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4099 else
4100 {
564d80f4
JH
4101 if (to != STACK_POINTER_REGNUM)
4102 abort ();
4103 else if (from == ARG_POINTER_REGNUM)
4dd2ac2c 4104 return frame.stack_pointer_offset;
564d80f4
JH
4105 else if (from != FRAME_POINTER_REGNUM)
4106 abort ();
0903fcab 4107 else
4dd2ac2c 4108 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4109 }
4110}
4111
4dd2ac2c 4112/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 4113
4dd2ac2c
JH
4114static void
4115ix86_compute_frame_layout (frame)
4116 struct ix86_frame *frame;
65954bd8 4117{
65954bd8 4118 HOST_WIDE_INT total_size;
564d80f4 4119 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
44affdae
JH
4120 int offset;
4121 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4dd2ac2c 4122 HOST_WIDE_INT size = get_frame_size ();
65954bd8 4123
4dd2ac2c 4124 frame->nregs = ix86_nsaved_regs ();
564d80f4 4125 total_size = size;
65954bd8 4126
9ba81eaa 4127 /* Skip return address and saved base pointer. */
4dd2ac2c
JH
4128 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4129
4130 frame->hard_frame_pointer_offset = offset;
564d80f4 4131
fcbfaa65
RK
4132 /* Do some sanity checking of stack_alignment_needed and
4133 preferred_alignment, since i386 port is the only using those features
f710504c 4134 that may break easily. */
564d80f4 4135
44affdae
JH
4136 if (size && !stack_alignment_needed)
4137 abort ();
44affdae
JH
4138 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4139 abort ();
4140 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4141 abort ();
4142 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4143 abort ();
564d80f4 4144
4dd2ac2c
JH
4145 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4146 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 4147
4dd2ac2c
JH
4148 /* Register save area */
4149 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 4150
8362f420
JH
4151 /* Va-arg area */
4152 if (ix86_save_varrargs_registers)
4153 {
4154 offset += X86_64_VARARGS_SIZE;
4155 frame->va_arg_size = X86_64_VARARGS_SIZE;
4156 }
4157 else
4158 frame->va_arg_size = 0;
4159
4dd2ac2c
JH
4160 /* Align start of frame for local function. */
4161 frame->padding1 = ((offset + stack_alignment_needed - 1)
4162 & -stack_alignment_needed) - offset;
f73ad30e 4163
4dd2ac2c 4164 offset += frame->padding1;
65954bd8 4165
4dd2ac2c
JH
4166 /* Frame pointer points here. */
4167 frame->frame_pointer_offset = offset;
54ff41b7 4168
4dd2ac2c 4169 offset += size;
65954bd8 4170
0b7ae565
RH
4171 /* Add outgoing arguments area. Can be skipped if we eliminated
4172 all the function calls as dead code. */
4173 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4dd2ac2c
JH
4174 {
4175 offset += current_function_outgoing_args_size;
4176 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4177 }
4178 else
4179 frame->outgoing_arguments_size = 0;
564d80f4 4180
002ff5bc
RH
4181 /* Align stack boundary. Only needed if we're calling another function
4182 or using alloca. */
4183 if (!current_function_is_leaf || current_function_calls_alloca)
0b7ae565
RH
4184 frame->padding2 = ((offset + preferred_alignment - 1)
4185 & -preferred_alignment) - offset;
4186 else
4187 frame->padding2 = 0;
4dd2ac2c
JH
4188
4189 offset += frame->padding2;
4190
4191 /* We've reached end of stack frame. */
4192 frame->stack_pointer_offset = offset;
4193
4194 /* Size prologue needs to allocate. */
4195 frame->to_allocate =
4196 (size + frame->padding1 + frame->padding2
8362f420 4197 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 4198
8362f420
JH
4199 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4200 && current_function_is_leaf)
4201 {
4202 frame->red_zone_size = frame->to_allocate;
4203 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4204 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4205 }
4206 else
4207 frame->red_zone_size = 0;
4208 frame->to_allocate -= frame->red_zone_size;
4209 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c
JH
4210#if 0
4211 fprintf (stderr, "nregs: %i\n", frame->nregs);
4212 fprintf (stderr, "size: %i\n", size);
4213 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4214 fprintf (stderr, "padding1: %i\n", frame->padding1);
8362f420 4215 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4dd2ac2c
JH
4216 fprintf (stderr, "padding2: %i\n", frame->padding2);
4217 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
8362f420 4218 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4dd2ac2c
JH
4219 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4220 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4221 frame->hard_frame_pointer_offset);
4222 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4223#endif
65954bd8
JL
4224}
4225
0903fcab
JH
4226/* Emit code to save registers in the prologue. */
4227
4228static void
4229ix86_emit_save_regs ()
4230{
4231 register int regno;
0903fcab 4232 rtx insn;
0903fcab 4233
4dd2ac2c 4234 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4235 if (ix86_save_reg (regno, true))
0903fcab 4236 {
0d7d98ee 4237 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
4238 RTX_FRAME_RELATED_P (insn) = 1;
4239 }
4240}
4241
c6036a37
JH
4242/* Emit code to save registers using MOV insns. First register
4243 is restored from POINTER + OFFSET. */
4244static void
4245ix86_emit_save_regs_using_mov (pointer, offset)
b72f00af
RK
4246 rtx pointer;
4247 HOST_WIDE_INT offset;
c6036a37
JH
4248{
4249 int regno;
4250 rtx insn;
4251
4252 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4253 if (ix86_save_reg (regno, true))
4254 {
b72f00af
RK
4255 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4256 Pmode, offset),
c6036a37
JH
4257 gen_rtx_REG (Pmode, regno));
4258 RTX_FRAME_RELATED_P (insn) = 1;
4259 offset += UNITS_PER_WORD;
4260 }
4261}
4262
0f290768 4263/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
4264
4265void
4266ix86_expand_prologue ()
2a2ab3f9 4267{
564d80f4 4268 rtx insn;
bd09bdeb 4269 bool pic_reg_used;
4dd2ac2c 4270 struct ix86_frame frame;
6ab16dd9 4271 int use_mov = 0;
c6036a37 4272 HOST_WIDE_INT allocate;
4dd2ac2c 4273
2ab0437e 4274 if (!optimize_size)
6ab16dd9
JH
4275 {
4276 use_fast_prologue_epilogue
4277 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
2ab0437e
JH
4278 if (TARGET_PROLOGUE_USING_MOVE)
4279 use_mov = use_fast_prologue_epilogue;
6ab16dd9 4280 }
4dd2ac2c 4281 ix86_compute_frame_layout (&frame);
79325812 4282
e075ae69
RH
4283 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4284 slower on all targets. Also sdb doesn't like it. */
e9a25f70 4285
2a2ab3f9
JVA
4286 if (frame_pointer_needed)
4287 {
564d80f4 4288 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 4289 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 4290
564d80f4 4291 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 4292 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
4293 }
4294
c6036a37
JH
4295 allocate = frame.to_allocate;
4296 /* In case we are dealing only with single register and empty frame,
4297 push is equivalent of the mov+add sequence. */
4298 if (allocate == 0 && frame.nregs <= 1)
4299 use_mov = 0;
4300
4301 if (!use_mov)
4302 ix86_emit_save_regs ();
4303 else
4304 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 4305
c6036a37 4306 if (allocate == 0)
8dfe5673 4307 ;
e323735c 4308 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
469ac993 4309 {
f2042df3
RH
4310 insn = emit_insn (gen_pro_epilogue_adjust_stack
4311 (stack_pointer_rtx, stack_pointer_rtx,
e323735c 4312 GEN_INT (-allocate)));
e075ae69 4313 RTX_FRAME_RELATED_P (insn) = 1;
469ac993 4314 }
79325812 4315 else
8dfe5673 4316 {
e075ae69 4317 /* ??? Is this only valid for Win32? */
e9a25f70 4318
e075ae69 4319 rtx arg0, sym;
e9a25f70 4320
8362f420 4321 if (TARGET_64BIT)
b531087a 4322 abort ();
8362f420 4323
e075ae69 4324 arg0 = gen_rtx_REG (SImode, 0);
c6036a37 4325 emit_move_insn (arg0, GEN_INT (allocate));
77a989d1 4326
e075ae69
RH
4327 sym = gen_rtx_MEM (FUNCTION_MODE,
4328 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
32ee7d1d 4329 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
e075ae69
RH
4330
4331 CALL_INSN_FUNCTION_USAGE (insn)
276ab4a4
RH
4332 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4333 CALL_INSN_FUNCTION_USAGE (insn));
e075ae69 4334 }
c6036a37
JH
4335 if (use_mov)
4336 {
4337 if (!frame_pointer_needed || !frame.to_allocate)
4338 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4339 else
4340 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4341 -frame.nregs * UNITS_PER_WORD);
4342 }
e9a25f70 4343
84530511
SC
4344#ifdef SUBTARGET_PROLOGUE
4345 SUBTARGET_PROLOGUE;
0f290768 4346#endif
84530511 4347
bd09bdeb
RH
4348 pic_reg_used = false;
4349 if (pic_offset_table_rtx
4350 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4351 || current_function_profile))
4352 {
4353 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4354
4355 if (alt_pic_reg_used != INVALID_REGNUM)
4356 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4357
4358 pic_reg_used = true;
4359 }
4360
e9a25f70 4361 if (pic_reg_used)
c8c03509
RH
4362 {
4363 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4364
66edd3b4
RH
4365 /* Even with accurate pre-reload life analysis, we can wind up
4366 deleting all references to the pic register after reload.
4367 Consider if cross-jumping unifies two sides of a branch
4368 controled by a comparison vs the only read from a global.
4369 In which case, allow the set_got to be deleted, though we're
4370 too late to do anything about the ebx save in the prologue. */
c8c03509
RH
4371 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4372 }
77a989d1 4373
66edd3b4
RH
4374 /* Prevent function calls from be scheduled before the call to mcount.
4375 In the pic_reg_used case, make sure that the got load isn't deleted. */
4376 if (current_function_profile)
4377 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
77a989d1
SC
4378}
4379
da2d1d3a
JH
4380/* Emit code to restore saved registers using MOV insns. First register
4381 is restored from POINTER + OFFSET. */
4382static void
1020a5ab
RH
4383ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4384 rtx pointer;
4385 int offset;
37a58036 4386 int maybe_eh_return;
da2d1d3a
JH
4387{
4388 int regno;
da2d1d3a 4389
4dd2ac2c 4390 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4391 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 4392 {
4dd2ac2c 4393 emit_move_insn (gen_rtx_REG (Pmode, regno),
b72f00af
RK
4394 adjust_address (gen_rtx_MEM (Pmode, pointer),
4395 Pmode, offset));
4dd2ac2c 4396 offset += UNITS_PER_WORD;
da2d1d3a
JH
4397 }
4398}
4399
0f290768 4400/* Restore function stack, frame, and registers. */
e9a25f70 4401
2a2ab3f9 4402void
1020a5ab
RH
4403ix86_expand_epilogue (style)
4404 int style;
2a2ab3f9 4405{
1c71e60e 4406 int regno;
fdb8a883 4407 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 4408 struct ix86_frame frame;
65954bd8 4409 HOST_WIDE_INT offset;
4dd2ac2c
JH
4410
4411 ix86_compute_frame_layout (&frame);
2a2ab3f9 4412
a4f31c00 4413 /* Calculate start of saved registers relative to ebp. Special care
84e306b4
RH
4414 must be taken for the normal return case of a function using
4415 eh_return: the eax and edx registers are marked as saved, but not
4416 restored along this path. */
4417 offset = frame.nregs;
4418 if (current_function_calls_eh_return && style != 2)
4419 offset -= 2;
4420 offset *= -UNITS_PER_WORD;
2a2ab3f9 4421
fdb8a883
JW
4422 /* If we're only restoring one register and sp is not valid then
4423 using a move instruction to restore the register since it's
0f290768 4424 less work than reloading sp and popping the register.
da2d1d3a
JH
4425
4426 The default code result in stack adjustment using add/lea instruction,
4427 while this code results in LEAVE instruction (or discrete equivalent),
4428 so it is profitable in some other cases as well. Especially when there
4429 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4430 and there is exactly one register to pop. This heruistic may need some
4431 tuning in future. */
4dd2ac2c 4432 if ((!sp_valid && frame.nregs <= 1)
2ab0437e 4433 || (TARGET_EPILOGUE_USING_MOVE
6ab16dd9 4434 && use_fast_prologue_epilogue
c6036a37 4435 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 4436 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2ab0437e 4437 || (frame_pointer_needed && TARGET_USE_LEAVE
6ab16dd9 4438 && use_fast_prologue_epilogue && frame.nregs == 1)
2ab0437e 4439 || current_function_calls_eh_return)
2a2ab3f9 4440 {
da2d1d3a
JH
4441 /* Restore registers. We can use ebp or esp to address the memory
4442 locations. If both are available, default to ebp, since offsets
4443 are known to be small. Only exception is esp pointing directly to the
4444 end of block of saved registers, where we may simplify addressing
4445 mode. */
4446
4dd2ac2c 4447 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
4448 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4449 frame.to_allocate, style == 2);
da2d1d3a 4450 else
1020a5ab
RH
4451 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4452 offset, style == 2);
4453
4454 /* eh_return epilogues need %ecx added to the stack pointer. */
4455 if (style == 2)
4456 {
4457 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 4458
1020a5ab
RH
4459 if (frame_pointer_needed)
4460 {
4461 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4462 tmp = plus_constant (tmp, UNITS_PER_WORD);
4463 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4464
4465 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4466 emit_move_insn (hard_frame_pointer_rtx, tmp);
4467
4468 emit_insn (gen_pro_epilogue_adjust_stack
f2042df3 4469 (stack_pointer_rtx, sa, const0_rtx));
1020a5ab
RH
4470 }
4471 else
4472 {
4473 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4474 tmp = plus_constant (tmp, (frame.to_allocate
4475 + frame.nregs * UNITS_PER_WORD));
4476 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4477 }
4478 }
4479 else if (!frame_pointer_needed)
f2042df3
RH
4480 emit_insn (gen_pro_epilogue_adjust_stack
4481 (stack_pointer_rtx, stack_pointer_rtx,
4482 GEN_INT (frame.to_allocate
4483 + frame.nregs * UNITS_PER_WORD)));
0f290768 4484 /* If not an i386, mov & pop is faster than "leave". */
6ab16dd9 4485 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
8362f420 4486 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
c8c5cb99 4487 else
2a2ab3f9 4488 {
1c71e60e
JH
4489 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4490 hard_frame_pointer_rtx,
f2042df3 4491 const0_rtx));
8362f420
JH
4492 if (TARGET_64BIT)
4493 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4494 else
4495 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
4496 }
4497 }
1c71e60e 4498 else
68f654ec 4499 {
1c71e60e
JH
4500 /* First step is to deallocate the stack frame so that we can
4501 pop the registers. */
4502 if (!sp_valid)
4503 {
4504 if (!frame_pointer_needed)
4505 abort ();
4506 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4507 hard_frame_pointer_rtx,
f2042df3 4508 GEN_INT (offset)));
1c71e60e 4509 }
4dd2ac2c 4510 else if (frame.to_allocate)
f2042df3
RH
4511 emit_insn (gen_pro_epilogue_adjust_stack
4512 (stack_pointer_rtx, stack_pointer_rtx,
4513 GEN_INT (frame.to_allocate)));
1c71e60e 4514
4dd2ac2c 4515 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4516 if (ix86_save_reg (regno, false))
8362f420
JH
4517 {
4518 if (TARGET_64BIT)
4519 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4520 else
4521 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4522 }
4dd2ac2c 4523 if (frame_pointer_needed)
8362f420 4524 {
f5143c46 4525 /* Leave results in shorter dependency chains on CPUs that are
2ab0437e
JH
4526 able to grok it fast. */
4527 if (TARGET_USE_LEAVE)
4528 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4529 else if (TARGET_64BIT)
8362f420
JH
4530 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4531 else
4532 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4533 }
68f654ec 4534 }
68f654ec 4535
cbbf65e0 4536 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 4537 if (style == 0)
cbbf65e0
RH
4538 return;
4539
2a2ab3f9
JVA
4540 if (current_function_pops_args && current_function_args_size)
4541 {
e075ae69 4542 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 4543
b8c752c8
UD
4544 /* i386 can only pop 64K bytes. If asked to pop more, pop
4545 return address, do explicit add, and jump indirectly to the
0f290768 4546 caller. */
2a2ab3f9 4547
b8c752c8 4548 if (current_function_pops_args >= 65536)
2a2ab3f9 4549 {
e075ae69 4550 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 4551
8362f420
JH
4552 /* There are is no "pascal" calling convention in 64bit ABI. */
4553 if (TARGET_64BIT)
b531087a 4554 abort ();
8362f420 4555
e075ae69
RH
4556 emit_insn (gen_popsi1 (ecx));
4557 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 4558 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 4559 }
79325812 4560 else
e075ae69
RH
4561 emit_jump_insn (gen_return_pop_internal (popc));
4562 }
4563 else
4564 emit_jump_insn (gen_return_internal ());
4565}
bd09bdeb
RH
4566
4567/* Reset from the function's potential modifications. */
4568
4569static void
4570ix86_output_function_epilogue (file, size)
4571 FILE *file ATTRIBUTE_UNUSED;
4572 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
4573{
4574 if (pic_offset_table_rtx)
4575 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4576}
e075ae69
RH
4577\f
4578/* Extract the parts of an RTL expression that is a valid memory address
b446e5a2
JH
4579 for an instruction. Return 0 if the structure of the address is
4580 grossly off. Return -1 if the address contains ASHIFT, so it is not
4581 strictly valid, but still used for computing length of lea instruction.
4582 */
e075ae69
RH
4583
4584static int
4585ix86_decompose_address (addr, out)
4586 register rtx addr;
4587 struct ix86_address *out;
4588{
4589 rtx base = NULL_RTX;
4590 rtx index = NULL_RTX;
4591 rtx disp = NULL_RTX;
4592 HOST_WIDE_INT scale = 1;
4593 rtx scale_rtx = NULL_RTX;
b446e5a2 4594 int retval = 1;
e075ae69 4595
1540f9eb 4596 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
e075ae69
RH
4597 base = addr;
4598 else if (GET_CODE (addr) == PLUS)
4599 {
4600 rtx op0 = XEXP (addr, 0);
4601 rtx op1 = XEXP (addr, 1);
4602 enum rtx_code code0 = GET_CODE (op0);
4603 enum rtx_code code1 = GET_CODE (op1);
4604
4605 if (code0 == REG || code0 == SUBREG)
4606 {
4607 if (code1 == REG || code1 == SUBREG)
4608 index = op0, base = op1; /* index + base */
4609 else
4610 base = op0, disp = op1; /* base + displacement */
4611 }
4612 else if (code0 == MULT)
e9a25f70 4613 {
e075ae69
RH
4614 index = XEXP (op0, 0);
4615 scale_rtx = XEXP (op0, 1);
4616 if (code1 == REG || code1 == SUBREG)
4617 base = op1; /* index*scale + base */
e9a25f70 4618 else
e075ae69
RH
4619 disp = op1; /* index*scale + disp */
4620 }
4621 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4622 {
4623 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4624 scale_rtx = XEXP (XEXP (op0, 0), 1);
4625 base = XEXP (op0, 1);
4626 disp = op1;
2a2ab3f9 4627 }
e075ae69
RH
4628 else if (code0 == PLUS)
4629 {
4630 index = XEXP (op0, 0); /* index + base + disp */
4631 base = XEXP (op0, 1);
4632 disp = op1;
4633 }
4634 else
b446e5a2 4635 return 0;
e075ae69
RH
4636 }
4637 else if (GET_CODE (addr) == MULT)
4638 {
4639 index = XEXP (addr, 0); /* index*scale */
4640 scale_rtx = XEXP (addr, 1);
4641 }
4642 else if (GET_CODE (addr) == ASHIFT)
4643 {
4644 rtx tmp;
4645
4646 /* We're called for lea too, which implements ashift on occasion. */
4647 index = XEXP (addr, 0);
4648 tmp = XEXP (addr, 1);
4649 if (GET_CODE (tmp) != CONST_INT)
b446e5a2 4650 return 0;
e075ae69
RH
4651 scale = INTVAL (tmp);
4652 if ((unsigned HOST_WIDE_INT) scale > 3)
b446e5a2 4653 return 0;
e075ae69 4654 scale = 1 << scale;
b446e5a2 4655 retval = -1;
2a2ab3f9 4656 }
2a2ab3f9 4657 else
e075ae69
RH
4658 disp = addr; /* displacement */
4659
4660 /* Extract the integral value of scale. */
4661 if (scale_rtx)
e9a25f70 4662 {
e075ae69 4663 if (GET_CODE (scale_rtx) != CONST_INT)
b446e5a2 4664 return 0;
e075ae69 4665 scale = INTVAL (scale_rtx);
e9a25f70 4666 }
3b3c6a3f 4667
e075ae69
RH
4668 /* Allow arg pointer and stack pointer as index if there is not scaling */
4669 if (base && index && scale == 1
564d80f4
JH
4670 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4671 || index == stack_pointer_rtx))
e075ae69
RH
4672 {
4673 rtx tmp = base;
4674 base = index;
4675 index = tmp;
4676 }
4677
4678 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
4679 if ((base == hard_frame_pointer_rtx
4680 || base == frame_pointer_rtx
4681 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
4682 disp = const0_rtx;
4683
4684 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4685 Avoid this by transforming to [%esi+0]. */
4686 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4687 && base && !index && !disp
329e1d01 4688 && REG_P (base)
e075ae69
RH
4689 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4690 disp = const0_rtx;
4691
4692 /* Special case: encode reg+reg instead of reg*2. */
4693 if (!base && index && scale && scale == 2)
4694 base = index, scale = 1;
0f290768 4695
e075ae69
RH
4696 /* Special case: scaling cannot be encoded without base or displacement. */
4697 if (!base && !disp && index && scale != 1)
4698 disp = const0_rtx;
4699
4700 out->base = base;
4701 out->index = index;
4702 out->disp = disp;
4703 out->scale = scale;
3b3c6a3f 4704
b446e5a2 4705 return retval;
e075ae69 4706}
01329426
JH
4707\f
4708/* Return cost of the memory address x.
4709 For i386, it is better to use a complex address than let gcc copy
4710 the address into a reg and make a new pseudo. But not if the address
4711 requires to two regs - that would mean more pseudos with longer
4712 lifetimes. */
4713int
4714ix86_address_cost (x)
4715 rtx x;
4716{
4717 struct ix86_address parts;
4718 int cost = 1;
3b3c6a3f 4719
01329426
JH
4720 if (!ix86_decompose_address (x, &parts))
4721 abort ();
4722
1540f9eb
JH
4723 if (parts.base && GET_CODE (parts.base) == SUBREG)
4724 parts.base = SUBREG_REG (parts.base);
4725 if (parts.index && GET_CODE (parts.index) == SUBREG)
4726 parts.index = SUBREG_REG (parts.index);
4727
01329426
JH
4728 /* More complex memory references are better. */
4729 if (parts.disp && parts.disp != const0_rtx)
4730 cost--;
4731
4732 /* Attempt to minimize number of registers in the address. */
4733 if ((parts.base
4734 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4735 || (parts.index
4736 && (!REG_P (parts.index)
4737 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4738 cost++;
4739
4740 if (parts.base
4741 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4742 && parts.index
4743 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4744 && parts.base != parts.index)
4745 cost++;
4746
4747 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4748 since it's predecode logic can't detect the length of instructions
4749 and it degenerates to vector decoded. Increase cost of such
4750 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 4751 to split such addresses or even refuse such addresses at all.
01329426
JH
4752
4753 Following addressing modes are affected:
4754 [base+scale*index]
4755 [scale*index+disp]
4756 [base+index]
0f290768 4757
01329426
JH
4758 The first and last case may be avoidable by explicitly coding the zero in
4759 memory address, but I don't have AMD-K6 machine handy to check this
4760 theory. */
4761
4762 if (TARGET_K6
4763 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4764 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4765 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4766 cost += 10;
0f290768 4767
01329426
JH
4768 return cost;
4769}
4770\f
b949ea8b
JW
4771/* If X is a machine specific address (i.e. a symbol or label being
4772 referenced as a displacement from the GOT implemented using an
4773 UNSPEC), then return the base term. Otherwise return X. */
4774
4775rtx
4776ix86_find_base_term (x)
4777 rtx x;
4778{
4779 rtx term;
4780
6eb791fc
JH
4781 if (TARGET_64BIT)
4782 {
4783 if (GET_CODE (x) != CONST)
4784 return x;
4785 term = XEXP (x, 0);
4786 if (GET_CODE (term) == PLUS
4787 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4788 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4789 term = XEXP (term, 0);
4790 if (GET_CODE (term) != UNSPEC
8ee41eaf 4791 || XINT (term, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
4792 return x;
4793
4794 term = XVECEXP (term, 0, 0);
4795
4796 if (GET_CODE (term) != SYMBOL_REF
4797 && GET_CODE (term) != LABEL_REF)
4798 return x;
4799
4800 return term;
4801 }
4802
b949ea8b
JW
4803 if (GET_CODE (x) != PLUS
4804 || XEXP (x, 0) != pic_offset_table_rtx
4805 || GET_CODE (XEXP (x, 1)) != CONST)
4806 return x;
4807
4808 term = XEXP (XEXP (x, 1), 0);
4809
4810 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4811 term = XEXP (term, 0);
4812
4813 if (GET_CODE (term) != UNSPEC
8ee41eaf 4814 || XINT (term, 1) != UNSPEC_GOTOFF)
b949ea8b
JW
4815 return x;
4816
4817 term = XVECEXP (term, 0, 0);
4818
4819 if (GET_CODE (term) != SYMBOL_REF
4820 && GET_CODE (term) != LABEL_REF)
4821 return x;
4822
4823 return term;
4824}
4825\f
f996902d
RH
4826/* Determine if a given RTX is a valid constant. We already know this
4827 satisfies CONSTANT_P. */
4828
4829bool
4830legitimate_constant_p (x)
4831 rtx x;
4832{
4833 rtx inner;
4834
4835 switch (GET_CODE (x))
4836 {
4837 case SYMBOL_REF:
4838 /* TLS symbols are not constant. */
4839 if (tls_symbolic_operand (x, Pmode))
4840 return false;
4841 break;
4842
4843 case CONST:
4844 inner = XEXP (x, 0);
4845
4846 /* Offsets of TLS symbols are never valid.
4847 Discourage CSE from creating them. */
4848 if (GET_CODE (inner) == PLUS
4849 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
4850 return false;
4851
4852 /* Only some unspecs are valid as "constants". */
4853 if (GET_CODE (inner) == UNSPEC)
4854 switch (XINT (inner, 1))
4855 {
4856 case UNSPEC_TPOFF:
4857 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
4858 default:
4859 return false;
4860 }
4861 break;
4862
4863 default:
4864 break;
4865 }
4866
4867 /* Otherwise we handle everything else in the move patterns. */
4868 return true;
4869}
4870
4871/* Determine if a given RTX is a valid constant address. */
4872
4873bool
4874constant_address_p (x)
4875 rtx x;
4876{
4877 switch (GET_CODE (x))
4878 {
4879 case LABEL_REF:
4880 case CONST_INT:
4881 return true;
4882
4883 case CONST_DOUBLE:
4884 return TARGET_64BIT;
4885
4886 case CONST:
b069de3b
SS
4887 /* For Mach-O, really believe the CONST. */
4888 if (TARGET_MACHO)
4889 return true;
4890 /* Otherwise fall through. */
f996902d
RH
4891 case SYMBOL_REF:
4892 return !flag_pic && legitimate_constant_p (x);
4893
4894 default:
4895 return false;
4896 }
4897}
4898
4899/* Nonzero if the constant value X is a legitimate general operand
fce5a9f2 4900 when generating PIC code. It is given that flag_pic is on and
f996902d
RH
4901 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4902
4903bool
4904legitimate_pic_operand_p (x)
4905 rtx x;
4906{
4907 rtx inner;
4908
4909 switch (GET_CODE (x))
4910 {
4911 case CONST:
4912 inner = XEXP (x, 0);
4913
4914 /* Only some unspecs are valid as "constants". */
4915 if (GET_CODE (inner) == UNSPEC)
4916 switch (XINT (inner, 1))
4917 {
4918 case UNSPEC_TPOFF:
4919 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
4920 default:
4921 return false;
4922 }
4923 /* FALLTHRU */
4924
4925 case SYMBOL_REF:
4926 case LABEL_REF:
4927 return legitimate_pic_address_disp_p (x);
4928
4929 default:
4930 return true;
4931 }
4932}
4933
e075ae69
RH
4934/* Determine if a given CONST RTX is a valid memory displacement
4935 in PIC mode. */
0f290768 4936
59be65f6 4937int
91bb873f
RH
4938legitimate_pic_address_disp_p (disp)
4939 register rtx disp;
4940{
f996902d
RH
4941 bool saw_plus;
4942
6eb791fc
JH
4943 /* In 64bit mode we can allow direct addresses of symbols and labels
4944 when they are not dynamic symbols. */
4945 if (TARGET_64BIT)
4946 {
4947 rtx x = disp;
4948 if (GET_CODE (disp) == CONST)
4949 x = XEXP (disp, 0);
4950 /* ??? Handle PIC code models */
4951 if (GET_CODE (x) == PLUS
4952 && (GET_CODE (XEXP (x, 1)) == CONST_INT
4953 && ix86_cmodel == CM_SMALL_PIC
4954 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4955 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4956 x = XEXP (x, 0);
4957 if (local_symbolic_operand (x, Pmode))
4958 return 1;
4959 }
91bb873f
RH
4960 if (GET_CODE (disp) != CONST)
4961 return 0;
4962 disp = XEXP (disp, 0);
4963
6eb791fc
JH
4964 if (TARGET_64BIT)
4965 {
4966 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4967 of GOT tables. We should not need these anyway. */
4968 if (GET_CODE (disp) != UNSPEC
8ee41eaf 4969 || XINT (disp, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
4970 return 0;
4971
4972 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4973 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4974 return 0;
4975 return 1;
4976 }
4977
f996902d 4978 saw_plus = false;
91bb873f
RH
4979 if (GET_CODE (disp) == PLUS)
4980 {
4981 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4982 return 0;
4983 disp = XEXP (disp, 0);
f996902d 4984 saw_plus = true;
91bb873f
RH
4985 }
4986
b069de3b
SS
4987 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
4988 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
4989 {
4990 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
4991 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
4992 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
4993 {
4994 const char *sym_name = XSTR (XEXP (disp, 1), 0);
4995 if (strstr (sym_name, "$pb") != 0)
4996 return 1;
4997 }
4998 }
4999
8ee41eaf 5000 if (GET_CODE (disp) != UNSPEC)
91bb873f
RH
5001 return 0;
5002
623fe810
RH
5003 switch (XINT (disp, 1))
5004 {
8ee41eaf 5005 case UNSPEC_GOT:
f996902d
RH
5006 if (saw_plus)
5007 return false;
623fe810 5008 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
8ee41eaf 5009 case UNSPEC_GOTOFF:
623fe810 5010 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
f996902d
RH
5011 case UNSPEC_GOTTPOFF:
5012 if (saw_plus)
5013 return false;
5014 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5015 case UNSPEC_NTPOFF:
5016 /* ??? Could support offset here. */
5017 if (saw_plus)
5018 return false;
5019 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5020 case UNSPEC_DTPOFF:
5021 /* ??? Could support offset here. */
5022 if (saw_plus)
5023 return false;
5024 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
623fe810 5025 }
fce5a9f2 5026
623fe810 5027 return 0;
91bb873f
RH
5028}
5029
e075ae69
RH
5030/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5031 memory address for an instruction. The MODE argument is the machine mode
5032 for the MEM expression that wants to use this address.
5033
5034 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5035 convert common non-canonical forms to canonical form so that they will
5036 be recognized. */
5037
3b3c6a3f
MM
5038int
5039legitimate_address_p (mode, addr, strict)
5040 enum machine_mode mode;
5041 register rtx addr;
5042 int strict;
5043{
e075ae69
RH
5044 struct ix86_address parts;
5045 rtx base, index, disp;
5046 HOST_WIDE_INT scale;
5047 const char *reason = NULL;
5048 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
5049
5050 if (TARGET_DEBUG_ADDR)
5051 {
5052 fprintf (stderr,
e9a25f70 5053 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 5054 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
5055 debug_rtx (addr);
5056 }
5057
9e20be0c
JJ
5058 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5059 {
5060 if (TARGET_DEBUG_ADDR)
5061 fprintf (stderr, "Success.\n");
5062 return TRUE;
5063 }
5064
b446e5a2 5065 if (ix86_decompose_address (addr, &parts) <= 0)
3b3c6a3f 5066 {
e075ae69 5067 reason = "decomposition failed";
50e60bc3 5068 goto report_error;
3b3c6a3f
MM
5069 }
5070
e075ae69
RH
5071 base = parts.base;
5072 index = parts.index;
5073 disp = parts.disp;
5074 scale = parts.scale;
91f0226f 5075
e075ae69 5076 /* Validate base register.
e9a25f70
JL
5077
5078 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
5079 is one word out of a two word structure, which is represented internally
5080 as a DImode int. */
e9a25f70 5081
3b3c6a3f
MM
5082 if (base)
5083 {
1540f9eb 5084 rtx reg;
e075ae69
RH
5085 reason_rtx = base;
5086
1540f9eb
JH
5087 if (GET_CODE (base) == SUBREG)
5088 reg = SUBREG_REG (base);
5089 else
5090 reg = base;
5091
5092 if (GET_CODE (reg) != REG)
3b3c6a3f 5093 {
e075ae69 5094 reason = "base is not a register";
50e60bc3 5095 goto report_error;
3b3c6a3f
MM
5096 }
5097
c954bd01
RH
5098 if (GET_MODE (base) != Pmode)
5099 {
e075ae69 5100 reason = "base is not in Pmode";
50e60bc3 5101 goto report_error;
c954bd01
RH
5102 }
5103
1540f9eb
JH
5104 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5105 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
3b3c6a3f 5106 {
e075ae69 5107 reason = "base is not valid";
50e60bc3 5108 goto report_error;
3b3c6a3f
MM
5109 }
5110 }
5111
e075ae69 5112 /* Validate index register.
e9a25f70
JL
5113
5114 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
5115 is one word out of a two word structure, which is represented internally
5116 as a DImode int. */
e075ae69
RH
5117
5118 if (index)
3b3c6a3f 5119 {
1540f9eb 5120 rtx reg;
e075ae69
RH
5121 reason_rtx = index;
5122
1540f9eb
JH
5123 if (GET_CODE (index) == SUBREG)
5124 reg = SUBREG_REG (index);
5125 else
5126 reg = index;
5127
5128 if (GET_CODE (reg) != REG)
3b3c6a3f 5129 {
e075ae69 5130 reason = "index is not a register";
50e60bc3 5131 goto report_error;
3b3c6a3f
MM
5132 }
5133
e075ae69 5134 if (GET_MODE (index) != Pmode)
c954bd01 5135 {
e075ae69 5136 reason = "index is not in Pmode";
50e60bc3 5137 goto report_error;
c954bd01
RH
5138 }
5139
1540f9eb
JH
5140 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5141 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
3b3c6a3f 5142 {
e075ae69 5143 reason = "index is not valid";
50e60bc3 5144 goto report_error;
3b3c6a3f
MM
5145 }
5146 }
3b3c6a3f 5147
e075ae69
RH
5148 /* Validate scale factor. */
5149 if (scale != 1)
3b3c6a3f 5150 {
e075ae69
RH
5151 reason_rtx = GEN_INT (scale);
5152 if (!index)
3b3c6a3f 5153 {
e075ae69 5154 reason = "scale without index";
50e60bc3 5155 goto report_error;
3b3c6a3f
MM
5156 }
5157
e075ae69 5158 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 5159 {
e075ae69 5160 reason = "scale is not a valid multiplier";
50e60bc3 5161 goto report_error;
3b3c6a3f
MM
5162 }
5163 }
5164
91bb873f 5165 /* Validate displacement. */
3b3c6a3f
MM
5166 if (disp)
5167 {
e075ae69
RH
5168 reason_rtx = disp;
5169
0d7d98ee 5170 if (TARGET_64BIT)
3b3c6a3f 5171 {
0d7d98ee
JH
5172 if (!x86_64_sign_extended_value (disp))
5173 {
5174 reason = "displacement is out of range";
5175 goto report_error;
5176 }
5177 }
5178 else
5179 {
5180 if (GET_CODE (disp) == CONST_DOUBLE)
5181 {
5182 reason = "displacement is a const_double";
5183 goto report_error;
5184 }
3b3c6a3f
MM
5185 }
5186
f996902d
RH
5187 if (GET_CODE (disp) == CONST
5188 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5189 switch (XINT (XEXP (disp, 0), 1))
5190 {
5191 case UNSPEC_GOT:
5192 case UNSPEC_GOTOFF:
5193 case UNSPEC_GOTPCREL:
5194 if (!flag_pic)
5195 abort ();
5196 goto is_legitimate_pic;
5197
5198 case UNSPEC_GOTTPOFF:
5199 case UNSPEC_NTPOFF:
5200 case UNSPEC_DTPOFF:
5201 break;
5202
5203 default:
5204 reason = "invalid address unspec";
5205 goto report_error;
5206 }
5207
b069de3b
SS
5208 else if (flag_pic && (SYMBOLIC_CONST (disp)
5209#if TARGET_MACHO
5210 && !machopic_operand_p (disp)
5211#endif
5212 ))
3b3c6a3f 5213 {
f996902d 5214 is_legitimate_pic:
0d7d98ee
JH
5215 if (TARGET_64BIT && (index || base))
5216 {
5217 reason = "non-constant pic memory reference";
5218 goto report_error;
5219 }
91bb873f
RH
5220 if (! legitimate_pic_address_disp_p (disp))
5221 {
e075ae69 5222 reason = "displacement is an invalid pic construct";
50e60bc3 5223 goto report_error;
91bb873f
RH
5224 }
5225
4e9efe54 5226 /* This code used to verify that a symbolic pic displacement
0f290768
KH
5227 includes the pic_offset_table_rtx register.
5228
4e9efe54
JH
5229 While this is good idea, unfortunately these constructs may
5230 be created by "adds using lea" optimization for incorrect
5231 code like:
5232
5233 int a;
5234 int foo(int i)
5235 {
5236 return *(&a+i);
5237 }
5238
50e60bc3 5239 This code is nonsensical, but results in addressing
4e9efe54 5240 GOT table with pic_offset_table_rtx base. We can't
f710504c 5241 just refuse it easily, since it gets matched by
4e9efe54
JH
5242 "addsi3" pattern, that later gets split to lea in the
5243 case output register differs from input. While this
5244 can be handled by separate addsi pattern for this case
5245 that never results in lea, this seems to be easier and
5246 correct fix for crash to disable this test. */
3b3c6a3f 5247 }
f996902d
RH
5248 else if (!CONSTANT_ADDRESS_P (disp))
5249 {
5250 reason = "displacement is not constant";
5251 goto report_error;
5252 }
3b3c6a3f
MM
5253 }
5254
e075ae69 5255 /* Everything looks valid. */
3b3c6a3f 5256 if (TARGET_DEBUG_ADDR)
e075ae69 5257 fprintf (stderr, "Success.\n");
3b3c6a3f 5258 return TRUE;
e075ae69 5259
5bf0ebab 5260 report_error:
e075ae69
RH
5261 if (TARGET_DEBUG_ADDR)
5262 {
5263 fprintf (stderr, "Error: %s\n", reason);
5264 debug_rtx (reason_rtx);
5265 }
5266 return FALSE;
3b3c6a3f 5267}
3b3c6a3f 5268\f
55efb413
JW
5269/* Return an unique alias set for the GOT. */
5270
0f290768 5271static HOST_WIDE_INT
55efb413
JW
5272ix86_GOT_alias_set ()
5273{
5bf0ebab
RH
5274 static HOST_WIDE_INT set = -1;
5275 if (set == -1)
5276 set = new_alias_set ();
5277 return set;
0f290768 5278}
55efb413 5279
3b3c6a3f
MM
5280/* Return a legitimate reference for ORIG (an address) using the
5281 register REG. If REG is 0, a new pseudo is generated.
5282
91bb873f 5283 There are two types of references that must be handled:
3b3c6a3f
MM
5284
5285 1. Global data references must load the address from the GOT, via
5286 the PIC reg. An insn is emitted to do this load, and the reg is
5287 returned.
5288
91bb873f
RH
5289 2. Static data references, constant pool addresses, and code labels
5290 compute the address as an offset from the GOT, whose base is in
5291 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5292 differentiate them from global data objects. The returned
5293 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
5294
5295 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 5296 reg also appears in the address. */
3b3c6a3f
MM
5297
5298rtx
5299legitimize_pic_address (orig, reg)
5300 rtx orig;
5301 rtx reg;
5302{
5303 rtx addr = orig;
5304 rtx new = orig;
91bb873f 5305 rtx base;
3b3c6a3f 5306
b069de3b
SS
5307#if TARGET_MACHO
5308 if (reg == 0)
5309 reg = gen_reg_rtx (Pmode);
5310 /* Use the generic Mach-O PIC machinery. */
5311 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5312#endif
5313
623fe810 5314 if (local_symbolic_operand (addr, Pmode))
3b3c6a3f 5315 {
14f73b5a
JH
5316 /* In 64bit mode we can address such objects directly. */
5317 if (TARGET_64BIT)
5318 new = addr;
5319 else
5320 {
5321 /* This symbol may be referenced via a displacement from the PIC
5322 base address (@GOTOFF). */
3b3c6a3f 5323
66edd3b4
RH
5324 if (reload_in_progress)
5325 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf 5326 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
14f73b5a
JH
5327 new = gen_rtx_CONST (Pmode, new);
5328 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 5329
14f73b5a
JH
5330 if (reg != 0)
5331 {
5332 emit_move_insn (reg, new);
5333 new = reg;
5334 }
5335 }
3b3c6a3f 5336 }
91bb873f 5337 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 5338 {
14f73b5a
JH
5339 if (TARGET_64BIT)
5340 {
8ee41eaf 5341 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
14f73b5a
JH
5342 new = gen_rtx_CONST (Pmode, new);
5343 new = gen_rtx_MEM (Pmode, new);
5344 RTX_UNCHANGING_P (new) = 1;
5345 set_mem_alias_set (new, ix86_GOT_alias_set ());
5346
5347 if (reg == 0)
5348 reg = gen_reg_rtx (Pmode);
5349 /* Use directly gen_movsi, otherwise the address is loaded
5350 into register for CSE. We don't want to CSE this addresses,
5351 instead we CSE addresses from the GOT table, so skip this. */
5352 emit_insn (gen_movsi (reg, new));
5353 new = reg;
5354 }
5355 else
5356 {
5357 /* This symbol must be referenced via a load from the
5358 Global Offset Table (@GOT). */
3b3c6a3f 5359
66edd3b4
RH
5360 if (reload_in_progress)
5361 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf 5362 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
14f73b5a
JH
5363 new = gen_rtx_CONST (Pmode, new);
5364 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5365 new = gen_rtx_MEM (Pmode, new);
5366 RTX_UNCHANGING_P (new) = 1;
5367 set_mem_alias_set (new, ix86_GOT_alias_set ());
3b3c6a3f 5368
14f73b5a
JH
5369 if (reg == 0)
5370 reg = gen_reg_rtx (Pmode);
5371 emit_move_insn (reg, new);
5372 new = reg;
5373 }
0f290768 5374 }
91bb873f
RH
5375 else
5376 {
5377 if (GET_CODE (addr) == CONST)
3b3c6a3f 5378 {
91bb873f 5379 addr = XEXP (addr, 0);
e3c8ea67
RH
5380
5381 /* We must match stuff we generate before. Assume the only
5382 unspecs that can get here are ours. Not that we could do
5383 anything with them anyway... */
5384 if (GET_CODE (addr) == UNSPEC
5385 || (GET_CODE (addr) == PLUS
5386 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5387 return orig;
5388 if (GET_CODE (addr) != PLUS)
564d80f4 5389 abort ();
3b3c6a3f 5390 }
91bb873f
RH
5391 if (GET_CODE (addr) == PLUS)
5392 {
5393 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 5394
91bb873f
RH
5395 /* Check first to see if this is a constant offset from a @GOTOFF
5396 symbol reference. */
623fe810 5397 if (local_symbolic_operand (op0, Pmode)
91bb873f
RH
5398 && GET_CODE (op1) == CONST_INT)
5399 {
6eb791fc
JH
5400 if (!TARGET_64BIT)
5401 {
66edd3b4
RH
5402 if (reload_in_progress)
5403 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf
RH
5404 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5405 UNSPEC_GOTOFF);
6eb791fc
JH
5406 new = gen_rtx_PLUS (Pmode, new, op1);
5407 new = gen_rtx_CONST (Pmode, new);
5408 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
91bb873f 5409
6eb791fc
JH
5410 if (reg != 0)
5411 {
5412 emit_move_insn (reg, new);
5413 new = reg;
5414 }
5415 }
5416 else
91bb873f 5417 {
6eb791fc 5418 /* ??? We need to limit offsets here. */
91bb873f
RH
5419 }
5420 }
5421 else
5422 {
5423 base = legitimize_pic_address (XEXP (addr, 0), reg);
5424 new = legitimize_pic_address (XEXP (addr, 1),
5425 base == reg ? NULL_RTX : reg);
5426
5427 if (GET_CODE (new) == CONST_INT)
5428 new = plus_constant (base, INTVAL (new));
5429 else
5430 {
5431 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5432 {
5433 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5434 new = XEXP (new, 1);
5435 }
5436 new = gen_rtx_PLUS (Pmode, base, new);
5437 }
5438 }
5439 }
3b3c6a3f
MM
5440 }
5441 return new;
5442}
fb49053f 5443
fb49053f 5444static void
f996902d 5445ix86_encode_section_info (decl, first)
fb49053f
RH
5446 tree decl;
5447 int first ATTRIBUTE_UNUSED;
5448{
f996902d
RH
5449 bool local_p = (*targetm.binds_local_p) (decl);
5450 rtx rtl, symbol;
5451
5452 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5453 if (GET_CODE (rtl) != MEM)
5454 return;
5455 symbol = XEXP (rtl, 0);
5456 if (GET_CODE (symbol) != SYMBOL_REF)
5457 return;
5458
5459 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5460 symbol so that we may access it directly in the GOT. */
5461
fb49053f 5462 if (flag_pic)
f996902d
RH
5463 SYMBOL_REF_FLAG (symbol) = local_p;
5464
5465 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5466 "local dynamic", "initial exec" or "local exec" TLS models
5467 respectively. */
5468
5469 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
fb49053f 5470 {
f996902d
RH
5471 const char *symbol_str;
5472 char *newstr;
5473 size_t len;
5474 enum tls_model kind;
5475
5476 if (!flag_pic)
5477 {
5478 if (local_p)
5479 kind = TLS_MODEL_LOCAL_EXEC;
5480 else
5481 kind = TLS_MODEL_INITIAL_EXEC;
5482 }
5483 /* Local dynamic is inefficient when we're not combining the
5484 parts of the address. */
5485 else if (optimize && local_p)
5486 kind = TLS_MODEL_LOCAL_DYNAMIC;
5487 else
5488 kind = TLS_MODEL_GLOBAL_DYNAMIC;
5489 if (kind < flag_tls_default)
5490 kind = flag_tls_default;
5491
5492 symbol_str = XSTR (symbol, 0);
fb49053f 5493
f996902d
RH
5494 if (symbol_str[0] == '%')
5495 {
5496 if (symbol_str[1] == tls_model_chars[kind])
5497 return;
5498 symbol_str += 2;
5499 }
5500 len = strlen (symbol_str) + 1;
5501 newstr = alloca (len + 2);
5502
5503 newstr[0] = '%';
5504 newstr[1] = tls_model_chars[kind];
5505 memcpy (newstr + 2, symbol_str, len);
5506
5507 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
fb49053f
RH
5508 }
5509}
f996902d
RH
5510
5511/* Undo the above when printing symbol names. */
5512
5513static const char *
5514ix86_strip_name_encoding (str)
5515 const char *str;
5516{
5517 if (str[0] == '%')
5518 str += 2;
5519 if (str [0] == '*')
5520 str += 1;
5521 return str;
5522}
3b3c6a3f 5523\f
f996902d
RH
5524/* Load the thread pointer into a register. */
5525
5526static rtx
5527get_thread_pointer ()
5528{
5529 rtx tp;
5530
5531 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9e20be0c
JJ
5532 tp = gen_rtx_MEM (Pmode, tp);
5533 RTX_UNCHANGING_P (tp) = 1;
5534 set_mem_alias_set (tp, ix86_GOT_alias_set ());
f996902d
RH
5535 tp = force_reg (Pmode, tp);
5536
5537 return tp;
5538}
fce5a9f2 5539
3b3c6a3f
MM
5540/* Try machine-dependent ways of modifying an illegitimate address
5541 to be legitimate. If we find one, return the new, valid address.
5542 This macro is used in only one place: `memory_address' in explow.c.
5543
5544 OLDX is the address as it was before break_out_memory_refs was called.
5545 In some cases it is useful to look at this to decide what needs to be done.
5546
5547 MODE and WIN are passed so that this macro can use
5548 GO_IF_LEGITIMATE_ADDRESS.
5549
5550 It is always safe for this macro to do nothing. It exists to recognize
5551 opportunities to optimize the output.
5552
5553 For the 80386, we handle X+REG by loading X into a register R and
5554 using R+REG. R will go in a general reg and indexing will be used.
5555 However, if REG is a broken-out memory address or multiplication,
5556 nothing needs to be done because REG can certainly go in a general reg.
5557
5558 When -fpic is used, special handling is needed for symbolic references.
5559 See comments by legitimize_pic_address in i386.c for details. */
5560
5561rtx
5562legitimize_address (x, oldx, mode)
5563 register rtx x;
bb5177ac 5564 register rtx oldx ATTRIBUTE_UNUSED;
3b3c6a3f
MM
5565 enum machine_mode mode;
5566{
5567 int changed = 0;
5568 unsigned log;
5569
5570 if (TARGET_DEBUG_ADDR)
5571 {
e9a25f70
JL
5572 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5573 GET_MODE_NAME (mode));
3b3c6a3f
MM
5574 debug_rtx (x);
5575 }
5576
f996902d
RH
5577 log = tls_symbolic_operand (x, mode);
5578 if (log)
5579 {
5580 rtx dest, base, off, pic;
5581
755ac5d4 5582 switch (log)
f996902d
RH
5583 {
5584 case TLS_MODEL_GLOBAL_DYNAMIC:
5585 dest = gen_reg_rtx (Pmode);
5586 emit_insn (gen_tls_global_dynamic (dest, x));
5587 break;
5588
5589 case TLS_MODEL_LOCAL_DYNAMIC:
5590 base = gen_reg_rtx (Pmode);
5591 emit_insn (gen_tls_local_dynamic_base (base));
5592
5593 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5594 off = gen_rtx_CONST (Pmode, off);
5595
5596 return gen_rtx_PLUS (Pmode, base, off);
5597
5598 case TLS_MODEL_INITIAL_EXEC:
5599 if (flag_pic)
5600 {
66edd3b4
RH
5601 if (reload_in_progress)
5602 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
f996902d
RH
5603 pic = pic_offset_table_rtx;
5604 }
5605 else
5606 {
5607 pic = gen_reg_rtx (Pmode);
5608 emit_insn (gen_set_got (pic));
5609 }
5610
5611 base = get_thread_pointer ();
5612
5613 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_GOTTPOFF);
5614 off = gen_rtx_CONST (Pmode, off);
5615 off = gen_rtx_PLUS (Pmode, pic, off);
5616 off = gen_rtx_MEM (Pmode, off);
5617 RTX_UNCHANGING_P (off) = 1;
5618 set_mem_alias_set (off, ix86_GOT_alias_set ());
5619
5620 /* Damn Sun for specifing a set of dynamic relocations without
5621 considering the two-operand nature of the architecture!
5622 We'd be much better off with a "GOTNTPOFF" relocation that
5623 already contained the negated constant. */
5624 /* ??? Using negl and reg+reg addressing appears to be a lose
5625 size-wise. The negl is two bytes, just like the extra movl
5626 incurred by the two-operand subl, but reg+reg addressing
5627 uses the two-byte modrm form, unlike plain reg. */
5628
5629 dest = gen_reg_rtx (Pmode);
5630 emit_insn (gen_subsi3 (dest, base, off));
5631 break;
5632
5633 case TLS_MODEL_LOCAL_EXEC:
5634 base = get_thread_pointer ();
5635
5636 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5637 TARGET_GNU_TLS ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5638 off = gen_rtx_CONST (Pmode, off);
5639
5640 if (TARGET_GNU_TLS)
5641 return gen_rtx_PLUS (Pmode, base, off);
5642 else
5643 {
5644 dest = gen_reg_rtx (Pmode);
5645 emit_insn (gen_subsi3 (dest, base, off));
5646 }
5647 break;
5648
5649 default:
5650 abort ();
5651 }
5652
5653 return dest;
5654 }
5655
3b3c6a3f
MM
5656 if (flag_pic && SYMBOLIC_CONST (x))
5657 return legitimize_pic_address (x, 0);
5658
5659 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5660 if (GET_CODE (x) == ASHIFT
5661 && GET_CODE (XEXP (x, 1)) == CONST_INT
b531087a 5662 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3b3c6a3f
MM
5663 {
5664 changed = 1;
a269a03c
JC
5665 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5666 GEN_INT (1 << log));
3b3c6a3f
MM
5667 }
5668
5669 if (GET_CODE (x) == PLUS)
5670 {
0f290768 5671 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 5672
3b3c6a3f
MM
5673 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5674 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
b531087a 5675 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3b3c6a3f
MM
5676 {
5677 changed = 1;
c5c76735
JL
5678 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5679 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5680 GEN_INT (1 << log));
3b3c6a3f
MM
5681 }
5682
5683 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5684 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
b531087a 5685 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3b3c6a3f
MM
5686 {
5687 changed = 1;
c5c76735
JL
5688 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5689 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5690 GEN_INT (1 << log));
3b3c6a3f
MM
5691 }
5692
0f290768 5693 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
5694 if (GET_CODE (XEXP (x, 1)) == MULT)
5695 {
5696 rtx tmp = XEXP (x, 0);
5697 XEXP (x, 0) = XEXP (x, 1);
5698 XEXP (x, 1) = tmp;
5699 changed = 1;
5700 }
5701
5702 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5703 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5704 created by virtual register instantiation, register elimination, and
5705 similar optimizations. */
5706 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5707 {
5708 changed = 1;
c5c76735
JL
5709 x = gen_rtx_PLUS (Pmode,
5710 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5711 XEXP (XEXP (x, 1), 0)),
5712 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
5713 }
5714
e9a25f70
JL
5715 /* Canonicalize
5716 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
5717 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5718 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5719 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5720 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5721 && CONSTANT_P (XEXP (x, 1)))
5722 {
00c79232
ML
5723 rtx constant;
5724 rtx other = NULL_RTX;
3b3c6a3f
MM
5725
5726 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5727 {
5728 constant = XEXP (x, 1);
5729 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5730 }
5731 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5732 {
5733 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5734 other = XEXP (x, 1);
5735 }
5736 else
5737 constant = 0;
5738
5739 if (constant)
5740 {
5741 changed = 1;
c5c76735
JL
5742 x = gen_rtx_PLUS (Pmode,
5743 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5744 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5745 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
5746 }
5747 }
5748
5749 if (changed && legitimate_address_p (mode, x, FALSE))
5750 return x;
5751
5752 if (GET_CODE (XEXP (x, 0)) == MULT)
5753 {
5754 changed = 1;
5755 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5756 }
5757
5758 if (GET_CODE (XEXP (x, 1)) == MULT)
5759 {
5760 changed = 1;
5761 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5762 }
5763
5764 if (changed
5765 && GET_CODE (XEXP (x, 1)) == REG
5766 && GET_CODE (XEXP (x, 0)) == REG)
5767 return x;
5768
5769 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5770 {
5771 changed = 1;
5772 x = legitimize_pic_address (x, 0);
5773 }
5774
5775 if (changed && legitimate_address_p (mode, x, FALSE))
5776 return x;
5777
5778 if (GET_CODE (XEXP (x, 0)) == REG)
5779 {
5780 register rtx temp = gen_reg_rtx (Pmode);
5781 register rtx val = force_operand (XEXP (x, 1), temp);
5782 if (val != temp)
5783 emit_move_insn (temp, val);
5784
5785 XEXP (x, 1) = temp;
5786 return x;
5787 }
5788
5789 else if (GET_CODE (XEXP (x, 1)) == REG)
5790 {
5791 register rtx temp = gen_reg_rtx (Pmode);
5792 register rtx val = force_operand (XEXP (x, 0), temp);
5793 if (val != temp)
5794 emit_move_insn (temp, val);
5795
5796 XEXP (x, 0) = temp;
5797 return x;
5798 }
5799 }
5800
5801 return x;
5802}
2a2ab3f9
JVA
5803\f
5804/* Print an integer constant expression in assembler syntax. Addition
5805 and subtraction are the only arithmetic that may appear in these
5806 expressions. FILE is the stdio stream to write to, X is the rtx, and
5807 CODE is the operand print code from the output string. */
5808
5809static void
5810output_pic_addr_const (file, x, code)
5811 FILE *file;
5812 rtx x;
5813 int code;
5814{
5815 char buf[256];
5816
5817 switch (GET_CODE (x))
5818 {
5819 case PC:
5820 if (flag_pic)
5821 putc ('.', file);
5822 else
5823 abort ();
5824 break;
5825
5826 case SYMBOL_REF:
91bb873f 5827 assemble_name (file, XSTR (x, 0));
b069de3b 5828 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
91bb873f 5829 fputs ("@PLT", file);
2a2ab3f9
JVA
5830 break;
5831
91bb873f
RH
5832 case LABEL_REF:
5833 x = XEXP (x, 0);
5834 /* FALLTHRU */
2a2ab3f9
JVA
5835 case CODE_LABEL:
5836 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5837 assemble_name (asm_out_file, buf);
5838 break;
5839
5840 case CONST_INT:
f64cecad 5841 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
5842 break;
5843
5844 case CONST:
5845 /* This used to output parentheses around the expression,
5846 but that does not work on the 386 (either ATT or BSD assembler). */
5847 output_pic_addr_const (file, XEXP (x, 0), code);
5848 break;
5849
5850 case CONST_DOUBLE:
5851 if (GET_MODE (x) == VOIDmode)
5852 {
5853 /* We can use %d if the number is <32 bits and positive. */
5854 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
5855 fprintf (file, "0x%lx%08lx",
5856 (unsigned long) CONST_DOUBLE_HIGH (x),
5857 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 5858 else
f64cecad 5859 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
5860 }
5861 else
5862 /* We can't handle floating point constants;
5863 PRINT_OPERAND must handle them. */
5864 output_operand_lossage ("floating constant misused");
5865 break;
5866
5867 case PLUS:
e9a25f70 5868 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
5869 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5870 {
2a2ab3f9 5871 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 5872 putc ('+', file);
e9a25f70 5873 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 5874 }
91bb873f 5875 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 5876 {
2a2ab3f9 5877 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 5878 putc ('+', file);
e9a25f70 5879 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 5880 }
91bb873f
RH
5881 else
5882 abort ();
2a2ab3f9
JVA
5883 break;
5884
5885 case MINUS:
b069de3b
SS
5886 if (!TARGET_MACHO)
5887 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
2a2ab3f9 5888 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 5889 putc ('-', file);
2a2ab3f9 5890 output_pic_addr_const (file, XEXP (x, 1), code);
b069de3b
SS
5891 if (!TARGET_MACHO)
5892 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
2a2ab3f9
JVA
5893 break;
5894
91bb873f
RH
5895 case UNSPEC:
5896 if (XVECLEN (x, 0) != 1)
5bf0ebab 5897 abort ();
91bb873f
RH
5898 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5899 switch (XINT (x, 1))
77ebd435 5900 {
8ee41eaf 5901 case UNSPEC_GOT:
77ebd435
AJ
5902 fputs ("@GOT", file);
5903 break;
8ee41eaf 5904 case UNSPEC_GOTOFF:
77ebd435
AJ
5905 fputs ("@GOTOFF", file);
5906 break;
8ee41eaf 5907 case UNSPEC_GOTPCREL:
edfe8595 5908 fputs ("@GOTPCREL(%rip)", file);
6eb791fc 5909 break;
f996902d
RH
5910 case UNSPEC_GOTTPOFF:
5911 fputs ("@GOTTPOFF", file);
5912 break;
5913 case UNSPEC_TPOFF:
5914 fputs ("@TPOFF", file);
5915 break;
5916 case UNSPEC_NTPOFF:
5917 fputs ("@NTPOFF", file);
5918 break;
5919 case UNSPEC_DTPOFF:
5920 fputs ("@DTPOFF", file);
5921 break;
77ebd435
AJ
5922 default:
5923 output_operand_lossage ("invalid UNSPEC as operand");
5924 break;
5925 }
91bb873f
RH
5926 break;
5927
2a2ab3f9
JVA
5928 default:
5929 output_operand_lossage ("invalid expression as operand");
5930 }
5931}
1865dbb5 5932
0f290768 5933/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
1865dbb5
JM
5934 We need to handle our special PIC relocations. */
5935
0f290768 5936void
1865dbb5
JM
5937i386_dwarf_output_addr_const (file, x)
5938 FILE *file;
5939 rtx x;
5940{
14f73b5a 5941#ifdef ASM_QUAD
18b5b8d6 5942 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
14f73b5a
JH
5943#else
5944 if (TARGET_64BIT)
5945 abort ();
18b5b8d6 5946 fprintf (file, "%s", ASM_LONG);
14f73b5a 5947#endif
1865dbb5
JM
5948 if (flag_pic)
5949 output_pic_addr_const (file, x, '\0');
5950 else
5951 output_addr_const (file, x);
5952 fputc ('\n', file);
5953}
5954
5955/* In the name of slightly smaller debug output, and to cater to
5956 general assembler losage, recognize PIC+GOTOFF and turn it back
5957 into a direct symbol reference. */
5958
5959rtx
5960i386_simplify_dwarf_addr (orig_x)
5961 rtx orig_x;
5962{
ec65b2e3 5963 rtx x = orig_x, y;
1865dbb5 5964
4c8c0dec
JJ
5965 if (GET_CODE (x) == MEM)
5966 x = XEXP (x, 0);
5967
6eb791fc
JH
5968 if (TARGET_64BIT)
5969 {
5970 if (GET_CODE (x) != CONST
5971 || GET_CODE (XEXP (x, 0)) != UNSPEC
8ee41eaf 5972 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
4c8c0dec 5973 || GET_CODE (orig_x) != MEM)
6eb791fc
JH
5974 return orig_x;
5975 return XVECEXP (XEXP (x, 0), 0, 0);
5976 }
5977
1865dbb5 5978 if (GET_CODE (x) != PLUS
1865dbb5
JM
5979 || GET_CODE (XEXP (x, 1)) != CONST)
5980 return orig_x;
5981
ec65b2e3
JJ
5982 if (GET_CODE (XEXP (x, 0)) == REG
5983 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5984 /* %ebx + GOT/GOTOFF */
5985 y = NULL;
5986 else if (GET_CODE (XEXP (x, 0)) == PLUS)
5987 {
5988 /* %ebx + %reg * scale + GOT/GOTOFF */
5989 y = XEXP (x, 0);
5990 if (GET_CODE (XEXP (y, 0)) == REG
5991 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
5992 y = XEXP (y, 1);
5993 else if (GET_CODE (XEXP (y, 1)) == REG
5994 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
5995 y = XEXP (y, 0);
5996 else
5997 return orig_x;
5998 if (GET_CODE (y) != REG
5999 && GET_CODE (y) != MULT
6000 && GET_CODE (y) != ASHIFT)
6001 return orig_x;
6002 }
6003 else
6004 return orig_x;
6005
1865dbb5
JM
6006 x = XEXP (XEXP (x, 1), 0);
6007 if (GET_CODE (x) == UNSPEC
8ee41eaf
RH
6008 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6009 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6010 {
6011 if (y)
6012 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6013 return XVECEXP (x, 0, 0);
6014 }
1865dbb5
JM
6015
6016 if (GET_CODE (x) == PLUS
6017 && GET_CODE (XEXP (x, 0)) == UNSPEC
6018 && GET_CODE (XEXP (x, 1)) == CONST_INT
8ee41eaf
RH
6019 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6020 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6021 && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6022 {
6023 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6024 if (y)
6025 return gen_rtx_PLUS (Pmode, y, x);
6026 return x;
6027 }
1865dbb5
JM
6028
6029 return orig_x;
6030}
2a2ab3f9 6031\f
a269a03c 6032static void
e075ae69 6033put_condition_code (code, mode, reverse, fp, file)
a269a03c 6034 enum rtx_code code;
e075ae69
RH
6035 enum machine_mode mode;
6036 int reverse, fp;
a269a03c
JC
6037 FILE *file;
6038{
a269a03c
JC
6039 const char *suffix;
6040
9a915772
JH
6041 if (mode == CCFPmode || mode == CCFPUmode)
6042 {
6043 enum rtx_code second_code, bypass_code;
6044 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6045 if (bypass_code != NIL || second_code != NIL)
b531087a 6046 abort ();
9a915772
JH
6047 code = ix86_fp_compare_code_to_integer (code);
6048 mode = CCmode;
6049 }
a269a03c
JC
6050 if (reverse)
6051 code = reverse_condition (code);
e075ae69 6052
a269a03c
JC
6053 switch (code)
6054 {
6055 case EQ:
6056 suffix = "e";
6057 break;
a269a03c
JC
6058 case NE:
6059 suffix = "ne";
6060 break;
a269a03c 6061 case GT:
7e08e190 6062 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
6063 abort ();
6064 suffix = "g";
a269a03c 6065 break;
a269a03c 6066 case GTU:
e075ae69
RH
6067 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6068 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 6069 if (mode != CCmode)
0f290768 6070 abort ();
e075ae69 6071 suffix = fp ? "nbe" : "a";
a269a03c 6072 break;
a269a03c 6073 case LT:
9076b9c1 6074 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6075 suffix = "s";
7e08e190 6076 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6077 suffix = "l";
9076b9c1 6078 else
0f290768 6079 abort ();
a269a03c 6080 break;
a269a03c 6081 case LTU:
9076b9c1 6082 if (mode != CCmode)
0f290768 6083 abort ();
a269a03c
JC
6084 suffix = "b";
6085 break;
a269a03c 6086 case GE:
9076b9c1 6087 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6088 suffix = "ns";
7e08e190 6089 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6090 suffix = "ge";
9076b9c1 6091 else
0f290768 6092 abort ();
a269a03c 6093 break;
a269a03c 6094 case GEU:
e075ae69 6095 /* ??? As above. */
7e08e190 6096 if (mode != CCmode)
0f290768 6097 abort ();
7e08e190 6098 suffix = fp ? "nb" : "ae";
a269a03c 6099 break;
a269a03c 6100 case LE:
7e08e190 6101 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
6102 abort ();
6103 suffix = "le";
a269a03c 6104 break;
a269a03c 6105 case LEU:
9076b9c1
JH
6106 if (mode != CCmode)
6107 abort ();
7e08e190 6108 suffix = "be";
a269a03c 6109 break;
3a3677ff 6110 case UNORDERED:
9e7adcb3 6111 suffix = fp ? "u" : "p";
3a3677ff
RH
6112 break;
6113 case ORDERED:
9e7adcb3 6114 suffix = fp ? "nu" : "np";
3a3677ff 6115 break;
a269a03c
JC
6116 default:
6117 abort ();
6118 }
6119 fputs (suffix, file);
6120}
6121
e075ae69
RH
6122void
6123print_reg (x, code, file)
6124 rtx x;
6125 int code;
6126 FILE *file;
e5cb57e8 6127{
e075ae69 6128 if (REGNO (x) == ARG_POINTER_REGNUM
564d80f4 6129 || REGNO (x) == FRAME_POINTER_REGNUM
e075ae69
RH
6130 || REGNO (x) == FLAGS_REG
6131 || REGNO (x) == FPSR_REG)
6132 abort ();
e9a25f70 6133
5bf0ebab 6134 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
e075ae69
RH
6135 putc ('%', file);
6136
ef6257cd 6137 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
6138 code = 2;
6139 else if (code == 'b')
6140 code = 1;
6141 else if (code == 'k')
6142 code = 4;
3f3f2124
JH
6143 else if (code == 'q')
6144 code = 8;
e075ae69
RH
6145 else if (code == 'y')
6146 code = 3;
6147 else if (code == 'h')
6148 code = 0;
6149 else
6150 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 6151
3f3f2124
JH
6152 /* Irritatingly, AMD extended registers use different naming convention
6153 from the normal registers. */
6154 if (REX_INT_REG_P (x))
6155 {
885a70fd
JH
6156 if (!TARGET_64BIT)
6157 abort ();
3f3f2124
JH
6158 switch (code)
6159 {
ef6257cd 6160 case 0:
c725bd79 6161 error ("extended registers have no high halves");
3f3f2124
JH
6162 break;
6163 case 1:
6164 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6165 break;
6166 case 2:
6167 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6168 break;
6169 case 4:
6170 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6171 break;
6172 case 8:
6173 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6174 break;
6175 default:
c725bd79 6176 error ("unsupported operand size for extended register");
3f3f2124
JH
6177 break;
6178 }
6179 return;
6180 }
e075ae69
RH
6181 switch (code)
6182 {
6183 case 3:
6184 if (STACK_TOP_P (x))
6185 {
6186 fputs ("st(0)", file);
6187 break;
6188 }
6189 /* FALLTHRU */
e075ae69 6190 case 8:
3f3f2124 6191 case 4:
e075ae69 6192 case 12:
446988df 6193 if (! ANY_FP_REG_P (x))
885a70fd 6194 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
e075ae69 6195 /* FALLTHRU */
a7180f70 6196 case 16:
e075ae69
RH
6197 case 2:
6198 fputs (hi_reg_name[REGNO (x)], file);
6199 break;
6200 case 1:
6201 fputs (qi_reg_name[REGNO (x)], file);
6202 break;
6203 case 0:
6204 fputs (qi_high_reg_name[REGNO (x)], file);
6205 break;
6206 default:
6207 abort ();
fe25fea3 6208 }
e5cb57e8
SC
6209}
6210
f996902d
RH
6211/* Locate some local-dynamic symbol still in use by this function
6212 so that we can print its name in some tls_local_dynamic_base
6213 pattern. */
6214
6215static const char *
6216get_some_local_dynamic_name ()
6217{
6218 rtx insn;
6219
6220 if (cfun->machine->some_ld_name)
6221 return cfun->machine->some_ld_name;
6222
6223 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6224 if (INSN_P (insn)
6225 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6226 return cfun->machine->some_ld_name;
6227
6228 abort ();
6229}
6230
6231static int
6232get_some_local_dynamic_name_1 (px, data)
6233 rtx *px;
6234 void *data ATTRIBUTE_UNUSED;
6235{
6236 rtx x = *px;
6237
6238 if (GET_CODE (x) == SYMBOL_REF
6239 && local_dynamic_symbolic_operand (x, Pmode))
6240 {
6241 cfun->machine->some_ld_name = XSTR (x, 0);
6242 return 1;
6243 }
6244
6245 return 0;
6246}
6247
2a2ab3f9 6248/* Meaning of CODE:
fe25fea3 6249 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 6250 C -- print opcode suffix for set/cmov insn.
fe25fea3 6251 c -- like C, but print reversed condition
ef6257cd 6252 F,f -- likewise, but for floating-point.
048b1c95
JJ
6253 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6254 nothing
2a2ab3f9
JVA
6255 R -- print the prefix for register names.
6256 z -- print the opcode suffix for the size of the current operand.
6257 * -- print a star (in certain assembler syntax)
fb204271 6258 A -- print an absolute memory reference.
2a2ab3f9 6259 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
6260 s -- print a shift double count, followed by the assemblers argument
6261 delimiter.
fe25fea3
SC
6262 b -- print the QImode name of the register for the indicated operand.
6263 %b0 would print %al if operands[0] is reg 0.
6264 w -- likewise, print the HImode name of the register.
6265 k -- likewise, print the SImode name of the register.
3f3f2124 6266 q -- likewise, print the DImode name of the register.
ef6257cd
JH
6267 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6268 y -- print "st(0)" instead of "st" as a register.
a46d1d38 6269 D -- print condition for SSE cmp instruction.
ef6257cd
JH
6270 P -- if PIC, print an @PLT suffix.
6271 X -- don't print any sort of PIC '@' suffix for a symbol.
f996902d 6272 & -- print some in-use local-dynamic symbol name.
a46d1d38 6273 */
2a2ab3f9
JVA
6274
6275void
6276print_operand (file, x, code)
6277 FILE *file;
6278 rtx x;
6279 int code;
6280{
6281 if (code)
6282 {
6283 switch (code)
6284 {
6285 case '*':
80f33d06 6286 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9
JVA
6287 putc ('*', file);
6288 return;
6289
f996902d
RH
6290 case '&':
6291 assemble_name (file, get_some_local_dynamic_name ());
6292 return;
6293
fb204271 6294 case 'A':
80f33d06 6295 if (ASSEMBLER_DIALECT == ASM_ATT)
fb204271 6296 putc ('*', file);
80f33d06 6297 else if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
6298 {
6299 /* Intel syntax. For absolute addresses, registers should not
6300 be surrounded by braces. */
6301 if (GET_CODE (x) != REG)
6302 {
6303 putc ('[', file);
6304 PRINT_OPERAND (file, x, 0);
6305 putc (']', file);
6306 return;
6307 }
6308 }
80f33d06
GS
6309 else
6310 abort ();
fb204271
DN
6311
6312 PRINT_OPERAND (file, x, 0);
6313 return;
6314
6315
2a2ab3f9 6316 case 'L':
80f33d06 6317 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6318 putc ('l', file);
2a2ab3f9
JVA
6319 return;
6320
6321 case 'W':
80f33d06 6322 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6323 putc ('w', file);
2a2ab3f9
JVA
6324 return;
6325
6326 case 'B':
80f33d06 6327 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6328 putc ('b', file);
2a2ab3f9
JVA
6329 return;
6330
6331 case 'Q':
80f33d06 6332 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6333 putc ('l', file);
2a2ab3f9
JVA
6334 return;
6335
6336 case 'S':
80f33d06 6337 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6338 putc ('s', file);
2a2ab3f9
JVA
6339 return;
6340
5f1ec3e6 6341 case 'T':
80f33d06 6342 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6343 putc ('t', file);
5f1ec3e6
JVA
6344 return;
6345
2a2ab3f9
JVA
6346 case 'z':
6347 /* 387 opcodes don't get size suffixes if the operands are
0f290768 6348 registers. */
2a2ab3f9
JVA
6349 if (STACK_REG_P (x))
6350 return;
6351
831c4e87
KC
6352 /* Likewise if using Intel opcodes. */
6353 if (ASSEMBLER_DIALECT == ASM_INTEL)
6354 return;
6355
6356 /* This is the size of op from size of operand. */
2a2ab3f9
JVA
6357 switch (GET_MODE_SIZE (GET_MODE (x)))
6358 {
2a2ab3f9 6359 case 2:
155d8a47
JW
6360#ifdef HAVE_GAS_FILDS_FISTS
6361 putc ('s', file);
6362#endif
2a2ab3f9
JVA
6363 return;
6364
6365 case 4:
6366 if (GET_MODE (x) == SFmode)
6367 {
e075ae69 6368 putc ('s', file);
2a2ab3f9
JVA
6369 return;
6370 }
6371 else
e075ae69 6372 putc ('l', file);
2a2ab3f9
JVA
6373 return;
6374
5f1ec3e6 6375 case 12:
2b589241 6376 case 16:
e075ae69
RH
6377 putc ('t', file);
6378 return;
5f1ec3e6 6379
2a2ab3f9
JVA
6380 case 8:
6381 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
6382 {
6383#ifdef GAS_MNEMONICS
e075ae69 6384 putc ('q', file);
56c0e8fa 6385#else
e075ae69
RH
6386 putc ('l', file);
6387 putc ('l', file);
56c0e8fa
JVA
6388#endif
6389 }
e075ae69
RH
6390 else
6391 putc ('l', file);
2a2ab3f9 6392 return;
155d8a47
JW
6393
6394 default:
6395 abort ();
2a2ab3f9 6396 }
4af3895e
JVA
6397
6398 case 'b':
6399 case 'w':
6400 case 'k':
3f3f2124 6401 case 'q':
4af3895e
JVA
6402 case 'h':
6403 case 'y':
5cb6195d 6404 case 'X':
e075ae69 6405 case 'P':
4af3895e
JVA
6406 break;
6407
2d49677f
SC
6408 case 's':
6409 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6410 {
6411 PRINT_OPERAND (file, x, 0);
e075ae69 6412 putc (',', file);
2d49677f 6413 }
a269a03c
JC
6414 return;
6415
a46d1d38
JH
6416 case 'D':
6417 /* Little bit of braindamage here. The SSE compare instructions
6418 does use completely different names for the comparisons that the
6419 fp conditional moves. */
6420 switch (GET_CODE (x))
6421 {
6422 case EQ:
6423 case UNEQ:
6424 fputs ("eq", file);
6425 break;
6426 case LT:
6427 case UNLT:
6428 fputs ("lt", file);
6429 break;
6430 case LE:
6431 case UNLE:
6432 fputs ("le", file);
6433 break;
6434 case UNORDERED:
6435 fputs ("unord", file);
6436 break;
6437 case NE:
6438 case LTGT:
6439 fputs ("neq", file);
6440 break;
6441 case UNGE:
6442 case GE:
6443 fputs ("nlt", file);
6444 break;
6445 case UNGT:
6446 case GT:
6447 fputs ("nle", file);
6448 break;
6449 case ORDERED:
6450 fputs ("ord", file);
6451 break;
6452 default:
6453 abort ();
6454 break;
6455 }
6456 return;
048b1c95
JJ
6457 case 'O':
6458#ifdef CMOV_SUN_AS_SYNTAX
6459 if (ASSEMBLER_DIALECT == ASM_ATT)
6460 {
6461 switch (GET_MODE (x))
6462 {
6463 case HImode: putc ('w', file); break;
6464 case SImode:
6465 case SFmode: putc ('l', file); break;
6466 case DImode:
6467 case DFmode: putc ('q', file); break;
6468 default: abort ();
6469 }
6470 putc ('.', file);
6471 }
6472#endif
6473 return;
1853aadd 6474 case 'C':
e075ae69 6475 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 6476 return;
fe25fea3 6477 case 'F':
048b1c95
JJ
6478#ifdef CMOV_SUN_AS_SYNTAX
6479 if (ASSEMBLER_DIALECT == ASM_ATT)
6480 putc ('.', file);
6481#endif
e075ae69 6482 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
6483 return;
6484
e9a25f70 6485 /* Like above, but reverse condition */
e075ae69 6486 case 'c':
fce5a9f2 6487 /* Check to see if argument to %c is really a constant
c1d5afc4
CR
6488 and not a condition code which needs to be reversed. */
6489 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
6490 {
6491 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6492 return;
6493 }
e075ae69
RH
6494 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6495 return;
fe25fea3 6496 case 'f':
048b1c95
JJ
6497#ifdef CMOV_SUN_AS_SYNTAX
6498 if (ASSEMBLER_DIALECT == ASM_ATT)
6499 putc ('.', file);
6500#endif
e075ae69 6501 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 6502 return;
ef6257cd
JH
6503 case '+':
6504 {
6505 rtx x;
e5cb57e8 6506
ef6257cd
JH
6507 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6508 return;
a4f31c00 6509
ef6257cd
JH
6510 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6511 if (x)
6512 {
6513 int pred_val = INTVAL (XEXP (x, 0));
6514
6515 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6516 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6517 {
6518 int taken = pred_val > REG_BR_PROB_BASE / 2;
6519 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6520
6521 /* Emit hints only in the case default branch prediction
6522 heruistics would fail. */
6523 if (taken != cputaken)
6524 {
6525 /* We use 3e (DS) prefix for taken branches and
6526 2e (CS) prefix for not taken branches. */
6527 if (taken)
6528 fputs ("ds ; ", file);
6529 else
6530 fputs ("cs ; ", file);
6531 }
6532 }
6533 }
6534 return;
6535 }
4af3895e 6536 default:
a52453cc 6537 output_operand_lossage ("invalid operand code `%c'", code);
2a2ab3f9
JVA
6538 }
6539 }
e9a25f70 6540
2a2ab3f9
JVA
6541 if (GET_CODE (x) == REG)
6542 {
6543 PRINT_REG (x, code, file);
6544 }
e9a25f70 6545
2a2ab3f9
JVA
6546 else if (GET_CODE (x) == MEM)
6547 {
e075ae69 6548 /* No `byte ptr' prefix for call instructions. */
80f33d06 6549 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
2a2ab3f9 6550 {
69ddee61 6551 const char * size;
e075ae69
RH
6552 switch (GET_MODE_SIZE (GET_MODE (x)))
6553 {
6554 case 1: size = "BYTE"; break;
6555 case 2: size = "WORD"; break;
6556 case 4: size = "DWORD"; break;
6557 case 8: size = "QWORD"; break;
6558 case 12: size = "XWORD"; break;
a7180f70 6559 case 16: size = "XMMWORD"; break;
e075ae69 6560 default:
564d80f4 6561 abort ();
e075ae69 6562 }
fb204271
DN
6563
6564 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6565 if (code == 'b')
6566 size = "BYTE";
6567 else if (code == 'w')
6568 size = "WORD";
6569 else if (code == 'k')
6570 size = "DWORD";
6571
e075ae69
RH
6572 fputs (size, file);
6573 fputs (" PTR ", file);
2a2ab3f9 6574 }
e075ae69
RH
6575
6576 x = XEXP (x, 0);
6577 if (flag_pic && CONSTANT_ADDRESS_P (x))
6578 output_pic_addr_const (file, x, code);
0d7d98ee 6579 /* Avoid (%rip) for call operands. */
5bf0ebab 6580 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
0d7d98ee
JH
6581 && GET_CODE (x) != CONST_INT)
6582 output_addr_const (file, x);
c8b94768
RH
6583 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6584 output_operand_lossage ("invalid constraints for operand");
2a2ab3f9 6585 else
e075ae69 6586 output_address (x);
2a2ab3f9 6587 }
e9a25f70 6588
2a2ab3f9
JVA
6589 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6590 {
e9a25f70
JL
6591 REAL_VALUE_TYPE r;
6592 long l;
6593
5f1ec3e6
JVA
6594 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6595 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69 6596
80f33d06 6597 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6598 putc ('$', file);
52267fcb 6599 fprintf (file, "0x%lx", l);
5f1ec3e6 6600 }
e9a25f70 6601
0f290768 6602 /* These float cases don't actually occur as immediate operands. */
5f1ec3e6
JVA
6603 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6604 {
e9a25f70
JL
6605 REAL_VALUE_TYPE r;
6606 char dstr[30];
6607
5f1ec3e6
JVA
6608 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6609 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6610 fprintf (file, "%s", dstr);
2a2ab3f9 6611 }
e9a25f70 6612
2b589241
JH
6613 else if (GET_CODE (x) == CONST_DOUBLE
6614 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
2a2ab3f9 6615 {
e9a25f70
JL
6616 REAL_VALUE_TYPE r;
6617 char dstr[30];
6618
5f1ec3e6
JVA
6619 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6620 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6621 fprintf (file, "%s", dstr);
2a2ab3f9 6622 }
f996902d 6623
79325812 6624 else
2a2ab3f9 6625 {
4af3895e 6626 if (code != 'P')
2a2ab3f9 6627 {
695dac07 6628 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69 6629 {
80f33d06 6630 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
6631 putc ('$', file);
6632 }
2a2ab3f9
JVA
6633 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6634 || GET_CODE (x) == LABEL_REF)
e075ae69 6635 {
80f33d06 6636 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
6637 putc ('$', file);
6638 else
6639 fputs ("OFFSET FLAT:", file);
6640 }
2a2ab3f9 6641 }
e075ae69
RH
6642 if (GET_CODE (x) == CONST_INT)
6643 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6644 else if (flag_pic)
2a2ab3f9
JVA
6645 output_pic_addr_const (file, x, code);
6646 else
6647 output_addr_const (file, x);
6648 }
6649}
6650\f
6651/* Print a memory operand whose address is ADDR. */
6652
6653void
6654print_operand_address (file, addr)
6655 FILE *file;
6656 register rtx addr;
6657{
e075ae69
RH
6658 struct ix86_address parts;
6659 rtx base, index, disp;
6660 int scale;
e9a25f70 6661
9e20be0c
JJ
6662 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
6663 {
6664 if (ASSEMBLER_DIALECT == ASM_INTEL)
6665 fputs ("DWORD PTR ", file);
6666 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6667 putc ('%', file);
6668 fputs ("gs:0", file);
6669 return;
6670 }
6671
e075ae69
RH
6672 if (! ix86_decompose_address (addr, &parts))
6673 abort ();
e9a25f70 6674
e075ae69
RH
6675 base = parts.base;
6676 index = parts.index;
6677 disp = parts.disp;
6678 scale = parts.scale;
e9a25f70 6679
e075ae69
RH
6680 if (!base && !index)
6681 {
6682 /* Displacement only requires special attention. */
e9a25f70 6683
e075ae69 6684 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 6685 {
80f33d06 6686 if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
6687 {
6688 if (USER_LABEL_PREFIX[0] == 0)
6689 putc ('%', file);
6690 fputs ("ds:", file);
6691 }
e075ae69 6692 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
2a2ab3f9 6693 }
e075ae69
RH
6694 else if (flag_pic)
6695 output_pic_addr_const (file, addr, 0);
6696 else
6697 output_addr_const (file, addr);
0d7d98ee
JH
6698
6699 /* Use one byte shorter RIP relative addressing for 64bit mode. */
edfe8595
RH
6700 if (TARGET_64BIT
6701 && (GET_CODE (addr) == SYMBOL_REF
6702 || GET_CODE (addr) == LABEL_REF
6703 || (GET_CODE (addr) == CONST
6704 && GET_CODE (XEXP (addr, 0)) == PLUS
6705 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
6706 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
0d7d98ee 6707 fputs ("(%rip)", file);
e075ae69
RH
6708 }
6709 else
6710 {
80f33d06 6711 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9 6712 {
e075ae69 6713 if (disp)
2a2ab3f9 6714 {
c399861d 6715 if (flag_pic)
e075ae69
RH
6716 output_pic_addr_const (file, disp, 0);
6717 else if (GET_CODE (disp) == LABEL_REF)
6718 output_asm_label (disp);
2a2ab3f9 6719 else
e075ae69 6720 output_addr_const (file, disp);
2a2ab3f9
JVA
6721 }
6722
e075ae69
RH
6723 putc ('(', file);
6724 if (base)
6725 PRINT_REG (base, 0, file);
6726 if (index)
2a2ab3f9 6727 {
e075ae69
RH
6728 putc (',', file);
6729 PRINT_REG (index, 0, file);
6730 if (scale != 1)
6731 fprintf (file, ",%d", scale);
2a2ab3f9 6732 }
e075ae69 6733 putc (')', file);
2a2ab3f9 6734 }
2a2ab3f9
JVA
6735 else
6736 {
e075ae69 6737 rtx offset = NULL_RTX;
e9a25f70 6738
e075ae69
RH
6739 if (disp)
6740 {
6741 /* Pull out the offset of a symbol; print any symbol itself. */
6742 if (GET_CODE (disp) == CONST
6743 && GET_CODE (XEXP (disp, 0)) == PLUS
6744 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6745 {
6746 offset = XEXP (XEXP (disp, 0), 1);
6747 disp = gen_rtx_CONST (VOIDmode,
6748 XEXP (XEXP (disp, 0), 0));
6749 }
ce193852 6750
e075ae69
RH
6751 if (flag_pic)
6752 output_pic_addr_const (file, disp, 0);
6753 else if (GET_CODE (disp) == LABEL_REF)
6754 output_asm_label (disp);
6755 else if (GET_CODE (disp) == CONST_INT)
6756 offset = disp;
6757 else
6758 output_addr_const (file, disp);
6759 }
e9a25f70 6760
e075ae69
RH
6761 putc ('[', file);
6762 if (base)
a8620236 6763 {
e075ae69
RH
6764 PRINT_REG (base, 0, file);
6765 if (offset)
6766 {
6767 if (INTVAL (offset) >= 0)
6768 putc ('+', file);
6769 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6770 }
a8620236 6771 }
e075ae69
RH
6772 else if (offset)
6773 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 6774 else
e075ae69 6775 putc ('0', file);
e9a25f70 6776
e075ae69
RH
6777 if (index)
6778 {
6779 putc ('+', file);
6780 PRINT_REG (index, 0, file);
6781 if (scale != 1)
6782 fprintf (file, "*%d", scale);
6783 }
6784 putc (']', file);
6785 }
2a2ab3f9
JVA
6786 }
6787}
f996902d
RH
6788
6789bool
6790output_addr_const_extra (file, x)
6791 FILE *file;
6792 rtx x;
6793{
6794 rtx op;
6795
6796 if (GET_CODE (x) != UNSPEC)
6797 return false;
6798
6799 op = XVECEXP (x, 0, 0);
6800 switch (XINT (x, 1))
6801 {
6802 case UNSPEC_GOTTPOFF:
6803 output_addr_const (file, op);
6804 fputs ("@GOTTPOFF", file);
6805 break;
6806 case UNSPEC_TPOFF:
6807 output_addr_const (file, op);
6808 fputs ("@TPOFF", file);
6809 break;
6810 case UNSPEC_NTPOFF:
6811 output_addr_const (file, op);
6812 fputs ("@NTPOFF", file);
6813 break;
6814 case UNSPEC_DTPOFF:
6815 output_addr_const (file, op);
6816 fputs ("@DTPOFF", file);
6817 break;
6818
6819 default:
6820 return false;
6821 }
6822
6823 return true;
6824}
2a2ab3f9
JVA
6825\f
6826/* Split one or more DImode RTL references into pairs of SImode
6827 references. The RTL can be REG, offsettable MEM, integer constant, or
6828 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6829 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 6830 that parallel "operands". */
2a2ab3f9
JVA
6831
6832void
6833split_di (operands, num, lo_half, hi_half)
6834 rtx operands[];
6835 int num;
6836 rtx lo_half[], hi_half[];
6837{
6838 while (num--)
6839 {
57dbca5e 6840 rtx op = operands[num];
b932f770
JH
6841
6842 /* simplify_subreg refuse to split volatile memory addresses,
6843 but we still have to handle it. */
6844 if (GET_CODE (op) == MEM)
2a2ab3f9 6845 {
f4ef873c 6846 lo_half[num] = adjust_address (op, SImode, 0);
b72f00af 6847 hi_half[num] = adjust_address (op, SImode, 4);
2a2ab3f9
JVA
6848 }
6849 else
b932f770 6850 {
38ca929b
JH
6851 lo_half[num] = simplify_gen_subreg (SImode, op,
6852 GET_MODE (op) == VOIDmode
6853 ? DImode : GET_MODE (op), 0);
6854 hi_half[num] = simplify_gen_subreg (SImode, op,
6855 GET_MODE (op) == VOIDmode
6856 ? DImode : GET_MODE (op), 4);
b932f770 6857 }
2a2ab3f9
JVA
6858 }
6859}
44cf5b6a
JH
6860/* Split one or more TImode RTL references into pairs of SImode
6861 references. The RTL can be REG, offsettable MEM, integer constant, or
6862 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6863 split and "num" is its length. lo_half and hi_half are output arrays
6864 that parallel "operands". */
6865
6866void
6867split_ti (operands, num, lo_half, hi_half)
6868 rtx operands[];
6869 int num;
6870 rtx lo_half[], hi_half[];
6871{
6872 while (num--)
6873 {
6874 rtx op = operands[num];
b932f770
JH
6875
6876 /* simplify_subreg refuse to split volatile memory addresses, but we
6877 still have to handle it. */
6878 if (GET_CODE (op) == MEM)
44cf5b6a
JH
6879 {
6880 lo_half[num] = adjust_address (op, DImode, 0);
6881 hi_half[num] = adjust_address (op, DImode, 8);
6882 }
6883 else
b932f770
JH
6884 {
6885 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6886 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6887 }
44cf5b6a
JH
6888 }
6889}
2a2ab3f9 6890\f
2a2ab3f9
JVA
6891/* Output code to perform a 387 binary operation in INSN, one of PLUS,
6892 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6893 is the expression of the binary operation. The output may either be
6894 emitted here, or returned to the caller, like all output_* functions.
6895
6896 There is no guarantee that the operands are the same mode, as they
0f290768 6897 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 6898
e3c2afab
AM
6899#ifndef SYSV386_COMPAT
6900/* Set to 1 for compatibility with brain-damaged assemblers. No-one
6901 wants to fix the assemblers because that causes incompatibility
6902 with gcc. No-one wants to fix gcc because that causes
6903 incompatibility with assemblers... You can use the option of
6904 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6905#define SYSV386_COMPAT 1
6906#endif
6907
69ddee61 6908const char *
2a2ab3f9
JVA
6909output_387_binary_op (insn, operands)
6910 rtx insn;
6911 rtx *operands;
6912{
e3c2afab 6913 static char buf[30];
69ddee61 6914 const char *p;
1deaa899
JH
6915 const char *ssep;
6916 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
2a2ab3f9 6917
e3c2afab
AM
6918#ifdef ENABLE_CHECKING
6919 /* Even if we do not want to check the inputs, this documents input
6920 constraints. Which helps in understanding the following code. */
6921 if (STACK_REG_P (operands[0])
6922 && ((REG_P (operands[1])
6923 && REGNO (operands[0]) == REGNO (operands[1])
6924 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6925 || (REG_P (operands[2])
6926 && REGNO (operands[0]) == REGNO (operands[2])
6927 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6928 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6929 ; /* ok */
1deaa899 6930 else if (!is_sse)
e3c2afab
AM
6931 abort ();
6932#endif
6933
2a2ab3f9
JVA
6934 switch (GET_CODE (operands[3]))
6935 {
6936 case PLUS:
e075ae69
RH
6937 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6938 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6939 p = "fiadd";
6940 else
6941 p = "fadd";
1deaa899 6942 ssep = "add";
2a2ab3f9
JVA
6943 break;
6944
6945 case MINUS:
e075ae69
RH
6946 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6947 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6948 p = "fisub";
6949 else
6950 p = "fsub";
1deaa899 6951 ssep = "sub";
2a2ab3f9
JVA
6952 break;
6953
6954 case MULT:
e075ae69
RH
6955 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6956 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6957 p = "fimul";
6958 else
6959 p = "fmul";
1deaa899 6960 ssep = "mul";
2a2ab3f9
JVA
6961 break;
6962
6963 case DIV:
e075ae69
RH
6964 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6965 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6966 p = "fidiv";
6967 else
6968 p = "fdiv";
1deaa899 6969 ssep = "div";
2a2ab3f9
JVA
6970 break;
6971
6972 default:
6973 abort ();
6974 }
6975
1deaa899
JH
6976 if (is_sse)
6977 {
6978 strcpy (buf, ssep);
6979 if (GET_MODE (operands[0]) == SFmode)
6980 strcat (buf, "ss\t{%2, %0|%0, %2}");
6981 else
6982 strcat (buf, "sd\t{%2, %0|%0, %2}");
6983 return buf;
6984 }
e075ae69 6985 strcpy (buf, p);
2a2ab3f9
JVA
6986
6987 switch (GET_CODE (operands[3]))
6988 {
6989 case MULT:
6990 case PLUS:
6991 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6992 {
e3c2afab 6993 rtx temp = operands[2];
2a2ab3f9
JVA
6994 operands[2] = operands[1];
6995 operands[1] = temp;
6996 }
6997
e3c2afab
AM
6998 /* know operands[0] == operands[1]. */
6999
2a2ab3f9 7000 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7001 {
7002 p = "%z2\t%2";
7003 break;
7004 }
2a2ab3f9
JVA
7005
7006 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
7007 {
7008 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7009 /* How is it that we are storing to a dead operand[2]?
7010 Well, presumably operands[1] is dead too. We can't
7011 store the result to st(0) as st(0) gets popped on this
7012 instruction. Instead store to operands[2] (which I
7013 think has to be st(1)). st(1) will be popped later.
7014 gcc <= 2.8.1 didn't have this check and generated
7015 assembly code that the Unixware assembler rejected. */
7016 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7017 else
e3c2afab 7018 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 7019 break;
6b28fd63 7020 }
2a2ab3f9
JVA
7021
7022 if (STACK_TOP_P (operands[0]))
e3c2afab 7023 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7024 else
e3c2afab 7025 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 7026 break;
2a2ab3f9
JVA
7027
7028 case MINUS:
7029 case DIV:
7030 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
7031 {
7032 p = "r%z1\t%1";
7033 break;
7034 }
2a2ab3f9
JVA
7035
7036 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7037 {
7038 p = "%z2\t%2";
7039 break;
7040 }
2a2ab3f9 7041
2a2ab3f9 7042 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 7043 {
e3c2afab
AM
7044#if SYSV386_COMPAT
7045 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7046 derived assemblers, confusingly reverse the direction of
7047 the operation for fsub{r} and fdiv{r} when the
7048 destination register is not st(0). The Intel assembler
7049 doesn't have this brain damage. Read !SYSV386_COMPAT to
7050 figure out what the hardware really does. */
7051 if (STACK_TOP_P (operands[0]))
7052 p = "{p\t%0, %2|rp\t%2, %0}";
7053 else
7054 p = "{rp\t%2, %0|p\t%0, %2}";
7055#else
6b28fd63 7056 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7057 /* As above for fmul/fadd, we can't store to st(0). */
7058 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7059 else
e3c2afab
AM
7060 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7061#endif
e075ae69 7062 break;
6b28fd63 7063 }
2a2ab3f9
JVA
7064
7065 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 7066 {
e3c2afab 7067#if SYSV386_COMPAT
6b28fd63 7068 if (STACK_TOP_P (operands[0]))
e3c2afab 7069 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 7070 else
e3c2afab
AM
7071 p = "{p\t%1, %0|rp\t%0, %1}";
7072#else
7073 if (STACK_TOP_P (operands[0]))
7074 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7075 else
7076 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7077#endif
e075ae69 7078 break;
6b28fd63 7079 }
2a2ab3f9
JVA
7080
7081 if (STACK_TOP_P (operands[0]))
7082 {
7083 if (STACK_TOP_P (operands[1]))
e3c2afab 7084 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7085 else
e3c2afab 7086 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 7087 break;
2a2ab3f9
JVA
7088 }
7089 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
7090 {
7091#if SYSV386_COMPAT
7092 p = "{\t%1, %0|r\t%0, %1}";
7093#else
7094 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7095#endif
7096 }
2a2ab3f9 7097 else
e3c2afab
AM
7098 {
7099#if SYSV386_COMPAT
7100 p = "{r\t%2, %0|\t%0, %2}";
7101#else
7102 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7103#endif
7104 }
e075ae69 7105 break;
2a2ab3f9
JVA
7106
7107 default:
7108 abort ();
7109 }
e075ae69
RH
7110
7111 strcat (buf, p);
7112 return buf;
2a2ab3f9 7113}
e075ae69 7114
a4f31c00 7115/* Output code to initialize control word copies used by
7a2e09f4
JH
7116 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7117 is set to control word rounding downwards. */
7118void
7119emit_i387_cw_initialization (normal, round_down)
7120 rtx normal, round_down;
7121{
7122 rtx reg = gen_reg_rtx (HImode);
7123
7124 emit_insn (gen_x86_fnstcw_1 (normal));
7125 emit_move_insn (reg, normal);
7126 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7127 && !TARGET_64BIT)
7128 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7129 else
7130 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7131 emit_move_insn (round_down, reg);
7132}
7133
2a2ab3f9 7134/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 7135 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 7136 operand may be [SDX]Fmode. */
2a2ab3f9 7137
69ddee61 7138const char *
2a2ab3f9
JVA
7139output_fix_trunc (insn, operands)
7140 rtx insn;
7141 rtx *operands;
7142{
7143 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69 7144 int dimode_p = GET_MODE (operands[0]) == DImode;
2a2ab3f9 7145
e075ae69
RH
7146 /* Jump through a hoop or two for DImode, since the hardware has no
7147 non-popping instruction. We used to do this a different way, but
7148 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
7149 if (dimode_p && !stack_top_dies)
7150 output_asm_insn ("fld\t%y1", operands);
e075ae69 7151
7a2e09f4 7152 if (!STACK_TOP_P (operands[1]))
10195bd8
JW
7153 abort ();
7154
e075ae69 7155 if (GET_CODE (operands[0]) != MEM)
7a2e09f4 7156 abort ();
e9a25f70 7157
7a2e09f4 7158 output_asm_insn ("fldcw\t%3", operands);
e075ae69 7159 if (stack_top_dies || dimode_p)
7a2e09f4 7160 output_asm_insn ("fistp%z0\t%0", operands);
10195bd8 7161 else
7a2e09f4 7162 output_asm_insn ("fist%z0\t%0", operands);
e075ae69 7163 output_asm_insn ("fldcw\t%2", operands);
10195bd8 7164
e075ae69 7165 return "";
2a2ab3f9 7166}
cda749b1 7167
e075ae69
RH
7168/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7169 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7170 when fucom should be used. */
7171
69ddee61 7172const char *
e075ae69 7173output_fp_compare (insn, operands, eflags_p, unordered_p)
cda749b1
JW
7174 rtx insn;
7175 rtx *operands;
e075ae69 7176 int eflags_p, unordered_p;
cda749b1 7177{
e075ae69
RH
7178 int stack_top_dies;
7179 rtx cmp_op0 = operands[0];
7180 rtx cmp_op1 = operands[1];
0644b628 7181 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
e075ae69
RH
7182
7183 if (eflags_p == 2)
7184 {
7185 cmp_op0 = cmp_op1;
7186 cmp_op1 = operands[2];
7187 }
0644b628
JH
7188 if (is_sse)
7189 {
7190 if (GET_MODE (operands[0]) == SFmode)
7191 if (unordered_p)
7192 return "ucomiss\t{%1, %0|%0, %1}";
7193 else
7194 return "comiss\t{%1, %0|%0, %y}";
7195 else
7196 if (unordered_p)
7197 return "ucomisd\t{%1, %0|%0, %1}";
7198 else
7199 return "comisd\t{%1, %0|%0, %y}";
7200 }
cda749b1 7201
e075ae69 7202 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
7203 abort ();
7204
e075ae69 7205 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 7206
e075ae69
RH
7207 if (STACK_REG_P (cmp_op1)
7208 && stack_top_dies
7209 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7210 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 7211 {
e075ae69
RH
7212 /* If both the top of the 387 stack dies, and the other operand
7213 is also a stack register that dies, then this must be a
7214 `fcompp' float compare */
7215
7216 if (eflags_p == 1)
7217 {
7218 /* There is no double popping fcomi variant. Fortunately,
7219 eflags is immune from the fstp's cc clobbering. */
7220 if (unordered_p)
7221 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7222 else
7223 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7224 return "fstp\t%y0";
7225 }
7226 else
cda749b1 7227 {
e075ae69
RH
7228 if (eflags_p == 2)
7229 {
7230 if (unordered_p)
7231 return "fucompp\n\tfnstsw\t%0";
7232 else
7233 return "fcompp\n\tfnstsw\t%0";
7234 }
cda749b1
JW
7235 else
7236 {
e075ae69
RH
7237 if (unordered_p)
7238 return "fucompp";
7239 else
7240 return "fcompp";
cda749b1
JW
7241 }
7242 }
cda749b1
JW
7243 }
7244 else
7245 {
e075ae69 7246 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 7247
0f290768 7248 static const char * const alt[24] =
e075ae69
RH
7249 {
7250 "fcom%z1\t%y1",
7251 "fcomp%z1\t%y1",
7252 "fucom%z1\t%y1",
7253 "fucomp%z1\t%y1",
0f290768 7254
e075ae69
RH
7255 "ficom%z1\t%y1",
7256 "ficomp%z1\t%y1",
7257 NULL,
7258 NULL,
7259
7260 "fcomi\t{%y1, %0|%0, %y1}",
7261 "fcomip\t{%y1, %0|%0, %y1}",
7262 "fucomi\t{%y1, %0|%0, %y1}",
7263 "fucomip\t{%y1, %0|%0, %y1}",
7264
7265 NULL,
7266 NULL,
7267 NULL,
7268 NULL,
7269
7270 "fcom%z2\t%y2\n\tfnstsw\t%0",
7271 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7272 "fucom%z2\t%y2\n\tfnstsw\t%0",
7273 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 7274
e075ae69
RH
7275 "ficom%z2\t%y2\n\tfnstsw\t%0",
7276 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7277 NULL,
7278 NULL
7279 };
7280
7281 int mask;
69ddee61 7282 const char *ret;
e075ae69
RH
7283
7284 mask = eflags_p << 3;
7285 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7286 mask |= unordered_p << 1;
7287 mask |= stack_top_dies;
7288
7289 if (mask >= 24)
7290 abort ();
7291 ret = alt[mask];
7292 if (ret == NULL)
7293 abort ();
cda749b1 7294
e075ae69 7295 return ret;
cda749b1
JW
7296 }
7297}
2a2ab3f9 7298
f88c65f7
RH
7299void
7300ix86_output_addr_vec_elt (file, value)
7301 FILE *file;
7302 int value;
7303{
7304 const char *directive = ASM_LONG;
7305
7306 if (TARGET_64BIT)
7307 {
7308#ifdef ASM_QUAD
7309 directive = ASM_QUAD;
7310#else
7311 abort ();
7312#endif
7313 }
7314
7315 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7316}
7317
7318void
7319ix86_output_addr_diff_elt (file, value, rel)
7320 FILE *file;
7321 int value, rel;
7322{
7323 if (TARGET_64BIT)
74411039 7324 fprintf (file, "%s%s%d-%s%d\n",
f88c65f7
RH
7325 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7326 else if (HAVE_AS_GOTOFF_IN_DATA)
7327 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
b069de3b
SS
7328#if TARGET_MACHO
7329 else if (TARGET_MACHO)
7330 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7331 machopic_function_base_name () + 1);
7332#endif
f88c65f7
RH
7333 else
7334 asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
7335 ASM_LONG, LPREFIX, value);
7336}
32b5b1aa 7337\f
a8bac9ab
RH
7338/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7339 for the target. */
7340
7341void
7342ix86_expand_clear (dest)
7343 rtx dest;
7344{
7345 rtx tmp;
7346
7347 /* We play register width games, which are only valid after reload. */
7348 if (!reload_completed)
7349 abort ();
7350
7351 /* Avoid HImode and its attendant prefix byte. */
7352 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7353 dest = gen_rtx_REG (SImode, REGNO (dest));
7354
7355 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7356
7357 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7358 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7359 {
7360 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7361 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7362 }
7363
7364 emit_insn (tmp);
7365}
7366
f996902d
RH
7367/* X is an unchanging MEM. If it is a constant pool reference, return
7368 the constant pool rtx, else NULL. */
7369
7370static rtx
7371maybe_get_pool_constant (x)
7372 rtx x;
7373{
7374 x = XEXP (x, 0);
7375
7376 if (flag_pic)
7377 {
7378 if (GET_CODE (x) != PLUS)
7379 return NULL_RTX;
7380 if (XEXP (x, 0) != pic_offset_table_rtx)
7381 return NULL_RTX;
7382 x = XEXP (x, 1);
7383 if (GET_CODE (x) != CONST)
7384 return NULL_RTX;
7385 x = XEXP (x, 0);
7386 if (GET_CODE (x) != UNSPEC)
7387 return NULL_RTX;
7388 if (XINT (x, 1) != UNSPEC_GOTOFF)
7389 return NULL_RTX;
7390 x = XVECEXP (x, 0, 0);
7391 }
7392
7393 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7394 return get_pool_constant (x);
7395
7396 return NULL_RTX;
7397}
7398
79325812 7399void
e075ae69
RH
7400ix86_expand_move (mode, operands)
7401 enum machine_mode mode;
7402 rtx operands[];
32b5b1aa 7403{
e075ae69 7404 int strict = (reload_in_progress || reload_completed);
f996902d
RH
7405 rtx insn, op0, op1, tmp;
7406
7407 op0 = operands[0];
7408 op1 = operands[1];
7409
7410 /* ??? We have a slight problem. We need to say that tls symbols are
7411 not legitimate constants so that reload does not helpfully reload
7412 these constants from a REG_EQUIV, which we cannot handle. (Recall
7413 that general- and local-dynamic address resolution requires a
7414 function call.)
e9a25f70 7415
f996902d
RH
7416 However, if we say that tls symbols are not legitimate constants,
7417 then emit_move_insn helpfully drop them into the constant pool.
7418
7419 It is far easier to work around emit_move_insn than reload. Recognize
7420 the MEM that we would have created and extract the symbol_ref. */
7421
7422 if (mode == Pmode
7423 && GET_CODE (op1) == MEM
7424 && RTX_UNCHANGING_P (op1))
32b5b1aa 7425 {
f996902d
RH
7426 tmp = maybe_get_pool_constant (op1);
7427 /* Note that we only care about symbolic constants here, which
7428 unlike CONST_INT will always have a proper mode. */
7429 if (tmp && GET_MODE (tmp) == Pmode)
7430 op1 = tmp;
7431 }
e9a25f70 7432
f996902d
RH
7433 if (tls_symbolic_operand (op1, Pmode))
7434 {
7435 op1 = legitimize_address (op1, op1, VOIDmode);
7436 if (GET_CODE (op0) == MEM)
7437 {
7438 tmp = gen_reg_rtx (mode);
7439 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7440 op1 = tmp;
7441 }
7442 }
7443 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7444 {
b069de3b
SS
7445#if TARGET_MACHO
7446 if (MACHOPIC_PURE)
7447 {
7448 rtx temp = ((reload_in_progress
7449 || ((op0 && GET_CODE (op0) == REG)
7450 && mode == Pmode))
7451 ? op0 : gen_reg_rtx (Pmode));
7452 op1 = machopic_indirect_data_reference (op1, temp);
7453 op1 = machopic_legitimize_pic_address (op1, mode,
7454 temp == op1 ? 0 : temp);
7455 }
7456 else
7457 {
7458 if (MACHOPIC_INDIRECT)
7459 op1 = machopic_indirect_data_reference (op1, 0);
7460 }
7461 if (op0 != op1)
7462 {
7463 insn = gen_rtx_SET (VOIDmode, op0, op1);
7464 emit_insn (insn);
7465 }
7466 return;
7467#endif /* TARGET_MACHO */
f996902d
RH
7468 if (GET_CODE (op0) == MEM)
7469 op1 = force_reg (Pmode, op1);
e075ae69 7470 else
32b5b1aa 7471 {
f996902d 7472 rtx temp = op0;
e075ae69
RH
7473 if (GET_CODE (temp) != REG)
7474 temp = gen_reg_rtx (Pmode);
f996902d
RH
7475 temp = legitimize_pic_address (op1, temp);
7476 if (temp == op0)
e075ae69 7477 return;
f996902d 7478 op1 = temp;
32b5b1aa 7479 }
e075ae69
RH
7480 }
7481 else
7482 {
f996902d 7483 if (GET_CODE (op0) == MEM
44cf5b6a 7484 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
f996902d
RH
7485 || !push_operand (op0, mode))
7486 && GET_CODE (op1) == MEM)
7487 op1 = force_reg (mode, op1);
e9a25f70 7488
f996902d
RH
7489 if (push_operand (op0, mode)
7490 && ! general_no_elim_operand (op1, mode))
7491 op1 = copy_to_mode_reg (mode, op1);
2c5a510c 7492
44cf5b6a
JH
7493 /* Force large constants in 64bit compilation into register
7494 to get them CSEed. */
7495 if (TARGET_64BIT && mode == DImode
f996902d
RH
7496 && immediate_operand (op1, mode)
7497 && !x86_64_zero_extended_value (op1)
7498 && !register_operand (op0, mode)
44cf5b6a 7499 && optimize && !reload_completed && !reload_in_progress)
f996902d 7500 op1 = copy_to_mode_reg (mode, op1);
44cf5b6a 7501
e075ae69 7502 if (FLOAT_MODE_P (mode))
32b5b1aa 7503 {
d7a29404
JH
7504 /* If we are loading a floating point constant to a register,
7505 force the value to memory now, since we'll get better code
7506 out the back end. */
e075ae69
RH
7507
7508 if (strict)
7509 ;
f996902d
RH
7510 else if (GET_CODE (op1) == CONST_DOUBLE
7511 && register_operand (op0, mode))
7512 op1 = validize_mem (force_const_mem (mode, op1));
32b5b1aa 7513 }
32b5b1aa 7514 }
e9a25f70 7515
f996902d 7516 insn = gen_rtx_SET (VOIDmode, op0, op1);
e9a25f70 7517
e075ae69
RH
7518 emit_insn (insn);
7519}
e9a25f70 7520
e37af218
RH
7521void
7522ix86_expand_vector_move (mode, operands)
7523 enum machine_mode mode;
7524 rtx operands[];
7525{
7526 /* Force constants other than zero into memory. We do not know how
7527 the instructions used to build constants modify the upper 64 bits
7528 of the register, once we have that information we may be able
7529 to handle some of them more efficiently. */
7530 if ((reload_in_progress | reload_completed) == 0
7531 && register_operand (operands[0], mode)
7532 && CONSTANT_P (operands[1]))
7533 {
7534 rtx addr = gen_reg_rtx (Pmode);
7535 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
7536 operands[1] = gen_rtx_MEM (mode, addr);
7537 }
7538
7539 /* Make operand1 a register if it isn't already. */
7540 if ((reload_in_progress | reload_completed) == 0
7541 && !register_operand (operands[0], mode)
7542 && !register_operand (operands[1], mode)
7543 && operands[1] != CONST0_RTX (mode))
7544 {
59bef189 7545 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
e37af218
RH
7546 emit_move_insn (operands[0], temp);
7547 return;
7548 }
7549
7550 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
fce5a9f2 7551}
e37af218 7552
e075ae69
RH
7553/* Attempt to expand a binary operator. Make the expansion closer to the
7554 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 7555 memory references (one output, two input) in a single insn. */
e9a25f70 7556
e075ae69
RH
7557void
7558ix86_expand_binary_operator (code, mode, operands)
7559 enum rtx_code code;
7560 enum machine_mode mode;
7561 rtx operands[];
7562{
7563 int matching_memory;
7564 rtx src1, src2, dst, op, clob;
7565
7566 dst = operands[0];
7567 src1 = operands[1];
7568 src2 = operands[2];
7569
7570 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7571 if (GET_RTX_CLASS (code) == 'c'
7572 && (rtx_equal_p (dst, src2)
7573 || immediate_operand (src1, mode)))
7574 {
7575 rtx temp = src1;
7576 src1 = src2;
7577 src2 = temp;
32b5b1aa 7578 }
e9a25f70 7579
e075ae69
RH
7580 /* If the destination is memory, and we do not have matching source
7581 operands, do things in registers. */
7582 matching_memory = 0;
7583 if (GET_CODE (dst) == MEM)
32b5b1aa 7584 {
e075ae69
RH
7585 if (rtx_equal_p (dst, src1))
7586 matching_memory = 1;
7587 else if (GET_RTX_CLASS (code) == 'c'
7588 && rtx_equal_p (dst, src2))
7589 matching_memory = 2;
7590 else
7591 dst = gen_reg_rtx (mode);
7592 }
0f290768 7593
e075ae69
RH
7594 /* Both source operands cannot be in memory. */
7595 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7596 {
7597 if (matching_memory != 2)
7598 src2 = force_reg (mode, src2);
7599 else
7600 src1 = force_reg (mode, src1);
32b5b1aa 7601 }
e9a25f70 7602
06a964de
JH
7603 /* If the operation is not commutable, source 1 cannot be a constant
7604 or non-matching memory. */
0f290768 7605 if ((CONSTANT_P (src1)
06a964de
JH
7606 || (!matching_memory && GET_CODE (src1) == MEM))
7607 && GET_RTX_CLASS (code) != 'c')
e075ae69 7608 src1 = force_reg (mode, src1);
0f290768 7609
e075ae69 7610 /* If optimizing, copy to regs to improve CSE */
fe577e58 7611 if (optimize && ! no_new_pseudos)
32b5b1aa 7612 {
e075ae69
RH
7613 if (GET_CODE (dst) == MEM)
7614 dst = gen_reg_rtx (mode);
7615 if (GET_CODE (src1) == MEM)
7616 src1 = force_reg (mode, src1);
7617 if (GET_CODE (src2) == MEM)
7618 src2 = force_reg (mode, src2);
32b5b1aa 7619 }
e9a25f70 7620
e075ae69
RH
7621 /* Emit the instruction. */
7622
7623 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7624 if (reload_in_progress)
7625 {
7626 /* Reload doesn't know about the flags register, and doesn't know that
7627 it doesn't want to clobber it. We can only do this with PLUS. */
7628 if (code != PLUS)
7629 abort ();
7630 emit_insn (op);
7631 }
7632 else
32b5b1aa 7633 {
e075ae69
RH
7634 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7635 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 7636 }
e9a25f70 7637
e075ae69
RH
7638 /* Fix up the destination if needed. */
7639 if (dst != operands[0])
7640 emit_move_insn (operands[0], dst);
7641}
7642
7643/* Return TRUE or FALSE depending on whether the binary operator meets the
7644 appropriate constraints. */
7645
7646int
7647ix86_binary_operator_ok (code, mode, operands)
7648 enum rtx_code code;
7649 enum machine_mode mode ATTRIBUTE_UNUSED;
7650 rtx operands[3];
7651{
7652 /* Both source operands cannot be in memory. */
7653 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7654 return 0;
7655 /* If the operation is not commutable, source 1 cannot be a constant. */
7656 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
7657 return 0;
7658 /* If the destination is memory, we must have a matching source operand. */
7659 if (GET_CODE (operands[0]) == MEM
7660 && ! (rtx_equal_p (operands[0], operands[1])
7661 || (GET_RTX_CLASS (code) == 'c'
7662 && rtx_equal_p (operands[0], operands[2]))))
7663 return 0;
06a964de 7664 /* If the operation is not commutable and the source 1 is memory, we must
d6a7951f 7665 have a matching destination. */
06a964de
JH
7666 if (GET_CODE (operands[1]) == MEM
7667 && GET_RTX_CLASS (code) != 'c'
7668 && ! rtx_equal_p (operands[0], operands[1]))
7669 return 0;
e075ae69
RH
7670 return 1;
7671}
7672
7673/* Attempt to expand a unary operator. Make the expansion closer to the
7674 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 7675 memory references (one output, one input) in a single insn. */
e075ae69 7676
9d81fc27 7677void
e075ae69
RH
7678ix86_expand_unary_operator (code, mode, operands)
7679 enum rtx_code code;
7680 enum machine_mode mode;
7681 rtx operands[];
7682{
06a964de
JH
7683 int matching_memory;
7684 rtx src, dst, op, clob;
7685
7686 dst = operands[0];
7687 src = operands[1];
e075ae69 7688
06a964de
JH
7689 /* If the destination is memory, and we do not have matching source
7690 operands, do things in registers. */
7691 matching_memory = 0;
7692 if (GET_CODE (dst) == MEM)
32b5b1aa 7693 {
06a964de
JH
7694 if (rtx_equal_p (dst, src))
7695 matching_memory = 1;
e075ae69 7696 else
06a964de 7697 dst = gen_reg_rtx (mode);
32b5b1aa 7698 }
e9a25f70 7699
06a964de
JH
7700 /* When source operand is memory, destination must match. */
7701 if (!matching_memory && GET_CODE (src) == MEM)
7702 src = force_reg (mode, src);
0f290768 7703
06a964de 7704 /* If optimizing, copy to regs to improve CSE */
fe577e58 7705 if (optimize && ! no_new_pseudos)
06a964de
JH
7706 {
7707 if (GET_CODE (dst) == MEM)
7708 dst = gen_reg_rtx (mode);
7709 if (GET_CODE (src) == MEM)
7710 src = force_reg (mode, src);
7711 }
7712
7713 /* Emit the instruction. */
7714
7715 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7716 if (reload_in_progress || code == NOT)
7717 {
7718 /* Reload doesn't know about the flags register, and doesn't know that
7719 it doesn't want to clobber it. */
7720 if (code != NOT)
7721 abort ();
7722 emit_insn (op);
7723 }
7724 else
7725 {
7726 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7727 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7728 }
7729
7730 /* Fix up the destination if needed. */
7731 if (dst != operands[0])
7732 emit_move_insn (operands[0], dst);
e075ae69
RH
7733}
7734
7735/* Return TRUE or FALSE depending on whether the unary operator meets the
7736 appropriate constraints. */
7737
7738int
7739ix86_unary_operator_ok (code, mode, operands)
7740 enum rtx_code code ATTRIBUTE_UNUSED;
7741 enum machine_mode mode ATTRIBUTE_UNUSED;
7742 rtx operands[2] ATTRIBUTE_UNUSED;
7743{
06a964de
JH
7744 /* If one of operands is memory, source and destination must match. */
7745 if ((GET_CODE (operands[0]) == MEM
7746 || GET_CODE (operands[1]) == MEM)
7747 && ! rtx_equal_p (operands[0], operands[1]))
7748 return FALSE;
e075ae69
RH
7749 return TRUE;
7750}
7751
16189740
RH
7752/* Return TRUE or FALSE depending on whether the first SET in INSN
7753 has source and destination with matching CC modes, and that the
7754 CC mode is at least as constrained as REQ_MODE. */
7755
7756int
7757ix86_match_ccmode (insn, req_mode)
7758 rtx insn;
7759 enum machine_mode req_mode;
7760{
7761 rtx set;
7762 enum machine_mode set_mode;
7763
7764 set = PATTERN (insn);
7765 if (GET_CODE (set) == PARALLEL)
7766 set = XVECEXP (set, 0, 0);
7767 if (GET_CODE (set) != SET)
7768 abort ();
9076b9c1
JH
7769 if (GET_CODE (SET_SRC (set)) != COMPARE)
7770 abort ();
16189740
RH
7771
7772 set_mode = GET_MODE (SET_DEST (set));
7773 switch (set_mode)
7774 {
9076b9c1
JH
7775 case CCNOmode:
7776 if (req_mode != CCNOmode
7777 && (req_mode != CCmode
7778 || XEXP (SET_SRC (set), 1) != const0_rtx))
7779 return 0;
7780 break;
16189740 7781 case CCmode:
9076b9c1 7782 if (req_mode == CCGCmode)
16189740
RH
7783 return 0;
7784 /* FALLTHRU */
9076b9c1
JH
7785 case CCGCmode:
7786 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7787 return 0;
7788 /* FALLTHRU */
7789 case CCGOCmode:
16189740
RH
7790 if (req_mode == CCZmode)
7791 return 0;
7792 /* FALLTHRU */
7793 case CCZmode:
7794 break;
7795
7796 default:
7797 abort ();
7798 }
7799
7800 return (GET_MODE (SET_SRC (set)) == set_mode);
7801}
7802
e075ae69
RH
7803/* Generate insn patterns to do an integer compare of OPERANDS. */
7804
7805static rtx
7806ix86_expand_int_compare (code, op0, op1)
7807 enum rtx_code code;
7808 rtx op0, op1;
7809{
7810 enum machine_mode cmpmode;
7811 rtx tmp, flags;
7812
7813 cmpmode = SELECT_CC_MODE (code, op0, op1);
7814 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7815
7816 /* This is very simple, but making the interface the same as in the
7817 FP case makes the rest of the code easier. */
7818 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7819 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7820
7821 /* Return the test that should be put into the flags user, i.e.
7822 the bcc, scc, or cmov instruction. */
7823 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7824}
7825
3a3677ff
RH
7826/* Figure out whether to use ordered or unordered fp comparisons.
7827 Return the appropriate mode to use. */
e075ae69 7828
b1cdafbb 7829enum machine_mode
3a3677ff 7830ix86_fp_compare_mode (code)
8752c357 7831 enum rtx_code code ATTRIBUTE_UNUSED;
e075ae69 7832{
9e7adcb3
JH
7833 /* ??? In order to make all comparisons reversible, we do all comparisons
7834 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7835 all forms trapping and nontrapping comparisons, we can make inequality
7836 comparisons trapping again, since it results in better code when using
7837 FCOM based compares. */
7838 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
7839}
7840
9076b9c1
JH
7841enum machine_mode
7842ix86_cc_mode (code, op0, op1)
7843 enum rtx_code code;
7844 rtx op0, op1;
7845{
7846 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7847 return ix86_fp_compare_mode (code);
7848 switch (code)
7849 {
7850 /* Only zero flag is needed. */
7851 case EQ: /* ZF=0 */
7852 case NE: /* ZF!=0 */
7853 return CCZmode;
7854 /* Codes needing carry flag. */
265dab10
JH
7855 case GEU: /* CF=0 */
7856 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
7857 case LTU: /* CF=1 */
7858 case LEU: /* CF=1 | ZF=1 */
265dab10 7859 return CCmode;
9076b9c1
JH
7860 /* Codes possibly doable only with sign flag when
7861 comparing against zero. */
7862 case GE: /* SF=OF or SF=0 */
7e08e190 7863 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
7864 if (op1 == const0_rtx)
7865 return CCGOCmode;
7866 else
7867 /* For other cases Carry flag is not required. */
7868 return CCGCmode;
7869 /* Codes doable only with sign flag when comparing
7870 against zero, but we miss jump instruction for it
7871 so we need to use relational tests agains overflow
7872 that thus needs to be zero. */
7873 case GT: /* ZF=0 & SF=OF */
7874 case LE: /* ZF=1 | SF<>OF */
7875 if (op1 == const0_rtx)
7876 return CCNOmode;
7877 else
7878 return CCGCmode;
7fcd7218
JH
7879 /* strcmp pattern do (use flags) and combine may ask us for proper
7880 mode. */
7881 case USE:
7882 return CCmode;
9076b9c1 7883 default:
0f290768 7884 abort ();
9076b9c1
JH
7885 }
7886}
7887
3a3677ff
RH
7888/* Return true if we should use an FCOMI instruction for this fp comparison. */
7889
a940d8bd 7890int
3a3677ff 7891ix86_use_fcomi_compare (code)
9e7adcb3 7892 enum rtx_code code ATTRIBUTE_UNUSED;
3a3677ff 7893{
9e7adcb3
JH
7894 enum rtx_code swapped_code = swap_condition (code);
7895 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7896 || (ix86_fp_comparison_cost (swapped_code)
7897 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
7898}
7899
0f290768 7900/* Swap, force into registers, or otherwise massage the two operands
3a3677ff
RH
7901 to a fp comparison. The operands are updated in place; the new
7902 comparsion code is returned. */
7903
7904static enum rtx_code
7905ix86_prepare_fp_compare_args (code, pop0, pop1)
7906 enum rtx_code code;
7907 rtx *pop0, *pop1;
7908{
7909 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7910 rtx op0 = *pop0, op1 = *pop1;
7911 enum machine_mode op_mode = GET_MODE (op0);
0644b628 7912 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
3a3677ff 7913
e075ae69 7914 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
7915 The same is true of the XFmode compare instructions. The same is
7916 true of the fcomi compare instructions. */
7917
0644b628
JH
7918 if (!is_sse
7919 && (fpcmp_mode == CCFPUmode
7920 || op_mode == XFmode
7921 || op_mode == TFmode
7922 || ix86_use_fcomi_compare (code)))
e075ae69 7923 {
3a3677ff
RH
7924 op0 = force_reg (op_mode, op0);
7925 op1 = force_reg (op_mode, op1);
e075ae69
RH
7926 }
7927 else
7928 {
7929 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7930 things around if they appear profitable, otherwise force op0
7931 into a register. */
7932
7933 if (standard_80387_constant_p (op0) == 0
7934 || (GET_CODE (op0) == MEM
7935 && ! (standard_80387_constant_p (op1) == 0
7936 || GET_CODE (op1) == MEM)))
32b5b1aa 7937 {
e075ae69
RH
7938 rtx tmp;
7939 tmp = op0, op0 = op1, op1 = tmp;
7940 code = swap_condition (code);
7941 }
7942
7943 if (GET_CODE (op0) != REG)
3a3677ff 7944 op0 = force_reg (op_mode, op0);
e075ae69
RH
7945
7946 if (CONSTANT_P (op1))
7947 {
7948 if (standard_80387_constant_p (op1))
3a3677ff 7949 op1 = force_reg (op_mode, op1);
e075ae69 7950 else
3a3677ff 7951 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
7952 }
7953 }
e9a25f70 7954
9e7adcb3
JH
7955 /* Try to rearrange the comparison to make it cheaper. */
7956 if (ix86_fp_comparison_cost (code)
7957 > ix86_fp_comparison_cost (swap_condition (code))
558740bf 7958 && (GET_CODE (op1) == REG || !no_new_pseudos))
9e7adcb3
JH
7959 {
7960 rtx tmp;
7961 tmp = op0, op0 = op1, op1 = tmp;
7962 code = swap_condition (code);
7963 if (GET_CODE (op0) != REG)
7964 op0 = force_reg (op_mode, op0);
7965 }
7966
3a3677ff
RH
7967 *pop0 = op0;
7968 *pop1 = op1;
7969 return code;
7970}
7971
c0c102a9
JH
7972/* Convert comparison codes we use to represent FP comparison to integer
7973 code that will result in proper branch. Return UNKNOWN if no such code
7974 is available. */
7975static enum rtx_code
7976ix86_fp_compare_code_to_integer (code)
7977 enum rtx_code code;
7978{
7979 switch (code)
7980 {
7981 case GT:
7982 return GTU;
7983 case GE:
7984 return GEU;
7985 case ORDERED:
7986 case UNORDERED:
7987 return code;
7988 break;
7989 case UNEQ:
7990 return EQ;
7991 break;
7992 case UNLT:
7993 return LTU;
7994 break;
7995 case UNLE:
7996 return LEU;
7997 break;
7998 case LTGT:
7999 return NE;
8000 break;
8001 default:
8002 return UNKNOWN;
8003 }
8004}
8005
8006/* Split comparison code CODE into comparisons we can do using branch
8007 instructions. BYPASS_CODE is comparison code for branch that will
8008 branch around FIRST_CODE and SECOND_CODE. If some of branches
8009 is not required, set value to NIL.
8010 We never require more than two branches. */
8011static void
8012ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8013 enum rtx_code code, *bypass_code, *first_code, *second_code;
8014{
8015 *first_code = code;
8016 *bypass_code = NIL;
8017 *second_code = NIL;
8018
8019 /* The fcomi comparison sets flags as follows:
8020
8021 cmp ZF PF CF
8022 > 0 0 0
8023 < 0 0 1
8024 = 1 0 0
8025 un 1 1 1 */
8026
8027 switch (code)
8028 {
8029 case GT: /* GTU - CF=0 & ZF=0 */
8030 case GE: /* GEU - CF=0 */
8031 case ORDERED: /* PF=0 */
8032 case UNORDERED: /* PF=1 */
8033 case UNEQ: /* EQ - ZF=1 */
8034 case UNLT: /* LTU - CF=1 */
8035 case UNLE: /* LEU - CF=1 | ZF=1 */
8036 case LTGT: /* EQ - ZF=0 */
8037 break;
8038 case LT: /* LTU - CF=1 - fails on unordered */
8039 *first_code = UNLT;
8040 *bypass_code = UNORDERED;
8041 break;
8042 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8043 *first_code = UNLE;
8044 *bypass_code = UNORDERED;
8045 break;
8046 case EQ: /* EQ - ZF=1 - fails on unordered */
8047 *first_code = UNEQ;
8048 *bypass_code = UNORDERED;
8049 break;
8050 case NE: /* NE - ZF=0 - fails on unordered */
8051 *first_code = LTGT;
8052 *second_code = UNORDERED;
8053 break;
8054 case UNGE: /* GEU - CF=0 - fails on unordered */
8055 *first_code = GE;
8056 *second_code = UNORDERED;
8057 break;
8058 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8059 *first_code = GT;
8060 *second_code = UNORDERED;
8061 break;
8062 default:
8063 abort ();
8064 }
8065 if (!TARGET_IEEE_FP)
8066 {
8067 *second_code = NIL;
8068 *bypass_code = NIL;
8069 }
8070}
8071
9e7adcb3
JH
8072/* Return cost of comparison done fcom + arithmetics operations on AX.
8073 All following functions do use number of instructions as an cost metrics.
8074 In future this should be tweaked to compute bytes for optimize_size and
8075 take into account performance of various instructions on various CPUs. */
8076static int
8077ix86_fp_comparison_arithmetics_cost (code)
8078 enum rtx_code code;
8079{
8080 if (!TARGET_IEEE_FP)
8081 return 4;
8082 /* The cost of code output by ix86_expand_fp_compare. */
8083 switch (code)
8084 {
8085 case UNLE:
8086 case UNLT:
8087 case LTGT:
8088 case GT:
8089 case GE:
8090 case UNORDERED:
8091 case ORDERED:
8092 case UNEQ:
8093 return 4;
8094 break;
8095 case LT:
8096 case NE:
8097 case EQ:
8098 case UNGE:
8099 return 5;
8100 break;
8101 case LE:
8102 case UNGT:
8103 return 6;
8104 break;
8105 default:
8106 abort ();
8107 }
8108}
8109
8110/* Return cost of comparison done using fcomi operation.
8111 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8112static int
8113ix86_fp_comparison_fcomi_cost (code)
8114 enum rtx_code code;
8115{
8116 enum rtx_code bypass_code, first_code, second_code;
8117 /* Return arbitarily high cost when instruction is not supported - this
8118 prevents gcc from using it. */
8119 if (!TARGET_CMOVE)
8120 return 1024;
8121 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8122 return (bypass_code != NIL || second_code != NIL) + 2;
8123}
8124
8125/* Return cost of comparison done using sahf operation.
8126 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8127static int
8128ix86_fp_comparison_sahf_cost (code)
8129 enum rtx_code code;
8130{
8131 enum rtx_code bypass_code, first_code, second_code;
8132 /* Return arbitarily high cost when instruction is not preferred - this
8133 avoids gcc from using it. */
8134 if (!TARGET_USE_SAHF && !optimize_size)
8135 return 1024;
8136 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8137 return (bypass_code != NIL || second_code != NIL) + 3;
8138}
8139
8140/* Compute cost of the comparison done using any method.
8141 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8142static int
8143ix86_fp_comparison_cost (code)
8144 enum rtx_code code;
8145{
8146 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8147 int min;
8148
8149 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8150 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8151
8152 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8153 if (min > sahf_cost)
8154 min = sahf_cost;
8155 if (min > fcomi_cost)
8156 min = fcomi_cost;
8157 return min;
8158}
c0c102a9 8159
3a3677ff
RH
8160/* Generate insn patterns to do a floating point compare of OPERANDS. */
8161
9e7adcb3
JH
8162static rtx
8163ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
3a3677ff
RH
8164 enum rtx_code code;
8165 rtx op0, op1, scratch;
9e7adcb3
JH
8166 rtx *second_test;
8167 rtx *bypass_test;
3a3677ff
RH
8168{
8169 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 8170 rtx tmp, tmp2;
9e7adcb3 8171 int cost = ix86_fp_comparison_cost (code);
c0c102a9 8172 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
8173
8174 fpcmp_mode = ix86_fp_compare_mode (code);
8175 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8176
9e7adcb3
JH
8177 if (second_test)
8178 *second_test = NULL_RTX;
8179 if (bypass_test)
8180 *bypass_test = NULL_RTX;
8181
c0c102a9
JH
8182 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8183
9e7adcb3
JH
8184 /* Do fcomi/sahf based test when profitable. */
8185 if ((bypass_code == NIL || bypass_test)
8186 && (second_code == NIL || second_test)
8187 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 8188 {
c0c102a9
JH
8189 if (TARGET_CMOVE)
8190 {
8191 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8192 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8193 tmp);
8194 emit_insn (tmp);
8195 }
8196 else
8197 {
8198 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 8199 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
8200 if (!scratch)
8201 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
8202 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8203 emit_insn (gen_x86_sahf_1 (scratch));
8204 }
e075ae69
RH
8205
8206 /* The FP codes work out to act like unsigned. */
9a915772 8207 intcmp_mode = fpcmp_mode;
9e7adcb3
JH
8208 code = first_code;
8209 if (bypass_code != NIL)
8210 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8211 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8212 const0_rtx);
8213 if (second_code != NIL)
8214 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8215 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8216 const0_rtx);
e075ae69
RH
8217 }
8218 else
8219 {
8220 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69 8221 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 8222 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
8223 if (!scratch)
8224 scratch = gen_reg_rtx (HImode);
3a3677ff 8225 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 8226
9a915772
JH
8227 /* In the unordered case, we have to check C2 for NaN's, which
8228 doesn't happen to work out to anything nice combination-wise.
8229 So do some bit twiddling on the value we've got in AH to come
8230 up with an appropriate set of condition codes. */
e075ae69 8231
9a915772
JH
8232 intcmp_mode = CCNOmode;
8233 switch (code)
32b5b1aa 8234 {
9a915772
JH
8235 case GT:
8236 case UNGT:
8237 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 8238 {
3a3677ff 8239 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 8240 code = EQ;
9a915772
JH
8241 }
8242 else
8243 {
8244 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8245 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8246 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8247 intcmp_mode = CCmode;
8248 code = GEU;
8249 }
8250 break;
8251 case LT:
8252 case UNLT:
8253 if (code == LT && TARGET_IEEE_FP)
8254 {
3a3677ff
RH
8255 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8256 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
8257 intcmp_mode = CCmode;
8258 code = EQ;
9a915772
JH
8259 }
8260 else
8261 {
8262 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8263 code = NE;
8264 }
8265 break;
8266 case GE:
8267 case UNGE:
8268 if (code == GE || !TARGET_IEEE_FP)
8269 {
3a3677ff 8270 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 8271 code = EQ;
9a915772
JH
8272 }
8273 else
8274 {
8275 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8276 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8277 GEN_INT (0x01)));
8278 code = NE;
8279 }
8280 break;
8281 case LE:
8282 case UNLE:
8283 if (code == LE && TARGET_IEEE_FP)
8284 {
3a3677ff
RH
8285 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8286 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8287 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
8288 intcmp_mode = CCmode;
8289 code = LTU;
9a915772
JH
8290 }
8291 else
8292 {
8293 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8294 code = NE;
8295 }
8296 break;
8297 case EQ:
8298 case UNEQ:
8299 if (code == EQ && TARGET_IEEE_FP)
8300 {
3a3677ff
RH
8301 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8302 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
8303 intcmp_mode = CCmode;
8304 code = EQ;
9a915772
JH
8305 }
8306 else
8307 {
3a3677ff
RH
8308 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8309 code = NE;
8310 break;
9a915772
JH
8311 }
8312 break;
8313 case NE:
8314 case LTGT:
8315 if (code == NE && TARGET_IEEE_FP)
8316 {
3a3677ff 8317 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
8318 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8319 GEN_INT (0x40)));
3a3677ff 8320 code = NE;
9a915772
JH
8321 }
8322 else
8323 {
3a3677ff
RH
8324 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8325 code = EQ;
32b5b1aa 8326 }
9a915772
JH
8327 break;
8328
8329 case UNORDERED:
8330 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8331 code = NE;
8332 break;
8333 case ORDERED:
8334 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8335 code = EQ;
8336 break;
8337
8338 default:
8339 abort ();
32b5b1aa 8340 }
32b5b1aa 8341 }
e075ae69
RH
8342
8343 /* Return the test that should be put into the flags user, i.e.
8344 the bcc, scc, or cmov instruction. */
8345 return gen_rtx_fmt_ee (code, VOIDmode,
8346 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8347 const0_rtx);
8348}
8349
9e3e266c 8350rtx
a1b8572c 8351ix86_expand_compare (code, second_test, bypass_test)
e075ae69 8352 enum rtx_code code;
a1b8572c 8353 rtx *second_test, *bypass_test;
e075ae69
RH
8354{
8355 rtx op0, op1, ret;
8356 op0 = ix86_compare_op0;
8357 op1 = ix86_compare_op1;
8358
a1b8572c
JH
8359 if (second_test)
8360 *second_test = NULL_RTX;
8361 if (bypass_test)
8362 *bypass_test = NULL_RTX;
8363
e075ae69 8364 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
bf71a4f8 8365 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
77ebd435 8366 second_test, bypass_test);
32b5b1aa 8367 else
e075ae69
RH
8368 ret = ix86_expand_int_compare (code, op0, op1);
8369
8370 return ret;
8371}
8372
03598dea
JH
8373/* Return true if the CODE will result in nontrivial jump sequence. */
8374bool
8375ix86_fp_jump_nontrivial_p (code)
8376 enum rtx_code code;
8377{
8378 enum rtx_code bypass_code, first_code, second_code;
8379 if (!TARGET_CMOVE)
8380 return true;
8381 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8382 return bypass_code != NIL || second_code != NIL;
8383}
8384
e075ae69 8385void
3a3677ff 8386ix86_expand_branch (code, label)
e075ae69 8387 enum rtx_code code;
e075ae69
RH
8388 rtx label;
8389{
3a3677ff 8390 rtx tmp;
e075ae69 8391
3a3677ff 8392 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 8393 {
3a3677ff
RH
8394 case QImode:
8395 case HImode:
8396 case SImode:
0d7d98ee 8397 simple:
a1b8572c 8398 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
8399 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8400 gen_rtx_LABEL_REF (VOIDmode, label),
8401 pc_rtx);
8402 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 8403 return;
e075ae69 8404
3a3677ff
RH
8405 case SFmode:
8406 case DFmode:
0f290768 8407 case XFmode:
2b589241 8408 case TFmode:
3a3677ff
RH
8409 {
8410 rtvec vec;
8411 int use_fcomi;
03598dea 8412 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
8413
8414 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8415 &ix86_compare_op1);
fce5a9f2 8416
03598dea
JH
8417 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8418
8419 /* Check whether we will use the natural sequence with one jump. If
8420 so, we can expand jump early. Otherwise delay expansion by
8421 creating compound insn to not confuse optimizers. */
8422 if (bypass_code == NIL && second_code == NIL
8423 && TARGET_CMOVE)
8424 {
8425 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8426 gen_rtx_LABEL_REF (VOIDmode, label),
8427 pc_rtx, NULL_RTX);
8428 }
8429 else
8430 {
8431 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8432 ix86_compare_op0, ix86_compare_op1);
8433 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8434 gen_rtx_LABEL_REF (VOIDmode, label),
8435 pc_rtx);
8436 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8437
8438 use_fcomi = ix86_use_fcomi_compare (code);
8439 vec = rtvec_alloc (3 + !use_fcomi);
8440 RTVEC_ELT (vec, 0) = tmp;
8441 RTVEC_ELT (vec, 1)
8442 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8443 RTVEC_ELT (vec, 2)
8444 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8445 if (! use_fcomi)
8446 RTVEC_ELT (vec, 3)
8447 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8448
8449 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8450 }
3a3677ff
RH
8451 return;
8452 }
32b5b1aa 8453
3a3677ff 8454 case DImode:
0d7d98ee
JH
8455 if (TARGET_64BIT)
8456 goto simple;
3a3677ff
RH
8457 /* Expand DImode branch into multiple compare+branch. */
8458 {
8459 rtx lo[2], hi[2], label2;
8460 enum rtx_code code1, code2, code3;
32b5b1aa 8461
3a3677ff
RH
8462 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8463 {
8464 tmp = ix86_compare_op0;
8465 ix86_compare_op0 = ix86_compare_op1;
8466 ix86_compare_op1 = tmp;
8467 code = swap_condition (code);
8468 }
8469 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8470 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 8471
3a3677ff
RH
8472 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8473 avoid two branches. This costs one extra insn, so disable when
8474 optimizing for size. */
32b5b1aa 8475
3a3677ff
RH
8476 if ((code == EQ || code == NE)
8477 && (!optimize_size
8478 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8479 {
8480 rtx xor0, xor1;
32b5b1aa 8481
3a3677ff
RH
8482 xor1 = hi[0];
8483 if (hi[1] != const0_rtx)
8484 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8485 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 8486
3a3677ff
RH
8487 xor0 = lo[0];
8488 if (lo[1] != const0_rtx)
8489 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8490 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 8491
3a3677ff
RH
8492 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8493 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 8494
3a3677ff
RH
8495 ix86_compare_op0 = tmp;
8496 ix86_compare_op1 = const0_rtx;
8497 ix86_expand_branch (code, label);
8498 return;
8499 }
e075ae69 8500
1f9124e4
JJ
8501 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8502 op1 is a constant and the low word is zero, then we can just
8503 examine the high word. */
32b5b1aa 8504
1f9124e4
JJ
8505 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8506 switch (code)
8507 {
8508 case LT: case LTU: case GE: case GEU:
8509 ix86_compare_op0 = hi[0];
8510 ix86_compare_op1 = hi[1];
8511 ix86_expand_branch (code, label);
8512 return;
8513 default:
8514 break;
8515 }
e075ae69 8516
3a3677ff 8517 /* Otherwise, we need two or three jumps. */
e075ae69 8518
3a3677ff 8519 label2 = gen_label_rtx ();
e075ae69 8520
3a3677ff
RH
8521 code1 = code;
8522 code2 = swap_condition (code);
8523 code3 = unsigned_condition (code);
e075ae69 8524
3a3677ff
RH
8525 switch (code)
8526 {
8527 case LT: case GT: case LTU: case GTU:
8528 break;
e075ae69 8529
3a3677ff
RH
8530 case LE: code1 = LT; code2 = GT; break;
8531 case GE: code1 = GT; code2 = LT; break;
8532 case LEU: code1 = LTU; code2 = GTU; break;
8533 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 8534
3a3677ff
RH
8535 case EQ: code1 = NIL; code2 = NE; break;
8536 case NE: code2 = NIL; break;
e075ae69 8537
3a3677ff
RH
8538 default:
8539 abort ();
8540 }
e075ae69 8541
3a3677ff
RH
8542 /*
8543 * a < b =>
8544 * if (hi(a) < hi(b)) goto true;
8545 * if (hi(a) > hi(b)) goto false;
8546 * if (lo(a) < lo(b)) goto true;
8547 * false:
8548 */
8549
8550 ix86_compare_op0 = hi[0];
8551 ix86_compare_op1 = hi[1];
8552
8553 if (code1 != NIL)
8554 ix86_expand_branch (code1, label);
8555 if (code2 != NIL)
8556 ix86_expand_branch (code2, label2);
8557
8558 ix86_compare_op0 = lo[0];
8559 ix86_compare_op1 = lo[1];
8560 ix86_expand_branch (code3, label);
8561
8562 if (code2 != NIL)
8563 emit_label (label2);
8564 return;
8565 }
e075ae69 8566
3a3677ff
RH
8567 default:
8568 abort ();
8569 }
32b5b1aa 8570}
e075ae69 8571
9e7adcb3
JH
8572/* Split branch based on floating point condition. */
8573void
03598dea
JH
8574ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
8575 enum rtx_code code;
8576 rtx op1, op2, target1, target2, tmp;
9e7adcb3
JH
8577{
8578 rtx second, bypass;
8579 rtx label = NULL_RTX;
03598dea 8580 rtx condition;
6b24c259
JH
8581 int bypass_probability = -1, second_probability = -1, probability = -1;
8582 rtx i;
9e7adcb3
JH
8583
8584 if (target2 != pc_rtx)
8585 {
8586 rtx tmp = target2;
8587 code = reverse_condition_maybe_unordered (code);
8588 target2 = target1;
8589 target1 = tmp;
8590 }
8591
8592 condition = ix86_expand_fp_compare (code, op1, op2,
8593 tmp, &second, &bypass);
6b24c259
JH
8594
8595 if (split_branch_probability >= 0)
8596 {
8597 /* Distribute the probabilities across the jumps.
8598 Assume the BYPASS and SECOND to be always test
8599 for UNORDERED. */
8600 probability = split_branch_probability;
8601
d6a7951f 8602 /* Value of 1 is low enough to make no need for probability
6b24c259
JH
8603 to be updated. Later we may run some experiments and see
8604 if unordered values are more frequent in practice. */
8605 if (bypass)
8606 bypass_probability = 1;
8607 if (second)
8608 second_probability = 1;
8609 }
9e7adcb3
JH
8610 if (bypass != NULL_RTX)
8611 {
8612 label = gen_label_rtx ();
6b24c259
JH
8613 i = emit_jump_insn (gen_rtx_SET
8614 (VOIDmode, pc_rtx,
8615 gen_rtx_IF_THEN_ELSE (VOIDmode,
8616 bypass,
8617 gen_rtx_LABEL_REF (VOIDmode,
8618 label),
8619 pc_rtx)));
8620 if (bypass_probability >= 0)
8621 REG_NOTES (i)
8622 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8623 GEN_INT (bypass_probability),
8624 REG_NOTES (i));
8625 }
8626 i = emit_jump_insn (gen_rtx_SET
9e7adcb3
JH
8627 (VOIDmode, pc_rtx,
8628 gen_rtx_IF_THEN_ELSE (VOIDmode,
6b24c259
JH
8629 condition, target1, target2)));
8630 if (probability >= 0)
8631 REG_NOTES (i)
8632 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8633 GEN_INT (probability),
8634 REG_NOTES (i));
8635 if (second != NULL_RTX)
9e7adcb3 8636 {
6b24c259
JH
8637 i = emit_jump_insn (gen_rtx_SET
8638 (VOIDmode, pc_rtx,
8639 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8640 target2)));
8641 if (second_probability >= 0)
8642 REG_NOTES (i)
8643 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8644 GEN_INT (second_probability),
8645 REG_NOTES (i));
9e7adcb3 8646 }
9e7adcb3
JH
8647 if (label != NULL_RTX)
8648 emit_label (label);
8649}
8650
32b5b1aa 8651int
3a3677ff 8652ix86_expand_setcc (code, dest)
e075ae69 8653 enum rtx_code code;
e075ae69 8654 rtx dest;
32b5b1aa 8655{
a1b8572c
JH
8656 rtx ret, tmp, tmpreg;
8657 rtx second_test, bypass_test;
e075ae69 8658
885a70fd
JH
8659 if (GET_MODE (ix86_compare_op0) == DImode
8660 && !TARGET_64BIT)
e075ae69
RH
8661 return 0; /* FAIL */
8662
b932f770
JH
8663 if (GET_MODE (dest) != QImode)
8664 abort ();
e075ae69 8665
a1b8572c 8666 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
8667 PUT_MODE (ret, QImode);
8668
8669 tmp = dest;
a1b8572c 8670 tmpreg = dest;
32b5b1aa 8671
e075ae69 8672 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
8673 if (bypass_test || second_test)
8674 {
8675 rtx test = second_test;
8676 int bypass = 0;
8677 rtx tmp2 = gen_reg_rtx (QImode);
8678 if (bypass_test)
8679 {
8680 if (second_test)
b531087a 8681 abort ();
a1b8572c
JH
8682 test = bypass_test;
8683 bypass = 1;
8684 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8685 }
8686 PUT_MODE (test, QImode);
8687 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8688
8689 if (bypass)
8690 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8691 else
8692 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8693 }
e075ae69 8694
e075ae69 8695 return 1; /* DONE */
32b5b1aa 8696}
e075ae69 8697
32b5b1aa 8698int
e075ae69
RH
8699ix86_expand_int_movcc (operands)
8700 rtx operands[];
32b5b1aa 8701{
e075ae69
RH
8702 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8703 rtx compare_seq, compare_op;
a1b8572c 8704 rtx second_test, bypass_test;
635559ab 8705 enum machine_mode mode = GET_MODE (operands[0]);
32b5b1aa 8706
36583fea
JH
8707 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8708 In case comparsion is done with immediate, we can convert it to LTU or
8709 GEU by altering the integer. */
8710
8711 if ((code == LEU || code == GTU)
8712 && GET_CODE (ix86_compare_op1) == CONST_INT
635559ab 8713 && mode != HImode
261376e7
RH
8714 && INTVAL (ix86_compare_op1) != -1
8715 /* For x86-64, the immediate field in the instruction is 32-bit
8716 signed, so we can't increment a DImode value above 0x7fffffff. */
74411039
JH
8717 && (!TARGET_64BIT
8718 || GET_MODE (ix86_compare_op0) != DImode
261376e7 8719 || INTVAL (ix86_compare_op1) != 0x7fffffff)
0f290768 8720 && GET_CODE (operands[2]) == CONST_INT
36583fea
JH
8721 && GET_CODE (operands[3]) == CONST_INT)
8722 {
8723 if (code == LEU)
8724 code = LTU;
8725 else
8726 code = GEU;
261376e7
RH
8727 ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
8728 GET_MODE (ix86_compare_op0));
36583fea 8729 }
3a3677ff 8730
e075ae69 8731 start_sequence ();
a1b8572c 8732 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
2f937369 8733 compare_seq = get_insns ();
e075ae69
RH
8734 end_sequence ();
8735
8736 compare_code = GET_CODE (compare_op);
8737
8738 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8739 HImode insns, we'd be swallowed in word prefix ops. */
8740
635559ab
JH
8741 if (mode != HImode
8742 && (mode != DImode || TARGET_64BIT)
0f290768 8743 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
8744 && GET_CODE (operands[3]) == CONST_INT)
8745 {
8746 rtx out = operands[0];
8747 HOST_WIDE_INT ct = INTVAL (operands[2]);
8748 HOST_WIDE_INT cf = INTVAL (operands[3]);
8749 HOST_WIDE_INT diff;
8750
a1b8572c
JH
8751 if ((compare_code == LTU || compare_code == GEU)
8752 && !second_test && !bypass_test)
e075ae69 8753 {
e075ae69
RH
8754 /* Detect overlap between destination and compare sources. */
8755 rtx tmp = out;
8756
0f290768 8757 /* To simplify rest of code, restrict to the GEU case. */
36583fea
JH
8758 if (compare_code == LTU)
8759 {
8760 int tmp = ct;
8761 ct = cf;
8762 cf = tmp;
8763 compare_code = reverse_condition (compare_code);
8764 code = reverse_condition (code);
8765 }
8766 diff = ct - cf;
8767
e075ae69 8768 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
a500c31b 8769 || reg_overlap_mentioned_p (out, ix86_compare_op1))
635559ab 8770 tmp = gen_reg_rtx (mode);
e075ae69
RH
8771
8772 emit_insn (compare_seq);
635559ab 8773 if (mode == DImode)
14f73b5a
JH
8774 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
8775 else
8776 emit_insn (gen_x86_movsicc_0_m1 (tmp));
e075ae69 8777
36583fea
JH
8778 if (diff == 1)
8779 {
8780 /*
8781 * cmpl op0,op1
8782 * sbbl dest,dest
8783 * [addl dest, ct]
8784 *
8785 * Size 5 - 8.
8786 */
8787 if (ct)
635559ab
JH
8788 tmp = expand_simple_binop (mode, PLUS,
8789 tmp, GEN_INT (ct),
8790 tmp, 1, OPTAB_DIRECT);
36583fea
JH
8791 }
8792 else if (cf == -1)
8793 {
8794 /*
8795 * cmpl op0,op1
8796 * sbbl dest,dest
8797 * orl $ct, dest
8798 *
8799 * Size 8.
8800 */
635559ab
JH
8801 tmp = expand_simple_binop (mode, IOR,
8802 tmp, GEN_INT (ct),
8803 tmp, 1, OPTAB_DIRECT);
36583fea
JH
8804 }
8805 else if (diff == -1 && ct)
8806 {
8807 /*
8808 * cmpl op0,op1
8809 * sbbl dest,dest
06ec023f 8810 * notl dest
36583fea
JH
8811 * [addl dest, cf]
8812 *
8813 * Size 8 - 11.
8814 */
635559ab
JH
8815 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8816 if (cf)
8817 tmp = expand_simple_binop (mode, PLUS,
8818 tmp, GEN_INT (cf),
8819 tmp, 1, OPTAB_DIRECT);
36583fea
JH
8820 }
8821 else
8822 {
8823 /*
8824 * cmpl op0,op1
8825 * sbbl dest,dest
06ec023f 8826 * [notl dest]
36583fea
JH
8827 * andl cf - ct, dest
8828 * [addl dest, ct]
8829 *
8830 * Size 8 - 11.
8831 */
06ec023f
RB
8832
8833 if (cf == 0)
8834 {
8835 cf = ct;
8836 ct = 0;
8837 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8838 }
8839
635559ab
JH
8840 tmp = expand_simple_binop (mode, AND,
8841 tmp,
d8bf17f9 8842 gen_int_mode (cf - ct, mode),
635559ab
JH
8843 tmp, 1, OPTAB_DIRECT);
8844 if (ct)
8845 tmp = expand_simple_binop (mode, PLUS,
8846 tmp, GEN_INT (ct),
8847 tmp, 1, OPTAB_DIRECT);
36583fea 8848 }
e075ae69
RH
8849
8850 if (tmp != out)
8851 emit_move_insn (out, tmp);
8852
8853 return 1; /* DONE */
8854 }
8855
8856 diff = ct - cf;
8857 if (diff < 0)
8858 {
8859 HOST_WIDE_INT tmp;
8860 tmp = ct, ct = cf, cf = tmp;
8861 diff = -diff;
734dba19
JH
8862 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8863 {
8864 /* We may be reversing unordered compare to normal compare, that
8865 is not valid in general (we may convert non-trapping condition
8866 to trapping one), however on i386 we currently emit all
8867 comparisons unordered. */
8868 compare_code = reverse_condition_maybe_unordered (compare_code);
8869 code = reverse_condition_maybe_unordered (code);
8870 }
8871 else
8872 {
8873 compare_code = reverse_condition (compare_code);
8874 code = reverse_condition (code);
8875 }
e075ae69 8876 }
0f2a3457
JJ
8877
8878 compare_code = NIL;
8879 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
8880 && GET_CODE (ix86_compare_op1) == CONST_INT)
8881 {
8882 if (ix86_compare_op1 == const0_rtx
8883 && (code == LT || code == GE))
8884 compare_code = code;
8885 else if (ix86_compare_op1 == constm1_rtx)
8886 {
8887 if (code == LE)
8888 compare_code = LT;
8889 else if (code == GT)
8890 compare_code = GE;
8891 }
8892 }
8893
8894 /* Optimize dest = (op0 < 0) ? -1 : cf. */
8895 if (compare_code != NIL
8896 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
8897 && (cf == -1 || ct == -1))
8898 {
8899 /* If lea code below could be used, only optimize
8900 if it results in a 2 insn sequence. */
8901
8902 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
8903 || diff == 3 || diff == 5 || diff == 9)
8904 || (compare_code == LT && ct == -1)
8905 || (compare_code == GE && cf == -1))
8906 {
8907 /*
8908 * notl op1 (if necessary)
8909 * sarl $31, op1
8910 * orl cf, op1
8911 */
8912 if (ct != -1)
8913 {
8914 cf = ct;
8915 ct = -1;
8916 code = reverse_condition (code);
8917 }
8918
8919 out = emit_store_flag (out, code, ix86_compare_op0,
8920 ix86_compare_op1, VOIDmode, 0, -1);
8921
8922 out = expand_simple_binop (mode, IOR,
8923 out, GEN_INT (cf),
8924 out, 1, OPTAB_DIRECT);
8925 if (out != operands[0])
8926 emit_move_insn (operands[0], out);
8927
8928 return 1; /* DONE */
8929 }
8930 }
8931
635559ab
JH
8932 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
8933 || diff == 3 || diff == 5 || diff == 9)
8934 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
e075ae69
RH
8935 {
8936 /*
8937 * xorl dest,dest
8938 * cmpl op1,op2
8939 * setcc dest
8940 * lea cf(dest*(ct-cf)),dest
8941 *
8942 * Size 14.
8943 *
8944 * This also catches the degenerate setcc-only case.
8945 */
8946
8947 rtx tmp;
8948 int nops;
8949
8950 out = emit_store_flag (out, code, ix86_compare_op0,
8951 ix86_compare_op1, VOIDmode, 0, 1);
8952
8953 nops = 0;
97f51ac4
RB
8954 /* On x86_64 the lea instruction operates on Pmode, so we need
8955 to get arithmetics done in proper mode to match. */
e075ae69 8956 if (diff == 1)
14f73b5a 8957 tmp = out;
e075ae69
RH
8958 else
8959 {
885a70fd 8960 rtx out1;
14f73b5a 8961 out1 = out;
635559ab 8962 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
e075ae69
RH
8963 nops++;
8964 if (diff & 1)
8965 {
635559ab 8966 tmp = gen_rtx_PLUS (mode, tmp, out1);
e075ae69
RH
8967 nops++;
8968 }
8969 }
8970 if (cf != 0)
8971 {
635559ab 8972 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
e075ae69
RH
8973 nops++;
8974 }
885a70fd
JH
8975 if (tmp != out
8976 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
e075ae69 8977 {
14f73b5a 8978 if (nops == 1)
e075ae69
RH
8979 {
8980 rtx clob;
8981
8982 clob = gen_rtx_REG (CCmode, FLAGS_REG);
8983 clob = gen_rtx_CLOBBER (VOIDmode, clob);
8984
8985 tmp = gen_rtx_SET (VOIDmode, out, tmp);
8986 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8987 emit_insn (tmp);
8988 }
8989 else
8990 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
8991 }
8992 if (out != operands[0])
8993 emit_move_insn (operands[0], out);
8994
8995 return 1; /* DONE */
8996 }
8997
8998 /*
8999 * General case: Jumpful:
9000 * xorl dest,dest cmpl op1, op2
9001 * cmpl op1, op2 movl ct, dest
9002 * setcc dest jcc 1f
9003 * decl dest movl cf, dest
9004 * andl (cf-ct),dest 1:
9005 * addl ct,dest
0f290768 9006 *
e075ae69
RH
9007 * Size 20. Size 14.
9008 *
9009 * This is reasonably steep, but branch mispredict costs are
9010 * high on modern cpus, so consider failing only if optimizing
9011 * for space.
9012 *
9013 * %%% Parameterize branch_cost on the tuning architecture, then
9014 * use that. The 80386 couldn't care less about mispredicts.
9015 */
9016
9017 if (!optimize_size && !TARGET_CMOVE)
9018 {
97f51ac4 9019 if (cf == 0)
e075ae69 9020 {
97f51ac4
RB
9021 cf = ct;
9022 ct = 0;
734dba19 9023 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
0f2a3457
JJ
9024 /* We may be reversing unordered compare to normal compare,
9025 that is not valid in general (we may convert non-trapping
9026 condition to trapping one), however on i386 we currently
9027 emit all comparisons unordered. */
9028 code = reverse_condition_maybe_unordered (code);
9029 else
9030 {
9031 code = reverse_condition (code);
9032 if (compare_code != NIL)
9033 compare_code = reverse_condition (compare_code);
9034 }
9035 }
9036
9037 if (compare_code != NIL)
9038 {
9039 /* notl op1 (if needed)
9040 sarl $31, op1
9041 andl (cf-ct), op1
9042 addl ct, op1
9043
9044 For x < 0 (resp. x <= -1) there will be no notl,
9045 so if possible swap the constants to get rid of the
9046 complement.
9047 True/false will be -1/0 while code below (store flag
9048 followed by decrement) is 0/-1, so the constants need
9049 to be exchanged once more. */
9050
9051 if (compare_code == GE || !cf)
734dba19 9052 {
0f2a3457
JJ
9053 code = reverse_condition (code);
9054 compare_code = LT;
734dba19
JH
9055 }
9056 else
9057 {
0f2a3457
JJ
9058 HOST_WIDE_INT tmp = cf;
9059 cf = ct;
9060 ct = tmp;
734dba19 9061 }
0f2a3457
JJ
9062
9063 out = emit_store_flag (out, code, ix86_compare_op0,
9064 ix86_compare_op1, VOIDmode, 0, -1);
e075ae69 9065 }
0f2a3457
JJ
9066 else
9067 {
9068 out = emit_store_flag (out, code, ix86_compare_op0,
9069 ix86_compare_op1, VOIDmode, 0, 1);
e075ae69 9070
97f51ac4 9071 out = expand_simple_binop (mode, PLUS, out, constm1_rtx,
0f2a3457
JJ
9072 out, 1, OPTAB_DIRECT);
9073 }
e075ae69 9074
97f51ac4 9075 out = expand_simple_binop (mode, AND, out,
d8bf17f9 9076 gen_int_mode (cf - ct, mode),
635559ab 9077 out, 1, OPTAB_DIRECT);
97f51ac4
RB
9078 if (ct)
9079 out = expand_simple_binop (mode, PLUS, out, GEN_INT (ct),
9080 out, 1, OPTAB_DIRECT);
e075ae69
RH
9081 if (out != operands[0])
9082 emit_move_insn (operands[0], out);
9083
9084 return 1; /* DONE */
9085 }
9086 }
9087
9088 if (!TARGET_CMOVE)
9089 {
9090 /* Try a few things more with specific constants and a variable. */
9091
78a0d70c 9092 optab op;
e075ae69
RH
9093 rtx var, orig_out, out, tmp;
9094
9095 if (optimize_size)
9096 return 0; /* FAIL */
9097
0f290768 9098 /* If one of the two operands is an interesting constant, load a
e075ae69 9099 constant with the above and mask it in with a logical operation. */
0f290768 9100
e075ae69
RH
9101 if (GET_CODE (operands[2]) == CONST_INT)
9102 {
9103 var = operands[3];
9104 if (INTVAL (operands[2]) == 0)
9105 operands[3] = constm1_rtx, op = and_optab;
9106 else if (INTVAL (operands[2]) == -1)
9107 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
9108 else
9109 return 0; /* FAIL */
e075ae69
RH
9110 }
9111 else if (GET_CODE (operands[3]) == CONST_INT)
9112 {
9113 var = operands[2];
9114 if (INTVAL (operands[3]) == 0)
9115 operands[2] = constm1_rtx, op = and_optab;
9116 else if (INTVAL (operands[3]) == -1)
9117 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
9118 else
9119 return 0; /* FAIL */
e075ae69 9120 }
78a0d70c 9121 else
e075ae69
RH
9122 return 0; /* FAIL */
9123
9124 orig_out = operands[0];
635559ab 9125 tmp = gen_reg_rtx (mode);
e075ae69
RH
9126 operands[0] = tmp;
9127
9128 /* Recurse to get the constant loaded. */
9129 if (ix86_expand_int_movcc (operands) == 0)
9130 return 0; /* FAIL */
9131
9132 /* Mask in the interesting variable. */
635559ab 9133 out = expand_binop (mode, op, var, tmp, orig_out, 0,
e075ae69
RH
9134 OPTAB_WIDEN);
9135 if (out != orig_out)
9136 emit_move_insn (orig_out, out);
9137
9138 return 1; /* DONE */
9139 }
9140
9141 /*
9142 * For comparison with above,
9143 *
9144 * movl cf,dest
9145 * movl ct,tmp
9146 * cmpl op1,op2
9147 * cmovcc tmp,dest
9148 *
9149 * Size 15.
9150 */
9151
635559ab
JH
9152 if (! nonimmediate_operand (operands[2], mode))
9153 operands[2] = force_reg (mode, operands[2]);
9154 if (! nonimmediate_operand (operands[3], mode))
9155 operands[3] = force_reg (mode, operands[3]);
e075ae69 9156
a1b8572c
JH
9157 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9158 {
635559ab 9159 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
9160 emit_move_insn (tmp, operands[3]);
9161 operands[3] = tmp;
9162 }
9163 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9164 {
635559ab 9165 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
9166 emit_move_insn (tmp, operands[2]);
9167 operands[2] = tmp;
9168 }
c9682caf
JH
9169 if (! register_operand (operands[2], VOIDmode)
9170 && ! register_operand (operands[3], VOIDmode))
635559ab 9171 operands[2] = force_reg (mode, operands[2]);
a1b8572c 9172
e075ae69
RH
9173 emit_insn (compare_seq);
9174 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 9175 gen_rtx_IF_THEN_ELSE (mode,
e075ae69
RH
9176 compare_op, operands[2],
9177 operands[3])));
a1b8572c
JH
9178 if (bypass_test)
9179 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 9180 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c
JH
9181 bypass_test,
9182 operands[3],
9183 operands[0])));
9184 if (second_test)
9185 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 9186 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c
JH
9187 second_test,
9188 operands[2],
9189 operands[0])));
e075ae69
RH
9190
9191 return 1; /* DONE */
e9a25f70 9192}
e075ae69 9193
32b5b1aa 9194int
e075ae69
RH
9195ix86_expand_fp_movcc (operands)
9196 rtx operands[];
32b5b1aa 9197{
e075ae69 9198 enum rtx_code code;
e075ae69 9199 rtx tmp;
a1b8572c 9200 rtx compare_op, second_test, bypass_test;
32b5b1aa 9201
0073023d
JH
9202 /* For SF/DFmode conditional moves based on comparisons
9203 in same mode, we may want to use SSE min/max instructions. */
965f5423
JH
9204 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9205 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
0073023d 9206 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
fa9f36a1
JH
9207 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9208 && (!TARGET_IEEE_FP
9209 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
0073023d
JH
9210 /* We may be called from the post-reload splitter. */
9211 && (!REG_P (operands[0])
9212 || SSE_REG_P (operands[0])
52a661a6 9213 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
0073023d
JH
9214 {
9215 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9216 code = GET_CODE (operands[1]);
9217
9218 /* See if we have (cross) match between comparison operands and
9219 conditional move operands. */
9220 if (rtx_equal_p (operands[2], op1))
9221 {
9222 rtx tmp = op0;
9223 op0 = op1;
9224 op1 = tmp;
9225 code = reverse_condition_maybe_unordered (code);
9226 }
9227 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9228 {
9229 /* Check for min operation. */
9230 if (code == LT)
9231 {
9232 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9233 if (memory_operand (op0, VOIDmode))
9234 op0 = force_reg (GET_MODE (operands[0]), op0);
9235 if (GET_MODE (operands[0]) == SFmode)
9236 emit_insn (gen_minsf3 (operands[0], op0, op1));
9237 else
9238 emit_insn (gen_mindf3 (operands[0], op0, op1));
9239 return 1;
9240 }
9241 /* Check for max operation. */
9242 if (code == GT)
9243 {
9244 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9245 if (memory_operand (op0, VOIDmode))
9246 op0 = force_reg (GET_MODE (operands[0]), op0);
9247 if (GET_MODE (operands[0]) == SFmode)
9248 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9249 else
9250 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9251 return 1;
9252 }
9253 }
9254 /* Manage condition to be sse_comparison_operator. In case we are
9255 in non-ieee mode, try to canonicalize the destination operand
9256 to be first in the comparison - this helps reload to avoid extra
9257 moves. */
9258 if (!sse_comparison_operator (operands[1], VOIDmode)
9259 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9260 {
9261 rtx tmp = ix86_compare_op0;
9262 ix86_compare_op0 = ix86_compare_op1;
9263 ix86_compare_op1 = tmp;
9264 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9265 VOIDmode, ix86_compare_op0,
9266 ix86_compare_op1);
9267 }
9268 /* Similary try to manage result to be first operand of conditional
fa9f36a1
JH
9269 move. We also don't support the NE comparison on SSE, so try to
9270 avoid it. */
037f20f1
JH
9271 if ((rtx_equal_p (operands[0], operands[3])
9272 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9273 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
0073023d
JH
9274 {
9275 rtx tmp = operands[2];
9276 operands[2] = operands[3];
92d0fb09 9277 operands[3] = tmp;
0073023d
JH
9278 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9279 (GET_CODE (operands[1])),
9280 VOIDmode, ix86_compare_op0,
9281 ix86_compare_op1);
9282 }
9283 if (GET_MODE (operands[0]) == SFmode)
9284 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9285 operands[2], operands[3],
9286 ix86_compare_op0, ix86_compare_op1));
9287 else
9288 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9289 operands[2], operands[3],
9290 ix86_compare_op0, ix86_compare_op1));
9291 return 1;
9292 }
9293
e075ae69 9294 /* The floating point conditional move instructions don't directly
0f290768 9295 support conditions resulting from a signed integer comparison. */
32b5b1aa 9296
e075ae69 9297 code = GET_CODE (operands[1]);
a1b8572c 9298 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
9299
9300 /* The floating point conditional move instructions don't directly
9301 support signed integer comparisons. */
9302
a1b8572c 9303 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 9304 {
a1b8572c 9305 if (second_test != NULL || bypass_test != NULL)
b531087a 9306 abort ();
e075ae69 9307 tmp = gen_reg_rtx (QImode);
3a3677ff 9308 ix86_expand_setcc (code, tmp);
e075ae69
RH
9309 code = NE;
9310 ix86_compare_op0 = tmp;
9311 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
9312 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9313 }
9314 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9315 {
9316 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9317 emit_move_insn (tmp, operands[3]);
9318 operands[3] = tmp;
9319 }
9320 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9321 {
9322 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9323 emit_move_insn (tmp, operands[2]);
9324 operands[2] = tmp;
e075ae69 9325 }
e9a25f70 9326
e075ae69
RH
9327 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9328 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9e7adcb3 9329 compare_op,
e075ae69
RH
9330 operands[2],
9331 operands[3])));
a1b8572c
JH
9332 if (bypass_test)
9333 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9334 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9335 bypass_test,
9336 operands[3],
9337 operands[0])));
9338 if (second_test)
9339 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9340 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9341 second_test,
9342 operands[2],
9343 operands[0])));
32b5b1aa 9344
e075ae69 9345 return 1;
32b5b1aa
SC
9346}
9347
2450a057
JH
9348/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9349 works for floating pointer parameters and nonoffsetable memories.
9350 For pushes, it returns just stack offsets; the values will be saved
9351 in the right order. Maximally three parts are generated. */
9352
2b589241 9353static int
2450a057
JH
9354ix86_split_to_parts (operand, parts, mode)
9355 rtx operand;
9356 rtx *parts;
9357 enum machine_mode mode;
32b5b1aa 9358{
26e5b205
JH
9359 int size;
9360
9361 if (!TARGET_64BIT)
9362 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9363 else
9364 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 9365
a7180f70
BS
9366 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9367 abort ();
2450a057
JH
9368 if (size < 2 || size > 3)
9369 abort ();
9370
f996902d
RH
9371 /* Optimize constant pool reference to immediates. This is used by fp
9372 moves, that force all constants to memory to allow combining. */
9373 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9374 {
9375 rtx tmp = maybe_get_pool_constant (operand);
9376 if (tmp)
9377 operand = tmp;
9378 }
d7a29404 9379
2450a057 9380 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 9381 {
2450a057
JH
9382 /* The only non-offsetable memories we handle are pushes. */
9383 if (! push_operand (operand, VOIDmode))
9384 abort ();
9385
26e5b205
JH
9386 operand = copy_rtx (operand);
9387 PUT_MODE (operand, Pmode);
2450a057
JH
9388 parts[0] = parts[1] = parts[2] = operand;
9389 }
26e5b205 9390 else if (!TARGET_64BIT)
2450a057
JH
9391 {
9392 if (mode == DImode)
9393 split_di (&operand, 1, &parts[0], &parts[1]);
9394 else
e075ae69 9395 {
2450a057
JH
9396 if (REG_P (operand))
9397 {
9398 if (!reload_completed)
9399 abort ();
9400 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9401 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9402 if (size == 3)
9403 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9404 }
9405 else if (offsettable_memref_p (operand))
9406 {
f4ef873c 9407 operand = adjust_address (operand, SImode, 0);
2450a057 9408 parts[0] = operand;
b72f00af 9409 parts[1] = adjust_address (operand, SImode, 4);
2450a057 9410 if (size == 3)
b72f00af 9411 parts[2] = adjust_address (operand, SImode, 8);
2450a057
JH
9412 }
9413 else if (GET_CODE (operand) == CONST_DOUBLE)
9414 {
9415 REAL_VALUE_TYPE r;
2b589241 9416 long l[4];
2450a057
JH
9417
9418 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9419 switch (mode)
9420 {
9421 case XFmode:
2b589241 9422 case TFmode:
2450a057 9423 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
d8bf17f9 9424 parts[2] = gen_int_mode (l[2], SImode);
2450a057
JH
9425 break;
9426 case DFmode:
9427 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9428 break;
9429 default:
9430 abort ();
9431 }
d8bf17f9
LB
9432 parts[1] = gen_int_mode (l[1], SImode);
9433 parts[0] = gen_int_mode (l[0], SImode);
2450a057
JH
9434 }
9435 else
9436 abort ();
e075ae69 9437 }
2450a057 9438 }
26e5b205
JH
9439 else
9440 {
44cf5b6a
JH
9441 if (mode == TImode)
9442 split_ti (&operand, 1, &parts[0], &parts[1]);
26e5b205
JH
9443 if (mode == XFmode || mode == TFmode)
9444 {
9445 if (REG_P (operand))
9446 {
9447 if (!reload_completed)
9448 abort ();
9449 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9450 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9451 }
9452 else if (offsettable_memref_p (operand))
9453 {
b72f00af 9454 operand = adjust_address (operand, DImode, 0);
26e5b205 9455 parts[0] = operand;
b72f00af 9456 parts[1] = adjust_address (operand, SImode, 8);
26e5b205
JH
9457 }
9458 else if (GET_CODE (operand) == CONST_DOUBLE)
9459 {
9460 REAL_VALUE_TYPE r;
9461 long l[3];
9462
9463 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9464 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9465 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9466 if (HOST_BITS_PER_WIDE_INT >= 64)
523fbd9d 9467 parts[0]
d8bf17f9 9468 = gen_int_mode
44cf5b6a 9469 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
b531087a 9470 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
d8bf17f9 9471 DImode);
26e5b205
JH
9472 else
9473 parts[0] = immed_double_const (l[0], l[1], DImode);
d8bf17f9 9474 parts[1] = gen_int_mode (l[2], SImode);
26e5b205
JH
9475 }
9476 else
9477 abort ();
9478 }
9479 }
2450a057 9480
2b589241 9481 return size;
2450a057
JH
9482}
9483
9484/* Emit insns to perform a move or push of DI, DF, and XF values.
9485 Return false when normal moves are needed; true when all required
9486 insns have been emitted. Operands 2-4 contain the input values
9487 int the correct order; operands 5-7 contain the output values. */
9488
26e5b205
JH
9489void
9490ix86_split_long_move (operands)
9491 rtx operands[];
2450a057
JH
9492{
9493 rtx part[2][3];
26e5b205 9494 int nparts;
2450a057
JH
9495 int push = 0;
9496 int collisions = 0;
26e5b205
JH
9497 enum machine_mode mode = GET_MODE (operands[0]);
9498
9499 /* The DFmode expanders may ask us to move double.
9500 For 64bit target this is single move. By hiding the fact
9501 here we simplify i386.md splitters. */
9502 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9503 {
8cdfa312
RH
9504 /* Optimize constant pool reference to immediates. This is used by
9505 fp moves, that force all constants to memory to allow combining. */
26e5b205
JH
9506
9507 if (GET_CODE (operands[1]) == MEM
9508 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9509 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9510 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9511 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
9512 {
9513 operands[0] = copy_rtx (operands[0]);
9514 PUT_MODE (operands[0], Pmode);
9515 }
26e5b205
JH
9516 else
9517 operands[0] = gen_lowpart (DImode, operands[0]);
9518 operands[1] = gen_lowpart (DImode, operands[1]);
9519 emit_move_insn (operands[0], operands[1]);
9520 return;
9521 }
2450a057 9522
2450a057
JH
9523 /* The only non-offsettable memory we handle is push. */
9524 if (push_operand (operands[0], VOIDmode))
9525 push = 1;
9526 else if (GET_CODE (operands[0]) == MEM
9527 && ! offsettable_memref_p (operands[0]))
9528 abort ();
9529
26e5b205
JH
9530 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9531 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
9532
9533 /* When emitting push, take care for source operands on the stack. */
9534 if (push && GET_CODE (operands[1]) == MEM
9535 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9536 {
26e5b205 9537 if (nparts == 3)
886cbb88
JH
9538 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9539 XEXP (part[1][2], 0));
9540 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9541 XEXP (part[1][1], 0));
2450a057
JH
9542 }
9543
0f290768 9544 /* We need to do copy in the right order in case an address register
2450a057
JH
9545 of the source overlaps the destination. */
9546 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9547 {
9548 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9549 collisions++;
9550 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9551 collisions++;
26e5b205 9552 if (nparts == 3
2450a057
JH
9553 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9554 collisions++;
9555
9556 /* Collision in the middle part can be handled by reordering. */
26e5b205 9557 if (collisions == 1 && nparts == 3
2450a057 9558 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 9559 {
2450a057
JH
9560 rtx tmp;
9561 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9562 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9563 }
e075ae69 9564
2450a057
JH
9565 /* If there are more collisions, we can't handle it by reordering.
9566 Do an lea to the last part and use only one colliding move. */
9567 else if (collisions > 1)
9568 {
9569 collisions = 1;
26e5b205 9570 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
2450a057 9571 XEXP (part[1][0], 0)));
26e5b205
JH
9572 part[1][0] = change_address (part[1][0],
9573 TARGET_64BIT ? DImode : SImode,
9574 part[0][nparts - 1]);
b72f00af 9575 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
26e5b205 9576 if (nparts == 3)
b72f00af 9577 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
2450a057
JH
9578 }
9579 }
9580
9581 if (push)
9582 {
26e5b205 9583 if (!TARGET_64BIT)
2b589241 9584 {
26e5b205
JH
9585 if (nparts == 3)
9586 {
9587 /* We use only first 12 bytes of TFmode value, but for pushing we
9588 are required to adjust stack as if we were pushing real 16byte
9589 value. */
9590 if (mode == TFmode && !TARGET_64BIT)
9591 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
9592 GEN_INT (-4)));
9593 emit_move_insn (part[0][2], part[1][2]);
9594 }
2b589241 9595 }
26e5b205
JH
9596 else
9597 {
9598 /* In 64bit mode we don't have 32bit push available. In case this is
9599 register, it is OK - we will just use larger counterpart. We also
9600 retype memory - these comes from attempt to avoid REX prefix on
9601 moving of second half of TFmode value. */
9602 if (GET_MODE (part[1][1]) == SImode)
9603 {
9604 if (GET_CODE (part[1][1]) == MEM)
f4ef873c 9605 part[1][1] = adjust_address (part[1][1], DImode, 0);
26e5b205
JH
9606 else if (REG_P (part[1][1]))
9607 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9608 else
b531087a 9609 abort ();
886cbb88
JH
9610 if (GET_MODE (part[1][0]) == SImode)
9611 part[1][0] = part[1][1];
26e5b205
JH
9612 }
9613 }
9614 emit_move_insn (part[0][1], part[1][1]);
9615 emit_move_insn (part[0][0], part[1][0]);
9616 return;
2450a057
JH
9617 }
9618
9619 /* Choose correct order to not overwrite the source before it is copied. */
9620 if ((REG_P (part[0][0])
9621 && REG_P (part[1][1])
9622 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 9623 || (nparts == 3
2450a057
JH
9624 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9625 || (collisions > 0
9626 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9627 {
26e5b205 9628 if (nparts == 3)
2450a057 9629 {
26e5b205
JH
9630 operands[2] = part[0][2];
9631 operands[3] = part[0][1];
9632 operands[4] = part[0][0];
9633 operands[5] = part[1][2];
9634 operands[6] = part[1][1];
9635 operands[7] = part[1][0];
2450a057
JH
9636 }
9637 else
9638 {
26e5b205
JH
9639 operands[2] = part[0][1];
9640 operands[3] = part[0][0];
9641 operands[5] = part[1][1];
9642 operands[6] = part[1][0];
2450a057
JH
9643 }
9644 }
9645 else
9646 {
26e5b205 9647 if (nparts == 3)
2450a057 9648 {
26e5b205
JH
9649 operands[2] = part[0][0];
9650 operands[3] = part[0][1];
9651 operands[4] = part[0][2];
9652 operands[5] = part[1][0];
9653 operands[6] = part[1][1];
9654 operands[7] = part[1][2];
2450a057
JH
9655 }
9656 else
9657 {
26e5b205
JH
9658 operands[2] = part[0][0];
9659 operands[3] = part[0][1];
9660 operands[5] = part[1][0];
9661 operands[6] = part[1][1];
e075ae69
RH
9662 }
9663 }
26e5b205
JH
9664 emit_move_insn (operands[2], operands[5]);
9665 emit_move_insn (operands[3], operands[6]);
9666 if (nparts == 3)
9667 emit_move_insn (operands[4], operands[7]);
32b5b1aa 9668
26e5b205 9669 return;
32b5b1aa 9670}
32b5b1aa 9671
e075ae69
RH
9672void
9673ix86_split_ashldi (operands, scratch)
9674 rtx *operands, scratch;
32b5b1aa 9675{
e075ae69
RH
9676 rtx low[2], high[2];
9677 int count;
b985a30f 9678
e075ae69
RH
9679 if (GET_CODE (operands[2]) == CONST_INT)
9680 {
9681 split_di (operands, 2, low, high);
9682 count = INTVAL (operands[2]) & 63;
32b5b1aa 9683
e075ae69
RH
9684 if (count >= 32)
9685 {
9686 emit_move_insn (high[0], low[1]);
9687 emit_move_insn (low[0], const0_rtx);
b985a30f 9688
e075ae69
RH
9689 if (count > 32)
9690 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
9691 }
9692 else
9693 {
9694 if (!rtx_equal_p (operands[0], operands[1]))
9695 emit_move_insn (operands[0], operands[1]);
9696 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
9697 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
9698 }
9699 }
9700 else
9701 {
9702 if (!rtx_equal_p (operands[0], operands[1]))
9703 emit_move_insn (operands[0], operands[1]);
b985a30f 9704
e075ae69 9705 split_di (operands, 1, low, high);
b985a30f 9706
e075ae69
RH
9707 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
9708 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 9709
fe577e58 9710 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 9711 {
fe577e58 9712 if (! no_new_pseudos)
e075ae69
RH
9713 scratch = force_reg (SImode, const0_rtx);
9714 else
9715 emit_move_insn (scratch, const0_rtx);
9716
9717 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
9718 scratch));
9719 }
9720 else
9721 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9722 }
e9a25f70 9723}
32b5b1aa 9724
e075ae69
RH
9725void
9726ix86_split_ashrdi (operands, scratch)
9727 rtx *operands, scratch;
32b5b1aa 9728{
e075ae69
RH
9729 rtx low[2], high[2];
9730 int count;
32b5b1aa 9731
e075ae69
RH
9732 if (GET_CODE (operands[2]) == CONST_INT)
9733 {
9734 split_di (operands, 2, low, high);
9735 count = INTVAL (operands[2]) & 63;
32b5b1aa 9736
e075ae69
RH
9737 if (count >= 32)
9738 {
9739 emit_move_insn (low[0], high[1]);
32b5b1aa 9740
e075ae69
RH
9741 if (! reload_completed)
9742 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
9743 else
9744 {
9745 emit_move_insn (high[0], low[0]);
9746 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9747 }
9748
9749 if (count > 32)
9750 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
9751 }
9752 else
9753 {
9754 if (!rtx_equal_p (operands[0], operands[1]))
9755 emit_move_insn (operands[0], operands[1]);
9756 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9757 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
9758 }
9759 }
9760 else
32b5b1aa 9761 {
e075ae69
RH
9762 if (!rtx_equal_p (operands[0], operands[1]))
9763 emit_move_insn (operands[0], operands[1]);
9764
9765 split_di (operands, 1, low, high);
9766
9767 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9768 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
9769
fe577e58 9770 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 9771 {
fe577e58 9772 if (! no_new_pseudos)
e075ae69
RH
9773 scratch = gen_reg_rtx (SImode);
9774 emit_move_insn (scratch, high[0]);
9775 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
9776 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9777 scratch));
9778 }
9779 else
9780 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 9781 }
e075ae69 9782}
32b5b1aa 9783
e075ae69
RH
9784void
9785ix86_split_lshrdi (operands, scratch)
9786 rtx *operands, scratch;
9787{
9788 rtx low[2], high[2];
9789 int count;
32b5b1aa 9790
e075ae69 9791 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 9792 {
e075ae69
RH
9793 split_di (operands, 2, low, high);
9794 count = INTVAL (operands[2]) & 63;
9795
9796 if (count >= 32)
c7271385 9797 {
e075ae69
RH
9798 emit_move_insn (low[0], high[1]);
9799 emit_move_insn (high[0], const0_rtx);
32b5b1aa 9800
e075ae69
RH
9801 if (count > 32)
9802 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
9803 }
9804 else
9805 {
9806 if (!rtx_equal_p (operands[0], operands[1]))
9807 emit_move_insn (operands[0], operands[1]);
9808 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9809 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
9810 }
32b5b1aa 9811 }
e075ae69
RH
9812 else
9813 {
9814 if (!rtx_equal_p (operands[0], operands[1]))
9815 emit_move_insn (operands[0], operands[1]);
32b5b1aa 9816
e075ae69
RH
9817 split_di (operands, 1, low, high);
9818
9819 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9820 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
9821
9822 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 9823 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 9824 {
fe577e58 9825 if (! no_new_pseudos)
e075ae69
RH
9826 scratch = force_reg (SImode, const0_rtx);
9827 else
9828 emit_move_insn (scratch, const0_rtx);
9829
9830 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9831 scratch));
9832 }
9833 else
9834 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
9835 }
32b5b1aa 9836}
3f803cd9 9837
0407c02b 9838/* Helper function for the string operations below. Dest VARIABLE whether
0945b39d
JH
9839 it is aligned to VALUE bytes. If true, jump to the label. */
9840static rtx
9841ix86_expand_aligntest (variable, value)
9842 rtx variable;
9843 int value;
9844{
9845 rtx label = gen_label_rtx ();
9846 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
9847 if (GET_MODE (variable) == DImode)
9848 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
9849 else
9850 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
9851 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
d43e0b7d 9852 1, label);
0945b39d
JH
9853 return label;
9854}
9855
9856/* Adjust COUNTER by the VALUE. */
9857static void
9858ix86_adjust_counter (countreg, value)
9859 rtx countreg;
9860 HOST_WIDE_INT value;
9861{
9862 if (GET_MODE (countreg) == DImode)
9863 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
9864 else
9865 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
9866}
9867
9868/* Zero extend possibly SImode EXP to Pmode register. */
d24b3457 9869rtx
0945b39d
JH
9870ix86_zero_extend_to_Pmode (exp)
9871 rtx exp;
9872{
9873 rtx r;
9874 if (GET_MODE (exp) == VOIDmode)
9875 return force_reg (Pmode, exp);
9876 if (GET_MODE (exp) == Pmode)
9877 return copy_to_mode_reg (Pmode, exp);
9878 r = gen_reg_rtx (Pmode);
9879 emit_insn (gen_zero_extendsidi2 (r, exp));
9880 return r;
9881}
9882
9883/* Expand string move (memcpy) operation. Use i386 string operations when
9884 profitable. expand_clrstr contains similar code. */
9885int
9886ix86_expand_movstr (dst, src, count_exp, align_exp)
9887 rtx dst, src, count_exp, align_exp;
9888{
9889 rtx srcreg, destreg, countreg;
9890 enum machine_mode counter_mode;
9891 HOST_WIDE_INT align = 0;
9892 unsigned HOST_WIDE_INT count = 0;
9893 rtx insns;
9894
9895 start_sequence ();
9896
9897 if (GET_CODE (align_exp) == CONST_INT)
9898 align = INTVAL (align_exp);
9899
5519a4f9 9900 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
9901 if (!TARGET_ALIGN_STRINGOPS)
9902 align = 64;
9903
9904 if (GET_CODE (count_exp) == CONST_INT)
9905 count = INTVAL (count_exp);
9906
9907 /* Figure out proper mode for counter. For 32bits it is always SImode,
9908 for 64bits use SImode when possible, otherwise DImode.
9909 Set count to number of bytes copied when known at compile time. */
9910 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9911 || x86_64_zero_extended_value (count_exp))
9912 counter_mode = SImode;
9913 else
9914 counter_mode = DImode;
9915
9916 if (counter_mode != SImode && counter_mode != DImode)
9917 abort ();
9918
9919 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9920 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9921
9922 emit_insn (gen_cld ());
9923
9924 /* When optimizing for size emit simple rep ; movsb instruction for
9925 counts not divisible by 4. */
9926
9927 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9928 {
9929 countreg = ix86_zero_extend_to_Pmode (count_exp);
9930 if (TARGET_64BIT)
9931 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
9932 destreg, srcreg, countreg));
9933 else
9934 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
9935 destreg, srcreg, countreg));
9936 }
9937
9938 /* For constant aligned (or small unaligned) copies use rep movsl
9939 followed by code copying the rest. For PentiumPro ensure 8 byte
9940 alignment to allow rep movsl acceleration. */
9941
9942 else if (count != 0
9943 && (align >= 8
9944 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 9945 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
9946 {
9947 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9948 if (count & ~(size - 1))
9949 {
9950 countreg = copy_to_mode_reg (counter_mode,
9951 GEN_INT ((count >> (size == 4 ? 2 : 3))
9952 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9953 countreg = ix86_zero_extend_to_Pmode (countreg);
9954 if (size == 4)
9955 {
9956 if (TARGET_64BIT)
9957 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
9958 destreg, srcreg, countreg));
9959 else
9960 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
9961 destreg, srcreg, countreg));
9962 }
9963 else
9964 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
9965 destreg, srcreg, countreg));
9966 }
9967 if (size == 8 && (count & 0x04))
9968 emit_insn (gen_strmovsi (destreg, srcreg));
9969 if (count & 0x02)
9970 emit_insn (gen_strmovhi (destreg, srcreg));
9971 if (count & 0x01)
9972 emit_insn (gen_strmovqi (destreg, srcreg));
9973 }
9974 /* The generic code based on the glibc implementation:
9975 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9976 allowing accelerated copying there)
9977 - copy the data using rep movsl
9978 - copy the rest. */
9979 else
9980 {
9981 rtx countreg2;
9982 rtx label = NULL;
37ad04a5
JH
9983 int desired_alignment = (TARGET_PENTIUMPRO
9984 && (count == 0 || count >= (unsigned int) 260)
9985 ? 8 : UNITS_PER_WORD);
0945b39d
JH
9986
9987 /* In case we don't know anything about the alignment, default to
9988 library version, since it is usually equally fast and result in
9989 shorter code. */
9990 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9991 {
9992 end_sequence ();
9993 return 0;
9994 }
9995
9996 if (TARGET_SINGLE_STRINGOP)
9997 emit_insn (gen_cld ());
9998
9999 countreg2 = gen_reg_rtx (Pmode);
10000 countreg = copy_to_mode_reg (counter_mode, count_exp);
10001
10002 /* We don't use loops to align destination and to copy parts smaller
10003 than 4 bytes, because gcc is able to optimize such code better (in
10004 the case the destination or the count really is aligned, gcc is often
10005 able to predict the branches) and also it is friendlier to the
a4f31c00 10006 hardware branch prediction.
0945b39d
JH
10007
10008 Using loops is benefical for generic case, because we can
10009 handle small counts using the loops. Many CPUs (such as Athlon)
10010 have large REP prefix setup costs.
10011
10012 This is quite costy. Maybe we can revisit this decision later or
10013 add some customizability to this code. */
10014
37ad04a5 10015 if (count == 0 && align < desired_alignment)
0945b39d
JH
10016 {
10017 label = gen_label_rtx ();
aaae0bb9 10018 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 10019 LEU, 0, counter_mode, 1, label);
0945b39d
JH
10020 }
10021 if (align <= 1)
10022 {
10023 rtx label = ix86_expand_aligntest (destreg, 1);
10024 emit_insn (gen_strmovqi (destreg, srcreg));
10025 ix86_adjust_counter (countreg, 1);
10026 emit_label (label);
10027 LABEL_NUSES (label) = 1;
10028 }
10029 if (align <= 2)
10030 {
10031 rtx label = ix86_expand_aligntest (destreg, 2);
10032 emit_insn (gen_strmovhi (destreg, srcreg));
10033 ix86_adjust_counter (countreg, 2);
10034 emit_label (label);
10035 LABEL_NUSES (label) = 1;
10036 }
37ad04a5 10037 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
10038 {
10039 rtx label = ix86_expand_aligntest (destreg, 4);
10040 emit_insn (gen_strmovsi (destreg, srcreg));
10041 ix86_adjust_counter (countreg, 4);
10042 emit_label (label);
10043 LABEL_NUSES (label) = 1;
10044 }
10045
37ad04a5
JH
10046 if (label && desired_alignment > 4 && !TARGET_64BIT)
10047 {
10048 emit_label (label);
10049 LABEL_NUSES (label) = 1;
10050 label = NULL_RTX;
10051 }
0945b39d
JH
10052 if (!TARGET_SINGLE_STRINGOP)
10053 emit_insn (gen_cld ());
10054 if (TARGET_64BIT)
10055 {
10056 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10057 GEN_INT (3)));
10058 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10059 destreg, srcreg, countreg2));
10060 }
10061 else
10062 {
10063 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10064 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10065 destreg, srcreg, countreg2));
10066 }
10067
10068 if (label)
10069 {
10070 emit_label (label);
10071 LABEL_NUSES (label) = 1;
10072 }
10073 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10074 emit_insn (gen_strmovsi (destreg, srcreg));
10075 if ((align <= 4 || count == 0) && TARGET_64BIT)
10076 {
10077 rtx label = ix86_expand_aligntest (countreg, 4);
10078 emit_insn (gen_strmovsi (destreg, srcreg));
10079 emit_label (label);
10080 LABEL_NUSES (label) = 1;
10081 }
10082 if (align > 2 && count != 0 && (count & 2))
10083 emit_insn (gen_strmovhi (destreg, srcreg));
10084 if (align <= 2 || count == 0)
10085 {
10086 rtx label = ix86_expand_aligntest (countreg, 2);
10087 emit_insn (gen_strmovhi (destreg, srcreg));
10088 emit_label (label);
10089 LABEL_NUSES (label) = 1;
10090 }
10091 if (align > 1 && count != 0 && (count & 1))
10092 emit_insn (gen_strmovqi (destreg, srcreg));
10093 if (align <= 1 || count == 0)
10094 {
10095 rtx label = ix86_expand_aligntest (countreg, 1);
10096 emit_insn (gen_strmovqi (destreg, srcreg));
10097 emit_label (label);
10098 LABEL_NUSES (label) = 1;
10099 }
10100 }
10101
10102 insns = get_insns ();
10103 end_sequence ();
10104
10105 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
2f937369 10106 emit_insn (insns);
0945b39d
JH
10107 return 1;
10108}
10109
10110/* Expand string clear operation (bzero). Use i386 string operations when
10111 profitable. expand_movstr contains similar code. */
10112int
10113ix86_expand_clrstr (src, count_exp, align_exp)
10114 rtx src, count_exp, align_exp;
10115{
10116 rtx destreg, zeroreg, countreg;
10117 enum machine_mode counter_mode;
10118 HOST_WIDE_INT align = 0;
10119 unsigned HOST_WIDE_INT count = 0;
10120
10121 if (GET_CODE (align_exp) == CONST_INT)
10122 align = INTVAL (align_exp);
10123
5519a4f9 10124 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
10125 if (!TARGET_ALIGN_STRINGOPS)
10126 align = 32;
10127
10128 if (GET_CODE (count_exp) == CONST_INT)
10129 count = INTVAL (count_exp);
10130 /* Figure out proper mode for counter. For 32bits it is always SImode,
10131 for 64bits use SImode when possible, otherwise DImode.
10132 Set count to number of bytes copied when known at compile time. */
10133 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10134 || x86_64_zero_extended_value (count_exp))
10135 counter_mode = SImode;
10136 else
10137 counter_mode = DImode;
10138
10139 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10140
10141 emit_insn (gen_cld ());
10142
10143 /* When optimizing for size emit simple rep ; movsb instruction for
10144 counts not divisible by 4. */
10145
10146 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10147 {
10148 countreg = ix86_zero_extend_to_Pmode (count_exp);
10149 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10150 if (TARGET_64BIT)
10151 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10152 destreg, countreg));
10153 else
10154 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10155 destreg, countreg));
10156 }
10157 else if (count != 0
10158 && (align >= 8
10159 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 10160 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
10161 {
10162 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10163 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10164 if (count & ~(size - 1))
10165 {
10166 countreg = copy_to_mode_reg (counter_mode,
10167 GEN_INT ((count >> (size == 4 ? 2 : 3))
10168 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10169 countreg = ix86_zero_extend_to_Pmode (countreg);
10170 if (size == 4)
10171 {
10172 if (TARGET_64BIT)
10173 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10174 destreg, countreg));
10175 else
10176 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10177 destreg, countreg));
10178 }
10179 else
10180 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10181 destreg, countreg));
10182 }
10183 if (size == 8 && (count & 0x04))
10184 emit_insn (gen_strsetsi (destreg,
10185 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10186 if (count & 0x02)
10187 emit_insn (gen_strsethi (destreg,
10188 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10189 if (count & 0x01)
10190 emit_insn (gen_strsetqi (destreg,
10191 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10192 }
10193 else
10194 {
10195 rtx countreg2;
10196 rtx label = NULL;
37ad04a5
JH
10197 /* Compute desired alignment of the string operation. */
10198 int desired_alignment = (TARGET_PENTIUMPRO
10199 && (count == 0 || count >= (unsigned int) 260)
10200 ? 8 : UNITS_PER_WORD);
0945b39d
JH
10201
10202 /* In case we don't know anything about the alignment, default to
10203 library version, since it is usually equally fast and result in
10204 shorter code. */
10205 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10206 return 0;
10207
10208 if (TARGET_SINGLE_STRINGOP)
10209 emit_insn (gen_cld ());
10210
10211 countreg2 = gen_reg_rtx (Pmode);
10212 countreg = copy_to_mode_reg (counter_mode, count_exp);
10213 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10214
37ad04a5 10215 if (count == 0 && align < desired_alignment)
0945b39d
JH
10216 {
10217 label = gen_label_rtx ();
37ad04a5 10218 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 10219 LEU, 0, counter_mode, 1, label);
0945b39d
JH
10220 }
10221 if (align <= 1)
10222 {
10223 rtx label = ix86_expand_aligntest (destreg, 1);
10224 emit_insn (gen_strsetqi (destreg,
10225 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10226 ix86_adjust_counter (countreg, 1);
10227 emit_label (label);
10228 LABEL_NUSES (label) = 1;
10229 }
10230 if (align <= 2)
10231 {
10232 rtx label = ix86_expand_aligntest (destreg, 2);
10233 emit_insn (gen_strsethi (destreg,
10234 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10235 ix86_adjust_counter (countreg, 2);
10236 emit_label (label);
10237 LABEL_NUSES (label) = 1;
10238 }
37ad04a5 10239 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
10240 {
10241 rtx label = ix86_expand_aligntest (destreg, 4);
10242 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10243 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10244 : zeroreg)));
10245 ix86_adjust_counter (countreg, 4);
10246 emit_label (label);
10247 LABEL_NUSES (label) = 1;
10248 }
10249
37ad04a5
JH
10250 if (label && desired_alignment > 4 && !TARGET_64BIT)
10251 {
10252 emit_label (label);
10253 LABEL_NUSES (label) = 1;
10254 label = NULL_RTX;
10255 }
10256
0945b39d
JH
10257 if (!TARGET_SINGLE_STRINGOP)
10258 emit_insn (gen_cld ());
10259 if (TARGET_64BIT)
10260 {
10261 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10262 GEN_INT (3)));
10263 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10264 destreg, countreg2));
10265 }
10266 else
10267 {
10268 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10269 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10270 destreg, countreg2));
10271 }
0945b39d
JH
10272 if (label)
10273 {
10274 emit_label (label);
10275 LABEL_NUSES (label) = 1;
10276 }
37ad04a5 10277
0945b39d
JH
10278 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10279 emit_insn (gen_strsetsi (destreg,
10280 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10281 if (TARGET_64BIT && (align <= 4 || count == 0))
10282 {
79258dce 10283 rtx label = ix86_expand_aligntest (countreg, 4);
0945b39d
JH
10284 emit_insn (gen_strsetsi (destreg,
10285 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10286 emit_label (label);
10287 LABEL_NUSES (label) = 1;
10288 }
10289 if (align > 2 && count != 0 && (count & 2))
10290 emit_insn (gen_strsethi (destreg,
10291 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10292 if (align <= 2 || count == 0)
10293 {
74411039 10294 rtx label = ix86_expand_aligntest (countreg, 2);
0945b39d
JH
10295 emit_insn (gen_strsethi (destreg,
10296 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10297 emit_label (label);
10298 LABEL_NUSES (label) = 1;
10299 }
10300 if (align > 1 && count != 0 && (count & 1))
10301 emit_insn (gen_strsetqi (destreg,
10302 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10303 if (align <= 1 || count == 0)
10304 {
74411039 10305 rtx label = ix86_expand_aligntest (countreg, 1);
0945b39d
JH
10306 emit_insn (gen_strsetqi (destreg,
10307 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10308 emit_label (label);
10309 LABEL_NUSES (label) = 1;
10310 }
10311 }
10312 return 1;
10313}
10314/* Expand strlen. */
10315int
10316ix86_expand_strlen (out, src, eoschar, align)
10317 rtx out, src, eoschar, align;
10318{
10319 rtx addr, scratch1, scratch2, scratch3, scratch4;
10320
10321 /* The generic case of strlen expander is long. Avoid it's
10322 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10323
10324 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10325 && !TARGET_INLINE_ALL_STRINGOPS
10326 && !optimize_size
10327 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10328 return 0;
10329
10330 addr = force_reg (Pmode, XEXP (src, 0));
10331 scratch1 = gen_reg_rtx (Pmode);
10332
10333 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10334 && !optimize_size)
10335 {
10336 /* Well it seems that some optimizer does not combine a call like
10337 foo(strlen(bar), strlen(bar));
10338 when the move and the subtraction is done here. It does calculate
10339 the length just once when these instructions are done inside of
10340 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10341 often used and I use one fewer register for the lifetime of
10342 output_strlen_unroll() this is better. */
10343
10344 emit_move_insn (out, addr);
10345
10346 ix86_expand_strlensi_unroll_1 (out, align);
10347
10348 /* strlensi_unroll_1 returns the address of the zero at the end of
10349 the string, like memchr(), so compute the length by subtracting
10350 the start address. */
10351 if (TARGET_64BIT)
10352 emit_insn (gen_subdi3 (out, out, addr));
10353 else
10354 emit_insn (gen_subsi3 (out, out, addr));
10355 }
10356 else
10357 {
10358 scratch2 = gen_reg_rtx (Pmode);
10359 scratch3 = gen_reg_rtx (Pmode);
10360 scratch4 = force_reg (Pmode, constm1_rtx);
10361
10362 emit_move_insn (scratch3, addr);
10363 eoschar = force_reg (QImode, eoschar);
10364
10365 emit_insn (gen_cld ());
10366 if (TARGET_64BIT)
10367 {
10368 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10369 align, scratch4, scratch3));
10370 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10371 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10372 }
10373 else
10374 {
10375 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10376 align, scratch4, scratch3));
10377 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10378 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10379 }
10380 }
10381 return 1;
10382}
10383
e075ae69
RH
10384/* Expand the appropriate insns for doing strlen if not just doing
10385 repnz; scasb
10386
10387 out = result, initialized with the start address
10388 align_rtx = alignment of the address.
10389 scratch = scratch register, initialized with the startaddress when
77ebd435 10390 not aligned, otherwise undefined
3f803cd9
SC
10391
10392 This is just the body. It needs the initialisations mentioned above and
10393 some address computing at the end. These things are done in i386.md. */
10394
0945b39d
JH
10395static void
10396ix86_expand_strlensi_unroll_1 (out, align_rtx)
10397 rtx out, align_rtx;
3f803cd9 10398{
e075ae69
RH
10399 int align;
10400 rtx tmp;
10401 rtx align_2_label = NULL_RTX;
10402 rtx align_3_label = NULL_RTX;
10403 rtx align_4_label = gen_label_rtx ();
10404 rtx end_0_label = gen_label_rtx ();
e075ae69 10405 rtx mem;
e2e52e1b 10406 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 10407 rtx scratch = gen_reg_rtx (SImode);
e075ae69
RH
10408
10409 align = 0;
10410 if (GET_CODE (align_rtx) == CONST_INT)
10411 align = INTVAL (align_rtx);
3f803cd9 10412
e9a25f70 10413 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 10414
e9a25f70 10415 /* Is there a known alignment and is it less than 4? */
e075ae69 10416 if (align < 4)
3f803cd9 10417 {
0945b39d
JH
10418 rtx scratch1 = gen_reg_rtx (Pmode);
10419 emit_move_insn (scratch1, out);
e9a25f70 10420 /* Is there a known alignment and is it not 2? */
e075ae69 10421 if (align != 2)
3f803cd9 10422 {
e075ae69
RH
10423 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10424 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10425
10426 /* Leave just the 3 lower bits. */
0945b39d 10427 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
10428 NULL_RTX, 0, OPTAB_WIDEN);
10429
9076b9c1 10430 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 10431 Pmode, 1, align_4_label);
9076b9c1 10432 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
d43e0b7d 10433 Pmode, 1, align_2_label);
9076b9c1 10434 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
d43e0b7d 10435 Pmode, 1, align_3_label);
3f803cd9
SC
10436 }
10437 else
10438 {
e9a25f70
JL
10439 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10440 check if is aligned to 4 - byte. */
e9a25f70 10441
0945b39d 10442 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
e075ae69
RH
10443 NULL_RTX, 0, OPTAB_WIDEN);
10444
9076b9c1 10445 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 10446 Pmode, 1, align_4_label);
3f803cd9
SC
10447 }
10448
e075ae69 10449 mem = gen_rtx_MEM (QImode, out);
e9a25f70 10450
e075ae69 10451 /* Now compare the bytes. */
e9a25f70 10452
0f290768 10453 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1 10454 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
d43e0b7d 10455 QImode, 1, end_0_label);
3f803cd9 10456
0f290768 10457 /* Increment the address. */
0945b39d
JH
10458 if (TARGET_64BIT)
10459 emit_insn (gen_adddi3 (out, out, const1_rtx));
10460 else
10461 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 10462
e075ae69
RH
10463 /* Not needed with an alignment of 2 */
10464 if (align != 2)
10465 {
10466 emit_label (align_2_label);
3f803cd9 10467
d43e0b7d
RK
10468 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10469 end_0_label);
e075ae69 10470
0945b39d
JH
10471 if (TARGET_64BIT)
10472 emit_insn (gen_adddi3 (out, out, const1_rtx));
10473 else
10474 emit_insn (gen_addsi3 (out, out, const1_rtx));
e075ae69
RH
10475
10476 emit_label (align_3_label);
10477 }
10478
d43e0b7d
RK
10479 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10480 end_0_label);
e075ae69 10481
0945b39d
JH
10482 if (TARGET_64BIT)
10483 emit_insn (gen_adddi3 (out, out, const1_rtx));
10484 else
10485 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
10486 }
10487
e075ae69
RH
10488 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10489 align this loop. It gives only huge programs, but does not help to
10490 speed up. */
10491 emit_label (align_4_label);
3f803cd9 10492
e075ae69
RH
10493 mem = gen_rtx_MEM (SImode, out);
10494 emit_move_insn (scratch, mem);
0945b39d
JH
10495 if (TARGET_64BIT)
10496 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10497 else
10498 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 10499
e2e52e1b
JH
10500 /* This formula yields a nonzero result iff one of the bytes is zero.
10501 This saves three branches inside loop and many cycles. */
10502
10503 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
10504 emit_insn (gen_one_cmplsi2 (scratch, scratch));
10505 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0 10506 emit_insn (gen_andsi3 (tmpreg, tmpreg,
d8bf17f9 10507 gen_int_mode (0x80808080, SImode)));
d43e0b7d
RK
10508 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
10509 align_4_label);
e2e52e1b
JH
10510
10511 if (TARGET_CMOVE)
10512 {
10513 rtx reg = gen_reg_rtx (SImode);
0945b39d 10514 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
10515 emit_move_insn (reg, tmpreg);
10516 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
10517
0f290768 10518 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 10519 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
10520 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10521 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10522 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
10523 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
10524 reg,
10525 tmpreg)));
e2e52e1b 10526 /* Emit lea manually to avoid clobbering of flags. */
0945b39d
JH
10527 emit_insn (gen_rtx_SET (SImode, reg2,
10528 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
e2e52e1b
JH
10529
10530 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10531 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10532 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d 10533 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
a4f31c00
AJ
10534 reg2,
10535 out)));
e2e52e1b
JH
10536
10537 }
10538 else
10539 {
10540 rtx end_2_label = gen_label_rtx ();
10541 /* Is zero in the first two bytes? */
10542
16189740 10543 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
10544 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10545 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
10546 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10547 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
10548 pc_rtx);
10549 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10550 JUMP_LABEL (tmp) = end_2_label;
10551
0f290768 10552 /* Not in the first two. Move two bytes forward. */
e2e52e1b 10553 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
0945b39d
JH
10554 if (TARGET_64BIT)
10555 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
10556 else
10557 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
e2e52e1b
JH
10558
10559 emit_label (end_2_label);
10560
10561 }
10562
0f290768 10563 /* Avoid branch in fixing the byte. */
e2e52e1b 10564 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 10565 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
0945b39d
JH
10566 if (TARGET_64BIT)
10567 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
10568 else
10569 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
e075ae69
RH
10570
10571 emit_label (end_0_label);
10572}
0e07aff3
RH
10573
10574void
10575ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
10576 rtx retval, fnaddr, callarg1, callarg2, pop;
10577{
10578 rtx use = NULL, call;
10579
10580 if (pop == const0_rtx)
10581 pop = NULL;
10582 if (TARGET_64BIT && pop)
10583 abort ();
10584
b069de3b
SS
10585#if TARGET_MACHO
10586 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
10587 fnaddr = machopic_indirect_call_target (fnaddr);
10588#else
0e07aff3
RH
10589 /* Static functions and indirect calls don't need the pic register. */
10590 if (! TARGET_64BIT && flag_pic
10591 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
10592 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
66edd3b4 10593 use_reg (&use, pic_offset_table_rtx);
0e07aff3
RH
10594
10595 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
10596 {
10597 rtx al = gen_rtx_REG (QImode, 0);
10598 emit_move_insn (al, callarg2);
10599 use_reg (&use, al);
10600 }
b069de3b 10601#endif /* TARGET_MACHO */
0e07aff3
RH
10602
10603 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
10604 {
10605 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10606 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10607 }
10608
10609 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
10610 if (retval)
10611 call = gen_rtx_SET (VOIDmode, retval, call);
10612 if (pop)
10613 {
10614 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
10615 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
10616 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
10617 }
10618
10619 call = emit_call_insn (call);
10620 if (use)
10621 CALL_INSN_FUNCTION_USAGE (call) = use;
10622}
fce5a9f2 10623
e075ae69 10624\f
e075ae69
RH
10625/* Clear stack slot assignments remembered from previous functions.
10626 This is called from INIT_EXPANDERS once before RTL is emitted for each
10627 function. */
10628
e2500fed
GK
10629static struct machine_function *
10630ix86_init_machine_status ()
37b15744 10631{
e2500fed 10632 return ggc_alloc_cleared (sizeof (struct machine_function));
1526a060
BS
10633}
10634
e075ae69
RH
10635/* Return a MEM corresponding to a stack slot with mode MODE.
10636 Allocate a new slot if necessary.
10637
10638 The RTL for a function can have several slots available: N is
10639 which slot to use. */
10640
10641rtx
10642assign_386_stack_local (mode, n)
10643 enum machine_mode mode;
10644 int n;
10645{
10646 if (n < 0 || n >= MAX_386_STACK_LOCALS)
10647 abort ();
10648
10649 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
10650 ix86_stack_locals[(int) mode][n]
10651 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
10652
10653 return ix86_stack_locals[(int) mode][n];
10654}
f996902d
RH
10655
10656/* Construct the SYMBOL_REF for the tls_get_addr function. */
10657
e2500fed 10658static GTY(()) rtx ix86_tls_symbol;
f996902d
RH
10659rtx
10660ix86_tls_get_addr ()
10661{
f996902d 10662
e2500fed 10663 if (!ix86_tls_symbol)
f996902d 10664 {
e2500fed 10665 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, (TARGET_GNU_TLS
f996902d
RH
10666 ? "___tls_get_addr"
10667 : "__tls_get_addr"));
f996902d
RH
10668 }
10669
e2500fed 10670 return ix86_tls_symbol;
f996902d 10671}
e075ae69
RH
10672\f
10673/* Calculate the length of the memory address in the instruction
10674 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10675
10676static int
10677memory_address_length (addr)
10678 rtx addr;
10679{
10680 struct ix86_address parts;
10681 rtx base, index, disp;
10682 int len;
10683
10684 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
10685 || GET_CODE (addr) == POST_INC
10686 || GET_CODE (addr) == PRE_MODIFY
10687 || GET_CODE (addr) == POST_MODIFY)
e075ae69 10688 return 0;
3f803cd9 10689
e075ae69
RH
10690 if (! ix86_decompose_address (addr, &parts))
10691 abort ();
3f803cd9 10692
e075ae69
RH
10693 base = parts.base;
10694 index = parts.index;
10695 disp = parts.disp;
10696 len = 0;
3f803cd9 10697
e075ae69
RH
10698 /* Register Indirect. */
10699 if (base && !index && !disp)
10700 {
10701 /* Special cases: ebp and esp need the two-byte modrm form. */
10702 if (addr == stack_pointer_rtx
10703 || addr == arg_pointer_rtx
564d80f4
JH
10704 || addr == frame_pointer_rtx
10705 || addr == hard_frame_pointer_rtx)
e075ae69 10706 len = 1;
3f803cd9 10707 }
e9a25f70 10708
e075ae69
RH
10709 /* Direct Addressing. */
10710 else if (disp && !base && !index)
10711 len = 4;
10712
3f803cd9
SC
10713 else
10714 {
e075ae69
RH
10715 /* Find the length of the displacement constant. */
10716 if (disp)
10717 {
10718 if (GET_CODE (disp) == CONST_INT
10719 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
10720 len = 1;
10721 else
10722 len = 4;
10723 }
3f803cd9 10724
e075ae69
RH
10725 /* An index requires the two-byte modrm form. */
10726 if (index)
10727 len += 1;
3f803cd9
SC
10728 }
10729
e075ae69
RH
10730 return len;
10731}
79325812 10732
5bf0ebab
RH
10733/* Compute default value for "length_immediate" attribute. When SHORTFORM
10734 is set, expect that insn have 8bit immediate alternative. */
e075ae69 10735int
6ef67412 10736ix86_attr_length_immediate_default (insn, shortform)
e075ae69 10737 rtx insn;
6ef67412 10738 int shortform;
e075ae69 10739{
6ef67412
JH
10740 int len = 0;
10741 int i;
6c698a6d 10742 extract_insn_cached (insn);
6ef67412
JH
10743 for (i = recog_data.n_operands - 1; i >= 0; --i)
10744 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 10745 {
6ef67412 10746 if (len)
3071fab5 10747 abort ();
6ef67412
JH
10748 if (shortform
10749 && GET_CODE (recog_data.operand[i]) == CONST_INT
10750 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
10751 len = 1;
10752 else
10753 {
10754 switch (get_attr_mode (insn))
10755 {
10756 case MODE_QI:
10757 len+=1;
10758 break;
10759 case MODE_HI:
10760 len+=2;
10761 break;
10762 case MODE_SI:
10763 len+=4;
10764 break;
14f73b5a
JH
10765 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
10766 case MODE_DI:
10767 len+=4;
10768 break;
6ef67412 10769 default:
c725bd79 10770 fatal_insn ("unknown insn mode", insn);
6ef67412
JH
10771 }
10772 }
3071fab5 10773 }
6ef67412
JH
10774 return len;
10775}
10776/* Compute default value for "length_address" attribute. */
10777int
10778ix86_attr_length_address_default (insn)
10779 rtx insn;
10780{
10781 int i;
6c698a6d 10782 extract_insn_cached (insn);
1ccbefce
RH
10783 for (i = recog_data.n_operands - 1; i >= 0; --i)
10784 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 10785 {
6ef67412 10786 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
10787 break;
10788 }
6ef67412 10789 return 0;
3f803cd9 10790}
e075ae69
RH
10791\f
10792/* Return the maximum number of instructions a cpu can issue. */
b657fc39 10793
c237e94a 10794static int
e075ae69 10795ix86_issue_rate ()
b657fc39 10796{
e075ae69 10797 switch (ix86_cpu)
b657fc39 10798 {
e075ae69
RH
10799 case PROCESSOR_PENTIUM:
10800 case PROCESSOR_K6:
10801 return 2;
79325812 10802
e075ae69 10803 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
10804 case PROCESSOR_PENTIUM4:
10805 case PROCESSOR_ATHLON:
e075ae69 10806 return 3;
b657fc39 10807
b657fc39 10808 default:
e075ae69 10809 return 1;
b657fc39 10810 }
b657fc39
L
10811}
10812
e075ae69
RH
10813/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10814 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 10815
e075ae69
RH
10816static int
10817ix86_flags_dependant (insn, dep_insn, insn_type)
10818 rtx insn, dep_insn;
10819 enum attr_type insn_type;
10820{
10821 rtx set, set2;
b657fc39 10822
e075ae69
RH
10823 /* Simplify the test for uninteresting insns. */
10824 if (insn_type != TYPE_SETCC
10825 && insn_type != TYPE_ICMOV
10826 && insn_type != TYPE_FCMOV
10827 && insn_type != TYPE_IBR)
10828 return 0;
b657fc39 10829
e075ae69
RH
10830 if ((set = single_set (dep_insn)) != 0)
10831 {
10832 set = SET_DEST (set);
10833 set2 = NULL_RTX;
10834 }
10835 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
10836 && XVECLEN (PATTERN (dep_insn), 0) == 2
10837 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
10838 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
10839 {
10840 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10841 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10842 }
78a0d70c
ZW
10843 else
10844 return 0;
b657fc39 10845
78a0d70c
ZW
10846 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
10847 return 0;
b657fc39 10848
f5143c46 10849 /* This test is true if the dependent insn reads the flags but
78a0d70c
ZW
10850 not any other potentially set register. */
10851 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
10852 return 0;
10853
10854 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
10855 return 0;
10856
10857 return 1;
e075ae69 10858}
b657fc39 10859
e075ae69
RH
10860/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
10861 address with operands set by DEP_INSN. */
10862
10863static int
10864ix86_agi_dependant (insn, dep_insn, insn_type)
10865 rtx insn, dep_insn;
10866 enum attr_type insn_type;
10867{
10868 rtx addr;
10869
6ad48e84
JH
10870 if (insn_type == TYPE_LEA
10871 && TARGET_PENTIUM)
5fbdde42
RH
10872 {
10873 addr = PATTERN (insn);
10874 if (GET_CODE (addr) == SET)
10875 ;
10876 else if (GET_CODE (addr) == PARALLEL
10877 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
10878 addr = XVECEXP (addr, 0, 0);
10879 else
10880 abort ();
10881 addr = SET_SRC (addr);
10882 }
e075ae69
RH
10883 else
10884 {
10885 int i;
6c698a6d 10886 extract_insn_cached (insn);
1ccbefce
RH
10887 for (i = recog_data.n_operands - 1; i >= 0; --i)
10888 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 10889 {
1ccbefce 10890 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
10891 goto found;
10892 }
10893 return 0;
10894 found:;
b657fc39
L
10895 }
10896
e075ae69 10897 return modified_in_p (addr, dep_insn);
b657fc39 10898}
a269a03c 10899
c237e94a 10900static int
e075ae69 10901ix86_adjust_cost (insn, link, dep_insn, cost)
a269a03c
JC
10902 rtx insn, link, dep_insn;
10903 int cost;
10904{
e075ae69 10905 enum attr_type insn_type, dep_insn_type;
6ad48e84 10906 enum attr_memory memory, dep_memory;
e075ae69 10907 rtx set, set2;
9b00189f 10908 int dep_insn_code_number;
a269a03c 10909
309ada50 10910 /* Anti and output depenancies have zero cost on all CPUs. */
e075ae69 10911 if (REG_NOTE_KIND (link) != 0)
309ada50 10912 return 0;
a269a03c 10913
9b00189f
JH
10914 dep_insn_code_number = recog_memoized (dep_insn);
10915
e075ae69 10916 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 10917 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 10918 return cost;
a269a03c 10919
1c71e60e
JH
10920 insn_type = get_attr_type (insn);
10921 dep_insn_type = get_attr_type (dep_insn);
9b00189f 10922
a269a03c
JC
10923 switch (ix86_cpu)
10924 {
10925 case PROCESSOR_PENTIUM:
e075ae69
RH
10926 /* Address Generation Interlock adds a cycle of latency. */
10927 if (ix86_agi_dependant (insn, dep_insn, insn_type))
10928 cost += 1;
10929
10930 /* ??? Compares pair with jump/setcc. */
10931 if (ix86_flags_dependant (insn, dep_insn, insn_type))
10932 cost = 0;
10933
10934 /* Floating point stores require value to be ready one cycle ealier. */
0f290768 10935 if (insn_type == TYPE_FMOV
e075ae69
RH
10936 && get_attr_memory (insn) == MEMORY_STORE
10937 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10938 cost += 1;
10939 break;
a269a03c 10940
e075ae69 10941 case PROCESSOR_PENTIUMPRO:
6ad48e84
JH
10942 memory = get_attr_memory (insn);
10943 dep_memory = get_attr_memory (dep_insn);
10944
0f290768 10945 /* Since we can't represent delayed latencies of load+operation,
e075ae69
RH
10946 increase the cost here for non-imov insns. */
10947 if (dep_insn_type != TYPE_IMOV
6ad48e84
JH
10948 && dep_insn_type != TYPE_FMOV
10949 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
e075ae69
RH
10950 cost += 1;
10951
10952 /* INT->FP conversion is expensive. */
10953 if (get_attr_fp_int_src (dep_insn))
10954 cost += 5;
10955
10956 /* There is one cycle extra latency between an FP op and a store. */
10957 if (insn_type == TYPE_FMOV
10958 && (set = single_set (dep_insn)) != NULL_RTX
10959 && (set2 = single_set (insn)) != NULL_RTX
10960 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
10961 && GET_CODE (SET_DEST (set2)) == MEM)
10962 cost += 1;
6ad48e84
JH
10963
10964 /* Show ability of reorder buffer to hide latency of load by executing
10965 in parallel with previous instruction in case
10966 previous instruction is not needed to compute the address. */
10967 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10968 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10969 {
10970 /* Claim moves to take one cycle, as core can issue one load
10971 at time and the next load can start cycle later. */
10972 if (dep_insn_type == TYPE_IMOV
10973 || dep_insn_type == TYPE_FMOV)
10974 cost = 1;
10975 else if (cost > 1)
10976 cost--;
10977 }
e075ae69 10978 break;
a269a03c 10979
e075ae69 10980 case PROCESSOR_K6:
6ad48e84
JH
10981 memory = get_attr_memory (insn);
10982 dep_memory = get_attr_memory (dep_insn);
e075ae69
RH
10983 /* The esp dependency is resolved before the instruction is really
10984 finished. */
10985 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
10986 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
10987 return 1;
a269a03c 10988
0f290768 10989 /* Since we can't represent delayed latencies of load+operation,
e075ae69 10990 increase the cost here for non-imov insns. */
6ad48e84 10991 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
e075ae69
RH
10992 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
10993
10994 /* INT->FP conversion is expensive. */
10995 if (get_attr_fp_int_src (dep_insn))
10996 cost += 5;
6ad48e84
JH
10997
10998 /* Show ability of reorder buffer to hide latency of load by executing
10999 in parallel with previous instruction in case
11000 previous instruction is not needed to compute the address. */
11001 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11002 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11003 {
11004 /* Claim moves to take one cycle, as core can issue one load
11005 at time and the next load can start cycle later. */
11006 if (dep_insn_type == TYPE_IMOV
11007 || dep_insn_type == TYPE_FMOV)
11008 cost = 1;
11009 else if (cost > 2)
11010 cost -= 2;
11011 else
11012 cost = 1;
11013 }
a14003ee 11014 break;
e075ae69 11015
309ada50 11016 case PROCESSOR_ATHLON:
6ad48e84
JH
11017 memory = get_attr_memory (insn);
11018 dep_memory = get_attr_memory (dep_insn);
11019
11020 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
0b5107cf
JH
11021 {
11022 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
11023 cost += 2;
11024 else
11025 cost += 3;
11026 }
6ad48e84
JH
11027 /* Show ability of reorder buffer to hide latency of load by executing
11028 in parallel with previous instruction in case
11029 previous instruction is not needed to compute the address. */
11030 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11031 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11032 {
11033 /* Claim moves to take one cycle, as core can issue one load
11034 at time and the next load can start cycle later. */
11035 if (dep_insn_type == TYPE_IMOV
11036 || dep_insn_type == TYPE_FMOV)
11037 cost = 0;
11038 else if (cost >= 3)
11039 cost -= 3;
11040 else
11041 cost = 0;
11042 }
309ada50 11043
a269a03c 11044 default:
a269a03c
JC
11045 break;
11046 }
11047
11048 return cost;
11049}
0a726ef1 11050
e075ae69
RH
11051static union
11052{
11053 struct ppro_sched_data
11054 {
11055 rtx decode[3];
11056 int issued_this_cycle;
11057 } ppro;
11058} ix86_sched_data;
0a726ef1 11059
e075ae69
RH
11060static enum attr_ppro_uops
11061ix86_safe_ppro_uops (insn)
11062 rtx insn;
11063{
11064 if (recog_memoized (insn) >= 0)
11065 return get_attr_ppro_uops (insn);
11066 else
11067 return PPRO_UOPS_MANY;
11068}
0a726ef1 11069
e075ae69
RH
11070static void
11071ix86_dump_ppro_packet (dump)
11072 FILE *dump;
0a726ef1 11073{
e075ae69 11074 if (ix86_sched_data.ppro.decode[0])
0a726ef1 11075 {
e075ae69
RH
11076 fprintf (dump, "PPRO packet: %d",
11077 INSN_UID (ix86_sched_data.ppro.decode[0]));
11078 if (ix86_sched_data.ppro.decode[1])
11079 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11080 if (ix86_sched_data.ppro.decode[2])
11081 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11082 fputc ('\n', dump);
11083 }
11084}
0a726ef1 11085
e075ae69 11086/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 11087
c237e94a
ZW
11088static void
11089ix86_sched_init (dump, sched_verbose, veclen)
e075ae69
RH
11090 FILE *dump ATTRIBUTE_UNUSED;
11091 int sched_verbose ATTRIBUTE_UNUSED;
c237e94a 11092 int veclen ATTRIBUTE_UNUSED;
e075ae69
RH
11093{
11094 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11095}
11096
11097/* Shift INSN to SLOT, and shift everything else down. */
11098
11099static void
11100ix86_reorder_insn (insnp, slot)
11101 rtx *insnp, *slot;
11102{
11103 if (insnp != slot)
11104 {
11105 rtx insn = *insnp;
0f290768 11106 do
e075ae69
RH
11107 insnp[0] = insnp[1];
11108 while (++insnp != slot);
11109 *insnp = insn;
0a726ef1 11110 }
e075ae69
RH
11111}
11112
c6991660 11113static void
78a0d70c
ZW
11114ix86_sched_reorder_ppro (ready, e_ready)
11115 rtx *ready;
11116 rtx *e_ready;
11117{
11118 rtx decode[3];
11119 enum attr_ppro_uops cur_uops;
11120 int issued_this_cycle;
11121 rtx *insnp;
11122 int i;
e075ae69 11123
0f290768 11124 /* At this point .ppro.decode contains the state of the three
78a0d70c 11125 decoders from last "cycle". That is, those insns that were
0f290768 11126 actually independent. But here we're scheduling for the
78a0d70c
ZW
11127 decoder, and we may find things that are decodable in the
11128 same cycle. */
e075ae69 11129
0f290768 11130 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
78a0d70c 11131 issued_this_cycle = 0;
e075ae69 11132
78a0d70c
ZW
11133 insnp = e_ready;
11134 cur_uops = ix86_safe_ppro_uops (*insnp);
0a726ef1 11135
78a0d70c
ZW
11136 /* If the decoders are empty, and we've a complex insn at the
11137 head of the priority queue, let it issue without complaint. */
11138 if (decode[0] == NULL)
11139 {
11140 if (cur_uops == PPRO_UOPS_MANY)
11141 {
11142 decode[0] = *insnp;
11143 goto ppro_done;
11144 }
11145
11146 /* Otherwise, search for a 2-4 uop unsn to issue. */
11147 while (cur_uops != PPRO_UOPS_FEW)
11148 {
11149 if (insnp == ready)
11150 break;
11151 cur_uops = ix86_safe_ppro_uops (*--insnp);
11152 }
11153
11154 /* If so, move it to the head of the line. */
11155 if (cur_uops == PPRO_UOPS_FEW)
11156 ix86_reorder_insn (insnp, e_ready);
0a726ef1 11157
78a0d70c
ZW
11158 /* Issue the head of the queue. */
11159 issued_this_cycle = 1;
11160 decode[0] = *e_ready--;
11161 }
fb693d44 11162
78a0d70c
ZW
11163 /* Look for simple insns to fill in the other two slots. */
11164 for (i = 1; i < 3; ++i)
11165 if (decode[i] == NULL)
11166 {
a151daf0 11167 if (ready > e_ready)
78a0d70c 11168 goto ppro_done;
fb693d44 11169
e075ae69
RH
11170 insnp = e_ready;
11171 cur_uops = ix86_safe_ppro_uops (*insnp);
78a0d70c
ZW
11172 while (cur_uops != PPRO_UOPS_ONE)
11173 {
11174 if (insnp == ready)
11175 break;
11176 cur_uops = ix86_safe_ppro_uops (*--insnp);
11177 }
fb693d44 11178
78a0d70c
ZW
11179 /* Found one. Move it to the head of the queue and issue it. */
11180 if (cur_uops == PPRO_UOPS_ONE)
e075ae69 11181 {
78a0d70c
ZW
11182 ix86_reorder_insn (insnp, e_ready);
11183 decode[i] = *e_ready--;
11184 issued_this_cycle++;
11185 continue;
11186 }
fb693d44 11187
78a0d70c
ZW
11188 /* ??? Didn't find one. Ideally, here we would do a lazy split
11189 of 2-uop insns, issue one and queue the other. */
11190 }
fb693d44 11191
78a0d70c
ZW
11192 ppro_done:
11193 if (issued_this_cycle == 0)
11194 issued_this_cycle = 1;
11195 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11196}
fb693d44 11197
0f290768 11198/* We are about to being issuing insns for this clock cycle.
78a0d70c 11199 Override the default sort algorithm to better slot instructions. */
c237e94a
ZW
11200static int
11201ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
78a0d70c
ZW
11202 FILE *dump ATTRIBUTE_UNUSED;
11203 int sched_verbose ATTRIBUTE_UNUSED;
11204 rtx *ready;
c237e94a 11205 int *n_readyp;
78a0d70c
ZW
11206 int clock_var ATTRIBUTE_UNUSED;
11207{
c237e94a 11208 int n_ready = *n_readyp;
78a0d70c 11209 rtx *e_ready = ready + n_ready - 1;
fb693d44 11210
fce5a9f2 11211 /* Make sure to go ahead and initialize key items in
a151daf0
JL
11212 ix86_sched_data if we are not going to bother trying to
11213 reorder the ready queue. */
78a0d70c 11214 if (n_ready < 2)
a151daf0
JL
11215 {
11216 ix86_sched_data.ppro.issued_this_cycle = 1;
11217 goto out;
11218 }
e075ae69 11219
78a0d70c
ZW
11220 switch (ix86_cpu)
11221 {
11222 default:
11223 break;
e075ae69 11224
78a0d70c
ZW
11225 case PROCESSOR_PENTIUMPRO:
11226 ix86_sched_reorder_ppro (ready, e_ready);
e075ae69 11227 break;
fb693d44
RH
11228 }
11229
e075ae69
RH
11230out:
11231 return ix86_issue_rate ();
11232}
fb693d44 11233
e075ae69
RH
11234/* We are about to issue INSN. Return the number of insns left on the
11235 ready queue that can be issued this cycle. */
b222082e 11236
c237e94a 11237static int
e075ae69
RH
11238ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11239 FILE *dump;
11240 int sched_verbose;
11241 rtx insn;
11242 int can_issue_more;
11243{
11244 int i;
11245 switch (ix86_cpu)
fb693d44 11246 {
e075ae69
RH
11247 default:
11248 return can_issue_more - 1;
fb693d44 11249
e075ae69
RH
11250 case PROCESSOR_PENTIUMPRO:
11251 {
11252 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 11253
e075ae69
RH
11254 if (uops == PPRO_UOPS_MANY)
11255 {
11256 if (sched_verbose)
11257 ix86_dump_ppro_packet (dump);
11258 ix86_sched_data.ppro.decode[0] = insn;
11259 ix86_sched_data.ppro.decode[1] = NULL;
11260 ix86_sched_data.ppro.decode[2] = NULL;
11261 if (sched_verbose)
11262 ix86_dump_ppro_packet (dump);
11263 ix86_sched_data.ppro.decode[0] = NULL;
11264 }
11265 else if (uops == PPRO_UOPS_FEW)
11266 {
11267 if (sched_verbose)
11268 ix86_dump_ppro_packet (dump);
11269 ix86_sched_data.ppro.decode[0] = insn;
11270 ix86_sched_data.ppro.decode[1] = NULL;
11271 ix86_sched_data.ppro.decode[2] = NULL;
11272 }
11273 else
11274 {
11275 for (i = 0; i < 3; ++i)
11276 if (ix86_sched_data.ppro.decode[i] == NULL)
11277 {
11278 ix86_sched_data.ppro.decode[i] = insn;
11279 break;
11280 }
11281 if (i == 3)
11282 abort ();
11283 if (i == 2)
11284 {
11285 if (sched_verbose)
11286 ix86_dump_ppro_packet (dump);
11287 ix86_sched_data.ppro.decode[0] = NULL;
11288 ix86_sched_data.ppro.decode[1] = NULL;
11289 ix86_sched_data.ppro.decode[2] = NULL;
11290 }
11291 }
11292 }
11293 return --ix86_sched_data.ppro.issued_this_cycle;
11294 }
fb693d44 11295}
9b690711
RH
11296
11297static int
11298ia32_use_dfa_pipeline_interface ()
11299{
11300 if (ix86_cpu == PROCESSOR_PENTIUM)
11301 return 1;
11302 return 0;
11303}
11304
11305/* How many alternative schedules to try. This should be as wide as the
11306 scheduling freedom in the DFA, but no wider. Making this value too
11307 large results extra work for the scheduler. */
11308
11309static int
11310ia32_multipass_dfa_lookahead ()
11311{
11312 if (ix86_cpu == PROCESSOR_PENTIUM)
11313 return 2;
11314 else
11315 return 0;
11316}
11317
a7180f70 11318\f
0e4970d7
RK
11319/* Walk through INSNS and look for MEM references whose address is DSTREG or
11320 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11321 appropriate. */
11322
11323void
11324ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11325 rtx insns;
11326 rtx dstref, srcref, dstreg, srcreg;
11327{
11328 rtx insn;
11329
11330 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11331 if (INSN_P (insn))
11332 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11333 dstreg, srcreg);
11334}
11335
11336/* Subroutine of above to actually do the updating by recursively walking
11337 the rtx. */
11338
11339static void
11340ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11341 rtx x;
11342 rtx dstref, srcref, dstreg, srcreg;
11343{
11344 enum rtx_code code = GET_CODE (x);
11345 const char *format_ptr = GET_RTX_FORMAT (code);
11346 int i, j;
11347
11348 if (code == MEM && XEXP (x, 0) == dstreg)
11349 MEM_COPY_ATTRIBUTES (x, dstref);
11350 else if (code == MEM && XEXP (x, 0) == srcreg)
11351 MEM_COPY_ATTRIBUTES (x, srcref);
11352
11353 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11354 {
11355 if (*format_ptr == 'e')
11356 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11357 dstreg, srcreg);
11358 else if (*format_ptr == 'E')
11359 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
397be6cf 11360 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
0e4970d7
RK
11361 dstreg, srcreg);
11362 }
11363}
11364\f
a7180f70
BS
11365/* Compute the alignment given to a constant that is being placed in memory.
11366 EXP is the constant and ALIGN is the alignment that the object would
11367 ordinarily have.
11368 The value of this function is used instead of that alignment to align
11369 the object. */
11370
11371int
11372ix86_constant_alignment (exp, align)
11373 tree exp;
11374 int align;
11375{
11376 if (TREE_CODE (exp) == REAL_CST)
11377 {
11378 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11379 return 64;
11380 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11381 return 128;
11382 }
11383 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
11384 && align < 256)
11385 return 256;
11386
11387 return align;
11388}
11389
11390/* Compute the alignment for a static variable.
11391 TYPE is the data type, and ALIGN is the alignment that
11392 the object would ordinarily have. The value of this function is used
11393 instead of that alignment to align the object. */
11394
11395int
11396ix86_data_alignment (type, align)
11397 tree type;
11398 int align;
11399{
11400 if (AGGREGATE_TYPE_P (type)
11401 && TYPE_SIZE (type)
11402 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11403 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11404 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11405 return 256;
11406
0d7d98ee
JH
11407 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11408 to 16byte boundary. */
11409 if (TARGET_64BIT)
11410 {
11411 if (AGGREGATE_TYPE_P (type)
11412 && TYPE_SIZE (type)
11413 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11414 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11415 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11416 return 128;
11417 }
11418
a7180f70
BS
11419 if (TREE_CODE (type) == ARRAY_TYPE)
11420 {
11421 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11422 return 64;
11423 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11424 return 128;
11425 }
11426 else if (TREE_CODE (type) == COMPLEX_TYPE)
11427 {
0f290768 11428
a7180f70
BS
11429 if (TYPE_MODE (type) == DCmode && align < 64)
11430 return 64;
11431 if (TYPE_MODE (type) == XCmode && align < 128)
11432 return 128;
11433 }
11434 else if ((TREE_CODE (type) == RECORD_TYPE
11435 || TREE_CODE (type) == UNION_TYPE
11436 || TREE_CODE (type) == QUAL_UNION_TYPE)
11437 && TYPE_FIELDS (type))
11438 {
11439 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11440 return 64;
11441 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11442 return 128;
11443 }
11444 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11445 || TREE_CODE (type) == INTEGER_TYPE)
11446 {
11447 if (TYPE_MODE (type) == DFmode && align < 64)
11448 return 64;
11449 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11450 return 128;
11451 }
11452
11453 return align;
11454}
11455
11456/* Compute the alignment for a local variable.
11457 TYPE is the data type, and ALIGN is the alignment that
11458 the object would ordinarily have. The value of this macro is used
11459 instead of that alignment to align the object. */
11460
11461int
11462ix86_local_alignment (type, align)
11463 tree type;
11464 int align;
11465{
0d7d98ee
JH
11466 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11467 to 16byte boundary. */
11468 if (TARGET_64BIT)
11469 {
11470 if (AGGREGATE_TYPE_P (type)
11471 && TYPE_SIZE (type)
11472 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11473 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11474 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11475 return 128;
11476 }
a7180f70
BS
11477 if (TREE_CODE (type) == ARRAY_TYPE)
11478 {
11479 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11480 return 64;
11481 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11482 return 128;
11483 }
11484 else if (TREE_CODE (type) == COMPLEX_TYPE)
11485 {
11486 if (TYPE_MODE (type) == DCmode && align < 64)
11487 return 64;
11488 if (TYPE_MODE (type) == XCmode && align < 128)
11489 return 128;
11490 }
11491 else if ((TREE_CODE (type) == RECORD_TYPE
11492 || TREE_CODE (type) == UNION_TYPE
11493 || TREE_CODE (type) == QUAL_UNION_TYPE)
11494 && TYPE_FIELDS (type))
11495 {
11496 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11497 return 64;
11498 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11499 return 128;
11500 }
11501 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11502 || TREE_CODE (type) == INTEGER_TYPE)
11503 {
0f290768 11504
a7180f70
BS
11505 if (TYPE_MODE (type) == DFmode && align < 64)
11506 return 64;
11507 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11508 return 128;
11509 }
11510 return align;
11511}
0ed08620
JH
11512\f
11513/* Emit RTL insns to initialize the variable parts of a trampoline.
11514 FNADDR is an RTX for the address of the function's pure code.
11515 CXT is an RTX for the static chain value for the function. */
11516void
11517x86_initialize_trampoline (tramp, fnaddr, cxt)
11518 rtx tramp, fnaddr, cxt;
11519{
11520 if (!TARGET_64BIT)
11521 {
11522 /* Compute offset from the end of the jmp to the target function. */
11523 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11524 plus_constant (tramp, 10),
11525 NULL_RTX, 1, OPTAB_DIRECT);
11526 emit_move_insn (gen_rtx_MEM (QImode, tramp),
d8bf17f9 11527 gen_int_mode (0xb9, QImode));
0ed08620
JH
11528 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11529 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
d8bf17f9 11530 gen_int_mode (0xe9, QImode));
0ed08620
JH
11531 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11532 }
11533 else
11534 {
11535 int offset = 0;
11536 /* Try to load address using shorter movl instead of movabs.
11537 We may want to support movq for kernel mode, but kernel does not use
11538 trampolines at the moment. */
11539 if (x86_64_zero_extended_value (fnaddr))
11540 {
11541 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11542 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11543 gen_int_mode (0xbb41, HImode));
0ed08620
JH
11544 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11545 gen_lowpart (SImode, fnaddr));
11546 offset += 6;
11547 }
11548 else
11549 {
11550 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11551 gen_int_mode (0xbb49, HImode));
0ed08620
JH
11552 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11553 fnaddr);
11554 offset += 10;
11555 }
11556 /* Load static chain using movabs to r10. */
11557 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11558 gen_int_mode (0xba49, HImode));
0ed08620
JH
11559 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11560 cxt);
11561 offset += 10;
11562 /* Jump to the r11 */
11563 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11564 gen_int_mode (0xff49, HImode));
0ed08620 11565 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
d8bf17f9 11566 gen_int_mode (0xe3, QImode));
0ed08620
JH
11567 offset += 3;
11568 if (offset > TRAMPOLINE_SIZE)
b531087a 11569 abort ();
0ed08620
JH
11570 }
11571}
eeb06b1b 11572\f
6a2dd09a
RS
11573#define def_builtin(MASK, NAME, TYPE, CODE) \
11574do { \
11575 if ((MASK) & target_flags) \
11576 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11577 NULL, NULL_TREE); \
eeb06b1b 11578} while (0)
bd793c65 11579
bd793c65
BS
11580struct builtin_description
11581{
8b60264b
KG
11582 const unsigned int mask;
11583 const enum insn_code icode;
11584 const char *const name;
11585 const enum ix86_builtins code;
11586 const enum rtx_code comparison;
11587 const unsigned int flag;
bd793c65
BS
11588};
11589
fbe5eb6d
BS
11590/* Used for builtins that are enabled both by -msse and -msse2. */
11591#define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11592
8b60264b 11593static const struct builtin_description bdesc_comi[] =
bd793c65 11594{
fbe5eb6d
BS
11595 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
11596 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
11597 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
11598 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
11599 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
11600 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
11601 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
11602 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
11603 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
11604 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
11605 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
11606 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 },
11607 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0 },
11608 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0 },
11609 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0 },
11610 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, LT, 1 },
11611 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, LE, 1 },
11612 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0 },
11613 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 },
11614 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 },
11615 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 },
11616 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, LT, 1 },
11617 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, LE, 1 },
11618 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0 },
bd793c65
BS
11619};
11620
8b60264b 11621static const struct builtin_description bdesc_2arg[] =
bd793c65
BS
11622{
11623 /* SSE */
fbe5eb6d
BS
11624 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11625 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11626 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11627 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11628 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11629 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11630 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11631 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11632
11633 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11634 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11635 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11636 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11637 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11638 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11639 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11640 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11641 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11642 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11643 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11644 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11645 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11646 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11647 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11648 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
11649 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
11650 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11651 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11652 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11653 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11654 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
11655 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
11656 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11657
11658 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11659 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11660 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11661 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11662
11663 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11664 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11665 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11666 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11667 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
bd793c65
BS
11668
11669 /* MMX */
eeb06b1b
BS
11670 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11671 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11672 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11673 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11674 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11675 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11676
11677 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11678 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11679 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11680 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11681 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11682 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11683 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11684 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11685
11686 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11687 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
fbe5eb6d 11688 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
eeb06b1b
BS
11689
11690 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11691 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11692 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11693 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11694
fbe5eb6d
BS
11695 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11696 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
eeb06b1b
BS
11697
11698 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11699 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11700 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11701 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11702 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11703 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11704
fbe5eb6d
BS
11705 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11706 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11707 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11708 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
eeb06b1b
BS
11709
11710 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11711 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11712 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11713 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11714 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11715 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
bd793c65
BS
11716
11717 /* Special. */
eeb06b1b
BS
11718 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11719 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11720 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
11721
fbe5eb6d
BS
11722 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
11723 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
eeb06b1b
BS
11724
11725 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
11726 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
11727 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
11728 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
11729 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
11730 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
11731
11732 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
11733 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
11734 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
11735 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
11736 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
11737 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
11738
11739 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
11740 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
11741 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
11742 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
11743
fbe5eb6d
BS
11744 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
11745 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
11746
11747 /* SSE2 */
11748 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
11749 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
11750 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
11751 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
11752 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
11753 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
11754 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
11755 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
11756
11757 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
11758 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
11759 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
11760 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
11761 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
11762 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
11763 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
11764 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
11765 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
11766 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
11767 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
11768 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
11769 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
11770 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
11771 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
11772 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD, LT, 1 },
11773 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD, LE, 1 },
11774 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
11775 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
11776 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
11777 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
11778 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD, LT, 1 },
11779 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD, LE, 1 },
11780 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
11781
11782 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
11783 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
11784 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
11785 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
11786
11787 { MASK_SSE2, CODE_FOR_sse2_anddf3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
11788 { MASK_SSE2, CODE_FOR_sse2_nanddf3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
11789 { MASK_SSE2, CODE_FOR_sse2_iordf3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
11790 { MASK_SSE2, CODE_FOR_sse2_xordf3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
11791
11792 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
11793 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
11794 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
11795
11796 /* SSE2 MMX */
11797 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
11798 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
11799 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
11800 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
11801 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
11802 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
11803 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
11804 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
11805
11806 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
11807 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
11808 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
11809 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
11810 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
11811 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
11812 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
11813 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
11814
11815 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
11816 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
11817 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
11818 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
11819
916b60b7
BS
11820 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
11821 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
11822 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
11823 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
fbe5eb6d
BS
11824
11825 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
11826 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
11827
11828 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
11829 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
11830 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
11831 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
11832 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
11833 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
11834
11835 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
11836 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
11837 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
11838 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
11839
11840 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
11841 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
11842 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
11843 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
11844 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
11845 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
11846
916b60b7
BS
11847 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
11848 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
11849 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
11850
11851 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
11852 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
11853
11854 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
11855 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
11856 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
11857 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
11858 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
11859 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
11860
11861 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
11862 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
11863 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
11864 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
11865 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
11866 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
11867
11868 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
11869 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
11870 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
11871 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
11872
11873 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
11874
fbe5eb6d
BS
11875 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
11876 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
11877 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
bd793c65
BS
11878};
11879
8b60264b 11880static const struct builtin_description bdesc_1arg[] =
bd793c65 11881{
fbe5eb6d
BS
11882 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
11883 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
11884
11885 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
11886 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
11887 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
11888
11889 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
11890 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
11891 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
11892 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
11893
11894 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
11895 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
11896 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
11897
11898 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
11899
11900 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
11901 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
bd793c65 11902
fbe5eb6d
BS
11903 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
11904 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
11905 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
11906 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
11907 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
bd793c65 11908
fbe5eb6d 11909 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
bd793c65 11910
fbe5eb6d
BS
11911 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
11912 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
11913
11914 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
11915 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
11916 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }
bd793c65
BS
11917};
11918
f6155fda
SS
11919void
11920ix86_init_builtins ()
11921{
11922 if (TARGET_MMX)
11923 ix86_init_mmx_sse_builtins ();
11924}
11925
11926/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
bd793c65
BS
11927 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
11928 builtins. */
e37af218 11929static void
f6155fda 11930ix86_init_mmx_sse_builtins ()
bd793c65 11931{
8b60264b 11932 const struct builtin_description * d;
77ebd435 11933 size_t i;
bd793c65
BS
11934
11935 tree pchar_type_node = build_pointer_type (char_type_node);
11936 tree pfloat_type_node = build_pointer_type (float_type_node);
11937 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
916b60b7 11938 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
bd793c65
BS
11939 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
11940
11941 /* Comparisons. */
11942 tree int_ftype_v4sf_v4sf
b4de2f7d
AH
11943 = build_function_type_list (integer_type_node,
11944 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 11945 tree v4si_ftype_v4sf_v4sf
b4de2f7d
AH
11946 = build_function_type_list (V4SI_type_node,
11947 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 11948 /* MMX/SSE/integer conversions. */
bd793c65 11949 tree int_ftype_v4sf
b4de2f7d
AH
11950 = build_function_type_list (integer_type_node,
11951 V4SF_type_node, NULL_TREE);
bd793c65 11952 tree int_ftype_v8qi
b4de2f7d 11953 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
bd793c65 11954 tree v4sf_ftype_v4sf_int
b4de2f7d
AH
11955 = build_function_type_list (V4SF_type_node,
11956 V4SF_type_node, integer_type_node, NULL_TREE);
bd793c65 11957 tree v4sf_ftype_v4sf_v2si
b4de2f7d
AH
11958 = build_function_type_list (V4SF_type_node,
11959 V4SF_type_node, V2SI_type_node, NULL_TREE);
bd793c65 11960 tree int_ftype_v4hi_int
b4de2f7d
AH
11961 = build_function_type_list (integer_type_node,
11962 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 11963 tree v4hi_ftype_v4hi_int_int
e7a60f56 11964 = build_function_type_list (V4HI_type_node, V4HI_type_node,
b4de2f7d
AH
11965 integer_type_node, integer_type_node,
11966 NULL_TREE);
bd793c65
BS
11967 /* Miscellaneous. */
11968 tree v8qi_ftype_v4hi_v4hi
b4de2f7d
AH
11969 = build_function_type_list (V8QI_type_node,
11970 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 11971 tree v4hi_ftype_v2si_v2si
b4de2f7d
AH
11972 = build_function_type_list (V4HI_type_node,
11973 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 11974 tree v4sf_ftype_v4sf_v4sf_int
b4de2f7d
AH
11975 = build_function_type_list (V4SF_type_node,
11976 V4SF_type_node, V4SF_type_node,
11977 integer_type_node, NULL_TREE);
bd793c65 11978 tree v2si_ftype_v4hi_v4hi
b4de2f7d
AH
11979 = build_function_type_list (V2SI_type_node,
11980 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 11981 tree v4hi_ftype_v4hi_int
b4de2f7d 11982 = build_function_type_list (V4HI_type_node,
e7a60f56 11983 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 11984 tree v4hi_ftype_v4hi_di
b4de2f7d
AH
11985 = build_function_type_list (V4HI_type_node,
11986 V4HI_type_node, long_long_unsigned_type_node,
11987 NULL_TREE);
bd793c65 11988 tree v2si_ftype_v2si_di
b4de2f7d
AH
11989 = build_function_type_list (V2SI_type_node,
11990 V2SI_type_node, long_long_unsigned_type_node,
11991 NULL_TREE);
bd793c65 11992 tree void_ftype_void
b4de2f7d 11993 = build_function_type (void_type_node, void_list_node);
bd793c65 11994 tree void_ftype_unsigned
b4de2f7d 11995 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
bd793c65 11996 tree unsigned_ftype_void
b4de2f7d 11997 = build_function_type (unsigned_type_node, void_list_node);
bd793c65 11998 tree di_ftype_void
b4de2f7d 11999 = build_function_type (long_long_unsigned_type_node, void_list_node);
e37af218 12000 tree v4sf_ftype_void
b4de2f7d 12001 = build_function_type (V4SF_type_node, void_list_node);
bd793c65 12002 tree v2si_ftype_v4sf
b4de2f7d 12003 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12004 /* Loads/stores. */
bd793c65 12005 tree void_ftype_v8qi_v8qi_pchar
b4de2f7d
AH
12006 = build_function_type_list (void_type_node,
12007 V8QI_type_node, V8QI_type_node,
12008 pchar_type_node, NULL_TREE);
bd793c65 12009 tree v4sf_ftype_pfloat
b4de2f7d 12010 = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE);
bd793c65
BS
12011 /* @@@ the type is bogus */
12012 tree v4sf_ftype_v4sf_pv2si
b4de2f7d
AH
12013 = build_function_type_list (V4SF_type_node,
12014 V4SF_type_node, pv2di_type_node, NULL_TREE);
1255c85c 12015 tree void_ftype_pv2si_v4sf
b4de2f7d
AH
12016 = build_function_type_list (void_type_node,
12017 pv2di_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12018 tree void_ftype_pfloat_v4sf
b4de2f7d
AH
12019 = build_function_type_list (void_type_node,
12020 pfloat_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12021 tree void_ftype_pdi_di
b4de2f7d
AH
12022 = build_function_type_list (void_type_node,
12023 pdi_type_node, long_long_unsigned_type_node,
12024 NULL_TREE);
916b60b7 12025 tree void_ftype_pv2di_v2di
b4de2f7d
AH
12026 = build_function_type_list (void_type_node,
12027 pv2di_type_node, V2DI_type_node, NULL_TREE);
bd793c65
BS
12028 /* Normal vector unops. */
12029 tree v4sf_ftype_v4sf
b4de2f7d 12030 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
0f290768 12031
bd793c65
BS
12032 /* Normal vector binops. */
12033 tree v4sf_ftype_v4sf_v4sf
b4de2f7d
AH
12034 = build_function_type_list (V4SF_type_node,
12035 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12036 tree v8qi_ftype_v8qi_v8qi
b4de2f7d
AH
12037 = build_function_type_list (V8QI_type_node,
12038 V8QI_type_node, V8QI_type_node, NULL_TREE);
bd793c65 12039 tree v4hi_ftype_v4hi_v4hi
b4de2f7d
AH
12040 = build_function_type_list (V4HI_type_node,
12041 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12042 tree v2si_ftype_v2si_v2si
b4de2f7d
AH
12043 = build_function_type_list (V2SI_type_node,
12044 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 12045 tree di_ftype_di_di
b4de2f7d
AH
12046 = build_function_type_list (long_long_unsigned_type_node,
12047 long_long_unsigned_type_node,
12048 long_long_unsigned_type_node, NULL_TREE);
bd793c65 12049
47f339cf 12050 tree v2si_ftype_v2sf
ae3aa00d 12051 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12052 tree v2sf_ftype_v2si
b4de2f7d 12053 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
47f339cf 12054 tree v2si_ftype_v2si
b4de2f7d 12055 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
47f339cf 12056 tree v2sf_ftype_v2sf
b4de2f7d 12057 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12058 tree v2sf_ftype_v2sf_v2sf
b4de2f7d
AH
12059 = build_function_type_list (V2SF_type_node,
12060 V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12061 tree v2si_ftype_v2sf_v2sf
b4de2f7d
AH
12062 = build_function_type_list (V2SI_type_node,
12063 V2SF_type_node, V2SF_type_node, NULL_TREE);
fbe5eb6d
BS
12064 tree pint_type_node = build_pointer_type (integer_type_node);
12065 tree pdouble_type_node = build_pointer_type (double_type_node);
12066 tree int_ftype_v2df_v2df
b4de2f7d
AH
12067 = build_function_type_list (integer_type_node,
12068 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d
BS
12069
12070 tree ti_ftype_void
b4de2f7d 12071 = build_function_type (intTI_type_node, void_list_node);
fbe5eb6d 12072 tree ti_ftype_ti_ti
b4de2f7d
AH
12073 = build_function_type_list (intTI_type_node,
12074 intTI_type_node, intTI_type_node, NULL_TREE);
fbe5eb6d 12075 tree void_ftype_pvoid
b4de2f7d 12076 = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
fbe5eb6d 12077 tree v2di_ftype_di
b4de2f7d
AH
12078 = build_function_type_list (V2DI_type_node,
12079 long_long_unsigned_type_node, NULL_TREE);
fbe5eb6d 12080 tree v4sf_ftype_v4si
b4de2f7d 12081 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 12082 tree v4si_ftype_v4sf
b4de2f7d 12083 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 12084 tree v2df_ftype_v4si
b4de2f7d 12085 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 12086 tree v4si_ftype_v2df
b4de2f7d 12087 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12088 tree v2si_ftype_v2df
b4de2f7d 12089 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12090 tree v4sf_ftype_v2df
b4de2f7d 12091 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12092 tree v2df_ftype_v2si
b4de2f7d 12093 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
fbe5eb6d 12094 tree v2df_ftype_v4sf
b4de2f7d 12095 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 12096 tree int_ftype_v2df
b4de2f7d 12097 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12098 tree v2df_ftype_v2df_int
b4de2f7d
AH
12099 = build_function_type_list (V2DF_type_node,
12100 V2DF_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12101 tree v4sf_ftype_v4sf_v2df
b4de2f7d
AH
12102 = build_function_type_list (V4SF_type_node,
12103 V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12104 tree v2df_ftype_v2df_v4sf
b4de2f7d
AH
12105 = build_function_type_list (V2DF_type_node,
12106 V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 12107 tree v2df_ftype_v2df_v2df_int
b4de2f7d
AH
12108 = build_function_type_list (V2DF_type_node,
12109 V2DF_type_node, V2DF_type_node,
12110 integer_type_node,
12111 NULL_TREE);
fbe5eb6d 12112 tree v2df_ftype_v2df_pv2si
b4de2f7d
AH
12113 = build_function_type_list (V2DF_type_node,
12114 V2DF_type_node, pv2si_type_node, NULL_TREE);
fbe5eb6d 12115 tree void_ftype_pv2si_v2df
b4de2f7d
AH
12116 = build_function_type_list (void_type_node,
12117 pv2si_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12118 tree void_ftype_pdouble_v2df
b4de2f7d
AH
12119 = build_function_type_list (void_type_node,
12120 pdouble_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12121 tree void_ftype_pint_int
b4de2f7d
AH
12122 = build_function_type_list (void_type_node,
12123 pint_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12124 tree void_ftype_v16qi_v16qi_pchar
b4de2f7d
AH
12125 = build_function_type_list (void_type_node,
12126 V16QI_type_node, V16QI_type_node,
12127 pchar_type_node, NULL_TREE);
fbe5eb6d 12128 tree v2df_ftype_pdouble
b4de2f7d 12129 = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE);
fbe5eb6d 12130 tree v2df_ftype_v2df_v2df
b4de2f7d
AH
12131 = build_function_type_list (V2DF_type_node,
12132 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12133 tree v16qi_ftype_v16qi_v16qi
b4de2f7d
AH
12134 = build_function_type_list (V16QI_type_node,
12135 V16QI_type_node, V16QI_type_node, NULL_TREE);
fbe5eb6d 12136 tree v8hi_ftype_v8hi_v8hi
b4de2f7d
AH
12137 = build_function_type_list (V8HI_type_node,
12138 V8HI_type_node, V8HI_type_node, NULL_TREE);
fbe5eb6d 12139 tree v4si_ftype_v4si_v4si
b4de2f7d
AH
12140 = build_function_type_list (V4SI_type_node,
12141 V4SI_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 12142 tree v2di_ftype_v2di_v2di
b4de2f7d
AH
12143 = build_function_type_list (V2DI_type_node,
12144 V2DI_type_node, V2DI_type_node, NULL_TREE);
fbe5eb6d 12145 tree v2di_ftype_v2df_v2df
b4de2f7d
AH
12146 = build_function_type_list (V2DI_type_node,
12147 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12148 tree v2df_ftype_v2df
b4de2f7d 12149 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12150 tree v2df_ftype_double
b4de2f7d 12151 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
fbe5eb6d 12152 tree v2df_ftype_double_double
b4de2f7d
AH
12153 = build_function_type_list (V2DF_type_node,
12154 double_type_node, double_type_node, NULL_TREE);
fbe5eb6d 12155 tree int_ftype_v8hi_int
b4de2f7d
AH
12156 = build_function_type_list (integer_type_node,
12157 V8HI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12158 tree v8hi_ftype_v8hi_int_int
b4de2f7d
AH
12159 = build_function_type_list (V8HI_type_node,
12160 V8HI_type_node, integer_type_node,
12161 integer_type_node, NULL_TREE);
916b60b7 12162 tree v2di_ftype_v2di_int
b4de2f7d
AH
12163 = build_function_type_list (V2DI_type_node,
12164 V2DI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12165 tree v4si_ftype_v4si_int
b4de2f7d
AH
12166 = build_function_type_list (V4SI_type_node,
12167 V4SI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12168 tree v8hi_ftype_v8hi_int
b4de2f7d
AH
12169 = build_function_type_list (V8HI_type_node,
12170 V8HI_type_node, integer_type_node, NULL_TREE);
916b60b7 12171 tree v8hi_ftype_v8hi_v2di
b4de2f7d
AH
12172 = build_function_type_list (V8HI_type_node,
12173 V8HI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 12174 tree v4si_ftype_v4si_v2di
b4de2f7d
AH
12175 = build_function_type_list (V4SI_type_node,
12176 V4SI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 12177 tree v4si_ftype_v8hi_v8hi
b4de2f7d
AH
12178 = build_function_type_list (V4SI_type_node,
12179 V8HI_type_node, V8HI_type_node, NULL_TREE);
916b60b7 12180 tree di_ftype_v8qi_v8qi
b4de2f7d
AH
12181 = build_function_type_list (long_long_unsigned_type_node,
12182 V8QI_type_node, V8QI_type_node, NULL_TREE);
916b60b7 12183 tree v2di_ftype_v16qi_v16qi
b4de2f7d
AH
12184 = build_function_type_list (V2DI_type_node,
12185 V16QI_type_node, V16QI_type_node, NULL_TREE);
916b60b7 12186 tree int_ftype_v16qi
b4de2f7d 12187 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
47f339cf 12188
bd793c65
BS
12189 /* Add all builtins that are more or less simple operations on two
12190 operands. */
ca7558fc 12191 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
12192 {
12193 /* Use one of the operands; the target can have a different mode for
12194 mask-generating compares. */
12195 enum machine_mode mode;
12196 tree type;
12197
12198 if (d->name == 0)
12199 continue;
12200 mode = insn_data[d->icode].operand[1].mode;
12201
bd793c65
BS
12202 switch (mode)
12203 {
fbe5eb6d
BS
12204 case V16QImode:
12205 type = v16qi_ftype_v16qi_v16qi;
12206 break;
12207 case V8HImode:
12208 type = v8hi_ftype_v8hi_v8hi;
12209 break;
12210 case V4SImode:
12211 type = v4si_ftype_v4si_v4si;
12212 break;
12213 case V2DImode:
12214 type = v2di_ftype_v2di_v2di;
12215 break;
12216 case V2DFmode:
12217 type = v2df_ftype_v2df_v2df;
12218 break;
12219 case TImode:
12220 type = ti_ftype_ti_ti;
12221 break;
bd793c65
BS
12222 case V4SFmode:
12223 type = v4sf_ftype_v4sf_v4sf;
12224 break;
12225 case V8QImode:
12226 type = v8qi_ftype_v8qi_v8qi;
12227 break;
12228 case V4HImode:
12229 type = v4hi_ftype_v4hi_v4hi;
12230 break;
12231 case V2SImode:
12232 type = v2si_ftype_v2si_v2si;
12233 break;
bd793c65
BS
12234 case DImode:
12235 type = di_ftype_di_di;
12236 break;
12237
12238 default:
12239 abort ();
12240 }
0f290768 12241
bd793c65
BS
12242 /* Override for comparisons. */
12243 if (d->icode == CODE_FOR_maskcmpv4sf3
12244 || d->icode == CODE_FOR_maskncmpv4sf3
12245 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12246 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12247 type = v4si_ftype_v4sf_v4sf;
12248
fbe5eb6d
BS
12249 if (d->icode == CODE_FOR_maskcmpv2df3
12250 || d->icode == CODE_FOR_maskncmpv2df3
12251 || d->icode == CODE_FOR_vmmaskcmpv2df3
12252 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12253 type = v2di_ftype_v2df_v2df;
12254
eeb06b1b 12255 def_builtin (d->mask, d->name, type, d->code);
bd793c65
BS
12256 }
12257
12258 /* Add the remaining MMX insns with somewhat more complicated types. */
eeb06b1b
BS
12259 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12260 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12261 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12262 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12263 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12264 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12265 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12266
12267 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12268 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12269 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12270
12271 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12272 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12273
12274 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12275 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
0f290768 12276
bd793c65 12277 /* comi/ucomi insns. */
ca7558fc 12278 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
fbe5eb6d
BS
12279 if (d->mask == MASK_SSE2)
12280 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12281 else
12282 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
bd793c65 12283
1255c85c
BS
12284 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12285 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12286 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
bd793c65 12287
fbe5eb6d
BS
12288 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12289 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12290 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12291 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12292 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12293 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
bd793c65 12294
fbe5eb6d
BS
12295 def_builtin (MASK_SSE1, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
12296 def_builtin (MASK_SSE1, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
12297 def_builtin (MASK_SSE1, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
12298 def_builtin (MASK_SSE1, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
e37af218 12299
fbe5eb6d
BS
12300 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12301 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
bd793c65 12302
fbe5eb6d 12303 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
bd793c65 12304
fbe5eb6d
BS
12305 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
12306 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
12307 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
12308 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12309 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12310 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
bd793c65 12311
fbe5eb6d
BS
12312 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12313 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12314 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12315 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
bd793c65 12316
fbe5eb6d
BS
12317 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12318 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12319 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12320 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
bd793c65 12321
fbe5eb6d 12322 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
bd793c65 12323
916b60b7 12324 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
bd793c65 12325
fbe5eb6d
BS
12326 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12327 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12328 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12329 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12330 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12331 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
bd793c65 12332
fbe5eb6d 12333 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
bd793c65 12334
47f339cf
BS
12335 /* Original 3DNow! */
12336 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12337 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12338 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12339 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12340 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12341 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12342 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12343 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12344 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12345 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12346 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12347 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12348 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12349 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12350 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12351 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12352 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12353 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12354 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12355 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
47f339cf
BS
12356
12357 /* 3DNow! extension as used in the Athlon CPU. */
12358 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12359 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12360 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12361 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12362 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12363 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12364
fbe5eb6d
BS
12365 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12366
12367 /* SSE2 */
12368 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12369 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12370
12371 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12372 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12373
12374 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
12375 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
12376 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
12377 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12378 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12379 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12380
12381 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12382 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12383 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12384 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12385
12386 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
916b60b7 12387 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
fbe5eb6d
BS
12388 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12389 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
916b60b7 12390 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
fbe5eb6d
BS
12391
12392 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12393 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12394 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
916b60b7 12395 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
fbe5eb6d
BS
12396
12397 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12398 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12399
12400 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12401
12402 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
916b60b7 12403 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
fbe5eb6d
BS
12404
12405 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12406 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12407 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12408 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12409 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12410
12411 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12412
12413 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12414 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12415
12416 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12417 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12418 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12419
12420 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12421 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12422 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12423
12424 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12425 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12426 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12427 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
12428 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
12429 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12430 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12431
12432 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
12433 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12434 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
916b60b7
BS
12435
12436 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12437 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12438 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12439
12440 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12441 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12442 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12443
12444 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12445 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12446
12447 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12448 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12449 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12450
12451 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12452 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12453 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12454
12455 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12456 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12457
12458 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
bd793c65
BS
12459}
12460
12461/* Errors in the source file can cause expand_expr to return const0_rtx
12462 where we expect a vector. To avoid crashing, use one of the vector
12463 clear instructions. */
12464static rtx
12465safe_vector_operand (x, mode)
12466 rtx x;
12467 enum machine_mode mode;
12468{
12469 if (x != const0_rtx)
12470 return x;
12471 x = gen_reg_rtx (mode);
12472
47f339cf 12473 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
bd793c65
BS
12474 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12475 : gen_rtx_SUBREG (DImode, x, 0)));
12476 else
e37af218
RH
12477 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12478 : gen_rtx_SUBREG (V4SFmode, x, 0)));
bd793c65
BS
12479 return x;
12480}
12481
12482/* Subroutine of ix86_expand_builtin to take care of binop insns. */
12483
12484static rtx
12485ix86_expand_binop_builtin (icode, arglist, target)
12486 enum insn_code icode;
12487 tree arglist;
12488 rtx target;
12489{
12490 rtx pat;
12491 tree arg0 = TREE_VALUE (arglist);
12492 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12493 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12494 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12495 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12496 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12497 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12498
12499 if (VECTOR_MODE_P (mode0))
12500 op0 = safe_vector_operand (op0, mode0);
12501 if (VECTOR_MODE_P (mode1))
12502 op1 = safe_vector_operand (op1, mode1);
12503
12504 if (! target
12505 || GET_MODE (target) != tmode
12506 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12507 target = gen_reg_rtx (tmode);
12508
12509 /* In case the insn wants input operands in modes different from
12510 the result, abort. */
12511 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
12512 abort ();
12513
12514 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12515 op0 = copy_to_mode_reg (mode0, op0);
12516 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12517 op1 = copy_to_mode_reg (mode1, op1);
12518
59bef189
RH
12519 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12520 yet one of the two must not be a memory. This is normally enforced
12521 by expanders, but we didn't bother to create one here. */
12522 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12523 op0 = copy_to_mode_reg (mode0, op0);
12524
bd793c65
BS
12525 pat = GEN_FCN (icode) (target, op0, op1);
12526 if (! pat)
12527 return 0;
12528 emit_insn (pat);
12529 return target;
12530}
12531
fce5a9f2 12532/* In type_for_mode we restrict the ability to create TImode types
e37af218
RH
12533 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
12534 to have a V4SFmode signature. Convert them in-place to TImode. */
12535
12536static rtx
12537ix86_expand_timode_binop_builtin (icode, arglist, target)
12538 enum insn_code icode;
12539 tree arglist;
12540 rtx target;
12541{
12542 rtx pat;
12543 tree arg0 = TREE_VALUE (arglist);
12544 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12545 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12546 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12547
12548 op0 = gen_lowpart (TImode, op0);
12549 op1 = gen_lowpart (TImode, op1);
12550 target = gen_reg_rtx (TImode);
12551
12552 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
12553 op0 = copy_to_mode_reg (TImode, op0);
12554 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
12555 op1 = copy_to_mode_reg (TImode, op1);
12556
59bef189
RH
12557 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12558 yet one of the two must not be a memory. This is normally enforced
12559 by expanders, but we didn't bother to create one here. */
12560 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12561 op0 = copy_to_mode_reg (TImode, op0);
12562
e37af218
RH
12563 pat = GEN_FCN (icode) (target, op0, op1);
12564 if (! pat)
12565 return 0;
12566 emit_insn (pat);
12567
12568 return gen_lowpart (V4SFmode, target);
12569}
12570
bd793c65
BS
12571/* Subroutine of ix86_expand_builtin to take care of stores. */
12572
12573static rtx
e37af218 12574ix86_expand_store_builtin (icode, arglist)
bd793c65
BS
12575 enum insn_code icode;
12576 tree arglist;
bd793c65
BS
12577{
12578 rtx pat;
12579 tree arg0 = TREE_VALUE (arglist);
12580 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12581 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12582 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12583 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12584 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12585
12586 if (VECTOR_MODE_P (mode1))
12587 op1 = safe_vector_operand (op1, mode1);
12588
12589 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
59bef189
RH
12590
12591 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12592 op1 = copy_to_mode_reg (mode1, op1);
12593
bd793c65
BS
12594 pat = GEN_FCN (icode) (op0, op1);
12595 if (pat)
12596 emit_insn (pat);
12597 return 0;
12598}
12599
12600/* Subroutine of ix86_expand_builtin to take care of unop insns. */
12601
12602static rtx
12603ix86_expand_unop_builtin (icode, arglist, target, do_load)
12604 enum insn_code icode;
12605 tree arglist;
12606 rtx target;
12607 int do_load;
12608{
12609 rtx pat;
12610 tree arg0 = TREE_VALUE (arglist);
12611 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12612 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12613 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12614
12615 if (! target
12616 || GET_MODE (target) != tmode
12617 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12618 target = gen_reg_rtx (tmode);
12619 if (do_load)
12620 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12621 else
12622 {
12623 if (VECTOR_MODE_P (mode0))
12624 op0 = safe_vector_operand (op0, mode0);
12625
12626 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12627 op0 = copy_to_mode_reg (mode0, op0);
12628 }
12629
12630 pat = GEN_FCN (icode) (target, op0);
12631 if (! pat)
12632 return 0;
12633 emit_insn (pat);
12634 return target;
12635}
12636
12637/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12638 sqrtss, rsqrtss, rcpss. */
12639
12640static rtx
12641ix86_expand_unop1_builtin (icode, arglist, target)
12642 enum insn_code icode;
12643 tree arglist;
12644 rtx target;
12645{
12646 rtx pat;
12647 tree arg0 = TREE_VALUE (arglist);
59bef189 12648 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
bd793c65
BS
12649 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12650 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12651
12652 if (! target
12653 || GET_MODE (target) != tmode
12654 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12655 target = gen_reg_rtx (tmode);
12656
12657 if (VECTOR_MODE_P (mode0))
12658 op0 = safe_vector_operand (op0, mode0);
12659
12660 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12661 op0 = copy_to_mode_reg (mode0, op0);
fce5a9f2 12662
59bef189
RH
12663 op1 = op0;
12664 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12665 op1 = copy_to_mode_reg (mode0, op1);
fce5a9f2 12666
59bef189 12667 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
12668 if (! pat)
12669 return 0;
12670 emit_insn (pat);
12671 return target;
12672}
12673
12674/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12675
12676static rtx
12677ix86_expand_sse_compare (d, arglist, target)
8b60264b 12678 const struct builtin_description *d;
bd793c65
BS
12679 tree arglist;
12680 rtx target;
12681{
12682 rtx pat;
12683 tree arg0 = TREE_VALUE (arglist);
12684 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12685 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12686 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12687 rtx op2;
12688 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12689 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12690 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12691 enum rtx_code comparison = d->comparison;
12692
12693 if (VECTOR_MODE_P (mode0))
12694 op0 = safe_vector_operand (op0, mode0);
12695 if (VECTOR_MODE_P (mode1))
12696 op1 = safe_vector_operand (op1, mode1);
12697
12698 /* Swap operands if we have a comparison that isn't available in
12699 hardware. */
12700 if (d->flag)
12701 {
21e1b5f1
BS
12702 rtx tmp = gen_reg_rtx (mode1);
12703 emit_move_insn (tmp, op1);
bd793c65 12704 op1 = op0;
21e1b5f1 12705 op0 = tmp;
bd793c65 12706 }
21e1b5f1
BS
12707
12708 if (! target
12709 || GET_MODE (target) != tmode
12710 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
bd793c65
BS
12711 target = gen_reg_rtx (tmode);
12712
12713 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12714 op0 = copy_to_mode_reg (mode0, op0);
12715 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12716 op1 = copy_to_mode_reg (mode1, op1);
12717
12718 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12719 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12720 if (! pat)
12721 return 0;
12722 emit_insn (pat);
12723 return target;
12724}
12725
12726/* Subroutine of ix86_expand_builtin to take care of comi insns. */
12727
12728static rtx
12729ix86_expand_sse_comi (d, arglist, target)
8b60264b 12730 const struct builtin_description *d;
bd793c65
BS
12731 tree arglist;
12732 rtx target;
12733{
12734 rtx pat;
12735 tree arg0 = TREE_VALUE (arglist);
12736 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12737 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12738 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12739 rtx op2;
12740 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
12741 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
12742 enum rtx_code comparison = d->comparison;
12743
12744 if (VECTOR_MODE_P (mode0))
12745 op0 = safe_vector_operand (op0, mode0);
12746 if (VECTOR_MODE_P (mode1))
12747 op1 = safe_vector_operand (op1, mode1);
12748
12749 /* Swap operands if we have a comparison that isn't available in
12750 hardware. */
12751 if (d->flag)
12752 {
12753 rtx tmp = op1;
12754 op1 = op0;
12755 op0 = tmp;
bd793c65
BS
12756 }
12757
12758 target = gen_reg_rtx (SImode);
12759 emit_move_insn (target, const0_rtx);
12760 target = gen_rtx_SUBREG (QImode, target, 0);
12761
12762 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12763 op0 = copy_to_mode_reg (mode0, op0);
12764 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12765 op1 = copy_to_mode_reg (mode1, op1);
12766
12767 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12768 pat = GEN_FCN (d->icode) (op0, op1, op2);
12769 if (! pat)
12770 return 0;
12771 emit_insn (pat);
29628f27
BS
12772 emit_insn (gen_rtx_SET (VOIDmode,
12773 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
12774 gen_rtx_fmt_ee (comparison, QImode,
12775 gen_rtx_REG (CCmode, FLAGS_REG),
12776 const0_rtx)));
bd793c65 12777
6f1a6c5b 12778 return SUBREG_REG (target);
bd793c65
BS
12779}
12780
12781/* Expand an expression EXP that calls a built-in function,
12782 with result going to TARGET if that's convenient
12783 (and in mode MODE if that's convenient).
12784 SUBTARGET may be used as the target for computing one of EXP's operands.
12785 IGNORE is nonzero if the value is to be ignored. */
12786
12787rtx
12788ix86_expand_builtin (exp, target, subtarget, mode, ignore)
12789 tree exp;
12790 rtx target;
12791 rtx subtarget ATTRIBUTE_UNUSED;
12792 enum machine_mode mode ATTRIBUTE_UNUSED;
12793 int ignore ATTRIBUTE_UNUSED;
12794{
8b60264b 12795 const struct builtin_description *d;
77ebd435 12796 size_t i;
bd793c65
BS
12797 enum insn_code icode;
12798 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12799 tree arglist = TREE_OPERAND (exp, 1);
e37af218 12800 tree arg0, arg1, arg2;
bd793c65
BS
12801 rtx op0, op1, op2, pat;
12802 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 12803 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
12804
12805 switch (fcode)
12806 {
12807 case IX86_BUILTIN_EMMS:
12808 emit_insn (gen_emms ());
12809 return 0;
12810
12811 case IX86_BUILTIN_SFENCE:
12812 emit_insn (gen_sfence ());
12813 return 0;
12814
bd793c65 12815 case IX86_BUILTIN_PEXTRW:
fbe5eb6d
BS
12816 case IX86_BUILTIN_PEXTRW128:
12817 icode = (fcode == IX86_BUILTIN_PEXTRW
12818 ? CODE_FOR_mmx_pextrw
12819 : CODE_FOR_sse2_pextrw);
bd793c65
BS
12820 arg0 = TREE_VALUE (arglist);
12821 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12822 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12823 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12824 tmode = insn_data[icode].operand[0].mode;
12825 mode0 = insn_data[icode].operand[1].mode;
12826 mode1 = insn_data[icode].operand[2].mode;
12827
12828 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12829 op0 = copy_to_mode_reg (mode0, op0);
12830 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12831 {
12832 /* @@@ better error message */
12833 error ("selector must be an immediate");
6f1a6c5b 12834 return gen_reg_rtx (tmode);
bd793c65
BS
12835 }
12836 if (target == 0
12837 || GET_MODE (target) != tmode
12838 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12839 target = gen_reg_rtx (tmode);
12840 pat = GEN_FCN (icode) (target, op0, op1);
12841 if (! pat)
12842 return 0;
12843 emit_insn (pat);
12844 return target;
12845
12846 case IX86_BUILTIN_PINSRW:
fbe5eb6d
BS
12847 case IX86_BUILTIN_PINSRW128:
12848 icode = (fcode == IX86_BUILTIN_PINSRW
12849 ? CODE_FOR_mmx_pinsrw
12850 : CODE_FOR_sse2_pinsrw);
bd793c65
BS
12851 arg0 = TREE_VALUE (arglist);
12852 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12853 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12854 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12855 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12856 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12857 tmode = insn_data[icode].operand[0].mode;
12858 mode0 = insn_data[icode].operand[1].mode;
12859 mode1 = insn_data[icode].operand[2].mode;
12860 mode2 = insn_data[icode].operand[3].mode;
12861
12862 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12863 op0 = copy_to_mode_reg (mode0, op0);
12864 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12865 op1 = copy_to_mode_reg (mode1, op1);
12866 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12867 {
12868 /* @@@ better error message */
12869 error ("selector must be an immediate");
12870 return const0_rtx;
12871 }
12872 if (target == 0
12873 || GET_MODE (target) != tmode
12874 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12875 target = gen_reg_rtx (tmode);
12876 pat = GEN_FCN (icode) (target, op0, op1, op2);
12877 if (! pat)
12878 return 0;
12879 emit_insn (pat);
12880 return target;
12881
12882 case IX86_BUILTIN_MASKMOVQ:
fbe5eb6d
BS
12883 icode = (fcode == IX86_BUILTIN_MASKMOVQ
12884 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
12885 : CODE_FOR_sse2_maskmovdqu);
bd793c65
BS
12886 /* Note the arg order is different from the operand order. */
12887 arg1 = TREE_VALUE (arglist);
12888 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
12889 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12890 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12891 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12892 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12893 mode0 = insn_data[icode].operand[0].mode;
12894 mode1 = insn_data[icode].operand[1].mode;
12895 mode2 = insn_data[icode].operand[2].mode;
12896
5c464583 12897 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
bd793c65
BS
12898 op0 = copy_to_mode_reg (mode0, op0);
12899 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12900 op1 = copy_to_mode_reg (mode1, op1);
12901 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
12902 op2 = copy_to_mode_reg (mode2, op2);
12903 pat = GEN_FCN (icode) (op0, op1, op2);
12904 if (! pat)
12905 return 0;
12906 emit_insn (pat);
12907 return 0;
12908
12909 case IX86_BUILTIN_SQRTSS:
12910 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
12911 case IX86_BUILTIN_RSQRTSS:
12912 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
12913 case IX86_BUILTIN_RCPSS:
12914 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
12915
e37af218
RH
12916 case IX86_BUILTIN_ANDPS:
12917 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
12918 arglist, target);
12919 case IX86_BUILTIN_ANDNPS:
12920 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
12921 arglist, target);
12922 case IX86_BUILTIN_ORPS:
12923 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
12924 arglist, target);
12925 case IX86_BUILTIN_XORPS:
12926 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
12927 arglist, target);
12928
bd793c65
BS
12929 case IX86_BUILTIN_LOADAPS:
12930 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
12931
12932 case IX86_BUILTIN_LOADUPS:
12933 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
12934
12935 case IX86_BUILTIN_STOREAPS:
e37af218 12936 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
bd793c65 12937 case IX86_BUILTIN_STOREUPS:
e37af218 12938 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
bd793c65
BS
12939
12940 case IX86_BUILTIN_LOADSS:
12941 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
12942
12943 case IX86_BUILTIN_STORESS:
e37af218 12944 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
bd793c65 12945
0f290768 12946 case IX86_BUILTIN_LOADHPS:
bd793c65 12947 case IX86_BUILTIN_LOADLPS:
fbe5eb6d
BS
12948 case IX86_BUILTIN_LOADHPD:
12949 case IX86_BUILTIN_LOADLPD:
12950 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
12951 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
12952 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
12953 : CODE_FOR_sse2_movlpd);
bd793c65
BS
12954 arg0 = TREE_VALUE (arglist);
12955 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12956 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12957 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12958 tmode = insn_data[icode].operand[0].mode;
12959 mode0 = insn_data[icode].operand[1].mode;
12960 mode1 = insn_data[icode].operand[2].mode;
12961
12962 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12963 op0 = copy_to_mode_reg (mode0, op0);
12964 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
12965 if (target == 0
12966 || GET_MODE (target) != tmode
12967 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12968 target = gen_reg_rtx (tmode);
12969 pat = GEN_FCN (icode) (target, op0, op1);
12970 if (! pat)
12971 return 0;
12972 emit_insn (pat);
12973 return target;
0f290768 12974
bd793c65
BS
12975 case IX86_BUILTIN_STOREHPS:
12976 case IX86_BUILTIN_STORELPS:
fbe5eb6d
BS
12977 case IX86_BUILTIN_STOREHPD:
12978 case IX86_BUILTIN_STORELPD:
12979 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
12980 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
12981 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
12982 : CODE_FOR_sse2_movlpd);
bd793c65
BS
12983 arg0 = TREE_VALUE (arglist);
12984 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12985 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12986 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12987 mode0 = insn_data[icode].operand[1].mode;
12988 mode1 = insn_data[icode].operand[2].mode;
12989
12990 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12991 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12992 op1 = copy_to_mode_reg (mode1, op1);
12993
12994 pat = GEN_FCN (icode) (op0, op0, op1);
12995 if (! pat)
12996 return 0;
12997 emit_insn (pat);
12998 return 0;
12999
13000 case IX86_BUILTIN_MOVNTPS:
e37af218 13001 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
bd793c65 13002 case IX86_BUILTIN_MOVNTQ:
e37af218 13003 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
bd793c65
BS
13004
13005 case IX86_BUILTIN_LDMXCSR:
13006 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13007 target = assign_386_stack_local (SImode, 0);
13008 emit_move_insn (target, op0);
13009 emit_insn (gen_ldmxcsr (target));
13010 return 0;
13011
13012 case IX86_BUILTIN_STMXCSR:
13013 target = assign_386_stack_local (SImode, 0);
13014 emit_insn (gen_stmxcsr (target));
13015 return copy_to_mode_reg (SImode, target);
13016
bd793c65 13017 case IX86_BUILTIN_SHUFPS:
fbe5eb6d
BS
13018 case IX86_BUILTIN_SHUFPD:
13019 icode = (fcode == IX86_BUILTIN_SHUFPS
13020 ? CODE_FOR_sse_shufps
13021 : CODE_FOR_sse2_shufpd);
bd793c65
BS
13022 arg0 = TREE_VALUE (arglist);
13023 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13024 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13025 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13026 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13027 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13028 tmode = insn_data[icode].operand[0].mode;
13029 mode0 = insn_data[icode].operand[1].mode;
13030 mode1 = insn_data[icode].operand[2].mode;
13031 mode2 = insn_data[icode].operand[3].mode;
13032
13033 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13034 op0 = copy_to_mode_reg (mode0, op0);
13035 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13036 op1 = copy_to_mode_reg (mode1, op1);
13037 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13038 {
13039 /* @@@ better error message */
13040 error ("mask must be an immediate");
6f1a6c5b 13041 return gen_reg_rtx (tmode);
bd793c65
BS
13042 }
13043 if (target == 0
13044 || GET_MODE (target) != tmode
13045 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13046 target = gen_reg_rtx (tmode);
13047 pat = GEN_FCN (icode) (target, op0, op1, op2);
13048 if (! pat)
13049 return 0;
13050 emit_insn (pat);
13051 return target;
13052
13053 case IX86_BUILTIN_PSHUFW:
fbe5eb6d
BS
13054 case IX86_BUILTIN_PSHUFD:
13055 case IX86_BUILTIN_PSHUFHW:
13056 case IX86_BUILTIN_PSHUFLW:
13057 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13058 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13059 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13060 : CODE_FOR_mmx_pshufw);
bd793c65
BS
13061 arg0 = TREE_VALUE (arglist);
13062 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13063 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13064 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13065 tmode = insn_data[icode].operand[0].mode;
29628f27
BS
13066 mode1 = insn_data[icode].operand[1].mode;
13067 mode2 = insn_data[icode].operand[2].mode;
bd793c65 13068
29628f27
BS
13069 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13070 op0 = copy_to_mode_reg (mode1, op0);
13071 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
bd793c65
BS
13072 {
13073 /* @@@ better error message */
13074 error ("mask must be an immediate");
13075 return const0_rtx;
13076 }
13077 if (target == 0
13078 || GET_MODE (target) != tmode
13079 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13080 target = gen_reg_rtx (tmode);
29628f27 13081 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
13082 if (! pat)
13083 return 0;
13084 emit_insn (pat);
13085 return target;
13086
47f339cf
BS
13087 case IX86_BUILTIN_FEMMS:
13088 emit_insn (gen_femms ());
13089 return NULL_RTX;
13090
13091 case IX86_BUILTIN_PAVGUSB:
13092 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13093
13094 case IX86_BUILTIN_PF2ID:
13095 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13096
13097 case IX86_BUILTIN_PFACC:
13098 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13099
13100 case IX86_BUILTIN_PFADD:
13101 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13102
13103 case IX86_BUILTIN_PFCMPEQ:
13104 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13105
13106 case IX86_BUILTIN_PFCMPGE:
13107 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13108
13109 case IX86_BUILTIN_PFCMPGT:
13110 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13111
13112 case IX86_BUILTIN_PFMAX:
13113 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13114
13115 case IX86_BUILTIN_PFMIN:
13116 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13117
13118 case IX86_BUILTIN_PFMUL:
13119 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13120
13121 case IX86_BUILTIN_PFRCP:
13122 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13123
13124 case IX86_BUILTIN_PFRCPIT1:
13125 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13126
13127 case IX86_BUILTIN_PFRCPIT2:
13128 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13129
13130 case IX86_BUILTIN_PFRSQIT1:
13131 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13132
13133 case IX86_BUILTIN_PFRSQRT:
13134 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13135
13136 case IX86_BUILTIN_PFSUB:
13137 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13138
13139 case IX86_BUILTIN_PFSUBR:
13140 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13141
13142 case IX86_BUILTIN_PI2FD:
13143 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13144
13145 case IX86_BUILTIN_PMULHRW:
13146 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13147
47f339cf
BS
13148 case IX86_BUILTIN_PF2IW:
13149 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13150
13151 case IX86_BUILTIN_PFNACC:
13152 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13153
13154 case IX86_BUILTIN_PFPNACC:
13155 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13156
13157 case IX86_BUILTIN_PI2FW:
13158 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13159
13160 case IX86_BUILTIN_PSWAPDSI:
13161 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13162
13163 case IX86_BUILTIN_PSWAPDSF:
13164 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13165
e37af218
RH
13166 case IX86_BUILTIN_SSE_ZERO:
13167 target = gen_reg_rtx (V4SFmode);
13168 emit_insn (gen_sse_clrv4sf (target));
bd793c65
BS
13169 return target;
13170
bd793c65
BS
13171 case IX86_BUILTIN_MMX_ZERO:
13172 target = gen_reg_rtx (DImode);
13173 emit_insn (gen_mmx_clrdi (target));
13174 return target;
13175
fbe5eb6d
BS
13176 case IX86_BUILTIN_SQRTSD:
13177 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13178 case IX86_BUILTIN_LOADAPD:
13179 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13180 case IX86_BUILTIN_LOADUPD:
13181 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13182
13183 case IX86_BUILTIN_STOREAPD:
13184 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13185 case IX86_BUILTIN_STOREUPD:
13186 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13187
13188 case IX86_BUILTIN_LOADSD:
13189 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13190
13191 case IX86_BUILTIN_STORESD:
13192 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13193
13194 case IX86_BUILTIN_SETPD1:
13195 target = assign_386_stack_local (DFmode, 0);
13196 arg0 = TREE_VALUE (arglist);
13197 emit_move_insn (adjust_address (target, DFmode, 0),
13198 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13199 op0 = gen_reg_rtx (V2DFmode);
13200 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13201 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13202 return op0;
13203
13204 case IX86_BUILTIN_SETPD:
13205 target = assign_386_stack_local (V2DFmode, 0);
13206 arg0 = TREE_VALUE (arglist);
13207 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13208 emit_move_insn (adjust_address (target, DFmode, 0),
13209 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13210 emit_move_insn (adjust_address (target, DFmode, 8),
13211 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13212 op0 = gen_reg_rtx (V2DFmode);
13213 emit_insn (gen_sse2_movapd (op0, target));
13214 return op0;
13215
13216 case IX86_BUILTIN_LOADRPD:
13217 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13218 gen_reg_rtx (V2DFmode), 1);
13219 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13220 return target;
13221
13222 case IX86_BUILTIN_LOADPD1:
13223 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13224 gen_reg_rtx (V2DFmode), 1);
13225 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13226 return target;
13227
13228 case IX86_BUILTIN_STOREPD1:
13229 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13230 case IX86_BUILTIN_STORERPD:
13231 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13232
13233 case IX86_BUILTIN_MFENCE:
13234 emit_insn (gen_sse2_mfence ());
13235 return 0;
13236 case IX86_BUILTIN_LFENCE:
13237 emit_insn (gen_sse2_lfence ());
13238 return 0;
13239
13240 case IX86_BUILTIN_CLFLUSH:
13241 arg0 = TREE_VALUE (arglist);
13242 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13243 icode = CODE_FOR_sse2_clflush;
13244 mode0 = insn_data[icode].operand[0].mode;
13245 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13246 op0 = copy_to_mode_reg (mode0, op0);
13247
13248 emit_insn (gen_sse2_clflush (op0));
13249 return 0;
13250
13251 case IX86_BUILTIN_MOVNTPD:
13252 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13253 case IX86_BUILTIN_MOVNTDQ:
916b60b7 13254 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
fbe5eb6d
BS
13255 case IX86_BUILTIN_MOVNTI:
13256 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13257
bd793c65
BS
13258 default:
13259 break;
13260 }
13261
ca7558fc 13262 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
13263 if (d->code == fcode)
13264 {
13265 /* Compares are treated specially. */
13266 if (d->icode == CODE_FOR_maskcmpv4sf3
13267 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13268 || d->icode == CODE_FOR_maskncmpv4sf3
fbe5eb6d
BS
13269 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13270 || d->icode == CODE_FOR_maskcmpv2df3
13271 || d->icode == CODE_FOR_vmmaskcmpv2df3
13272 || d->icode == CODE_FOR_maskncmpv2df3
13273 || d->icode == CODE_FOR_vmmaskncmpv2df3)
bd793c65
BS
13274 return ix86_expand_sse_compare (d, arglist, target);
13275
13276 return ix86_expand_binop_builtin (d->icode, arglist, target);
13277 }
13278
ca7558fc 13279 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
bd793c65
BS
13280 if (d->code == fcode)
13281 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 13282
ca7558fc 13283 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
bd793c65
BS
13284 if (d->code == fcode)
13285 return ix86_expand_sse_comi (d, arglist, target);
0f290768 13286
bd793c65
BS
13287 /* @@@ Should really do something sensible here. */
13288 return 0;
bd793c65 13289}
4211a8fb
JH
13290
13291/* Store OPERAND to the memory after reload is completed. This means
f710504c 13292 that we can't easily use assign_stack_local. */
4211a8fb
JH
13293rtx
13294ix86_force_to_memory (mode, operand)
13295 enum machine_mode mode;
13296 rtx operand;
13297{
898d374d 13298 rtx result;
4211a8fb
JH
13299 if (!reload_completed)
13300 abort ();
898d374d
JH
13301 if (TARGET_64BIT && TARGET_RED_ZONE)
13302 {
13303 result = gen_rtx_MEM (mode,
13304 gen_rtx_PLUS (Pmode,
13305 stack_pointer_rtx,
13306 GEN_INT (-RED_ZONE_SIZE)));
13307 emit_move_insn (result, operand);
13308 }
13309 else if (TARGET_64BIT && !TARGET_RED_ZONE)
4211a8fb 13310 {
898d374d 13311 switch (mode)
4211a8fb 13312 {
898d374d
JH
13313 case HImode:
13314 case SImode:
13315 operand = gen_lowpart (DImode, operand);
13316 /* FALLTHRU */
13317 case DImode:
4211a8fb 13318 emit_insn (
898d374d
JH
13319 gen_rtx_SET (VOIDmode,
13320 gen_rtx_MEM (DImode,
13321 gen_rtx_PRE_DEC (DImode,
13322 stack_pointer_rtx)),
13323 operand));
13324 break;
13325 default:
13326 abort ();
13327 }
13328 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13329 }
13330 else
13331 {
13332 switch (mode)
13333 {
13334 case DImode:
13335 {
13336 rtx operands[2];
13337 split_di (&operand, 1, operands, operands + 1);
13338 emit_insn (
13339 gen_rtx_SET (VOIDmode,
13340 gen_rtx_MEM (SImode,
13341 gen_rtx_PRE_DEC (Pmode,
13342 stack_pointer_rtx)),
13343 operands[1]));
13344 emit_insn (
13345 gen_rtx_SET (VOIDmode,
13346 gen_rtx_MEM (SImode,
13347 gen_rtx_PRE_DEC (Pmode,
13348 stack_pointer_rtx)),
13349 operands[0]));
13350 }
13351 break;
13352 case HImode:
13353 /* It is better to store HImodes as SImodes. */
13354 if (!TARGET_PARTIAL_REG_STALL)
13355 operand = gen_lowpart (SImode, operand);
13356 /* FALLTHRU */
13357 case SImode:
4211a8fb 13358 emit_insn (
898d374d
JH
13359 gen_rtx_SET (VOIDmode,
13360 gen_rtx_MEM (GET_MODE (operand),
13361 gen_rtx_PRE_DEC (SImode,
13362 stack_pointer_rtx)),
13363 operand));
13364 break;
13365 default:
13366 abort ();
4211a8fb 13367 }
898d374d 13368 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 13369 }
898d374d 13370 return result;
4211a8fb
JH
13371}
13372
13373/* Free operand from the memory. */
13374void
13375ix86_free_from_memory (mode)
13376 enum machine_mode mode;
13377{
898d374d
JH
13378 if (!TARGET_64BIT || !TARGET_RED_ZONE)
13379 {
13380 int size;
13381
13382 if (mode == DImode || TARGET_64BIT)
13383 size = 8;
13384 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13385 size = 2;
13386 else
13387 size = 4;
13388 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13389 to pop or add instruction if registers are available. */
13390 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13391 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13392 GEN_INT (size))));
13393 }
4211a8fb 13394}
a946dd00 13395
f84aa48a
JH
13396/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13397 QImode must go into class Q_REGS.
13398 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
892a2d68 13399 movdf to do mem-to-mem moves through integer regs. */
f84aa48a
JH
13400enum reg_class
13401ix86_preferred_reload_class (x, class)
13402 rtx x;
13403 enum reg_class class;
13404{
13405 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13406 {
13407 /* SSE can't load any constant directly yet. */
13408 if (SSE_CLASS_P (class))
13409 return NO_REGS;
13410 /* Floats can load 0 and 1. */
13411 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13412 {
13413 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13414 if (MAYBE_SSE_CLASS_P (class))
13415 return (reg_class_subset_p (class, GENERAL_REGS)
13416 ? GENERAL_REGS : FLOAT_REGS);
13417 else
13418 return class;
13419 }
13420 /* General regs can load everything. */
13421 if (reg_class_subset_p (class, GENERAL_REGS))
13422 return GENERAL_REGS;
13423 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13424 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13425 return NO_REGS;
13426 }
13427 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13428 return NO_REGS;
13429 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13430 return Q_REGS;
13431 return class;
13432}
13433
13434/* If we are copying between general and FP registers, we need a memory
13435 location. The same is true for SSE and MMX registers.
13436
13437 The macro can't work reliably when one of the CLASSES is class containing
13438 registers from multiple units (SSE, MMX, integer). We avoid this by never
13439 combining those units in single alternative in the machine description.
13440 Ensure that this constraint holds to avoid unexpected surprises.
13441
13442 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13443 enforce these sanity checks. */
13444int
13445ix86_secondary_memory_needed (class1, class2, mode, strict)
13446 enum reg_class class1, class2;
13447 enum machine_mode mode;
13448 int strict;
13449{
13450 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13451 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13452 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13453 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13454 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13455 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13456 {
13457 if (strict)
13458 abort ();
13459 else
13460 return 1;
13461 }
13462 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13463 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13464 && (mode) != SImode)
13465 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13466 && (mode) != SImode));
13467}
13468/* Return the cost of moving data from a register in class CLASS1 to
a4f31c00 13469 one in class CLASS2.
f84aa48a
JH
13470
13471 It is not required that the cost always equal 2 when FROM is the same as TO;
13472 on some machines it is expensive to move between registers if they are not
13473 general registers. */
13474int
13475ix86_register_move_cost (mode, class1, class2)
13476 enum machine_mode mode;
13477 enum reg_class class1, class2;
13478{
13479 /* In case we require secondary memory, compute cost of the store followed
13480 by load. In case of copying from general_purpose_register we may emit
13481 multiple stores followed by single load causing memory size mismatch
13482 stall. Count this as arbitarily high cost of 20. */
13483 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13484 {
92d0fb09 13485 int add_cost = 0;
62415523 13486 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
92d0fb09 13487 add_cost = 20;
62415523 13488 return (MEMORY_MOVE_COST (mode, class1, 0)
92d0fb09 13489 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
f84aa48a 13490 }
92d0fb09 13491 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
13492 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13493 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
13494 return ix86_cost->mmxsse_to_integer;
13495 if (MAYBE_FLOAT_CLASS_P (class1))
13496 return ix86_cost->fp_move;
13497 if (MAYBE_SSE_CLASS_P (class1))
13498 return ix86_cost->sse_move;
13499 if (MAYBE_MMX_CLASS_P (class1))
13500 return ix86_cost->mmx_move;
f84aa48a
JH
13501 return 2;
13502}
13503
a946dd00
JH
13504/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13505int
13506ix86_hard_regno_mode_ok (regno, mode)
13507 int regno;
13508 enum machine_mode mode;
13509{
13510 /* Flags and only flags can only hold CCmode values. */
13511 if (CC_REGNO_P (regno))
13512 return GET_MODE_CLASS (mode) == MODE_CC;
13513 if (GET_MODE_CLASS (mode) == MODE_CC
13514 || GET_MODE_CLASS (mode) == MODE_RANDOM
13515 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13516 return 0;
13517 if (FP_REGNO_P (regno))
13518 return VALID_FP_MODE_P (mode);
13519 if (SSE_REGNO_P (regno))
13520 return VALID_SSE_REG_MODE (mode);
13521 if (MMX_REGNO_P (regno))
47f339cf 13522 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
a946dd00
JH
13523 /* We handle both integer and floats in the general purpose registers.
13524 In future we should be able to handle vector modes as well. */
13525 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13526 return 0;
13527 /* Take care for QImode values - they can be in non-QI regs, but then
13528 they do cause partial register stalls. */
d2836273 13529 if (regno < 4 || mode != QImode || TARGET_64BIT)
a946dd00
JH
13530 return 1;
13531 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13532}
fa79946e
JH
13533
13534/* Return the cost of moving data of mode M between a
13535 register and memory. A value of 2 is the default; this cost is
13536 relative to those in `REGISTER_MOVE_COST'.
13537
13538 If moving between registers and memory is more expensive than
13539 between two registers, you should define this macro to express the
a4f31c00
AJ
13540 relative cost.
13541
fa79946e
JH
13542 Model also increased moving costs of QImode registers in non
13543 Q_REGS classes.
13544 */
13545int
13546ix86_memory_move_cost (mode, class, in)
13547 enum machine_mode mode;
13548 enum reg_class class;
13549 int in;
13550{
13551 if (FLOAT_CLASS_P (class))
13552 {
13553 int index;
13554 switch (mode)
13555 {
13556 case SFmode:
13557 index = 0;
13558 break;
13559 case DFmode:
13560 index = 1;
13561 break;
13562 case XFmode:
13563 case TFmode:
13564 index = 2;
13565 break;
13566 default:
13567 return 100;
13568 }
13569 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13570 }
13571 if (SSE_CLASS_P (class))
13572 {
13573 int index;
13574 switch (GET_MODE_SIZE (mode))
13575 {
13576 case 4:
13577 index = 0;
13578 break;
13579 case 8:
13580 index = 1;
13581 break;
13582 case 16:
13583 index = 2;
13584 break;
13585 default:
13586 return 100;
13587 }
13588 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
13589 }
13590 if (MMX_CLASS_P (class))
13591 {
13592 int index;
13593 switch (GET_MODE_SIZE (mode))
13594 {
13595 case 4:
13596 index = 0;
13597 break;
13598 case 8:
13599 index = 1;
13600 break;
13601 default:
13602 return 100;
13603 }
13604 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13605 }
13606 switch (GET_MODE_SIZE (mode))
13607 {
13608 case 1:
13609 if (in)
13610 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13611 : ix86_cost->movzbl_load);
13612 else
13613 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13614 : ix86_cost->int_store[0] + 4);
13615 break;
13616 case 2:
13617 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13618 default:
13619 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13620 if (mode == TFmode)
13621 mode = XFmode;
3bb7e126 13622 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
fa79946e
JH
13623 * (int) GET_MODE_SIZE (mode) / 4);
13624 }
13625}
0ecf09f9 13626
21c318ba 13627#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
2cc07db4
RH
13628static void
13629ix86_svr3_asm_out_constructor (symbol, priority)
13630 rtx symbol;
13631 int priority ATTRIBUTE_UNUSED;
13632{
13633 init_section ();
13634 fputs ("\tpushl $", asm_out_file);
13635 assemble_name (asm_out_file, XSTR (symbol, 0));
13636 fputc ('\n', asm_out_file);
13637}
13638#endif
162f023b 13639
b069de3b
SS
13640#if TARGET_MACHO
13641
13642static int current_machopic_label_num;
13643
13644/* Given a symbol name and its associated stub, write out the
13645 definition of the stub. */
13646
13647void
13648machopic_output_stub (file, symb, stub)
13649 FILE *file;
13650 const char *symb, *stub;
13651{
13652 unsigned int length;
13653 char *binder_name, *symbol_name, lazy_ptr_name[32];
13654 int label = ++current_machopic_label_num;
13655
13656 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
13657 symb = (*targetm.strip_name_encoding) (symb);
13658
13659 length = strlen (stub);
13660 binder_name = alloca (length + 32);
13661 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
13662
13663 length = strlen (symb);
13664 symbol_name = alloca (length + 32);
13665 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
13666
13667 sprintf (lazy_ptr_name, "L%d$lz", label);
13668
13669 if (MACHOPIC_PURE)
13670 machopic_picsymbol_stub_section ();
13671 else
13672 machopic_symbol_stub_section ();
13673
13674 fprintf (file, "%s:\n", stub);
13675 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13676
13677 if (MACHOPIC_PURE)
13678 {
13679 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
13680 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
13681 fprintf (file, "\tjmp %%edx\n");
13682 }
13683 else
13684 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
13685
13686 fprintf (file, "%s:\n", binder_name);
13687
13688 if (MACHOPIC_PURE)
13689 {
13690 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
13691 fprintf (file, "\tpushl %%eax\n");
13692 }
13693 else
13694 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
13695
13696 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
13697
13698 machopic_lazy_symbol_ptr_section ();
13699 fprintf (file, "%s:\n", lazy_ptr_name);
13700 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13701 fprintf (file, "\t.long %s\n", binder_name);
13702}
13703#endif /* TARGET_MACHO */
13704
162f023b
JH
13705/* Order the registers for register allocator. */
13706
13707void
13708x86_order_regs_for_local_alloc ()
13709{
13710 int pos = 0;
13711 int i;
13712
13713 /* First allocate the local general purpose registers. */
13714 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13715 if (GENERAL_REGNO_P (i) && call_used_regs[i])
13716 reg_alloc_order [pos++] = i;
13717
13718 /* Global general purpose registers. */
13719 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13720 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
13721 reg_alloc_order [pos++] = i;
13722
13723 /* x87 registers come first in case we are doing FP math
13724 using them. */
13725 if (!TARGET_SSE_MATH)
13726 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13727 reg_alloc_order [pos++] = i;
fce5a9f2 13728
162f023b
JH
13729 /* SSE registers. */
13730 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
13731 reg_alloc_order [pos++] = i;
13732 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
13733 reg_alloc_order [pos++] = i;
13734
13735 /* x87 registerts. */
13736 if (TARGET_SSE_MATH)
13737 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13738 reg_alloc_order [pos++] = i;
13739
13740 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
13741 reg_alloc_order [pos++] = i;
13742
13743 /* Initialize the rest of array as we do not allocate some registers
13744 at all. */
13745 while (pos < FIRST_PSEUDO_REGISTER)
13746 reg_alloc_order [pos++] = 0;
13747}
194734e9
JH
13748
13749void
13750x86_output_mi_thunk (file, delta, function)
13751 FILE *file;
13752 int delta;
13753 tree function;
13754{
13755 tree parm;
13756 rtx xops[3];
13757
13758 if (ix86_regparm > 0)
13759 parm = TYPE_ARG_TYPES (TREE_TYPE (function));
13760 else
13761 parm = NULL_TREE;
13762 for (; parm; parm = TREE_CHAIN (parm))
13763 if (TREE_VALUE (parm) == void_type_node)
13764 break;
13765
13766 xops[0] = GEN_INT (delta);
13767 if (TARGET_64BIT)
13768 {
13769 int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
13770 xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
13771 output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
13772 if (flag_pic)
13773 {
13774 fprintf (file, "\tjmp *");
13775 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13776 fprintf (file, "@GOTPCREL(%%rip)\n");
13777 }
13778 else
13779 {
13780 fprintf (file, "\tjmp ");
13781 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13782 fprintf (file, "\n");
13783 }
13784 }
13785 else
13786 {
13787 if (parm)
13788 xops[1] = gen_rtx_REG (SImode, 0);
13789 else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
13790 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
13791 else
13792 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
13793 output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);
13794
13795 if (flag_pic)
13796 {
13797 xops[0] = pic_offset_table_rtx;
13798 xops[1] = gen_label_rtx ();
13799 xops[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
13800
13801 if (ix86_regparm > 2)
13802 abort ();
13803 output_asm_insn ("push{l}\t%0", xops);
13804 output_asm_insn ("call\t%P1", xops);
13805 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
13806 output_asm_insn ("pop{l}\t%0", xops);
13807 output_asm_insn
13808 ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
13809 xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
13810 output_asm_insn
13811 ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
13812 asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
13813 asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
13814 }
13815 else
13816 {
13817 fprintf (file, "\tjmp ");
13818 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13819 fprintf (file, "\n");
13820 }
13821 }
13822}
e2500fed 13823
e932b21b
JH
13824int
13825x86_field_alignment (field, computed)
13826 tree field;
13827 int computed;
13828{
13829 enum machine_mode mode;
ad9335eb
JJ
13830 tree type = TREE_TYPE (field);
13831
13832 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
e932b21b 13833 return computed;
ad9335eb
JJ
13834 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
13835 ? get_inner_array_type (type) : type);
39e3a681
JJ
13836 if (mode == DFmode || mode == DCmode
13837 || GET_MODE_CLASS (mode) == MODE_INT
13838 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
e932b21b
JH
13839 return MIN (32, computed);
13840 return computed;
13841}
13842
2a500b9e
JH
13843/* Implement machine specific optimizations.
13844 At the moment we implement single transformation: AMD Athlon works faster
13845 when RET is not destination of conditional jump or directly preceeded
13846 by other jump instruction. We avoid the penalty by inserting NOP just
13847 before the RET instructions in such cases. */
13848void
13849x86_machine_dependent_reorg (first)
13850 rtx first ATTRIBUTE_UNUSED;
13851{
13852 edge e;
13853
13854 if (!TARGET_ATHLON || !optimize || optimize_size)
13855 return;
13856 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
13857 {
13858 basic_block bb = e->src;
13859 rtx ret = bb->end;
13860 rtx prev;
13861 bool insert = false;
13862
13863 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
13864 continue;
13865 prev = prev_nonnote_insn (ret);
13866 if (prev && GET_CODE (prev) == CODE_LABEL)
13867 {
13868 edge e;
13869 for (e = bb->pred; e; e = e->pred_next)
13870 if (EDGE_FREQUENCY (e) && e->src->index > 0
13871 && !(e->flags & EDGE_FALLTHRU))
13872 insert = 1;
13873 }
13874 if (!insert)
13875 {
13876 prev = prev_real_insn (ret);
13877 if (prev && GET_CODE (prev) == JUMP_INSN
13878 && any_condjump_p (prev))
13879 insert = 1;
13880 }
13881 if (insert)
13882 emit_insn_before (gen_nop (), ret);
13883 }
13884}
13885
e2500fed 13886#include "gt-i386.h"
This page took 3.4479 seconds and 5 git commands to generate.